tests

rebase
2026-02-02 15:03:38 +00:00 · 2025-09-12 15:46:49 -04:00 · 2025-09-12 14:15:16 -04:00 · 2025-09-12 14:02:12 -04:00 · 2025-09-12 14:01:34 -04:00 · 2025-09-12 14:01:22 -04:00
5 changed files with 334 additions and 198 deletions
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -462,7 +462,6 @@ impl Session {
            tools_config: ToolsConfig::new(&ToolsConfigParams {
                model_family: &config.model_family,
                approval_policy,
-                sandbox_policy: sandbox_policy.clone(),
                include_plan_tool: config.include_plan_tool,
                include_apply_patch_tool: config.include_apply_patch_tool,
                include_web_search_request: config.tools_web_search_request,
@@ -686,12 +685,6 @@ impl Session {
        if let Some(user_instructions) = turn_context.user_instructions.as_deref() {
            items.push(UserInstructions::new(user_instructions.to_string()).into());
        }
-        items.push(ResponseItem::from(EnvironmentContext::new(
-            Some(turn_context.cwd.clone()),
-            Some(turn_context.approval_policy),
-            Some(turn_context.sandbox_policy.clone()),
-            Some(self.user_shell.clone()),
-        )));
        items
    }

@@ -1143,7 +1136,6 @@ async fn submission_loop(
                let tools_config = ToolsConfig::new(&ToolsConfigParams {
                    model_family: &effective_family,
                    approval_policy: new_approval_policy,
-                    sandbox_policy: new_sandbox_policy.clone(),
                    include_plan_tool: config.include_plan_tool,
                    include_apply_patch_tool: config.include_apply_patch_tool,
                    include_web_search_request: config.tools_web_search_request,
@@ -1165,27 +1157,18 @@ async fn submission_loop(

                // Install the new persistent context for subsequent tasks/turns.
                turn_context = Arc::new(new_turn_context);
-
-                // Optionally persist changes to model / effort
-                if cwd.is_some() || approval_policy.is_some() || sandbox_policy.is_some() {
-                    sess.record_conversation_items(&[ResponseItem::from(EnvironmentContext::new(
-                        cwd,
-                        approval_policy,
-                        sandbox_policy,
-                        // Shell is not configurable from turn to turn
-                        None,
-                    ))])
-                    .await;
-                }
            }
            Op::UserInput { items } => {
-                // attempt to inject input into current task
-                if let Err(items) = sess.inject_input(items) {
-                    // no current task, spawn a new one
-                    let task =
-                        AgentTask::spawn(sess.clone(), Arc::clone(&turn_context), sub.id, items);
-                    sess.set_task(task);
-                }
+                submit_user_input(
+                    turn_context.cwd.clone(),
+                    turn_context.approval_policy,
+                    turn_context.sandbox_policy.clone(),
+                    &sess,
+                    &turn_context,
+                    sub.id.clone(),
+                    items,
+                )
+                .await;
            }
            Op::UserTurn {
                items,
@@ -1230,7 +1213,6 @@ async fn submission_loop(
                        tools_config: ToolsConfig::new(&ToolsConfigParams {
                            model_family: &model_family,
                            approval_policy,
-                            sandbox_policy: sandbox_policy.clone(),
                            include_plan_tool: config.include_plan_tool,
                            include_apply_patch_tool: config.include_apply_patch_tool,
                            include_web_search_request: config.tools_web_search_request,
@@ -1247,11 +1229,16 @@ async fn submission_loop(
                        shell_environment_policy: turn_context.shell_environment_policy.clone(),
                        cwd,
                    };
-                    // TODO: record the new environment context in the conversation history
-                    // no current task, spawn a new one with the per‑turn context
-                    let task =
-                        AgentTask::spawn(sess.clone(), Arc::new(fresh_turn_context), sub.id, items);
-                    sess.set_task(task);
+                    submit_user_input(
+                        fresh_turn_context.cwd.clone(),
+                        fresh_turn_context.approval_policy,
+                        fresh_turn_context.sandbox_policy.clone(),
+                        &sess,
+                        &Arc::new(fresh_turn_context),
+                        sub.id.clone(),
+                        items,
+                    )
+                    .await;
                }
            }
            Op::ExecApproval { id, decision } => match decision {
@@ -2826,6 +2813,29 @@ async fn handle_sandbox_error(
    }
 }

+async fn submit_user_input(
+    cwd: PathBuf,
+    approval_policy: AskForApproval,
+    sandbox_policy: SandboxPolicy,
+    sess: &Arc<Session>,
+    turn_context: &Arc<TurnContext>,
+    sub_id: String,
+    items: Vec<InputItem>,
+) {
+    sess.record_conversation_items(&[ResponseItem::from(EnvironmentContext::new(
+        Some(cwd),
+        Some(approval_policy),
+        Some(sandbox_policy),
+        Some(sess.user_shell.clone()),
+    ))])
+    .await;
+    if let Err(items) = sess.inject_input(items) {
+        // no current task, spawn a new one
+        let task = AgentTask::spawn(Arc::clone(sess), Arc::clone(turn_context), sub_id, items);
+        sess.set_task(task);
+    }
+}
+
 fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
    let ExecToolCallOutput {
        aggregated_output, ..
--- a/codex-rs/core/src/openai_tools.rs
+++ b/codex-rs/core/src/openai_tools.rs
@@ -8,7 +8,6 @@ use std::collections::HashMap;
 use crate::model_family::ModelFamily;
 use crate::plan_tool::PLAN_TOOL;
 use crate::protocol::AskForApproval;
-use crate::protocol::SandboxPolicy;
 use crate::tool_apply_patch::ApplyPatchToolType;
 use crate::tool_apply_patch::create_apply_patch_freeform_tool;
 use crate::tool_apply_patch::create_apply_patch_json_tool;
@@ -58,7 +57,7 @@ pub(crate) enum OpenAiTool {
 #[derive(Debug, Clone)]
 pub enum ConfigShellToolType {
    DefaultShell,
-    ShellWithRequest { sandbox_policy: SandboxPolicy },
+    ShellWithRequest,
    LocalShell,
    StreamableShell,
 }
@@ -76,7 +75,6 @@ pub(crate) struct ToolsConfig {
 pub(crate) struct ToolsConfigParams<'a> {
    pub(crate) model_family: &'a ModelFamily,
    pub(crate) approval_policy: AskForApproval,
-    pub(crate) sandbox_policy: SandboxPolicy,
    pub(crate) include_plan_tool: bool,
    pub(crate) include_apply_patch_tool: bool,
    pub(crate) include_web_search_request: bool,
@@ -90,7 +88,6 @@ impl ToolsConfig {
        let ToolsConfigParams {
            model_family,
            approval_policy,
-            sandbox_policy,
            include_plan_tool,
            include_apply_patch_tool,
            include_web_search_request,
@@ -106,9 +103,7 @@ impl ToolsConfig {
            ConfigShellToolType::DefaultShell
        };
        if matches!(approval_policy, AskForApproval::OnRequest) && !use_streamable_shell_tool {
-            shell_type = ConfigShellToolType::ShellWithRequest {
-                sandbox_policy: sandbox_policy.clone(),
-            }
+            shell_type = ConfigShellToolType::ShellWithRequest;
        }

        let apply_patch_tool_type = match model_family.apply_patch_tool_type {
@@ -251,7 +246,9 @@ fn create_unified_exec_tool() -> OpenAiTool {
    })
 }

-fn create_shell_tool_for_sandbox(sandbox_policy: &SandboxPolicy) -> OpenAiTool {
+const SHELL_TOOL_DESCRIPTION: &str = r#"Runs a shell command and returns its output"#;
+
+fn create_shell_tool_for_request() -> OpenAiTool {
    let mut properties = BTreeMap::new();
    properties.insert(
        "command".to_string(),
@@ -263,79 +260,29 @@ fn create_shell_tool_for_sandbox(sandbox_policy: &SandboxPolicy) -> OpenAiTool {
    properties.insert(
        "workdir".to_string(),
        JsonSchema::String {
-            description: Some("The working directory to execute the command in".to_string()),
+            description: Some("Working directory to execute the command in.".to_string()),
        },
    );
    properties.insert(
        "timeout_ms".to_string(),
        JsonSchema::Number {
-            description: Some("The timeout for the command in milliseconds".to_string()),
+            description: Some("Timeout for the command in milliseconds.".to_string()),
        },
    );
-
-    if matches!(sandbox_policy, SandboxPolicy::WorkspaceWrite { .. }) {
-        properties.insert(
+    properties.insert(
        "with_escalated_permissions".to_string(),
        JsonSchema::Boolean {
-            description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()),
+            description: Some("Request escalated permissions, only for when a command would otherwise be blocked by the sandbox.".to_string()),
        },
    );
-        properties.insert(
+    properties.insert(
        "justification".to_string(),
        JsonSchema::String {
-            description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
+            description: Some("Required if and only if with_escalated_permissions == true. One sentence explaining why escalation is needed (e.g., write outside CWD, network fetch, git commit).".to_string()),
        },
    );
-    }

-    let description = match sandbox_policy {
-        SandboxPolicy::WorkspaceWrite {
-            network_access,
-            ..
-        } => {
-            let network_line = if !network_access {
-                "\n    - Commands that require network access"
-            } else {
-                ""
-            };
-
-            format!(
-                r#"
-The shell tool is used to execute shell commands.
- When invoking the shell tool, your call will be running in a sandbox, and some shell commands will require escalated privileges:
-  - Types of actions that require escalated privileges:
-    - Writing files other than those in the writable roots (see the environment context for the allowed directories){network_line}
-  - Examples of commands that require escalated privileges:
-    - git commit
-    - npm install or pnpm install
-    - cargo build
-    - cargo test
- When invoking a command that will require escalated privileges:
-  - Provide the with_escalated_permissions parameter with the boolean value true
-  - Include a short, 1 sentence explanation for why we need to run with_escalated_permissions in the justification parameter."#,
-            )
-        }
-        SandboxPolicy::DangerFullAccess => {
-            "Runs a shell command and returns its output.".to_string()
-        }
-        SandboxPolicy::ReadOnly => {
-            r#"
-The shell tool is used to execute shell commands.
- When invoking the shell tool, your call will be running in a sandbox, and some shell commands (including apply_patch) will require escalated permissions:
-  - Types of actions that require escalated privileges:
-    - Writing files
-    - Applying patches
-  - Examples of commands that require escalated privileges:
-    - apply_patch
-    - git commit
-    - npm install or pnpm install
-    - cargo build
-    - cargo test
- When invoking a command that will require escalated privileges:
-  - Provide the with_escalated_permissions parameter with the boolean value true
-  - Include a short, 1 sentence explanation for why we need to run with_escalated_permissions in the justification parameter"#.to_string()
-        }
-    };
+    let description = SHELL_TOOL_DESCRIPTION.to_string();

    OpenAiTool::Function(ResponsesApiTool {
        name: "shell".to_string(),
@@ -348,7 +295,6 @@ The shell tool is used to execute shell commands.
        },
    })
 }
-
 fn create_view_image_tool() -> OpenAiTool {
    // Support only local filesystem path.
    let mut properties = BTreeMap::new();
@@ -589,8 +535,8 @@ pub(crate) fn get_openai_tools(
            ConfigShellToolType::DefaultShell => {
                tools.push(create_shell_tool());
            }
-            ConfigShellToolType::ShellWithRequest { sandbox_policy } => {
-                tools.push(create_shell_tool_for_sandbox(sandbox_policy));
+            ConfigShellToolType::ShellWithRequest => {
+                tools.push(create_shell_tool_for_request());
            }
            ConfigShellToolType::LocalShell => {
                tools.push(OpenAiTool::LocalShell {});
@@ -686,7 +632,6 @@ mod tests {
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
            approval_policy: AskForApproval::Never,
-            sandbox_policy: SandboxPolicy::ReadOnly,
            include_plan_tool: true,
            include_apply_patch_tool: false,
            include_web_search_request: true,
@@ -708,7 +653,6 @@ mod tests {
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
            approval_policy: AskForApproval::Never,
-            sandbox_policy: SandboxPolicy::ReadOnly,
            include_plan_tool: true,
            include_apply_patch_tool: false,
            include_web_search_request: true,
@@ -730,7 +674,6 @@ mod tests {
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
            approval_policy: AskForApproval::Never,
-            sandbox_policy: SandboxPolicy::ReadOnly,
            include_plan_tool: false,
            include_apply_patch_tool: false,
            include_web_search_request: true,
@@ -836,7 +779,6 @@ mod tests {
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
            approval_policy: AskForApproval::Never,
-            sandbox_policy: SandboxPolicy::ReadOnly,
            include_plan_tool: false,
            include_apply_patch_tool: false,
            include_web_search_request: false,
@@ -914,7 +856,6 @@ mod tests {
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
            approval_policy: AskForApproval::Never,
-            sandbox_policy: SandboxPolicy::ReadOnly,
            include_plan_tool: false,
            include_apply_patch_tool: false,
            include_web_search_request: true,
@@ -977,7 +918,6 @@ mod tests {
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
            approval_policy: AskForApproval::Never,
-            sandbox_policy: SandboxPolicy::ReadOnly,
            include_plan_tool: false,
            include_apply_patch_tool: false,
            include_web_search_request: true,
@@ -1035,7 +975,6 @@ mod tests {
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
            approval_policy: AskForApproval::Never,
-            sandbox_policy: SandboxPolicy::ReadOnly,
            include_plan_tool: false,
            include_apply_patch_tool: false,
            include_web_search_request: true,
@@ -1096,7 +1035,6 @@ mod tests {
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
            approval_policy: AskForApproval::Never,
-            sandbox_policy: SandboxPolicy::ReadOnly,
            include_plan_tool: false,
            include_apply_patch_tool: false,
            include_web_search_request: true,
@@ -1150,13 +1088,7 @@ mod tests {

    #[test]
    fn test_shell_tool_for_sandbox_workspace_write() {
-        let sandbox_policy = SandboxPolicy::WorkspaceWrite {
-            writable_roots: vec!["workspace".into()],
-            network_access: false,
-            exclude_tmpdir_env_var: false,
-            exclude_slash_tmp: false,
-        };
-        let tool = super::create_shell_tool_for_sandbox(&sandbox_policy);
+        let tool = super::create_shell_tool_for_request();
        let OpenAiTool::Function(ResponsesApiTool {
            description, name, ..
        }) = &tool
@@ -1165,26 +1097,13 @@ mod tests {
        };
        assert_eq!(name, "shell");

-        let expected = r#"
-The shell tool is used to execute shell commands.
- When invoking the shell tool, your call will be running in a sandbox, and some shell commands will require escalated privileges:
-  - Types of actions that require escalated privileges:
-    - Writing files other than those in the writable roots (see the environment context for the allowed directories)
-    - Commands that require network access
-  - Examples of commands that require escalated privileges:
-    - git commit
-    - npm install or pnpm install
-    - cargo build
-    - cargo test
- When invoking a command that will require escalated privileges:
-  - Provide the with_escalated_permissions parameter with the boolean value true
-  - Include a short, 1 sentence explanation for why we need to run with_escalated_permissions in the justification parameter."#;
+        let expected = super::SHELL_TOOL_DESCRIPTION;
        assert_eq!(description, expected);
    }

    #[test]
    fn test_shell_tool_for_sandbox_readonly() {
-        let tool = super::create_shell_tool_for_sandbox(&SandboxPolicy::ReadOnly);
+        let tool = super::create_shell_tool_for_request();
        let OpenAiTool::Function(ResponsesApiTool {
            description, name, ..
        }) = &tool
@@ -1193,27 +1112,13 @@ The shell tool is used to execute shell commands.
        };
        assert_eq!(name, "shell");

-        let expected = r#"
-The shell tool is used to execute shell commands.
- When invoking the shell tool, your call will be running in a sandbox, and some shell commands (including apply_patch) will require escalated permissions:
-  - Types of actions that require escalated privileges:
-    - Writing files
-    - Applying patches
-  - Examples of commands that require escalated privileges:
-    - apply_patch
-    - git commit
-    - npm install or pnpm install
-    - cargo build
-    - cargo test
- When invoking a command that will require escalated privileges:
-  - Provide the with_escalated_permissions parameter with the boolean value true
-  - Include a short, 1 sentence explanation for why we need to run with_escalated_permissions in the justification parameter"#;
+        let expected = super::SHELL_TOOL_DESCRIPTION;
        assert_eq!(description, expected);
    }

    #[test]
    fn test_shell_tool_for_sandbox_danger_full_access() {
-        let tool = super::create_shell_tool_for_sandbox(&SandboxPolicy::DangerFullAccess);
+        let tool = super::create_shell_tool_for_request();
        let OpenAiTool::Function(ResponsesApiTool {
            description, name, ..
        }) = &tool
@@ -1222,6 +1127,7 @@ The shell tool is used to execute shell commands.
        };
        assert_eq!(name, "shell");

-        assert_eq!(description, "Runs a shell command and returns its output.");
+        let expected = super::SHELL_TOOL_DESCRIPTION;
+        assert_eq!(description, expected);
    }
 }
--- a/codex-rs/core/src/project_doc.rs
+++ b/codex-rs/core/src/project_doc.rs
@@ -26,7 +26,7 @@ const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";

 /// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
 /// string of instructions.
-pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
+pub async fn get_user_instructions(config: &Config) -> Option<String> {
    match read_project_docs(config).await {
        Ok(Some(project_doc)) => match &config.user_instructions {
            Some(original_instructions) => Some(format!(
--- a/codex-rs/core/tests/suite/client.rs
+++ b/codex-rs/core/tests/suite/client.rs
@@ -4,9 +4,11 @@ use codex_core::ModelProviderInfo;
 use codex_core::NewConversation;
 use codex_core::WireApi;
 use codex_core::built_in_model_providers;
+use codex_core::project_doc::get_user_instructions;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
+use codex_core::shell::default_user_shell;
 use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id;
@@ -221,6 +223,8 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
    };
    let codex_home = TempDir::new().unwrap();
    let mut config = load_default_config_for_test(&codex_home);
+    let cwd = TempDir::new().unwrap();
+    config.cwd = cwd.path().to_path_buf();
    config.model_provider = model_provider;
    config.experimental_resume = Some(session_path.clone());
    // Also configure user instructions to ensure they are NOT delivered on resume.
@@ -259,6 +263,29 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {

    let request = &server.received_requests().await.unwrap()[0];
    let request_body = request.body_json::<serde_json::Value>().unwrap();
+
+    // Build expected environment context for this turn.
+    let shell = default_user_shell().await;
+    let shell_line = match shell.name() {
+        Some(name) => format!("  <shell>{name}</shell>\n"),
+        None => String::new(),
+    };
+    let expected_env_text_turn = format!(
+        r#"<environment_context>
+  <cwd>{}</cwd>
+  <approval_policy>on-request</approval_policy>
+  <sandbox_mode>read-only</sandbox_mode>
+  <network_access>restricted</network_access>
+{}</environment_context>"#,
+        cwd.path().to_string_lossy(),
+        shell_line.as_str(),
+    );
+    let expected_env_msg_turn = json!({
+        "type": "message",
+        "role": "user",
+        "content": [ { "type": "input_text", "text": expected_env_text_turn } ]
+    });
+
    let expected_input = json!([
        {
            "type": "message",
@@ -270,12 +297,14 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
            "role": "assistant",
            "content": [{ "type": "output_text", "text": "resumed assistant message" }]
        },
+        expected_env_msg_turn,
        {
            "type": "message",
            "role": "user",
            "content": [{ "type": "input_text", "text": "hello" }]
        }
    ]);
+
    assert_eq!(request_body["input"], expected_input);
 }

@@ -838,7 +867,7 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() {
        conversation: codex,
        ..
    } = conversation_manager
-        .new_conversation(config)
+        .new_conversation(config.clone())
        .await
        .expect("create new conversation");

@@ -873,34 +902,49 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() {
    let requests = server.received_requests().await.unwrap();
    assert_eq!(requests.len(), 3, "expected 3 requests (one per turn)");

-    // Replace full-array compare with tail-only raw JSON compare using a single hard-coded value.
-    let r3_tail_expected = json!([
-        {
-            "type": "message",
-            "role": "user",
-            "content": [{"type":"input_text","text":"U1"}]
-        },
-        {
-            "type": "message",
-            "role": "assistant",
-            "content": [{"type":"output_text","text":"Hey there!\n"}]
-        },
-        {
-            "type": "message",
-            "role": "user",
-            "content": [{"type":"input_text","text":"U2"}]
-        },
-        {
-            "type": "message",
-            "role": "assistant",
-            "content": [{"type":"output_text","text":"Hey there!\n"}]
-        },
-        {
-            "type": "message",
-            "role": "user",
-            "content": [{"type":"input_text","text":"U3"}]
-        }
-    ]);
+    // Build expected environment context dynamically to avoid OS-dependent flakiness.
+    let user_instructions = get_user_instructions(&config).await;
+    let shell = default_user_shell().await;
+    let shell_line = match shell.name() {
+        Some(name) => format!("  <shell>{name}</shell>\n"),
+        None => String::new(),
+    };
+    let expected_env_text = format!(
+        r#"<environment_context>
+  <cwd>{}</cwd>
+  <approval_policy>on-request</approval_policy>
+  <sandbox_mode>read-only</sandbox_mode>
+  <network_access>restricted</network_access>
+{}</environment_context>"#,
+        std::env::current_dir().unwrap().to_string_lossy(),
+        shell_line.as_str(),
+    );
+    let expected_env_msg = json!({
+        "type": "message",
+        "role": "user",
+        "content": [ { "type": "input_text", "text": expected_env_text } ]
+    });
+    // Wrap user instructions in the XML container to match the raw/ingest view
+    let expected_ui_text = format!(
+        "<user_instructions>\n\n{}\n\n</user_instructions>",
+        user_instructions.clone().unwrap()
+    );
+    let expected_ui_msg = json!({
+        "type": "message",
+        "role": "user",
+        "content": [ { "type": "input_text", "text": expected_ui_text } ]
+    });
+
+    let expected_full = json!([
+        expected_ui_msg,
+        expected_env_msg.clone(),
+        {"type":"message","role":"user","content":[{"type":"input_text","text":"U1"}]},
+        {"type":"message","role":"assistant","content":[{"type":"output_text","text":"Hey there!\n"}]},
+        expected_env_msg.clone(),
+        {"type":"message","role":"user","content":[{"type":"input_text","text":"U2"}]},
+        {"type":"message","role":"assistant","content":[{"type":"output_text","text":"Hey there!\n"}]},
+        expected_env_msg,
+        {"type":"message","role":"user","content":[{"type":"input_text","text":"U3"}]}]);

    let r3_input_array = requests[2]
        .body_json::<serde_json::Value>()
@@ -909,12 +953,6 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() {
        .and_then(|v| v.as_array())
        .cloned()
        .expect("r3 missing input array");
-    // skipping earlier context and developer messages
-    let tail_len = r3_tail_expected.as_array().unwrap().len();
-    let actual_tail = &r3_input_array[r3_input_array.len() - tail_len..];
-    assert_eq!(
-        serde_json::Value::Array(actual_tail.to_vec()),
-        r3_tail_expected,
-        "request 3 tail mismatch",
-    );
+
+    assert_eq!(json!(r3_input_array), expected_full);
 }
--- a/codex-rs/core/tests/suite/prompt_caching.rs
+++ b/codex-rs/core/tests/suite/prompt_caching.rs
@@ -270,8 +270,13 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
    assert_eq!(requests.len(), 2, "expected two POST requests");

    let shell = default_user_shell().await;
+    let shell_line = match shell.name() {
+        Some(name) => format!("  <shell>{name}</shell>\n"),
+        None => String::new(),
+    };

-    let expected_env_text = format!(
+    // Per-turn environment context includes the shell tag.
+    let expected_env_text_turn = format!(
        r#"<environment_context>
  <cwd>{}</cwd>
  <approval_policy>on-request</approval_policy>
@@ -279,18 +284,15 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
  <network_access>restricted</network_access>
 {}</environment_context>"#,
        cwd.path().to_string_lossy(),
-        match shell.name() {
-            Some(name) => format!("  <shell>{name}</shell>\n"),
-            None => String::new(),
-        }
+        shell_line.as_str(),
    );
    let expected_ui_text =
        "<user_instructions>\n\nbe consistent and helpful\n\n</user_instructions>";

-    let expected_env_msg = serde_json::json!({
+    let expected_env_msg_turn = serde_json::json!({
        "type": "message",
        "role": "user",
-        "content": [ { "type": "input_text", "text": expected_env_text } ]
+        "content": [ { "type": "input_text", "text": expected_env_text_turn } ]
    });
    let expected_ui_msg = serde_json::json!({
        "type": "message",
@@ -304,11 +306,29 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
        "content": [ { "type": "input_text", "text": "hello 1" } ]
    });
    let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
+    let body1_input = body1["input"].as_array().unwrap();
    assert_eq!(
        body1["input"],
-        serde_json::json!([expected_ui_msg, expected_env_msg, expected_user_message_1])
+        serde_json::json!([
+            expected_ui_msg,
+            expected_env_msg_turn,
+            expected_user_message_1
+        ])
    );

+    let env_texts: Vec<&str> = body1_input
+        .iter()
+        .filter_map(|msg| {
+            msg.get("content")
+                .and_then(|content| content.as_array())
+                .and_then(|content| content.first())
+                .and_then(|item| item.get("text"))
+                .and_then(|text| text.as_str())
+        })
+        .filter(|text| text.starts_with("<environment_context>"))
+        .collect();
+    assert_eq!(env_texts, vec![expected_env_text_turn.as_str()]);
+
    let expected_user_message_2 = serde_json::json!({
        "type": "message",
        "role": "user",
@@ -318,7 +338,7 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
    let expected_body2 = serde_json::json!(
        [
            body1["input"].as_array().unwrap().as_slice(),
-            [expected_user_message_2].as_slice(),
+            [expected_env_msg_turn, expected_user_message_2].as_slice(),
        ]
        .concat()
    );
@@ -423,19 +443,28 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() {
        "role": "user",
        "content": [ { "type": "input_text", "text": "hello 2" } ]
    });
+    let shell = default_user_shell().await;
+    let shell_line = match shell.name() {
+        Some(name) => format!("  <shell>{name}</shell>\n"),
+        None => String::new(),
+    };
+
    // After overriding the turn context, the environment context should be emitted again
    // reflecting the new approval policy and sandbox settings. Omit cwd because it did
    // not change.
    let expected_env_text_2 = format!(
        r#"<environment_context>
+  <cwd>{}</cwd>
  <approval_policy>never</approval_policy>
  <sandbox_mode>workspace-write</sandbox_mode>
  <network_access>enabled</network_access>
  <writable_roots>
    <root>{}</root>
  </writable_roots>
-</environment_context>"#,
-        writable.path().to_string_lossy()
+{}</environment_context>"#,
+        cwd.path().to_string_lossy(),
+        writable.path().to_string_lossy(),
+        shell_line.as_str()
    );
    let expected_env_msg_2 = serde_json::json!({
        "type": "message",
@@ -546,12 +575,165 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
        "role": "user",
        "content": [ { "type": "input_text", "text": "hello 2" } ]
    });
+    let shell = default_user_shell().await;
+    let shell_line = match shell.name() {
+        Some(name) => format!("  <shell>{name}</shell>\n"),
+        None => String::new(),
+    };
+    let expected_env_text_2 = format!(
+        r#"<environment_context>
+  <cwd>{}</cwd>
+  <approval_policy>never</approval_policy>
+  <sandbox_mode>workspace-write</sandbox_mode>
+  <network_access>enabled</network_access>
+  <writable_roots>
+    <root>{}</root>
+  </writable_roots>
+{}</environment_context>"#,
+        new_cwd.path().to_string_lossy(),
+        writable.path().to_string_lossy(),
+        shell_line.as_str()
+    );
+    let expected_env_msg_2 = serde_json::json!({
+        "type": "message",
+        "role": "user",
+        "content": [ { "type": "input_text", "text": expected_env_text_2 } ]
+    });
    let expected_body2 = serde_json::json!(
        [
            body1["input"].as_array().unwrap().as_slice(),
-            [expected_user_message_2].as_slice(),
+            [expected_env_msg_2, expected_user_message_2].as_slice(),
        ]
        .concat()
    );
    assert_eq!(body2["input"], expected_body2);
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn tools_stable_across_all_approval_policy_transitions() {
+    use pretty_assertions::assert_eq;
+
+    let server = MockServer::start().await;
+
+    let sse = sse_completed("resp");
+    let template = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse, "text/event-stream");
+
+    // Build all transitions FROM each to each other (exclude self transitions)
+    let policies = vec![
+        AskForApproval::UnlessTrusted,
+        AskForApproval::OnFailure,
+        AskForApproval::OnRequest,
+        AskForApproval::Never,
+    ];
+    let mut transitions: Vec<(AskForApproval, AskForApproval)> = Vec::new();
+    for &from in &policies {
+        for &to in &policies {
+            if from != to {
+                transitions.push((from, to));
+            }
+        }
+    }
+
+    // Expect 2 POSTs per transition
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(template)
+        .expect((transitions.len() * 2) as u64)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let cwd = TempDir::new().unwrap();
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.cwd = cwd.path().to_path_buf();
+    config.model_provider = model_provider;
+    config.user_instructions = Some("be consistent and helpful".to_string());
+    // Keep tools stable and minimal
+    config.include_plan_tool = false;
+    config.include_apply_patch_tool = false;
+    config.tools_web_search_request = false;
+    config.use_experimental_unified_exec_tool = true; // policy-independent tool
+
+    let conversation_manager =
+        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation")
+        .conversation;
+
+    for (i, (from, to)) in transitions.iter().enumerate() {
+        // Ensure a known starting policy for this pair
+        codex
+            .submit(Op::OverrideTurnContext {
+                cwd: None,
+                approval_policy: Some(*from),
+                sandbox_policy: None,
+                model: None,
+                effort: None,
+                summary: None,
+            })
+            .await
+            .unwrap();
+
+        codex
+            .submit(Op::UserInput {
+                items: vec![InputItem::Text {
+                    text: format!("turn {i}-a"),
+                }],
+            })
+            .await
+            .unwrap();
+        wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+        // Override to the target policy and send next turn
+        codex
+            .submit(Op::OverrideTurnContext {
+                cwd: None,
+                approval_policy: Some(*to),
+                sandbox_policy: None,
+                model: None,
+                effort: None,
+                summary: None,
+            })
+            .await
+            .unwrap();
+
+        codex
+            .submit(Op::UserInput {
+                items: vec![InputItem::Text {
+                    text: format!("turn {i}-b"),
+                }],
+            })
+            .await
+            .unwrap();
+        wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+    }
+
+    // Verify tool arrays are identical across each pair of requests
+    let requests = server.received_requests().await.unwrap();
+    assert_eq!(
+        requests.len(),
+        transitions.len() * 2,
+        "expected 2 requests per transition"
+    );
+
+    for i in 0..transitions.len() {
+        let body_a = requests[2 * i].body_json::<serde_json::Value>().unwrap();
+        let body_b = requests[2 * i + 1]
+            .body_json::<serde_json::Value>()
+            .unwrap();
+        assert_eq!(
+            body_a["tools"], body_b["tools"],
+            "tools changed between requests for transition #{i}: {:?}",
+            transitions[i]
+        );
+    }
+}
Author	SHA1	Message	Date
Ahmed Ibrahim	570639cf98	tests	2025-09-12 15:46:49 -04:00
Ahmed Ibrahim	1c50fbb8a7	tests	2025-09-12 14:15:16 -04:00
Ahmed Ibrahim	3316d04ed4	rebase	2025-09-12 14:02:12 -04:00
Ahmed Ibrahim	67a8566f59	Merge branch 'patch-tools' of https://github.com/openai/codex into patch-tools	2025-09-12 14:01:34 -04:00
Ahmed Ibrahim	2d36621f48	Merge branch 'main' into patch-tools	2025-09-12 14:01:22 -04:00
Ahmed Ibrahim	0a70810fc0	squash: 19 commit(s) since origin/main - fmt + clippy: codex-core deterministic shell tool tests, conflict cleanup - patch-tools - fix tests - add tests - add tests - patch-tools - patch-tools - patch-tools - patch-tools - patch-tools - patch-tools - patch-tools - patch-tools - patch-tools - clippy - add tests - clippy - clippy - add tests	2025-09-12 14:00:10 -04:00
Ahmed Ibrahim	b5cf9e09ff	add tests	2025-09-12 13:54:38 -04:00
Ahmed Ibrahim	b2067c73d9	clippy	2025-09-12 13:50:23 -04:00
Ahmed Ibrahim	13e8771ee9	clippy	2025-09-12 13:50:16 -04:00
Ahmed Ibrahim	6577197fa4	add tests	2025-09-12 13:29:31 -04:00
Ahmed Ibrahim	fd1e12f34e	clippy	2025-09-12 12:27:20 -04:00
Ahmed Ibrahim	9580603fed	patch-tools	2025-09-12 11:53:34 -04:00
Ahmed Ibrahim	da38a8f56a	patch-tools	2025-09-12 11:51:08 -04:00
Ahmed Ibrahim	552a438cc9	patch-tools	2025-09-12 11:50:18 -04:00
Ahmed Ibrahim	a36a273d4e	patch-tools	2025-09-12 11:49:46 -04:00
Ahmed Ibrahim	6884c6ccf6	patch-tools	2025-09-12 11:49:14 -04:00
Ahmed Ibrahim	1e5a613c55	patch-tools	2025-09-12 11:48:01 -04:00
Ahmed Ibrahim	4fee2ca3fd	patch-tools	2025-09-12 11:44:40 -04:00
Ahmed Ibrahim	3318cf9369	patch-tools	2025-09-12 11:40:23 -04:00
Ahmed Ibrahim	5ba0bcf035	patch-tools	2025-09-12 11:36:57 -04:00
Ahmed Ibrahim	6d55ef62f9	add tests	2025-09-12 11:30:10 -04:00
Ahmed Ibrahim	cecf3a82a6	add tests	2025-09-12 11:29:25 -04:00
Dylan Hurd	9a7266a33f	fix tests	2025-09-11 15:56:27 -07:00
Ahmed Ibrahim	2abad8fece	patch-tools	2025-09-11 18:47:38 -04:00
Ahmed Ibrahim	0d4a25b981	fmt + clippy: codex-core deterministic shell tool tests, conflict cleanup	2025-09-11 15:40:25 -07:00