mirror of
https://github.com/openai/codex.git
synced 2026-04-23 22:24:57 +00:00
Compare commits
25 Commits
dev/window
...
patch-tool
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
570639cf98 | ||
|
|
1c50fbb8a7 | ||
|
|
3316d04ed4 | ||
|
|
67a8566f59 | ||
|
|
2d36621f48 | ||
|
|
0a70810fc0 | ||
|
|
b5cf9e09ff | ||
|
|
b2067c73d9 | ||
|
|
13e8771ee9 | ||
|
|
6577197fa4 | ||
|
|
fd1e12f34e | ||
|
|
9580603fed | ||
|
|
da38a8f56a | ||
|
|
552a438cc9 | ||
|
|
a36a273d4e | ||
|
|
6884c6ccf6 | ||
|
|
1e5a613c55 | ||
|
|
4fee2ca3fd | ||
|
|
3318cf9369 | ||
|
|
5ba0bcf035 | ||
|
|
6d55ef62f9 | ||
|
|
cecf3a82a6 | ||
|
|
9a7266a33f | ||
|
|
2abad8fece | ||
|
|
0d4a25b981 |
@@ -462,7 +462,6 @@ impl Session {
|
|||||||
tools_config: ToolsConfig::new(&ToolsConfigParams {
|
tools_config: ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &config.model_family,
|
model_family: &config.model_family,
|
||||||
approval_policy,
|
approval_policy,
|
||||||
sandbox_policy: sandbox_policy.clone(),
|
|
||||||
include_plan_tool: config.include_plan_tool,
|
include_plan_tool: config.include_plan_tool,
|
||||||
include_apply_patch_tool: config.include_apply_patch_tool,
|
include_apply_patch_tool: config.include_apply_patch_tool,
|
||||||
include_web_search_request: config.tools_web_search_request,
|
include_web_search_request: config.tools_web_search_request,
|
||||||
@@ -686,12 +685,6 @@ impl Session {
|
|||||||
if let Some(user_instructions) = turn_context.user_instructions.as_deref() {
|
if let Some(user_instructions) = turn_context.user_instructions.as_deref() {
|
||||||
items.push(UserInstructions::new(user_instructions.to_string()).into());
|
items.push(UserInstructions::new(user_instructions.to_string()).into());
|
||||||
}
|
}
|
||||||
items.push(ResponseItem::from(EnvironmentContext::new(
|
|
||||||
Some(turn_context.cwd.clone()),
|
|
||||||
Some(turn_context.approval_policy),
|
|
||||||
Some(turn_context.sandbox_policy.clone()),
|
|
||||||
Some(self.user_shell.clone()),
|
|
||||||
)));
|
|
||||||
items
|
items
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1143,7 +1136,6 @@ async fn submission_loop(
|
|||||||
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &effective_family,
|
model_family: &effective_family,
|
||||||
approval_policy: new_approval_policy,
|
approval_policy: new_approval_policy,
|
||||||
sandbox_policy: new_sandbox_policy.clone(),
|
|
||||||
include_plan_tool: config.include_plan_tool,
|
include_plan_tool: config.include_plan_tool,
|
||||||
include_apply_patch_tool: config.include_apply_patch_tool,
|
include_apply_patch_tool: config.include_apply_patch_tool,
|
||||||
include_web_search_request: config.tools_web_search_request,
|
include_web_search_request: config.tools_web_search_request,
|
||||||
@@ -1165,27 +1157,18 @@ async fn submission_loop(
|
|||||||
|
|
||||||
// Install the new persistent context for subsequent tasks/turns.
|
// Install the new persistent context for subsequent tasks/turns.
|
||||||
turn_context = Arc::new(new_turn_context);
|
turn_context = Arc::new(new_turn_context);
|
||||||
|
|
||||||
// Optionally persist changes to model / effort
|
|
||||||
if cwd.is_some() || approval_policy.is_some() || sandbox_policy.is_some() {
|
|
||||||
sess.record_conversation_items(&[ResponseItem::from(EnvironmentContext::new(
|
|
||||||
cwd,
|
|
||||||
approval_policy,
|
|
||||||
sandbox_policy,
|
|
||||||
// Shell is not configurable from turn to turn
|
|
||||||
None,
|
|
||||||
))])
|
|
||||||
.await;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Op::UserInput { items } => {
|
Op::UserInput { items } => {
|
||||||
// attempt to inject input into current task
|
submit_user_input(
|
||||||
if let Err(items) = sess.inject_input(items) {
|
turn_context.cwd.clone(),
|
||||||
// no current task, spawn a new one
|
turn_context.approval_policy,
|
||||||
let task =
|
turn_context.sandbox_policy.clone(),
|
||||||
AgentTask::spawn(sess.clone(), Arc::clone(&turn_context), sub.id, items);
|
&sess,
|
||||||
sess.set_task(task);
|
&turn_context,
|
||||||
}
|
sub.id.clone(),
|
||||||
|
items,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
}
|
}
|
||||||
Op::UserTurn {
|
Op::UserTurn {
|
||||||
items,
|
items,
|
||||||
@@ -1230,7 +1213,6 @@ async fn submission_loop(
|
|||||||
tools_config: ToolsConfig::new(&ToolsConfigParams {
|
tools_config: ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy,
|
approval_policy,
|
||||||
sandbox_policy: sandbox_policy.clone(),
|
|
||||||
include_plan_tool: config.include_plan_tool,
|
include_plan_tool: config.include_plan_tool,
|
||||||
include_apply_patch_tool: config.include_apply_patch_tool,
|
include_apply_patch_tool: config.include_apply_patch_tool,
|
||||||
include_web_search_request: config.tools_web_search_request,
|
include_web_search_request: config.tools_web_search_request,
|
||||||
@@ -1247,11 +1229,16 @@ async fn submission_loop(
|
|||||||
shell_environment_policy: turn_context.shell_environment_policy.clone(),
|
shell_environment_policy: turn_context.shell_environment_policy.clone(),
|
||||||
cwd,
|
cwd,
|
||||||
};
|
};
|
||||||
// TODO: record the new environment context in the conversation history
|
submit_user_input(
|
||||||
// no current task, spawn a new one with the per‑turn context
|
fresh_turn_context.cwd.clone(),
|
||||||
let task =
|
fresh_turn_context.approval_policy,
|
||||||
AgentTask::spawn(sess.clone(), Arc::new(fresh_turn_context), sub.id, items);
|
fresh_turn_context.sandbox_policy.clone(),
|
||||||
sess.set_task(task);
|
&sess,
|
||||||
|
&Arc::new(fresh_turn_context),
|
||||||
|
sub.id.clone(),
|
||||||
|
items,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Op::ExecApproval { id, decision } => match decision {
|
Op::ExecApproval { id, decision } => match decision {
|
||||||
@@ -2826,6 +2813,29 @@ async fn handle_sandbox_error(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn submit_user_input(
|
||||||
|
cwd: PathBuf,
|
||||||
|
approval_policy: AskForApproval,
|
||||||
|
sandbox_policy: SandboxPolicy,
|
||||||
|
sess: &Arc<Session>,
|
||||||
|
turn_context: &Arc<TurnContext>,
|
||||||
|
sub_id: String,
|
||||||
|
items: Vec<InputItem>,
|
||||||
|
) {
|
||||||
|
sess.record_conversation_items(&[ResponseItem::from(EnvironmentContext::new(
|
||||||
|
Some(cwd),
|
||||||
|
Some(approval_policy),
|
||||||
|
Some(sandbox_policy),
|
||||||
|
Some(sess.user_shell.clone()),
|
||||||
|
))])
|
||||||
|
.await;
|
||||||
|
if let Err(items) = sess.inject_input(items) {
|
||||||
|
// no current task, spawn a new one
|
||||||
|
let task = AgentTask::spawn(Arc::clone(sess), Arc::clone(turn_context), sub_id, items);
|
||||||
|
sess.set_task(task);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
|
fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
|
||||||
let ExecToolCallOutput {
|
let ExecToolCallOutput {
|
||||||
aggregated_output, ..
|
aggregated_output, ..
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ use std::collections::HashMap;
|
|||||||
use crate::model_family::ModelFamily;
|
use crate::model_family::ModelFamily;
|
||||||
use crate::plan_tool::PLAN_TOOL;
|
use crate::plan_tool::PLAN_TOOL;
|
||||||
use crate::protocol::AskForApproval;
|
use crate::protocol::AskForApproval;
|
||||||
use crate::protocol::SandboxPolicy;
|
|
||||||
use crate::tool_apply_patch::ApplyPatchToolType;
|
use crate::tool_apply_patch::ApplyPatchToolType;
|
||||||
use crate::tool_apply_patch::create_apply_patch_freeform_tool;
|
use crate::tool_apply_patch::create_apply_patch_freeform_tool;
|
||||||
use crate::tool_apply_patch::create_apply_patch_json_tool;
|
use crate::tool_apply_patch::create_apply_patch_json_tool;
|
||||||
@@ -58,7 +57,7 @@ pub(crate) enum OpenAiTool {
|
|||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum ConfigShellToolType {
|
pub enum ConfigShellToolType {
|
||||||
DefaultShell,
|
DefaultShell,
|
||||||
ShellWithRequest { sandbox_policy: SandboxPolicy },
|
ShellWithRequest,
|
||||||
LocalShell,
|
LocalShell,
|
||||||
StreamableShell,
|
StreamableShell,
|
||||||
}
|
}
|
||||||
@@ -76,7 +75,6 @@ pub(crate) struct ToolsConfig {
|
|||||||
pub(crate) struct ToolsConfigParams<'a> {
|
pub(crate) struct ToolsConfigParams<'a> {
|
||||||
pub(crate) model_family: &'a ModelFamily,
|
pub(crate) model_family: &'a ModelFamily,
|
||||||
pub(crate) approval_policy: AskForApproval,
|
pub(crate) approval_policy: AskForApproval,
|
||||||
pub(crate) sandbox_policy: SandboxPolicy,
|
|
||||||
pub(crate) include_plan_tool: bool,
|
pub(crate) include_plan_tool: bool,
|
||||||
pub(crate) include_apply_patch_tool: bool,
|
pub(crate) include_apply_patch_tool: bool,
|
||||||
pub(crate) include_web_search_request: bool,
|
pub(crate) include_web_search_request: bool,
|
||||||
@@ -90,7 +88,6 @@ impl ToolsConfig {
|
|||||||
let ToolsConfigParams {
|
let ToolsConfigParams {
|
||||||
model_family,
|
model_family,
|
||||||
approval_policy,
|
approval_policy,
|
||||||
sandbox_policy,
|
|
||||||
include_plan_tool,
|
include_plan_tool,
|
||||||
include_apply_patch_tool,
|
include_apply_patch_tool,
|
||||||
include_web_search_request,
|
include_web_search_request,
|
||||||
@@ -106,9 +103,7 @@ impl ToolsConfig {
|
|||||||
ConfigShellToolType::DefaultShell
|
ConfigShellToolType::DefaultShell
|
||||||
};
|
};
|
||||||
if matches!(approval_policy, AskForApproval::OnRequest) && !use_streamable_shell_tool {
|
if matches!(approval_policy, AskForApproval::OnRequest) && !use_streamable_shell_tool {
|
||||||
shell_type = ConfigShellToolType::ShellWithRequest {
|
shell_type = ConfigShellToolType::ShellWithRequest;
|
||||||
sandbox_policy: sandbox_policy.clone(),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let apply_patch_tool_type = match model_family.apply_patch_tool_type {
|
let apply_patch_tool_type = match model_family.apply_patch_tool_type {
|
||||||
@@ -251,7 +246,9 @@ fn create_unified_exec_tool() -> OpenAiTool {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_shell_tool_for_sandbox(sandbox_policy: &SandboxPolicy) -> OpenAiTool {
|
const SHELL_TOOL_DESCRIPTION: &str = r#"Runs a shell command and returns its output"#;
|
||||||
|
|
||||||
|
fn create_shell_tool_for_request() -> OpenAiTool {
|
||||||
let mut properties = BTreeMap::new();
|
let mut properties = BTreeMap::new();
|
||||||
properties.insert(
|
properties.insert(
|
||||||
"command".to_string(),
|
"command".to_string(),
|
||||||
@@ -263,79 +260,29 @@ fn create_shell_tool_for_sandbox(sandbox_policy: &SandboxPolicy) -> OpenAiTool {
|
|||||||
properties.insert(
|
properties.insert(
|
||||||
"workdir".to_string(),
|
"workdir".to_string(),
|
||||||
JsonSchema::String {
|
JsonSchema::String {
|
||||||
description: Some("The working directory to execute the command in".to_string()),
|
description: Some("Working directory to execute the command in.".to_string()),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
properties.insert(
|
properties.insert(
|
||||||
"timeout_ms".to_string(),
|
"timeout_ms".to_string(),
|
||||||
JsonSchema::Number {
|
JsonSchema::Number {
|
||||||
description: Some("The timeout for the command in milliseconds".to_string()),
|
description: Some("Timeout for the command in milliseconds.".to_string()),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
properties.insert(
|
||||||
if matches!(sandbox_policy, SandboxPolicy::WorkspaceWrite { .. }) {
|
|
||||||
properties.insert(
|
|
||||||
"with_escalated_permissions".to_string(),
|
"with_escalated_permissions".to_string(),
|
||||||
JsonSchema::Boolean {
|
JsonSchema::Boolean {
|
||||||
description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()),
|
description: Some("Request escalated permissions, only for when a command would otherwise be blocked by the sandbox.".to_string()),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
properties.insert(
|
properties.insert(
|
||||||
"justification".to_string(),
|
"justification".to_string(),
|
||||||
JsonSchema::String {
|
JsonSchema::String {
|
||||||
description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
|
description: Some("Required if and only if with_escalated_permissions == true. One sentence explaining why escalation is needed (e.g., write outside CWD, network fetch, git commit).".to_string()),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
}
|
|
||||||
|
|
||||||
let description = match sandbox_policy {
|
let description = SHELL_TOOL_DESCRIPTION.to_string();
|
||||||
SandboxPolicy::WorkspaceWrite {
|
|
||||||
network_access,
|
|
||||||
..
|
|
||||||
} => {
|
|
||||||
let network_line = if !network_access {
|
|
||||||
"\n - Commands that require network access"
|
|
||||||
} else {
|
|
||||||
""
|
|
||||||
};
|
|
||||||
|
|
||||||
format!(
|
|
||||||
r#"
|
|
||||||
The shell tool is used to execute shell commands.
|
|
||||||
- When invoking the shell tool, your call will be running in a sandbox, and some shell commands will require escalated privileges:
|
|
||||||
- Types of actions that require escalated privileges:
|
|
||||||
- Writing files other than those in the writable roots (see the environment context for the allowed directories){network_line}
|
|
||||||
- Examples of commands that require escalated privileges:
|
|
||||||
- git commit
|
|
||||||
- npm install or pnpm install
|
|
||||||
- cargo build
|
|
||||||
- cargo test
|
|
||||||
- When invoking a command that will require escalated privileges:
|
|
||||||
- Provide the with_escalated_permissions parameter with the boolean value true
|
|
||||||
- Include a short, 1 sentence explanation for why we need to run with_escalated_permissions in the justification parameter."#,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
SandboxPolicy::DangerFullAccess => {
|
|
||||||
"Runs a shell command and returns its output.".to_string()
|
|
||||||
}
|
|
||||||
SandboxPolicy::ReadOnly => {
|
|
||||||
r#"
|
|
||||||
The shell tool is used to execute shell commands.
|
|
||||||
- When invoking the shell tool, your call will be running in a sandbox, and some shell commands (including apply_patch) will require escalated permissions:
|
|
||||||
- Types of actions that require escalated privileges:
|
|
||||||
- Writing files
|
|
||||||
- Applying patches
|
|
||||||
- Examples of commands that require escalated privileges:
|
|
||||||
- apply_patch
|
|
||||||
- git commit
|
|
||||||
- npm install or pnpm install
|
|
||||||
- cargo build
|
|
||||||
- cargo test
|
|
||||||
- When invoking a command that will require escalated privileges:
|
|
||||||
- Provide the with_escalated_permissions parameter with the boolean value true
|
|
||||||
- Include a short, 1 sentence explanation for why we need to run with_escalated_permissions in the justification parameter"#.to_string()
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
OpenAiTool::Function(ResponsesApiTool {
|
OpenAiTool::Function(ResponsesApiTool {
|
||||||
name: "shell".to_string(),
|
name: "shell".to_string(),
|
||||||
@@ -348,7 +295,6 @@ The shell tool is used to execute shell commands.
|
|||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_view_image_tool() -> OpenAiTool {
|
fn create_view_image_tool() -> OpenAiTool {
|
||||||
// Support only local filesystem path.
|
// Support only local filesystem path.
|
||||||
let mut properties = BTreeMap::new();
|
let mut properties = BTreeMap::new();
|
||||||
@@ -589,8 +535,8 @@ pub(crate) fn get_openai_tools(
|
|||||||
ConfigShellToolType::DefaultShell => {
|
ConfigShellToolType::DefaultShell => {
|
||||||
tools.push(create_shell_tool());
|
tools.push(create_shell_tool());
|
||||||
}
|
}
|
||||||
ConfigShellToolType::ShellWithRequest { sandbox_policy } => {
|
ConfigShellToolType::ShellWithRequest => {
|
||||||
tools.push(create_shell_tool_for_sandbox(sandbox_policy));
|
tools.push(create_shell_tool_for_request());
|
||||||
}
|
}
|
||||||
ConfigShellToolType::LocalShell => {
|
ConfigShellToolType::LocalShell => {
|
||||||
tools.push(OpenAiTool::LocalShell {});
|
tools.push(OpenAiTool::LocalShell {});
|
||||||
@@ -686,7 +632,6 @@ mod tests {
|
|||||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy: AskForApproval::Never,
|
approval_policy: AskForApproval::Never,
|
||||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
||||||
include_plan_tool: true,
|
include_plan_tool: true,
|
||||||
include_apply_patch_tool: false,
|
include_apply_patch_tool: false,
|
||||||
include_web_search_request: true,
|
include_web_search_request: true,
|
||||||
@@ -708,7 +653,6 @@ mod tests {
|
|||||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy: AskForApproval::Never,
|
approval_policy: AskForApproval::Never,
|
||||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
||||||
include_plan_tool: true,
|
include_plan_tool: true,
|
||||||
include_apply_patch_tool: false,
|
include_apply_patch_tool: false,
|
||||||
include_web_search_request: true,
|
include_web_search_request: true,
|
||||||
@@ -730,7 +674,6 @@ mod tests {
|
|||||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy: AskForApproval::Never,
|
approval_policy: AskForApproval::Never,
|
||||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
||||||
include_plan_tool: false,
|
include_plan_tool: false,
|
||||||
include_apply_patch_tool: false,
|
include_apply_patch_tool: false,
|
||||||
include_web_search_request: true,
|
include_web_search_request: true,
|
||||||
@@ -836,7 +779,6 @@ mod tests {
|
|||||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy: AskForApproval::Never,
|
approval_policy: AskForApproval::Never,
|
||||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
||||||
include_plan_tool: false,
|
include_plan_tool: false,
|
||||||
include_apply_patch_tool: false,
|
include_apply_patch_tool: false,
|
||||||
include_web_search_request: false,
|
include_web_search_request: false,
|
||||||
@@ -914,7 +856,6 @@ mod tests {
|
|||||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy: AskForApproval::Never,
|
approval_policy: AskForApproval::Never,
|
||||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
||||||
include_plan_tool: false,
|
include_plan_tool: false,
|
||||||
include_apply_patch_tool: false,
|
include_apply_patch_tool: false,
|
||||||
include_web_search_request: true,
|
include_web_search_request: true,
|
||||||
@@ -977,7 +918,6 @@ mod tests {
|
|||||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy: AskForApproval::Never,
|
approval_policy: AskForApproval::Never,
|
||||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
||||||
include_plan_tool: false,
|
include_plan_tool: false,
|
||||||
include_apply_patch_tool: false,
|
include_apply_patch_tool: false,
|
||||||
include_web_search_request: true,
|
include_web_search_request: true,
|
||||||
@@ -1035,7 +975,6 @@ mod tests {
|
|||||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy: AskForApproval::Never,
|
approval_policy: AskForApproval::Never,
|
||||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
||||||
include_plan_tool: false,
|
include_plan_tool: false,
|
||||||
include_apply_patch_tool: false,
|
include_apply_patch_tool: false,
|
||||||
include_web_search_request: true,
|
include_web_search_request: true,
|
||||||
@@ -1096,7 +1035,6 @@ mod tests {
|
|||||||
let config = ToolsConfig::new(&ToolsConfigParams {
|
let config = ToolsConfig::new(&ToolsConfigParams {
|
||||||
model_family: &model_family,
|
model_family: &model_family,
|
||||||
approval_policy: AskForApproval::Never,
|
approval_policy: AskForApproval::Never,
|
||||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
||||||
include_plan_tool: false,
|
include_plan_tool: false,
|
||||||
include_apply_patch_tool: false,
|
include_apply_patch_tool: false,
|
||||||
include_web_search_request: true,
|
include_web_search_request: true,
|
||||||
@@ -1150,13 +1088,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_shell_tool_for_sandbox_workspace_write() {
|
fn test_shell_tool_for_sandbox_workspace_write() {
|
||||||
let sandbox_policy = SandboxPolicy::WorkspaceWrite {
|
let tool = super::create_shell_tool_for_request();
|
||||||
writable_roots: vec!["workspace".into()],
|
|
||||||
network_access: false,
|
|
||||||
exclude_tmpdir_env_var: false,
|
|
||||||
exclude_slash_tmp: false,
|
|
||||||
};
|
|
||||||
let tool = super::create_shell_tool_for_sandbox(&sandbox_policy);
|
|
||||||
let OpenAiTool::Function(ResponsesApiTool {
|
let OpenAiTool::Function(ResponsesApiTool {
|
||||||
description, name, ..
|
description, name, ..
|
||||||
}) = &tool
|
}) = &tool
|
||||||
@@ -1165,26 +1097,13 @@ mod tests {
|
|||||||
};
|
};
|
||||||
assert_eq!(name, "shell");
|
assert_eq!(name, "shell");
|
||||||
|
|
||||||
let expected = r#"
|
let expected = super::SHELL_TOOL_DESCRIPTION;
|
||||||
The shell tool is used to execute shell commands.
|
|
||||||
- When invoking the shell tool, your call will be running in a sandbox, and some shell commands will require escalated privileges:
|
|
||||||
- Types of actions that require escalated privileges:
|
|
||||||
- Writing files other than those in the writable roots (see the environment context for the allowed directories)
|
|
||||||
- Commands that require network access
|
|
||||||
- Examples of commands that require escalated privileges:
|
|
||||||
- git commit
|
|
||||||
- npm install or pnpm install
|
|
||||||
- cargo build
|
|
||||||
- cargo test
|
|
||||||
- When invoking a command that will require escalated privileges:
|
|
||||||
- Provide the with_escalated_permissions parameter with the boolean value true
|
|
||||||
- Include a short, 1 sentence explanation for why we need to run with_escalated_permissions in the justification parameter."#;
|
|
||||||
assert_eq!(description, expected);
|
assert_eq!(description, expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_shell_tool_for_sandbox_readonly() {
|
fn test_shell_tool_for_sandbox_readonly() {
|
||||||
let tool = super::create_shell_tool_for_sandbox(&SandboxPolicy::ReadOnly);
|
let tool = super::create_shell_tool_for_request();
|
||||||
let OpenAiTool::Function(ResponsesApiTool {
|
let OpenAiTool::Function(ResponsesApiTool {
|
||||||
description, name, ..
|
description, name, ..
|
||||||
}) = &tool
|
}) = &tool
|
||||||
@@ -1193,27 +1112,13 @@ The shell tool is used to execute shell commands.
|
|||||||
};
|
};
|
||||||
assert_eq!(name, "shell");
|
assert_eq!(name, "shell");
|
||||||
|
|
||||||
let expected = r#"
|
let expected = super::SHELL_TOOL_DESCRIPTION;
|
||||||
The shell tool is used to execute shell commands.
|
|
||||||
- When invoking the shell tool, your call will be running in a sandbox, and some shell commands (including apply_patch) will require escalated permissions:
|
|
||||||
- Types of actions that require escalated privileges:
|
|
||||||
- Writing files
|
|
||||||
- Applying patches
|
|
||||||
- Examples of commands that require escalated privileges:
|
|
||||||
- apply_patch
|
|
||||||
- git commit
|
|
||||||
- npm install or pnpm install
|
|
||||||
- cargo build
|
|
||||||
- cargo test
|
|
||||||
- When invoking a command that will require escalated privileges:
|
|
||||||
- Provide the with_escalated_permissions parameter with the boolean value true
|
|
||||||
- Include a short, 1 sentence explanation for why we need to run with_escalated_permissions in the justification parameter"#;
|
|
||||||
assert_eq!(description, expected);
|
assert_eq!(description, expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_shell_tool_for_sandbox_danger_full_access() {
|
fn test_shell_tool_for_sandbox_danger_full_access() {
|
||||||
let tool = super::create_shell_tool_for_sandbox(&SandboxPolicy::DangerFullAccess);
|
let tool = super::create_shell_tool_for_request();
|
||||||
let OpenAiTool::Function(ResponsesApiTool {
|
let OpenAiTool::Function(ResponsesApiTool {
|
||||||
description, name, ..
|
description, name, ..
|
||||||
}) = &tool
|
}) = &tool
|
||||||
@@ -1222,6 +1127,7 @@ The shell tool is used to execute shell commands.
|
|||||||
};
|
};
|
||||||
assert_eq!(name, "shell");
|
assert_eq!(name, "shell");
|
||||||
|
|
||||||
assert_eq!(description, "Runs a shell command and returns its output.");
|
let expected = super::SHELL_TOOL_DESCRIPTION;
|
||||||
|
assert_eq!(description, expected);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
|
|||||||
|
|
||||||
/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
|
/// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
|
||||||
/// string of instructions.
|
/// string of instructions.
|
||||||
pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
|
pub async fn get_user_instructions(config: &Config) -> Option<String> {
|
||||||
match read_project_docs(config).await {
|
match read_project_docs(config).await {
|
||||||
Ok(Some(project_doc)) => match &config.user_instructions {
|
Ok(Some(project_doc)) => match &config.user_instructions {
|
||||||
Some(original_instructions) => Some(format!(
|
Some(original_instructions) => Some(format!(
|
||||||
|
|||||||
@@ -4,9 +4,11 @@ use codex_core::ModelProviderInfo;
|
|||||||
use codex_core::NewConversation;
|
use codex_core::NewConversation;
|
||||||
use codex_core::WireApi;
|
use codex_core::WireApi;
|
||||||
use codex_core::built_in_model_providers;
|
use codex_core::built_in_model_providers;
|
||||||
|
use codex_core::project_doc::get_user_instructions;
|
||||||
use codex_core::protocol::EventMsg;
|
use codex_core::protocol::EventMsg;
|
||||||
use codex_core::protocol::InputItem;
|
use codex_core::protocol::InputItem;
|
||||||
use codex_core::protocol::Op;
|
use codex_core::protocol::Op;
|
||||||
|
use codex_core::shell::default_user_shell;
|
||||||
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||||
use core_test_support::load_default_config_for_test;
|
use core_test_support::load_default_config_for_test;
|
||||||
use core_test_support::load_sse_fixture_with_id;
|
use core_test_support::load_sse_fixture_with_id;
|
||||||
@@ -221,6 +223,8 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
|
|||||||
};
|
};
|
||||||
let codex_home = TempDir::new().unwrap();
|
let codex_home = TempDir::new().unwrap();
|
||||||
let mut config = load_default_config_for_test(&codex_home);
|
let mut config = load_default_config_for_test(&codex_home);
|
||||||
|
let cwd = TempDir::new().unwrap();
|
||||||
|
config.cwd = cwd.path().to_path_buf();
|
||||||
config.model_provider = model_provider;
|
config.model_provider = model_provider;
|
||||||
config.experimental_resume = Some(session_path.clone());
|
config.experimental_resume = Some(session_path.clone());
|
||||||
// Also configure user instructions to ensure they are NOT delivered on resume.
|
// Also configure user instructions to ensure they are NOT delivered on resume.
|
||||||
@@ -259,6 +263,29 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
|
|||||||
|
|
||||||
let request = &server.received_requests().await.unwrap()[0];
|
let request = &server.received_requests().await.unwrap()[0];
|
||||||
let request_body = request.body_json::<serde_json::Value>().unwrap();
|
let request_body = request.body_json::<serde_json::Value>().unwrap();
|
||||||
|
|
||||||
|
// Build expected environment context for this turn.
|
||||||
|
let shell = default_user_shell().await;
|
||||||
|
let shell_line = match shell.name() {
|
||||||
|
Some(name) => format!(" <shell>{name}</shell>\n"),
|
||||||
|
None => String::new(),
|
||||||
|
};
|
||||||
|
let expected_env_text_turn = format!(
|
||||||
|
r#"<environment_context>
|
||||||
|
<cwd>{}</cwd>
|
||||||
|
<approval_policy>on-request</approval_policy>
|
||||||
|
<sandbox_mode>read-only</sandbox_mode>
|
||||||
|
<network_access>restricted</network_access>
|
||||||
|
{}</environment_context>"#,
|
||||||
|
cwd.path().to_string_lossy(),
|
||||||
|
shell_line.as_str(),
|
||||||
|
);
|
||||||
|
let expected_env_msg_turn = json!({
|
||||||
|
"type": "message",
|
||||||
|
"role": "user",
|
||||||
|
"content": [ { "type": "input_text", "text": expected_env_text_turn } ]
|
||||||
|
});
|
||||||
|
|
||||||
let expected_input = json!([
|
let expected_input = json!([
|
||||||
{
|
{
|
||||||
"type": "message",
|
"type": "message",
|
||||||
@@ -270,12 +297,14 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
|
|||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"content": [{ "type": "output_text", "text": "resumed assistant message" }]
|
"content": [{ "type": "output_text", "text": "resumed assistant message" }]
|
||||||
},
|
},
|
||||||
|
expected_env_msg_turn,
|
||||||
{
|
{
|
||||||
"type": "message",
|
"type": "message",
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": [{ "type": "input_text", "text": "hello" }]
|
"content": [{ "type": "input_text", "text": "hello" }]
|
||||||
}
|
}
|
||||||
]);
|
]);
|
||||||
|
|
||||||
assert_eq!(request_body["input"], expected_input);
|
assert_eq!(request_body["input"], expected_input);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -838,7 +867,7 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() {
|
|||||||
conversation: codex,
|
conversation: codex,
|
||||||
..
|
..
|
||||||
} = conversation_manager
|
} = conversation_manager
|
||||||
.new_conversation(config)
|
.new_conversation(config.clone())
|
||||||
.await
|
.await
|
||||||
.expect("create new conversation");
|
.expect("create new conversation");
|
||||||
|
|
||||||
@@ -873,34 +902,49 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() {
|
|||||||
let requests = server.received_requests().await.unwrap();
|
let requests = server.received_requests().await.unwrap();
|
||||||
assert_eq!(requests.len(), 3, "expected 3 requests (one per turn)");
|
assert_eq!(requests.len(), 3, "expected 3 requests (one per turn)");
|
||||||
|
|
||||||
// Replace full-array compare with tail-only raw JSON compare using a single hard-coded value.
|
// Build expected environment context dynamically to avoid OS-dependent flakiness.
|
||||||
let r3_tail_expected = json!([
|
let user_instructions = get_user_instructions(&config).await;
|
||||||
{
|
let shell = default_user_shell().await;
|
||||||
"type": "message",
|
let shell_line = match shell.name() {
|
||||||
"role": "user",
|
Some(name) => format!(" <shell>{name}</shell>\n"),
|
||||||
"content": [{"type":"input_text","text":"U1"}]
|
None => String::new(),
|
||||||
},
|
};
|
||||||
{
|
let expected_env_text = format!(
|
||||||
"type": "message",
|
r#"<environment_context>
|
||||||
"role": "assistant",
|
<cwd>{}</cwd>
|
||||||
"content": [{"type":"output_text","text":"Hey there!\n"}]
|
<approval_policy>on-request</approval_policy>
|
||||||
},
|
<sandbox_mode>read-only</sandbox_mode>
|
||||||
{
|
<network_access>restricted</network_access>
|
||||||
"type": "message",
|
{}</environment_context>"#,
|
||||||
"role": "user",
|
std::env::current_dir().unwrap().to_string_lossy(),
|
||||||
"content": [{"type":"input_text","text":"U2"}]
|
shell_line.as_str(),
|
||||||
},
|
);
|
||||||
{
|
let expected_env_msg = json!({
|
||||||
"type": "message",
|
"type": "message",
|
||||||
"role": "assistant",
|
"role": "user",
|
||||||
"content": [{"type":"output_text","text":"Hey there!\n"}]
|
"content": [ { "type": "input_text", "text": expected_env_text } ]
|
||||||
},
|
});
|
||||||
{
|
// Wrap user instructions in the XML container to match the raw/ingest view
|
||||||
"type": "message",
|
let expected_ui_text = format!(
|
||||||
"role": "user",
|
"<user_instructions>\n\n{}\n\n</user_instructions>",
|
||||||
"content": [{"type":"input_text","text":"U3"}]
|
user_instructions.clone().unwrap()
|
||||||
}
|
);
|
||||||
]);
|
let expected_ui_msg = json!({
|
||||||
|
"type": "message",
|
||||||
|
"role": "user",
|
||||||
|
"content": [ { "type": "input_text", "text": expected_ui_text } ]
|
||||||
|
});
|
||||||
|
|
||||||
|
let expected_full = json!([
|
||||||
|
expected_ui_msg,
|
||||||
|
expected_env_msg.clone(),
|
||||||
|
{"type":"message","role":"user","content":[{"type":"input_text","text":"U1"}]},
|
||||||
|
{"type":"message","role":"assistant","content":[{"type":"output_text","text":"Hey there!\n"}]},
|
||||||
|
expected_env_msg.clone(),
|
||||||
|
{"type":"message","role":"user","content":[{"type":"input_text","text":"U2"}]},
|
||||||
|
{"type":"message","role":"assistant","content":[{"type":"output_text","text":"Hey there!\n"}]},
|
||||||
|
expected_env_msg,
|
||||||
|
{"type":"message","role":"user","content":[{"type":"input_text","text":"U3"}]}]);
|
||||||
|
|
||||||
let r3_input_array = requests[2]
|
let r3_input_array = requests[2]
|
||||||
.body_json::<serde_json::Value>()
|
.body_json::<serde_json::Value>()
|
||||||
@@ -909,12 +953,6 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() {
|
|||||||
.and_then(|v| v.as_array())
|
.and_then(|v| v.as_array())
|
||||||
.cloned()
|
.cloned()
|
||||||
.expect("r3 missing input array");
|
.expect("r3 missing input array");
|
||||||
// skipping earlier context and developer messages
|
|
||||||
let tail_len = r3_tail_expected.as_array().unwrap().len();
|
assert_eq!(json!(r3_input_array), expected_full);
|
||||||
let actual_tail = &r3_input_array[r3_input_array.len() - tail_len..];
|
|
||||||
assert_eq!(
|
|
||||||
serde_json::Value::Array(actual_tail.to_vec()),
|
|
||||||
r3_tail_expected,
|
|
||||||
"request 3 tail mismatch",
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -270,8 +270,13 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
|
|||||||
assert_eq!(requests.len(), 2, "expected two POST requests");
|
assert_eq!(requests.len(), 2, "expected two POST requests");
|
||||||
|
|
||||||
let shell = default_user_shell().await;
|
let shell = default_user_shell().await;
|
||||||
|
let shell_line = match shell.name() {
|
||||||
|
Some(name) => format!(" <shell>{name}</shell>\n"),
|
||||||
|
None => String::new(),
|
||||||
|
};
|
||||||
|
|
||||||
let expected_env_text = format!(
|
// Per-turn environment context includes the shell tag.
|
||||||
|
let expected_env_text_turn = format!(
|
||||||
r#"<environment_context>
|
r#"<environment_context>
|
||||||
<cwd>{}</cwd>
|
<cwd>{}</cwd>
|
||||||
<approval_policy>on-request</approval_policy>
|
<approval_policy>on-request</approval_policy>
|
||||||
@@ -279,18 +284,15 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
|
|||||||
<network_access>restricted</network_access>
|
<network_access>restricted</network_access>
|
||||||
{}</environment_context>"#,
|
{}</environment_context>"#,
|
||||||
cwd.path().to_string_lossy(),
|
cwd.path().to_string_lossy(),
|
||||||
match shell.name() {
|
shell_line.as_str(),
|
||||||
Some(name) => format!(" <shell>{name}</shell>\n"),
|
|
||||||
None => String::new(),
|
|
||||||
}
|
|
||||||
);
|
);
|
||||||
let expected_ui_text =
|
let expected_ui_text =
|
||||||
"<user_instructions>\n\nbe consistent and helpful\n\n</user_instructions>";
|
"<user_instructions>\n\nbe consistent and helpful\n\n</user_instructions>";
|
||||||
|
|
||||||
let expected_env_msg = serde_json::json!({
|
let expected_env_msg_turn = serde_json::json!({
|
||||||
"type": "message",
|
"type": "message",
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": [ { "type": "input_text", "text": expected_env_text } ]
|
"content": [ { "type": "input_text", "text": expected_env_text_turn } ]
|
||||||
});
|
});
|
||||||
let expected_ui_msg = serde_json::json!({
|
let expected_ui_msg = serde_json::json!({
|
||||||
"type": "message",
|
"type": "message",
|
||||||
@@ -304,11 +306,29 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
|
|||||||
"content": [ { "type": "input_text", "text": "hello 1" } ]
|
"content": [ { "type": "input_text", "text": "hello 1" } ]
|
||||||
});
|
});
|
||||||
let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
|
let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
|
||||||
|
let body1_input = body1["input"].as_array().unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
body1["input"],
|
body1["input"],
|
||||||
serde_json::json!([expected_ui_msg, expected_env_msg, expected_user_message_1])
|
serde_json::json!([
|
||||||
|
expected_ui_msg,
|
||||||
|
expected_env_msg_turn,
|
||||||
|
expected_user_message_1
|
||||||
|
])
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let env_texts: Vec<&str> = body1_input
|
||||||
|
.iter()
|
||||||
|
.filter_map(|msg| {
|
||||||
|
msg.get("content")
|
||||||
|
.and_then(|content| content.as_array())
|
||||||
|
.and_then(|content| content.first())
|
||||||
|
.and_then(|item| item.get("text"))
|
||||||
|
.and_then(|text| text.as_str())
|
||||||
|
})
|
||||||
|
.filter(|text| text.starts_with("<environment_context>"))
|
||||||
|
.collect();
|
||||||
|
assert_eq!(env_texts, vec![expected_env_text_turn.as_str()]);
|
||||||
|
|
||||||
let expected_user_message_2 = serde_json::json!({
|
let expected_user_message_2 = serde_json::json!({
|
||||||
"type": "message",
|
"type": "message",
|
||||||
"role": "user",
|
"role": "user",
|
||||||
@@ -318,7 +338,7 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
|
|||||||
let expected_body2 = serde_json::json!(
|
let expected_body2 = serde_json::json!(
|
||||||
[
|
[
|
||||||
body1["input"].as_array().unwrap().as_slice(),
|
body1["input"].as_array().unwrap().as_slice(),
|
||||||
[expected_user_message_2].as_slice(),
|
[expected_env_msg_turn, expected_user_message_2].as_slice(),
|
||||||
]
|
]
|
||||||
.concat()
|
.concat()
|
||||||
);
|
);
|
||||||
@@ -423,19 +443,28 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() {
|
|||||||
"role": "user",
|
"role": "user",
|
||||||
"content": [ { "type": "input_text", "text": "hello 2" } ]
|
"content": [ { "type": "input_text", "text": "hello 2" } ]
|
||||||
});
|
});
|
||||||
|
let shell = default_user_shell().await;
|
||||||
|
let shell_line = match shell.name() {
|
||||||
|
Some(name) => format!(" <shell>{name}</shell>\n"),
|
||||||
|
None => String::new(),
|
||||||
|
};
|
||||||
|
|
||||||
// After overriding the turn context, the environment context should be emitted again
|
// After overriding the turn context, the environment context should be emitted again
|
||||||
// reflecting the new approval policy and sandbox settings. Omit cwd because it did
|
// reflecting the new approval policy and sandbox settings. Omit cwd because it did
|
||||||
// not change.
|
// not change.
|
||||||
let expected_env_text_2 = format!(
|
let expected_env_text_2 = format!(
|
||||||
r#"<environment_context>
|
r#"<environment_context>
|
||||||
|
<cwd>{}</cwd>
|
||||||
<approval_policy>never</approval_policy>
|
<approval_policy>never</approval_policy>
|
||||||
<sandbox_mode>workspace-write</sandbox_mode>
|
<sandbox_mode>workspace-write</sandbox_mode>
|
||||||
<network_access>enabled</network_access>
|
<network_access>enabled</network_access>
|
||||||
<writable_roots>
|
<writable_roots>
|
||||||
<root>{}</root>
|
<root>{}</root>
|
||||||
</writable_roots>
|
</writable_roots>
|
||||||
</environment_context>"#,
|
{}</environment_context>"#,
|
||||||
writable.path().to_string_lossy()
|
cwd.path().to_string_lossy(),
|
||||||
|
writable.path().to_string_lossy(),
|
||||||
|
shell_line.as_str()
|
||||||
);
|
);
|
||||||
let expected_env_msg_2 = serde_json::json!({
|
let expected_env_msg_2 = serde_json::json!({
|
||||||
"type": "message",
|
"type": "message",
|
||||||
@@ -546,12 +575,165 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
|
|||||||
"role": "user",
|
"role": "user",
|
||||||
"content": [ { "type": "input_text", "text": "hello 2" } ]
|
"content": [ { "type": "input_text", "text": "hello 2" } ]
|
||||||
});
|
});
|
||||||
|
let shell = default_user_shell().await;
|
||||||
|
let shell_line = match shell.name() {
|
||||||
|
Some(name) => format!(" <shell>{name}</shell>\n"),
|
||||||
|
None => String::new(),
|
||||||
|
};
|
||||||
|
let expected_env_text_2 = format!(
|
||||||
|
r#"<environment_context>
|
||||||
|
<cwd>{}</cwd>
|
||||||
|
<approval_policy>never</approval_policy>
|
||||||
|
<sandbox_mode>workspace-write</sandbox_mode>
|
||||||
|
<network_access>enabled</network_access>
|
||||||
|
<writable_roots>
|
||||||
|
<root>{}</root>
|
||||||
|
</writable_roots>
|
||||||
|
{}</environment_context>"#,
|
||||||
|
new_cwd.path().to_string_lossy(),
|
||||||
|
writable.path().to_string_lossy(),
|
||||||
|
shell_line.as_str()
|
||||||
|
);
|
||||||
|
let expected_env_msg_2 = serde_json::json!({
|
||||||
|
"type": "message",
|
||||||
|
"role": "user",
|
||||||
|
"content": [ { "type": "input_text", "text": expected_env_text_2 } ]
|
||||||
|
});
|
||||||
let expected_body2 = serde_json::json!(
|
let expected_body2 = serde_json::json!(
|
||||||
[
|
[
|
||||||
body1["input"].as_array().unwrap().as_slice(),
|
body1["input"].as_array().unwrap().as_slice(),
|
||||||
[expected_user_message_2].as_slice(),
|
[expected_env_msg_2, expected_user_message_2].as_slice(),
|
||||||
]
|
]
|
||||||
.concat()
|
.concat()
|
||||||
);
|
);
|
||||||
assert_eq!(body2["input"], expected_body2);
|
assert_eq!(body2["input"], expected_body2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn tools_stable_across_all_approval_policy_transitions() {
|
||||||
|
use pretty_assertions::assert_eq;
|
||||||
|
|
||||||
|
let server = MockServer::start().await;
|
||||||
|
|
||||||
|
let sse = sse_completed("resp");
|
||||||
|
let template = ResponseTemplate::new(200)
|
||||||
|
.insert_header("content-type", "text/event-stream")
|
||||||
|
.set_body_raw(sse, "text/event-stream");
|
||||||
|
|
||||||
|
// Build all transitions FROM each to each other (exclude self transitions)
|
||||||
|
let policies = vec![
|
||||||
|
AskForApproval::UnlessTrusted,
|
||||||
|
AskForApproval::OnFailure,
|
||||||
|
AskForApproval::OnRequest,
|
||||||
|
AskForApproval::Never,
|
||||||
|
];
|
||||||
|
let mut transitions: Vec<(AskForApproval, AskForApproval)> = Vec::new();
|
||||||
|
for &from in &policies {
|
||||||
|
for &to in &policies {
|
||||||
|
if from != to {
|
||||||
|
transitions.push((from, to));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Expect 2 POSTs per transition
|
||||||
|
Mock::given(method("POST"))
|
||||||
|
.and(path("/v1/responses"))
|
||||||
|
.respond_with(template)
|
||||||
|
.expect((transitions.len() * 2) as u64)
|
||||||
|
.mount(&server)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let model_provider = ModelProviderInfo {
|
||||||
|
base_url: Some(format!("{}/v1", server.uri())),
|
||||||
|
..built_in_model_providers()["openai"].clone()
|
||||||
|
};
|
||||||
|
|
||||||
|
let cwd = TempDir::new().unwrap();
|
||||||
|
let codex_home = TempDir::new().unwrap();
|
||||||
|
let mut config = load_default_config_for_test(&codex_home);
|
||||||
|
config.cwd = cwd.path().to_path_buf();
|
||||||
|
config.model_provider = model_provider;
|
||||||
|
config.user_instructions = Some("be consistent and helpful".to_string());
|
||||||
|
// Keep tools stable and minimal
|
||||||
|
config.include_plan_tool = false;
|
||||||
|
config.include_apply_patch_tool = false;
|
||||||
|
config.tools_web_search_request = false;
|
||||||
|
config.use_experimental_unified_exec_tool = true; // policy-independent tool
|
||||||
|
|
||||||
|
let conversation_manager =
|
||||||
|
ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
|
||||||
|
let codex = conversation_manager
|
||||||
|
.new_conversation(config)
|
||||||
|
.await
|
||||||
|
.expect("create new conversation")
|
||||||
|
.conversation;
|
||||||
|
|
||||||
|
for (i, (from, to)) in transitions.iter().enumerate() {
|
||||||
|
// Ensure a known starting policy for this pair
|
||||||
|
codex
|
||||||
|
.submit(Op::OverrideTurnContext {
|
||||||
|
cwd: None,
|
||||||
|
approval_policy: Some(*from),
|
||||||
|
sandbox_policy: None,
|
||||||
|
model: None,
|
||||||
|
effort: None,
|
||||||
|
summary: None,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
codex
|
||||||
|
.submit(Op::UserInput {
|
||||||
|
items: vec![InputItem::Text {
|
||||||
|
text: format!("turn {i}-a"),
|
||||||
|
}],
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||||
|
|
||||||
|
// Override to the target policy and send next turn
|
||||||
|
codex
|
||||||
|
.submit(Op::OverrideTurnContext {
|
||||||
|
cwd: None,
|
||||||
|
approval_policy: Some(*to),
|
||||||
|
sandbox_policy: None,
|
||||||
|
model: None,
|
||||||
|
effort: None,
|
||||||
|
summary: None,
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
codex
|
||||||
|
.submit(Op::UserInput {
|
||||||
|
items: vec![InputItem::Text {
|
||||||
|
text: format!("turn {i}-b"),
|
||||||
|
}],
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify tool arrays are identical across each pair of requests
|
||||||
|
let requests = server.received_requests().await.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
requests.len(),
|
||||||
|
transitions.len() * 2,
|
||||||
|
"expected 2 requests per transition"
|
||||||
|
);
|
||||||
|
|
||||||
|
for i in 0..transitions.len() {
|
||||||
|
let body_a = requests[2 * i].body_json::<serde_json::Value>().unwrap();
|
||||||
|
let body_b = requests[2 * i + 1]
|
||||||
|
.body_json::<serde_json::Value>()
|
||||||
|
.unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
body_a["tools"], body_b["tools"],
|
||||||
|
"tools changed between requests for transition #{i}: {:?}",
|
||||||
|
transitions[i]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user