mirror of
https://github.com/openai/codex.git
synced 2026-04-17 19:24:47 +00:00
Compare commits
12 Commits
dev/shaqay
...
feat/add-s
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ad2eb4ca5a | ||
|
|
e3772c7eca | ||
|
|
d347bc8761 | ||
|
|
08bf18a078 | ||
|
|
a9a7cc1e16 | ||
|
|
c65b7e1df6 | ||
|
|
39e83fee71 | ||
|
|
6ac7664d4f | ||
|
|
b082630ec1 | ||
|
|
8ba4ae3bf3 | ||
|
|
4c5d2f4f07 | ||
|
|
bccfefd43c |
@@ -33,6 +33,31 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User has approved this command and wants to execute a different argv vector instead of the original one-time request.",
|
||||
"properties": {
|
||||
"approved_override_command": {
|
||||
"properties": {
|
||||
"command": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"approved_override_command"
|
||||
],
|
||||
"title": "ApprovedOverrideCommandReviewDecision",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User has approved this command and wants to apply the proposed execpolicy amendment so future matching commands are permitted.",
|
||||
|
||||
@@ -228,6 +228,31 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User approved the command, but wants to execute a different argv vector.",
|
||||
"properties": {
|
||||
"acceptWithOverrideCommand": {
|
||||
"properties": {
|
||||
"command": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"acceptWithOverrideCommand"
|
||||
],
|
||||
"title": "AcceptWithOverrideCommandCommandExecutionApprovalDecision",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"description": "User approved the command and future prompts in the same session-scoped approval cache should run without prompting.",
|
||||
"enum": [
|
||||
|
||||
@@ -10,6 +10,31 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User approved the command, but wants to execute a different argv vector.",
|
||||
"properties": {
|
||||
"acceptWithOverrideCommand": {
|
||||
"properties": {
|
||||
"command": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"acceptWithOverrideCommand"
|
||||
],
|
||||
"title": "AcceptWithOverrideCommandCommandExecutionApprovalDecision",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"description": "User approved the command and future prompts in the same session-scoped approval cache should run without prompting.",
|
||||
"enum": [
|
||||
|
||||
@@ -33,6 +33,31 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User has approved this command and wants to execute a different argv vector instead of the original one-time request.",
|
||||
"properties": {
|
||||
"approved_override_command": {
|
||||
"properties": {
|
||||
"command": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"approved_override_command"
|
||||
],
|
||||
"title": "ApprovedOverrideCommandReviewDecision",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User has approved this command and wants to apply the proposed execpolicy amendment so future matching commands are permitted.",
|
||||
|
||||
@@ -294,6 +294,31 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User approved the command, but wants to execute a different argv vector.",
|
||||
"properties": {
|
||||
"acceptWithOverrideCommand": {
|
||||
"properties": {
|
||||
"command": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"acceptWithOverrideCommand"
|
||||
],
|
||||
"title": "AcceptWithOverrideCommandCommandExecutionApprovalDecision",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"description": "User approved the command and future prompts in the same session-scoped approval cache should run without prompting.",
|
||||
"enum": [
|
||||
|
||||
@@ -1639,6 +1639,31 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User approved the command, but wants to execute a different argv vector.",
|
||||
"properties": {
|
||||
"acceptWithOverrideCommand": {
|
||||
"properties": {
|
||||
"command": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"acceptWithOverrideCommand"
|
||||
],
|
||||
"title": "AcceptWithOverrideCommandCommandExecutionApprovalDecision",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"description": "User approved the command and future prompts in the same session-scoped approval cache should run without prompting.",
|
||||
"enum": [
|
||||
@@ -3392,6 +3417,31 @@
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User has approved this command and wants to execute a different argv vector instead of the original one-time request.",
|
||||
"properties": {
|
||||
"approved_override_command": {
|
||||
"properties": {
|
||||
"command": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"command"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"approved_override_command"
|
||||
],
|
||||
"title": "ApprovedOverrideCommandReviewDecision",
|
||||
"type": "object"
|
||||
},
|
||||
{
|
||||
"additionalProperties": false,
|
||||
"description": "User has approved this command and wants to apply the proposed execpolicy amendment so future matching commands are permitted.",
|
||||
|
||||
@@ -7,4 +7,4 @@ import type { NetworkPolicyAmendment } from "./NetworkPolicyAmendment";
|
||||
/**
|
||||
* User's decision in response to an ExecApprovalRequest.
|
||||
*/
|
||||
export type ReviewDecision = "approved" | { "approved_execpolicy_amendment": { proposed_execpolicy_amendment: ExecPolicyAmendment, } } | "approved_for_session" | { "network_policy_amendment": { network_policy_amendment: NetworkPolicyAmendment, } } | "denied" | "abort";
|
||||
export type ReviewDecision = "approved" | { "approved_override_command": { command: Array<string>, } } | { "approved_execpolicy_amendment": { proposed_execpolicy_amendment: ExecPolicyAmendment, } } | "approved_for_session" | { "network_policy_amendment": { network_policy_amendment: NetworkPolicyAmendment, } } | "denied" | "abort";
|
||||
|
||||
@@ -4,4 +4,4 @@
|
||||
import type { ExecPolicyAmendment } from "./ExecPolicyAmendment";
|
||||
import type { NetworkPolicyAmendment } from "./NetworkPolicyAmendment";
|
||||
|
||||
export type CommandExecutionApprovalDecision = "accept" | "acceptForSession" | { "acceptWithExecpolicyAmendment": { execpolicy_amendment: ExecPolicyAmendment, } } | { "applyNetworkPolicyAmendment": { network_policy_amendment: NetworkPolicyAmendment, } } | "decline" | "cancel";
|
||||
export type CommandExecutionApprovalDecision = "accept" | { "acceptWithOverrideCommand": { command: Array<string>, } } | "acceptForSession" | { "acceptWithExecpolicyAmendment": { execpolicy_amendment: ExecPolicyAmendment, } } | { "applyNetworkPolicyAmendment": { network_policy_amendment: NetworkPolicyAmendment, } } | "decline" | "cancel";
|
||||
|
||||
@@ -959,6 +959,8 @@ pub struct ConfigEdit {
|
||||
pub enum CommandExecutionApprovalDecision {
|
||||
/// User approved the command.
|
||||
Accept,
|
||||
/// User approved the command, but wants to execute a different argv vector.
|
||||
AcceptWithOverrideCommand { command: Vec<String> },
|
||||
/// User approved the command and future prompts in the same session-scoped
|
||||
/// approval cache should run without prompting.
|
||||
AcceptForSession,
|
||||
@@ -981,6 +983,9 @@ impl From<CoreReviewDecision> for CommandExecutionApprovalDecision {
|
||||
fn from(value: CoreReviewDecision) -> Self {
|
||||
match value {
|
||||
CoreReviewDecision::Approved => Self::Accept,
|
||||
CoreReviewDecision::ApprovedOverrideCommand { command } => {
|
||||
Self::AcceptWithOverrideCommand { command }
|
||||
}
|
||||
CoreReviewDecision::ApprovedExecpolicyAmendment {
|
||||
proposed_execpolicy_amendment,
|
||||
} => Self::AcceptWithExecpolicyAmendment {
|
||||
@@ -6033,6 +6038,27 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn command_execution_request_approval_response_accepts_override_command() {
|
||||
let response = serde_json::from_value::<CommandExecutionRequestApprovalResponse>(json!({
|
||||
"decision": {
|
||||
"acceptWithOverrideCommand": {
|
||||
"command": ["echo", "hi"]
|
||||
}
|
||||
}
|
||||
}))
|
||||
.expect("override command response should deserialize");
|
||||
|
||||
assert_eq!(
|
||||
response,
|
||||
CommandExecutionRequestApprovalResponse {
|
||||
decision: CommandExecutionApprovalDecision::AcceptWithOverrideCommand {
|
||||
command: vec!["echo".to_string(), "hi".to_string()],
|
||||
},
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn permissions_request_approval_response_accepts_partial_macos_grants() {
|
||||
let cases = vec![
|
||||
|
||||
@@ -897,7 +897,7 @@ When an upstream HTTP status is available (for example, from the Responses API o
|
||||
Certain actions (shell commands or modifying files) may require explicit user approval depending on the user's config. When `turn/start` is used, the app-server drives an approval flow by sending a server-initiated JSON-RPC request to the client. The client must respond to tell Codex whether to proceed. UIs should present these requests inline with the active turn so users can review the proposed command or diff before choosing.
|
||||
|
||||
- Requests include `threadId` and `turnId`—use them to scope UI state to the active conversation.
|
||||
- Respond with a single `{ "decision": ... }` payload. Command approvals support `accept`, `acceptForSession`, `acceptWithExecpolicyAmendment`, `applyNetworkPolicyAmendment`, `decline`, or `cancel`. The server resumes or declines the work and ends the item with `item/completed`.
|
||||
- Respond with a single `{ "decision": ... }` payload. Command approvals support `accept`, `acceptWithOverrideCommand`, `acceptForSession`, `acceptWithExecpolicyAmendment`, `applyNetworkPolicyAmendment`, `decline`, or `cancel`. The server resumes or declines the work and ends the item with `item/completed`.
|
||||
|
||||
### Command execution approvals
|
||||
|
||||
@@ -905,7 +905,7 @@ Order of messages:
|
||||
|
||||
1. `item/started` — shows the pending `commandExecution` item with `command`, `cwd`, and other fields so you can render the proposed action.
|
||||
2. `item/commandExecution/requestApproval` (request) — carries the same `itemId`, `threadId`, `turnId`, optionally `approvalId` (for subcommand callbacks), and `reason`. For normal command approvals, it also includes `command`, `cwd`, and `commandActions` for friendly display. When `initialize.params.capabilities.experimentalApi = true`, it may also include experimental `additionalPermissions` describing requested per-command sandbox access; any filesystem paths in that payload are absolute on the wire, and network access is represented as `additionalPermissions.network.enabled`. For network-only approvals, those command fields may be omitted and `networkApprovalContext` is provided instead. Optional persistence hints may also be included via `proposedExecpolicyAmendment` and `proposedNetworkPolicyAmendments`. Clients can prefer `availableDecisions` when present to render the exact set of choices the server wants to expose, while still falling back to the older heuristics if it is omitted.
|
||||
3. Client response — for example `{ "decision": "accept" }`, `{ "decision": "acceptForSession" }`, `{ "decision": { "acceptWithExecpolicyAmendment": { "execpolicy_amendment": [...] } } }`, `{ "decision": { "applyNetworkPolicyAmendment": { "network_policy_amendment": { "host": "example.com", "action": "allow" } } } }`, `{ "decision": "decline" }`, or `{ "decision": "cancel" }`.
|
||||
3. Client response — for example `{ "decision": "accept" }`, `{ "decision": { "acceptWithOverrideCommand": { "command": ["echo", "hi"] } } }`, `{ "decision": "acceptForSession" }`, `{ "decision": { "acceptWithExecpolicyAmendment": { "execpolicy_amendment": [...] } } }`, `{ "decision": { "applyNetworkPolicyAmendment": { "network_policy_amendment": { "host": "example.com", "action": "allow" } } } }`, `{ "decision": "decline" }`, or `{ "decision": "cancel" }`.
|
||||
4. `serverRequest/resolved` — `{ threadId, requestId }` confirms the pending request has been resolved or cleared, including lifecycle cleanup on turn start/complete/interrupt.
|
||||
5. `item/completed` — final `commandExecution` item with `status: "completed" | "failed" | "declined"` and execution output. Render this as the authoritative result.
|
||||
|
||||
|
||||
@@ -2408,6 +2408,69 @@ fn map_file_change_approval_decision(
|
||||
}
|
||||
}
|
||||
|
||||
fn map_command_execution_approval_decision(
|
||||
decision: CommandExecutionApprovalDecision,
|
||||
allow_override_command: bool,
|
||||
) -> (ReviewDecision, Option<CommandExecutionStatus>) {
|
||||
match decision {
|
||||
CommandExecutionApprovalDecision::Accept => (ReviewDecision::Approved, None),
|
||||
CommandExecutionApprovalDecision::AcceptWithOverrideCommand { command } => {
|
||||
if !allow_override_command {
|
||||
error!(
|
||||
"failed to deserialize CommandExecutionRequestApprovalResponse: override command is not allowed for network-only approvals"
|
||||
);
|
||||
(
|
||||
ReviewDecision::Denied,
|
||||
Some(CommandExecutionStatus::Declined),
|
||||
)
|
||||
} else if command.is_empty() {
|
||||
error!(
|
||||
"failed to deserialize CommandExecutionRequestApprovalResponse: override command cannot be empty"
|
||||
);
|
||||
(
|
||||
ReviewDecision::Denied,
|
||||
Some(CommandExecutionStatus::Declined),
|
||||
)
|
||||
} else {
|
||||
(ReviewDecision::ApprovedOverrideCommand { command }, None)
|
||||
}
|
||||
}
|
||||
CommandExecutionApprovalDecision::AcceptForSession => {
|
||||
(ReviewDecision::ApprovedForSession, None)
|
||||
}
|
||||
CommandExecutionApprovalDecision::AcceptWithExecpolicyAmendment {
|
||||
execpolicy_amendment,
|
||||
} => (
|
||||
ReviewDecision::ApprovedExecpolicyAmendment {
|
||||
proposed_execpolicy_amendment: execpolicy_amendment.into_core(),
|
||||
},
|
||||
None,
|
||||
),
|
||||
CommandExecutionApprovalDecision::ApplyNetworkPolicyAmendment {
|
||||
network_policy_amendment,
|
||||
} => {
|
||||
let completion_status = match network_policy_amendment.action {
|
||||
V2NetworkPolicyRuleAction::Allow => None,
|
||||
V2NetworkPolicyRuleAction::Deny => Some(CommandExecutionStatus::Declined),
|
||||
};
|
||||
(
|
||||
ReviewDecision::NetworkPolicyAmendment {
|
||||
network_policy_amendment: network_policy_amendment.into_core(),
|
||||
},
|
||||
completion_status,
|
||||
)
|
||||
}
|
||||
CommandExecutionApprovalDecision::Decline => (
|
||||
ReviewDecision::Denied,
|
||||
Some(CommandExecutionStatus::Declined),
|
||||
),
|
||||
CommandExecutionApprovalDecision::Cancel => (
|
||||
ReviewDecision::Abort,
|
||||
Some(CommandExecutionStatus::Declined),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn on_file_change_request_approval_response(
|
||||
event_turn_id: String,
|
||||
@@ -2502,44 +2565,12 @@ async fn on_command_execution_request_approval_response(
|
||||
}
|
||||
});
|
||||
|
||||
let decision = response.decision;
|
||||
|
||||
let (decision, completion_status) = match decision {
|
||||
CommandExecutionApprovalDecision::Accept => (ReviewDecision::Approved, None),
|
||||
CommandExecutionApprovalDecision::AcceptForSession => {
|
||||
(ReviewDecision::ApprovedForSession, None)
|
||||
}
|
||||
CommandExecutionApprovalDecision::AcceptWithExecpolicyAmendment {
|
||||
execpolicy_amendment,
|
||||
} => (
|
||||
ReviewDecision::ApprovedExecpolicyAmendment {
|
||||
proposed_execpolicy_amendment: execpolicy_amendment.into_core(),
|
||||
},
|
||||
None,
|
||||
),
|
||||
CommandExecutionApprovalDecision::ApplyNetworkPolicyAmendment {
|
||||
network_policy_amendment,
|
||||
} => {
|
||||
let completion_status = match network_policy_amendment.action {
|
||||
V2NetworkPolicyRuleAction::Allow => None,
|
||||
V2NetworkPolicyRuleAction::Deny => Some(CommandExecutionStatus::Declined),
|
||||
};
|
||||
(
|
||||
ReviewDecision::NetworkPolicyAmendment {
|
||||
network_policy_amendment: network_policy_amendment.into_core(),
|
||||
},
|
||||
completion_status,
|
||||
)
|
||||
}
|
||||
CommandExecutionApprovalDecision::Decline => (
|
||||
ReviewDecision::Denied,
|
||||
Some(CommandExecutionStatus::Declined),
|
||||
),
|
||||
CommandExecutionApprovalDecision::Cancel => (
|
||||
ReviewDecision::Abort,
|
||||
Some(CommandExecutionStatus::Declined),
|
||||
),
|
||||
};
|
||||
let (decision, completion_status) = map_command_execution_approval_decision(
|
||||
response.decision,
|
||||
// Network-only prompts omit the completion item, and they
|
||||
// must not allow command override responses.
|
||||
completion_item.is_some(),
|
||||
);
|
||||
(decision, completion_status)
|
||||
}
|
||||
Ok(Err(err)) if is_turn_transition_server_request_error(&err) => return,
|
||||
@@ -2884,6 +2915,19 @@ mod tests {
|
||||
assert_eq!(completion_status, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn command_execution_override_is_rejected_for_network_only_prompts() {
|
||||
let (decision, completion_status) = map_command_execution_approval_decision(
|
||||
CommandExecutionApprovalDecision::AcceptWithOverrideCommand {
|
||||
command: vec!["echo".to_string(), "hi".to_string()],
|
||||
},
|
||||
false,
|
||||
);
|
||||
|
||||
assert_eq!(decision, ReviewDecision::Denied);
|
||||
assert_eq!(completion_status, Some(CommandExecutionStatus::Declined));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_server_elicitation_turn_transition_error_maps_to_cancel() {
|
||||
let error = JSONRPCErrorError {
|
||||
|
||||
@@ -111,6 +111,7 @@ fn is_shell_tool_name(name: &str) -> bool {
|
||||
struct ExecOutputJson {
|
||||
output: String,
|
||||
metadata: ExecOutputMetadataJson,
|
||||
executed_command: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
@@ -131,6 +132,9 @@ fn build_structured_output(parsed: &ExecOutputJson) -> String {
|
||||
"Wall time: {} seconds",
|
||||
parsed.metadata.duration_seconds
|
||||
));
|
||||
if let Some(executed_command) = parsed.executed_command.as_ref() {
|
||||
sections.push(format!("Executed command: {}", executed_command.join(" ")));
|
||||
}
|
||||
|
||||
let mut output = parsed.output.clone();
|
||||
if let Some((stripped, total_lines)) = strip_total_output_header(&parsed.output) {
|
||||
|
||||
@@ -196,6 +196,44 @@ fn reserializes_shell_outputs_for_function_and_custom_tool_calls() {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reserializes_shell_outputs_preserve_executed_command_when_present() {
|
||||
let raw_output = r#"{"output":"override-ok","metadata":{"exit_code":0,"duration_seconds":0.5},"executed_command":["/bin/echo","override-ok"]}"#;
|
||||
let expected_output = "Exit code: 0\nWall time: 0.5 seconds\nExecuted command: /bin/echo override-ok\nOutput:\noverride-ok";
|
||||
let mut items = vec![
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "shell".to_string(),
|
||||
namespace: None,
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "call-1".to_string(),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-1".to_string(),
|
||||
output: FunctionCallOutputPayload::from_text(raw_output.to_string()),
|
||||
},
|
||||
];
|
||||
|
||||
reserialize_shell_outputs(&mut items);
|
||||
|
||||
assert_eq!(
|
||||
items,
|
||||
vec![
|
||||
ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "shell".to_string(),
|
||||
namespace: None,
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "call-1".to_string(),
|
||||
},
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-1".to_string(),
|
||||
output: FunctionCallOutputPayload::from_text(expected_output.to_string()),
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tool_search_output_namespace_serializes_with_deferred_child_tools() {
|
||||
let namespace = tools::ToolSearchOutputTool::Namespace(tools::ResponsesApiNamespace {
|
||||
|
||||
@@ -2877,6 +2877,7 @@ impl Session {
|
||||
});
|
||||
let available_decisions = available_decisions.unwrap_or_else(|| {
|
||||
ExecApprovalRequestEvent::default_available_decisions(
|
||||
&command,
|
||||
network_approval_context.as_ref(),
|
||||
proposed_execpolicy_amendment.as_ref(),
|
||||
proposed_network_policy_amendments.as_deref(),
|
||||
|
||||
@@ -700,9 +700,9 @@ async fn maybe_auto_review_mcp_request_user_input(
|
||||
ReviewDecision::Approved
|
||||
| ReviewDecision::ApprovedExecpolicyAmendment { .. }
|
||||
| ReviewDecision::NetworkPolicyAmendment { .. } => MCP_TOOL_APPROVAL_ACCEPT.to_string(),
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
MCP_TOOL_APPROVAL_DECLINE_SYNTHETIC.to_string()
|
||||
}
|
||||
ReviewDecision::ApprovedOverrideCommand { .. }
|
||||
| ReviewDecision::Denied
|
||||
| ReviewDecision::Abort => MCP_TOOL_APPROVAL_DECLINE_SYNTHETIC.to_string(),
|
||||
};
|
||||
Some(RequestUserInputResponse {
|
||||
answers: HashMap::from([(
|
||||
|
||||
@@ -690,7 +690,9 @@ fn mcp_tool_approval_decision_from_guardian(decision: ReviewDecision) -> McpTool
|
||||
| ReviewDecision::ApprovedExecpolicyAmendment { .. }
|
||||
| ReviewDecision::NetworkPolicyAmendment { .. } => McpToolApprovalDecision::Accept,
|
||||
ReviewDecision::ApprovedForSession => McpToolApprovalDecision::AcceptForSession,
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => McpToolApprovalDecision::Decline,
|
||||
ReviewDecision::ApprovedOverrideCommand { .. }
|
||||
| ReviewDecision::Denied
|
||||
| ReviewDecision::Abort => McpToolApprovalDecision::Decline,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -602,6 +602,12 @@ fn guardian_review_decision_maps_to_mcp_tool_decision() {
|
||||
mcp_tool_approval_decision_from_guardian(ReviewDecision::Approved),
|
||||
McpToolApprovalDecision::Accept
|
||||
);
|
||||
assert_eq!(
|
||||
mcp_tool_approval_decision_from_guardian(ReviewDecision::ApprovedOverrideCommand {
|
||||
command: vec!["echo".to_string(), "override".to_string()],
|
||||
}),
|
||||
McpToolApprovalDecision::Decline
|
||||
);
|
||||
assert_eq!(
|
||||
mcp_tool_approval_decision_from_guardian(ReviewDecision::Denied),
|
||||
McpToolApprovalDecision::Decline
|
||||
|
||||
@@ -90,6 +90,7 @@ pub(crate) async fn emit_exec_command_begin(
|
||||
pub(crate) enum ToolEmitter {
|
||||
Shell {
|
||||
command: Vec<String>,
|
||||
is_command_overridden: bool,
|
||||
cwd: PathBuf,
|
||||
source: ExecCommandSource,
|
||||
parsed_cmd: Vec<ParsedCommand>,
|
||||
@@ -111,6 +112,7 @@ pub(crate) enum ToolEmitter {
|
||||
impl ToolEmitter {
|
||||
pub fn shell(
|
||||
command: Vec<String>,
|
||||
is_command_overridden: bool,
|
||||
cwd: PathBuf,
|
||||
source: ExecCommandSource,
|
||||
freeform: bool,
|
||||
@@ -118,6 +120,7 @@ impl ToolEmitter {
|
||||
let parsed_cmd = parse_command(&command);
|
||||
Self::Shell {
|
||||
command,
|
||||
is_command_overridden,
|
||||
cwd,
|
||||
source,
|
||||
parsed_cmd,
|
||||
@@ -153,6 +156,7 @@ impl ToolEmitter {
|
||||
(
|
||||
Self::Shell {
|
||||
command,
|
||||
is_command_overridden: _,
|
||||
cwd,
|
||||
source,
|
||||
parsed_cmd,
|
||||
@@ -289,10 +293,30 @@ impl ToolEmitter {
|
||||
ctx: ToolEventCtx<'_>,
|
||||
) -> String {
|
||||
match self {
|
||||
Self::Shell { freeform: true, .. } => {
|
||||
super::format_exec_output_for_model_freeform(output, ctx.turn.truncation_policy)
|
||||
}
|
||||
_ => super::format_exec_output_for_model_structured(output, ctx.turn.truncation_policy),
|
||||
Self::Shell {
|
||||
freeform: true,
|
||||
command,
|
||||
is_command_overridden,
|
||||
..
|
||||
} => super::format_exec_output_for_model_freeform(
|
||||
output,
|
||||
ctx.turn.truncation_policy,
|
||||
is_command_overridden.then_some(command.as_slice()),
|
||||
),
|
||||
Self::Shell {
|
||||
command,
|
||||
is_command_overridden,
|
||||
..
|
||||
} => super::format_exec_output_for_model_structured(
|
||||
output,
|
||||
ctx.turn.truncation_policy,
|
||||
is_command_overridden.then_some(command.as_slice()),
|
||||
),
|
||||
_ => super::format_exec_output_for_model_structured(
|
||||
output,
|
||||
ctx.turn.truncation_policy,
|
||||
None,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -215,6 +215,8 @@ impl ToolHandler for ApplyPatchHandler {
|
||||
turn: turn.clone(),
|
||||
call_id: call_id.clone(),
|
||||
tool_name: tool_name.to_string(),
|
||||
command_override: None,
|
||||
effective_command: None,
|
||||
};
|
||||
let out = orchestrator
|
||||
.run(
|
||||
@@ -319,6 +321,8 @@ pub(crate) async fn intercept_apply_patch(
|
||||
turn: turn.clone(),
|
||||
call_id: call_id.to_string(),
|
||||
tool_name: tool_name.to_string(),
|
||||
command_override: None,
|
||||
effective_command: None,
|
||||
};
|
||||
let out = orchestrator
|
||||
.run(
|
||||
|
||||
@@ -223,6 +223,7 @@ fn default_runtime_manager(codex_home: std::path::PathBuf) -> ArtifactRuntimeMan
|
||||
async fn emit_exec_begin(session: &Session, turn: &TurnContext, call_id: &str) {
|
||||
let emitter = ToolEmitter::shell(
|
||||
vec![ARTIFACTS_TOOL_NAME.to_string()],
|
||||
false,
|
||||
turn.cwd.clone(),
|
||||
ExecCommandSource::Agent,
|
||||
true,
|
||||
@@ -249,6 +250,7 @@ async fn emit_exec_end(
|
||||
};
|
||||
let emitter = ToolEmitter::shell(
|
||||
vec![ARTIFACTS_TOOL_NAME.to_string()],
|
||||
false,
|
||||
turn.cwd.clone(),
|
||||
ExecCommandSource::Agent,
|
||||
true,
|
||||
|
||||
@@ -61,6 +61,7 @@ async fn emit_js_repl_exec_begin(
|
||||
) {
|
||||
let emitter = ToolEmitter::shell(
|
||||
vec!["js_repl".to_string()],
|
||||
false,
|
||||
turn.cwd.clone(),
|
||||
ExecCommandSource::Agent,
|
||||
false,
|
||||
@@ -80,6 +81,7 @@ async fn emit_js_repl_exec_end(
|
||||
let exec_output = build_js_repl_exec_output(output, error, duration);
|
||||
let emitter = ToolEmitter::shell(
|
||||
vec!["js_repl".to_string()],
|
||||
false,
|
||||
turn.cwd.clone(),
|
||||
ExecCommandSource::Agent,
|
||||
false,
|
||||
|
||||
@@ -411,14 +411,15 @@ impl ShellHandler {
|
||||
}
|
||||
|
||||
let source = ExecCommandSource::Agent;
|
||||
let emitter = ToolEmitter::shell(
|
||||
let begin_emitter = ToolEmitter::shell(
|
||||
exec_params.command.clone(),
|
||||
false,
|
||||
exec_params.cwd.clone(),
|
||||
source,
|
||||
freeform,
|
||||
);
|
||||
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
|
||||
emitter.begin(event_ctx).await;
|
||||
begin_emitter.begin(event_ctx).await;
|
||||
|
||||
let exec_approval_requirement = session
|
||||
.services
|
||||
@@ -462,11 +463,15 @@ impl ShellHandler {
|
||||
}
|
||||
}
|
||||
};
|
||||
let effective_command =
|
||||
std::sync::Arc::new(tokio::sync::Mutex::new(exec_params.command.clone()));
|
||||
let tool_ctx = ToolCtx {
|
||||
session: session.clone(),
|
||||
turn: turn.clone(),
|
||||
call_id: call_id.clone(),
|
||||
tool_name,
|
||||
command_override: None,
|
||||
effective_command: Some(std::sync::Arc::clone(&effective_command)),
|
||||
};
|
||||
let out = orchestrator
|
||||
.run(
|
||||
@@ -478,8 +483,16 @@ impl ShellHandler {
|
||||
)
|
||||
.await
|
||||
.map(|result| result.output);
|
||||
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
|
||||
let content = emitter.finish(event_ctx, out).await?;
|
||||
let effective_command = effective_command.lock().await.clone();
|
||||
let is_command_overridden = effective_command != exec_params.command;
|
||||
let finish_emitter = ToolEmitter::shell(
|
||||
effective_command,
|
||||
is_command_overridden,
|
||||
exec_params.cwd.clone(),
|
||||
source,
|
||||
freeform,
|
||||
);
|
||||
let content = finish_emitter.finish(event_ctx, out).await?;
|
||||
Ok(FunctionToolOutput::from_text(content, Some(true)))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +1,17 @@
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_protocol::models::ShellCommandToolCallParams;
|
||||
use codex_protocol::models::function_call_output_content_items_to_text;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::ExecCommandStatus;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use crate::codex::make_session_and_context;
|
||||
use crate::codex::make_session_and_context_with_rx;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::is_safe_command::is_known_safe_command;
|
||||
use crate::powershell::try_find_powershell_executable_blocking;
|
||||
@@ -13,8 +20,15 @@ use crate::sandboxing::SandboxPermissions;
|
||||
use crate::shell::Shell;
|
||||
use crate::shell::ShellType;
|
||||
use crate::shell_snapshot::ShellSnapshot;
|
||||
use crate::state::ActiveTurn;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::handlers::ShellCommandHandler;
|
||||
use crate::tools::handlers::ShellHandler;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use tokio::sync::watch;
|
||||
use tokio::time::timeout;
|
||||
|
||||
/// The logic for is_known_safe_command() has heuristics for known shells,
|
||||
/// so we must ensure the commands generated by [ShellCommandHandler] can be
|
||||
@@ -63,6 +77,10 @@ fn assert_safe(shell: &Shell, command: &str) {
|
||||
));
|
||||
}
|
||||
|
||||
fn tool_output_text(output: &crate::tools::context::FunctionToolOutput) -> String {
|
||||
function_call_output_content_items_to_text(&output.body).expect("tool output text")
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn shell_command_handler_to_exec_params_uses_session_shell_and_turn_context() {
|
||||
let (session, turn_context) = make_session_and_context().await;
|
||||
@@ -178,3 +196,437 @@ fn shell_command_handler_rejects_login_when_disallowed() {
|
||||
"unexpected error: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn shell_handler_retry_emits_single_begin_before_approval_request() {
|
||||
let (session, mut turn_context, rx) = make_session_and_context_with_rx().await;
|
||||
*session.active_turn.lock().await = Some(ActiveTurn::default());
|
||||
Arc::get_mut(&mut turn_context)
|
||||
.expect("single turn context ref")
|
||||
.approval_policy
|
||||
.set(AskForApproval::OnFailure)
|
||||
.expect("test setup should allow updating approval policy");
|
||||
|
||||
let temp_dir = tempfile::tempdir_in(
|
||||
turn_context
|
||||
.cwd
|
||||
.parent()
|
||||
.expect("workspace cwd should have a parent"),
|
||||
)
|
||||
.expect("create temp dir outside workspace");
|
||||
let file_path = temp_dir.path().join("outside-sandbox.txt");
|
||||
let expected_stdout = "hello from outside sandbox\n".to_string();
|
||||
std::fs::write(&file_path, expected_stdout.as_bytes()).expect("write readable file");
|
||||
|
||||
let command = if cfg!(windows) {
|
||||
vec![
|
||||
"cmd.exe".to_string(),
|
||||
"/C".to_string(),
|
||||
format!("type \"{}\"", file_path.display()),
|
||||
]
|
||||
} else {
|
||||
vec!["/bin/cat".to_string(), file_path.display().to_string()]
|
||||
};
|
||||
|
||||
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
let call_id = "call-1".to_string();
|
||||
let handler = ShellHandler;
|
||||
let handle: tokio::task::JoinHandle<
|
||||
Result<crate::tools::context::FunctionToolOutput, crate::function_tool::FunctionCallError>,
|
||||
> = tokio::spawn({
|
||||
let session = Arc::clone(&session);
|
||||
let turn_context = Arc::clone(&turn_context);
|
||||
let tracker = Arc::clone(&tracker);
|
||||
let command = command.clone();
|
||||
let call_id = call_id.clone();
|
||||
async move {
|
||||
handler
|
||||
.handle(ToolInvocation {
|
||||
session,
|
||||
turn: turn_context.clone(),
|
||||
tracker,
|
||||
call_id: call_id.clone(),
|
||||
tool_name: "shell".to_string(),
|
||||
tool_namespace: None,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"command": command,
|
||||
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
|
||||
"timeout_ms": 1_000u64,
|
||||
"sandbox_permissions": SandboxPermissions::UseDefault,
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
}
|
||||
});
|
||||
|
||||
let mut begin_count = 0usize;
|
||||
let mut begin_idx = None;
|
||||
let mut event_idx = 0usize;
|
||||
let approval_idx = loop {
|
||||
let event = timeout(Duration::from_secs(5), rx.recv())
|
||||
.await
|
||||
.expect("timed out waiting for approval flow event")
|
||||
.expect("approval flow event missing");
|
||||
match event.msg {
|
||||
EventMsg::ExecCommandBegin(begin) if begin.call_id == call_id => {
|
||||
begin_count += 1;
|
||||
if begin_idx.is_none() {
|
||||
begin_idx = Some(event_idx);
|
||||
}
|
||||
}
|
||||
EventMsg::ExecApprovalRequest(request) if request.call_id == call_id => {
|
||||
break event_idx;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
event_idx += 1;
|
||||
};
|
||||
|
||||
assert_eq!(begin_count, 1);
|
||||
assert_eq!(begin_idx, Some(0));
|
||||
assert!(
|
||||
approval_idx > begin_idx.expect("begin event should exist"),
|
||||
"approval request should follow the begin event"
|
||||
);
|
||||
|
||||
session
|
||||
.notify_approval(&call_id, ReviewDecision::Approved)
|
||||
.await;
|
||||
|
||||
let end_event = loop {
|
||||
let event = timeout(Duration::from_secs(5), rx.recv())
|
||||
.await
|
||||
.expect("timed out waiting for exec end event")
|
||||
.expect("exec end event missing");
|
||||
match event.msg {
|
||||
EventMsg::ExecCommandBegin(begin) if begin.call_id == call_id => {
|
||||
begin_count += 1;
|
||||
}
|
||||
EventMsg::ExecCommandEnd(end) if end.call_id == call_id => {
|
||||
break end;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
};
|
||||
|
||||
let _response = timeout(Duration::from_secs(5), handle)
|
||||
.await
|
||||
.expect("timed out waiting for shell handler")
|
||||
.expect("shell handler join error")
|
||||
.expect("shell handler should succeed");
|
||||
|
||||
assert_eq!(begin_count, 1);
|
||||
assert_eq!(end_event.status, ExecCommandStatus::Completed);
|
||||
assert_eq!(end_event.command, command);
|
||||
assert_eq!(end_event.stdout, expected_stdout);
|
||||
assert_eq!(end_event.exit_code, 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn shell_handler_retry_override_keeps_single_begin_and_reports_override() {
|
||||
let (session, mut turn_context, rx) = make_session_and_context_with_rx().await;
|
||||
*session.active_turn.lock().await = Some(ActiveTurn::default());
|
||||
Arc::get_mut(&mut turn_context)
|
||||
.expect("single turn context ref")
|
||||
.approval_policy
|
||||
.set(AskForApproval::OnFailure)
|
||||
.expect("test setup should allow updating approval policy");
|
||||
|
||||
let temp_dir = tempfile::tempdir_in(
|
||||
turn_context
|
||||
.cwd
|
||||
.parent()
|
||||
.expect("workspace cwd should have a parent"),
|
||||
)
|
||||
.expect("create temp dir outside workspace");
|
||||
let file_path = temp_dir.path().join("outside-sandbox.txt");
|
||||
std::fs::write(&file_path, b"should not run\n").expect("write readable file");
|
||||
|
||||
let requested_command = if cfg!(windows) {
|
||||
vec![
|
||||
"cmd.exe".to_string(),
|
||||
"/C".to_string(),
|
||||
format!("type \"{}\"", file_path.display()),
|
||||
]
|
||||
} else {
|
||||
vec!["/bin/cat".to_string(), file_path.display().to_string()]
|
||||
};
|
||||
let override_command = if cfg!(windows) {
|
||||
vec![
|
||||
"cmd.exe".to_string(),
|
||||
"/C".to_string(),
|
||||
"echo override-after-retry".to_string(),
|
||||
]
|
||||
} else {
|
||||
vec!["/bin/echo".to_string(), "override-after-retry".to_string()]
|
||||
};
|
||||
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
let call_id = "call-retry-override".to_string();
|
||||
let handler = ShellHandler;
|
||||
let handle: tokio::task::JoinHandle<
|
||||
Result<crate::tools::context::FunctionToolOutput, crate::function_tool::FunctionCallError>,
|
||||
> = tokio::spawn({
|
||||
let session = Arc::clone(&session);
|
||||
let turn_context = Arc::clone(&turn_context);
|
||||
let tracker = Arc::clone(&tracker);
|
||||
let requested_command = requested_command.clone();
|
||||
let call_id = call_id.clone();
|
||||
async move {
|
||||
handler
|
||||
.handle(ToolInvocation {
|
||||
session,
|
||||
turn: turn_context.clone(),
|
||||
tracker,
|
||||
call_id: call_id.clone(),
|
||||
tool_name: "shell".to_string(),
|
||||
tool_namespace: None,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"command": requested_command,
|
||||
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
|
||||
"timeout_ms": 1_000u64,
|
||||
"sandbox_permissions": SandboxPermissions::UseDefault,
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
}
|
||||
});
|
||||
|
||||
let mut begin_count = 0usize;
|
||||
let begin_event = loop {
|
||||
let event = timeout(Duration::from_secs(5), rx.recv())
|
||||
.await
|
||||
.expect("timed out waiting for begin event")
|
||||
.expect("begin event missing");
|
||||
if let EventMsg::ExecCommandBegin(begin) = event.msg
|
||||
&& begin.call_id == call_id
|
||||
{
|
||||
begin_count += 1;
|
||||
break begin;
|
||||
}
|
||||
};
|
||||
|
||||
let approval_event = loop {
|
||||
let event = timeout(Duration::from_secs(5), rx.recv())
|
||||
.await
|
||||
.expect("timed out waiting for retry approval request")
|
||||
.expect("retry approval request missing");
|
||||
match event.msg {
|
||||
EventMsg::ExecCommandBegin(begin) if begin.call_id == call_id => {
|
||||
begin_count += 1;
|
||||
}
|
||||
EventMsg::ExecApprovalRequest(request) if request.call_id == call_id => {
|
||||
break request;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
};
|
||||
|
||||
assert_eq!(begin_count, 1);
|
||||
assert_eq!(begin_event.command, requested_command);
|
||||
assert_eq!(approval_event.command, requested_command);
|
||||
|
||||
session
|
||||
.notify_approval(
|
||||
&call_id,
|
||||
ReviewDecision::ApprovedOverrideCommand {
|
||||
command: override_command.clone(),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let end_event = loop {
|
||||
let event = timeout(Duration::from_secs(5), rx.recv())
|
||||
.await
|
||||
.expect("timed out waiting for exec end event")
|
||||
.expect("exec end event missing");
|
||||
match event.msg {
|
||||
EventMsg::ExecCommandBegin(begin) if begin.call_id == call_id => {
|
||||
begin_count += 1;
|
||||
}
|
||||
EventMsg::ExecCommandEnd(end) if end.call_id == call_id => {
|
||||
break end;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
};
|
||||
|
||||
let response = timeout(Duration::from_secs(5), handle)
|
||||
.await
|
||||
.expect("timed out waiting for shell handler")
|
||||
.expect("shell handler join error");
|
||||
|
||||
let duration_seconds = (end_event.duration.as_secs_f32() * 10.0).round() / 10.0;
|
||||
let expected_body = serde_json::json!({
|
||||
"output": end_event.aggregated_output,
|
||||
"metadata": {
|
||||
"exit_code": end_event.exit_code,
|
||||
"duration_seconds": duration_seconds,
|
||||
},
|
||||
"executed_command": override_command,
|
||||
});
|
||||
let tool_output = match response {
|
||||
Ok(output) => tool_output_text(&output),
|
||||
Err(crate::function_tool::FunctionCallError::RespondToModel(message)) => message,
|
||||
Err(err) => panic!("unexpected shell handler error: {err:?}"),
|
||||
};
|
||||
|
||||
assert_eq!(begin_count, 1);
|
||||
assert_eq!(
|
||||
end_event.command,
|
||||
expected_body["executed_command"]
|
||||
.as_array()
|
||||
.expect("array")
|
||||
.iter()
|
||||
.map(|value| value.as_str().expect("string").to_string())
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
assert_eq!(
|
||||
serde_json::from_str::<serde_json::Value>(&tool_output).expect("valid json"),
|
||||
expected_body
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn shell_handler_retry_override_reapplies_sandbox_selection() {
|
||||
let (session, mut turn_context, rx) = make_session_and_context_with_rx().await;
|
||||
*session.active_turn.lock().await = Some(ActiveTurn::default());
|
||||
Arc::get_mut(&mut turn_context)
|
||||
.expect("single turn context ref")
|
||||
.approval_policy
|
||||
.set(AskForApproval::OnFailure)
|
||||
.expect("test setup should allow updating approval policy");
|
||||
|
||||
let temp_dir = tempfile::tempdir_in(
|
||||
turn_context
|
||||
.cwd
|
||||
.parent()
|
||||
.expect("workspace cwd should have a parent"),
|
||||
)
|
||||
.expect("create temp dir outside workspace");
|
||||
let requested_path = temp_dir.path().join("requested-outside-sandbox.txt");
|
||||
let override_path = temp_dir.path().join("override-outside-sandbox.txt");
|
||||
std::fs::write(&requested_path, b"requested should stay sandboxed\n")
|
||||
.expect("write requested file");
|
||||
std::fs::write(&override_path, b"override should stay sandboxed\n")
|
||||
.expect("write override file");
|
||||
|
||||
let requested_command = if cfg!(windows) {
|
||||
vec![
|
||||
"cmd.exe".to_string(),
|
||||
"/C".to_string(),
|
||||
format!("type \"{}\"", requested_path.display()),
|
||||
]
|
||||
} else {
|
||||
vec!["/bin/cat".to_string(), requested_path.display().to_string()]
|
||||
};
|
||||
let override_command = if cfg!(windows) {
|
||||
vec![
|
||||
"cmd.exe".to_string(),
|
||||
"/C".to_string(),
|
||||
format!("type \"{}\"", override_path.display()),
|
||||
]
|
||||
} else {
|
||||
vec!["/bin/cat".to_string(), override_path.display().to_string()]
|
||||
};
|
||||
|
||||
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
let call_id = "call-retry-override-sandboxed".to_string();
|
||||
let handler = ShellHandler;
|
||||
let handle: tokio::task::JoinHandle<
|
||||
Result<crate::tools::context::FunctionToolOutput, crate::function_tool::FunctionCallError>,
|
||||
> = tokio::spawn({
|
||||
let session = Arc::clone(&session);
|
||||
let turn_context = Arc::clone(&turn_context);
|
||||
let tracker = Arc::clone(&tracker);
|
||||
let requested_command = requested_command.clone();
|
||||
let call_id = call_id.clone();
|
||||
async move {
|
||||
handler
|
||||
.handle(ToolInvocation {
|
||||
session,
|
||||
turn: turn_context.clone(),
|
||||
tracker,
|
||||
call_id: call_id.clone(),
|
||||
tool_name: "shell".to_string(),
|
||||
tool_namespace: None,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"command": requested_command,
|
||||
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
|
||||
"timeout_ms": 1_000u64,
|
||||
"sandbox_permissions": SandboxPermissions::UseDefault,
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
})
|
||||
.await
|
||||
}
|
||||
});
|
||||
|
||||
let begin_event = loop {
|
||||
let event = timeout(Duration::from_secs(5), rx.recv())
|
||||
.await
|
||||
.expect("timed out waiting for begin event")
|
||||
.expect("begin event missing");
|
||||
if let EventMsg::ExecCommandBegin(begin) = event.msg
|
||||
&& begin.call_id == call_id
|
||||
{
|
||||
break begin;
|
||||
}
|
||||
};
|
||||
|
||||
let approval_event = loop {
|
||||
let event = timeout(Duration::from_secs(5), rx.recv())
|
||||
.await
|
||||
.expect("timed out waiting for retry approval request")
|
||||
.expect("retry approval request missing");
|
||||
if let EventMsg::ExecApprovalRequest(request) = event.msg
|
||||
&& request.call_id == call_id
|
||||
{
|
||||
break request;
|
||||
}
|
||||
};
|
||||
|
||||
assert_eq!(begin_event.command, requested_command);
|
||||
assert_eq!(approval_event.command, requested_command);
|
||||
|
||||
session
|
||||
.notify_approval(
|
||||
&call_id,
|
||||
ReviewDecision::ApprovedOverrideCommand {
|
||||
command: override_command.clone(),
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let end_event = loop {
|
||||
let event = timeout(Duration::from_secs(5), rx.recv())
|
||||
.await
|
||||
.expect("timed out waiting for exec end event")
|
||||
.expect("exec end event missing");
|
||||
if let EventMsg::ExecCommandEnd(end) = event.msg
|
||||
&& end.call_id == call_id
|
||||
{
|
||||
break end;
|
||||
}
|
||||
};
|
||||
|
||||
let response = timeout(Duration::from_secs(5), handle)
|
||||
.await
|
||||
.expect("timed out waiting for shell handler")
|
||||
.expect("shell handler join error");
|
||||
|
||||
assert!(
|
||||
response.is_err(),
|
||||
"shell handler should keep override command sandboxed"
|
||||
);
|
||||
|
||||
assert_eq!(end_event.status, ExecCommandStatus::Failed);
|
||||
assert_eq!(end_event.command, override_command);
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ use crate::exec::ExecToolCallOutput;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::truncate::formatted_truncate_text;
|
||||
use crate::truncate::truncate_text;
|
||||
use codex_shell_command::parse_command::shlex_join;
|
||||
pub use router::ToolRouter;
|
||||
use serde::Serialize;
|
||||
|
||||
@@ -32,6 +33,7 @@ pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
|
||||
pub fn format_exec_output_for_model_structured(
|
||||
exec_output: &ExecToolCallOutput,
|
||||
truncation_policy: TruncationPolicy,
|
||||
executed_command: Option<&[String]>,
|
||||
) -> String {
|
||||
let ExecToolCallOutput {
|
||||
exit_code,
|
||||
@@ -49,6 +51,8 @@ pub fn format_exec_output_for_model_structured(
|
||||
struct ExecOutput<'a> {
|
||||
output: &'a str,
|
||||
metadata: ExecMetadata,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
executed_command: Option<&'a [String]>,
|
||||
}
|
||||
|
||||
// round to 1 decimal place
|
||||
@@ -62,6 +66,7 @@ pub fn format_exec_output_for_model_structured(
|
||||
exit_code: *exit_code,
|
||||
duration_seconds,
|
||||
},
|
||||
executed_command,
|
||||
};
|
||||
|
||||
#[expect(clippy::expect_used)]
|
||||
@@ -71,6 +76,7 @@ pub fn format_exec_output_for_model_structured(
|
||||
pub fn format_exec_output_for_model_freeform(
|
||||
exec_output: &ExecToolCallOutput,
|
||||
truncation_policy: TruncationPolicy,
|
||||
executed_command: Option<&[String]>,
|
||||
) -> String {
|
||||
// round to 1 decimal place
|
||||
let duration_seconds = ((exec_output.duration.as_secs_f32()) * 10.0).round() / 10.0;
|
||||
@@ -83,6 +89,9 @@ pub fn format_exec_output_for_model_freeform(
|
||||
|
||||
let mut sections = Vec::new();
|
||||
|
||||
if let Some(command) = executed_command {
|
||||
sections.push(format!("Executed command: {}", shlex_join(command)));
|
||||
}
|
||||
sections.push(format!("Exit code: {}", exec_output.exit_code));
|
||||
sections.push(format!("Wall time: {duration_seconds} seconds"));
|
||||
if total_lines != formatted_output.lines().count() {
|
||||
@@ -117,3 +126,49 @@ fn build_content_with_timeout(exec_output: &ExecToolCallOutput) -> String {
|
||||
exec_output.aggregated_output.text.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::time::Duration;
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_for_model_structured_includes_executed_command_when_present() {
|
||||
let exec_output = ExecToolCallOutput {
|
||||
aggregated_output: crate::exec::StreamOutput::new("override-ok\n".to_string()),
|
||||
duration: Duration::from_millis(100),
|
||||
..ExecToolCallOutput::default()
|
||||
};
|
||||
|
||||
let formatted = format_exec_output_for_model_structured(
|
||||
&exec_output,
|
||||
TruncationPolicy::Bytes(1024),
|
||||
Some(&["/bin/echo".to_string(), "override-ok".to_string()]),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
formatted,
|
||||
r#"{"output":"override-ok\n","metadata":{"exit_code":0,"duration_seconds":0.1},"executed_command":["/bin/echo","override-ok"]}"#
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_for_model_freeform_includes_executed_command_when_present() {
|
||||
let exec_output = ExecToolCallOutput {
|
||||
aggregated_output: crate::exec::StreamOutput::new("override-ok\n".to_string()),
|
||||
duration: Duration::from_millis(100),
|
||||
..ExecToolCallOutput::default()
|
||||
};
|
||||
|
||||
let formatted = format_exec_output_for_model_freeform(
|
||||
&exec_output,
|
||||
TruncationPolicy::Bytes(1024),
|
||||
Some(&["/bin/echo".to_string(), "override-ok".to_string()]),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
formatted,
|
||||
"Executed command: /bin/echo override-ok\nExit code: 0\nWall time: 0.1 seconds\nOutput:\noverride-ok\n"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -118,6 +118,22 @@ fn allows_network_approval_flow(policy: AskForApproval) -> bool {
|
||||
!matches!(policy, AskForApproval::Never)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn pending_decision_for_network_review(
|
||||
review_decision: &ReviewDecision,
|
||||
) -> Option<PendingApprovalDecision> {
|
||||
match review_decision {
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedExecpolicyAmendment { .. } => {
|
||||
Some(PendingApprovalDecision::AllowOnce)
|
||||
}
|
||||
ReviewDecision::ApprovedForSession => Some(PendingApprovalDecision::AllowForSession),
|
||||
ReviewDecision::ApprovedOverrideCommand { .. }
|
||||
| ReviewDecision::Denied
|
||||
| ReviewDecision::Abort => Some(PendingApprovalDecision::Deny),
|
||||
ReviewDecision::NetworkPolicyAmendment { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
impl PendingApprovalDecision {
|
||||
fn to_network_decision(self) -> NetworkDecision {
|
||||
match self {
|
||||
@@ -465,7 +481,9 @@ impl NetworkApprovalService {
|
||||
PendingApprovalDecision::Deny
|
||||
}
|
||||
},
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
ReviewDecision::Denied
|
||||
| ReviewDecision::Abort
|
||||
| ReviewDecision::ApprovedOverrideCommand { .. } => {
|
||||
if routes_approval_to_guardian(&turn_context) {
|
||||
if let Some(owner_call) = owner_call.as_ref() {
|
||||
self.record_call_outcome(
|
||||
|
||||
@@ -1,7 +1,47 @@
|
||||
use super::*;
|
||||
use crate::codex::make_session_and_context_with_rx;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::TurnAbortReason;
|
||||
use crate::state::TaskKind;
|
||||
use crate::tasks::SessionTask;
|
||||
use crate::tasks::SessionTaskContext;
|
||||
use async_trait::async_trait;
|
||||
use codex_network_proxy::BlockedRequestArgs;
|
||||
use codex_network_proxy::NetworkPolicyRequest;
|
||||
use codex_network_proxy::NetworkPolicyRequestArgs;
|
||||
use codex_network_proxy::NetworkProtocol;
|
||||
use codex_protocol::approvals::ExecPolicyAmendment;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::time::Duration;
|
||||
use tokio::time::timeout;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
struct WaitForCancellationTask;
|
||||
|
||||
#[async_trait]
|
||||
impl SessionTask for WaitForCancellationTask {
|
||||
fn kind(&self) -> TaskKind {
|
||||
TaskKind::Regular
|
||||
}
|
||||
|
||||
fn span_name(&self) -> &'static str {
|
||||
"network-approval-test"
|
||||
}
|
||||
|
||||
async fn run(
|
||||
self: Arc<Self>,
|
||||
_session: Arc<SessionTaskContext>,
|
||||
_ctx: Arc<crate::codex::TurnContext>,
|
||||
_input: Vec<UserInput>,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> Option<String> {
|
||||
cancellation_token.cancelled().await;
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn pending_approvals_are_deduped_per_host_protocol_and_port() {
|
||||
@@ -137,6 +177,100 @@ fn only_never_policy_disables_network_approval_flow() {
|
||||
assert!(allows_network_approval_flow(AskForApproval::UnlessTrusted));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn network_review_rejects_command_override_but_allows_execpolicy_amendment() {
|
||||
assert_eq!(
|
||||
pending_decision_for_network_review(&ReviewDecision::ApprovedOverrideCommand {
|
||||
command: vec!["echo".to_string(), "override".to_string()],
|
||||
}),
|
||||
Some(PendingApprovalDecision::Deny)
|
||||
);
|
||||
assert_eq!(
|
||||
pending_decision_for_network_review(&ReviewDecision::ApprovedExecpolicyAmendment {
|
||||
proposed_execpolicy_amendment: ExecPolicyAmendment::new(vec![
|
||||
"echo".to_string(),
|
||||
"override".to_string(),
|
||||
]),
|
||||
}),
|
||||
Some(PendingApprovalDecision::AllowOnce)
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn inline_network_review_rejects_command_override_at_runtime() {
|
||||
let service = Arc::new(NetworkApprovalService::default());
|
||||
let (session, turn_context, rx) = make_session_and_context_with_rx().await;
|
||||
service
|
||||
.register_call("registration-1".to_string(), "turn-1".to_string())
|
||||
.await;
|
||||
session
|
||||
.spawn_task(
|
||||
Arc::clone(&turn_context),
|
||||
vec![UserInput::Text {
|
||||
text: "need network".to_string(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
WaitForCancellationTask,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = NetworkPolicyRequest::new(NetworkPolicyRequestArgs {
|
||||
protocol: NetworkProtocol::Http,
|
||||
host: "example.com".to_string(),
|
||||
port: 80,
|
||||
client_addr: None,
|
||||
method: Some("GET".to_string()),
|
||||
command: None,
|
||||
exec_policy_hint: None,
|
||||
});
|
||||
|
||||
let handle = tokio::spawn({
|
||||
let service = Arc::clone(&service);
|
||||
let session = Arc::clone(&session);
|
||||
async move { service.handle_inline_policy_request(session, request).await }
|
||||
});
|
||||
|
||||
let approval_request = loop {
|
||||
let event = timeout(Duration::from_secs(5), rx.recv())
|
||||
.await
|
||||
.expect("timed out waiting for approval request")
|
||||
.expect("approval request missing");
|
||||
if let EventMsg::ExecApprovalRequest(request) = event.msg {
|
||||
break request;
|
||||
}
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
approval_request.command,
|
||||
vec![
|
||||
"network-access".to_string(),
|
||||
"http://example.com:80".to_string(),
|
||||
]
|
||||
);
|
||||
|
||||
session
|
||||
.notify_approval(
|
||||
&approval_request.call_id,
|
||||
ReviewDecision::ApprovedOverrideCommand {
|
||||
command: vec!["echo".to_string(), "override".to_string()],
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let decision = timeout(Duration::from_secs(5), handle)
|
||||
.await
|
||||
.expect("timed out waiting for inline network decision")
|
||||
.expect("inline network decision join error");
|
||||
|
||||
assert_eq!(decision, NetworkDecision::deny("not_allowed"));
|
||||
assert_eq!(
|
||||
service.take_call_outcome("registration-1").await,
|
||||
Some(NetworkApprovalOutcome::DeniedByUser)
|
||||
);
|
||||
|
||||
session.abort_all_tasks(TurnAbortReason::Interrupted).await;
|
||||
}
|
||||
|
||||
fn denied_blocked_request(host: &str) -> BlockedRequest {
|
||||
BlockedRequest::new(BlockedRequestArgs {
|
||||
host: host.to_string(),
|
||||
|
||||
@@ -65,13 +65,7 @@ impl ToolOrchestrator {
|
||||
)
|
||||
.await;
|
||||
|
||||
let attempt_tool_ctx = ToolCtx {
|
||||
session: tool_ctx.session.clone(),
|
||||
turn: tool_ctx.turn.clone(),
|
||||
call_id: tool_ctx.call_id.clone(),
|
||||
tool_name: tool_ctx.tool_name.clone(),
|
||||
};
|
||||
let run_result = tool.run(req, attempt, &attempt_tool_ctx).await;
|
||||
let run_result = tool.run(req, attempt, tool_ctx).await;
|
||||
|
||||
let Some(network_approval) = network_approval else {
|
||||
return (run_result, None);
|
||||
@@ -113,6 +107,14 @@ impl ToolOrchestrator {
|
||||
let otel_ci = &tool_ctx.call_id;
|
||||
let otel_user = ToolDecisionSource::User;
|
||||
let otel_cfg = ToolDecisionSource::Config;
|
||||
let mut effective_tool_ctx = ToolCtx {
|
||||
session: tool_ctx.session.clone(),
|
||||
turn: tool_ctx.turn.clone(),
|
||||
call_id: tool_ctx.call_id.clone(),
|
||||
tool_name: tool_ctx.tool_name.clone(),
|
||||
command_override: tool_ctx.command_override.clone(),
|
||||
effective_command: tool_ctx.effective_command.clone(),
|
||||
};
|
||||
|
||||
// 1) Approval
|
||||
let mut already_approved = false;
|
||||
@@ -129,15 +131,22 @@ impl ToolOrchestrator {
|
||||
}
|
||||
ExecApprovalRequirement::NeedsApproval { reason, .. } => {
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: &tool_ctx.session,
|
||||
turn: &tool_ctx.turn,
|
||||
call_id: &tool_ctx.call_id,
|
||||
session: &effective_tool_ctx.session,
|
||||
turn: &effective_tool_ctx.turn,
|
||||
call_id: &effective_tool_ctx.call_id,
|
||||
retry_reason: reason,
|
||||
network_approval_context: None,
|
||||
command_override: effective_tool_ctx.command_override.clone(),
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
otel.tool_decision(otel_tn, otel_ci, &decision, otel_user.clone());
|
||||
if let Some(command) = decision.override_command() {
|
||||
effective_tool_ctx.command_override = Some(command.to_vec());
|
||||
if let Some(effective_command) = &effective_tool_ctx.effective_command {
|
||||
*effective_command.lock().await = command.to_vec();
|
||||
}
|
||||
}
|
||||
|
||||
match decision {
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
@@ -149,6 +158,7 @@ impl ToolOrchestrator {
|
||||
return Err(ToolError::Rejected(reason));
|
||||
}
|
||||
ReviewDecision::Approved
|
||||
| ReviewDecision::ApprovedOverrideCommand { .. }
|
||||
| ReviewDecision::ApprovedExecpolicyAmendment { .. }
|
||||
| ReviewDecision::ApprovedForSession => {}
|
||||
ReviewDecision::NetworkPolicyAmendment {
|
||||
@@ -205,7 +215,7 @@ impl ToolOrchestrator {
|
||||
let (first_result, first_deferred_network_approval) = Self::run_attempt(
|
||||
tool,
|
||||
req,
|
||||
tool_ctx,
|
||||
&effective_tool_ctx,
|
||||
&initial_attempt,
|
||||
has_managed_network_requirements,
|
||||
)
|
||||
@@ -276,17 +286,26 @@ impl ToolOrchestrator {
|
||||
let bypass_retry_approval = tool
|
||||
.should_bypass_approval(approval_policy, already_approved)
|
||||
&& network_approval_context.is_none();
|
||||
let mut command_overridden_on_retry = false;
|
||||
if !bypass_retry_approval {
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: &tool_ctx.session,
|
||||
turn: &tool_ctx.turn,
|
||||
call_id: &tool_ctx.call_id,
|
||||
session: &effective_tool_ctx.session,
|
||||
turn: &effective_tool_ctx.turn,
|
||||
call_id: &effective_tool_ctx.call_id,
|
||||
retry_reason: Some(retry_reason),
|
||||
network_approval_context: network_approval_context.clone(),
|
||||
command_override: effective_tool_ctx.command_override.clone(),
|
||||
};
|
||||
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
otel.tool_decision(otel_tn, otel_ci, &decision, otel_user);
|
||||
if let Some(command) = decision.override_command() {
|
||||
command_overridden_on_retry = true;
|
||||
effective_tool_ctx.command_override = Some(command.to_vec());
|
||||
if let Some(effective_command) = &effective_tool_ctx.effective_command {
|
||||
*effective_command.lock().await = command.to_vec();
|
||||
}
|
||||
}
|
||||
|
||||
match decision {
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
@@ -298,6 +317,7 @@ impl ToolOrchestrator {
|
||||
return Err(ToolError::Rejected(reason));
|
||||
}
|
||||
ReviewDecision::Approved
|
||||
| ReviewDecision::ApprovedOverrideCommand { .. }
|
||||
| ReviewDecision::ApprovedExecpolicyAmendment { .. }
|
||||
| ReviewDecision::ApprovedForSession => {}
|
||||
ReviewDecision::NetworkPolicyAmendment {
|
||||
@@ -311,15 +331,37 @@ impl ToolOrchestrator {
|
||||
}
|
||||
}
|
||||
|
||||
let escalated_attempt = SandboxAttempt {
|
||||
sandbox: crate::exec::SandboxType::None,
|
||||
let retry_sandbox = if command_overridden_on_retry {
|
||||
match tool.sandbox_mode_for_first_attempt(req) {
|
||||
SandboxOverride::BypassSandboxFirstAttempt => {
|
||||
crate::exec::SandboxType::None
|
||||
}
|
||||
SandboxOverride::NoOverride => self.sandbox.select_initial(
|
||||
&turn_ctx.file_system_sandbox_policy,
|
||||
turn_ctx.network_sandbox_policy,
|
||||
tool.sandbox_preference(),
|
||||
turn_ctx.windows_sandbox_level,
|
||||
has_managed_network_requirements,
|
||||
),
|
||||
}
|
||||
} else {
|
||||
crate::exec::SandboxType::None
|
||||
};
|
||||
let retry_codex_linux_sandbox_exe = if command_overridden_on_retry {
|
||||
turn_ctx.codex_linux_sandbox_exe.as_ref()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let retry_attempt = SandboxAttempt {
|
||||
sandbox: retry_sandbox,
|
||||
policy: &turn_ctx.sandbox_policy,
|
||||
file_system_policy: &turn_ctx.file_system_sandbox_policy,
|
||||
network_policy: turn_ctx.network_sandbox_policy,
|
||||
enforce_managed_network: has_managed_network_requirements,
|
||||
manager: &self.sandbox,
|
||||
sandbox_cwd: &turn_ctx.cwd,
|
||||
codex_linux_sandbox_exe: None,
|
||||
codex_linux_sandbox_exe: retry_codex_linux_sandbox_exe,
|
||||
use_legacy_landlock,
|
||||
windows_sandbox_level: turn_ctx.windows_sandbox_level,
|
||||
windows_sandbox_private_desktop: turn_ctx
|
||||
@@ -332,8 +374,8 @@ impl ToolOrchestrator {
|
||||
let (retry_result, retry_deferred_network_approval) = Self::run_attempt(
|
||||
tool,
|
||||
req,
|
||||
tool_ctx,
|
||||
&escalated_attempt,
|
||||
&effective_tool_ctx,
|
||||
&retry_attempt,
|
||||
has_managed_network_requirements,
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -144,7 +144,10 @@ impl Approvable<ShellRequest> for ShellRuntime {
|
||||
ctx: ApprovalCtx<'a>,
|
||||
) -> BoxFuture<'a, ReviewDecision> {
|
||||
let keys = self.approval_keys(req);
|
||||
let command = req.command.clone();
|
||||
let command = ctx
|
||||
.command_override
|
||||
.clone()
|
||||
.unwrap_or_else(|| req.command.clone());
|
||||
let cwd = req.cwd.clone();
|
||||
let retry_reason = ctx.retry_reason.clone();
|
||||
let reason = retry_reason.clone().or_else(|| req.justification.clone());
|
||||
@@ -221,8 +224,9 @@ impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
|
||||
ctx: &ToolCtx,
|
||||
) -> Result<ExecToolCallOutput, ToolError> {
|
||||
let session_shell = ctx.session.user_shell();
|
||||
let base_command = ctx.command_override.as_ref().unwrap_or(&req.command);
|
||||
let command = maybe_wrap_shell_lc_with_snapshot(
|
||||
&req.command,
|
||||
base_command,
|
||||
session_shell.as_ref(),
|
||||
&req.cwd,
|
||||
&req.explicit_env_overrides,
|
||||
|
||||
@@ -547,6 +547,12 @@ impl CoreShellActionProvider {
|
||||
EscalationDecision::run()
|
||||
}
|
||||
}
|
||||
ReviewDecision::ApprovedOverrideCommand { .. } => {
|
||||
EscalationDecision::deny(Some(
|
||||
"Command override is not supported for execve approvals"
|
||||
.to_string(),
|
||||
))
|
||||
}
|
||||
ReviewDecision::ApprovedForSession => {
|
||||
// Currently, we only add session approvals for
|
||||
// skill scripts because we are storing only the
|
||||
|
||||
@@ -115,7 +115,10 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
|
||||
let session = ctx.session;
|
||||
let turn = ctx.turn;
|
||||
let call_id = ctx.call_id.to_string();
|
||||
let command = req.command.clone();
|
||||
let command = ctx
|
||||
.command_override
|
||||
.clone()
|
||||
.unwrap_or_else(|| req.command.clone());
|
||||
let cwd = req.cwd.clone();
|
||||
let retry_reason = ctx.retry_reason.clone();
|
||||
let reason = retry_reason.clone().or_else(|| req.justification.clone());
|
||||
@@ -192,7 +195,7 @@ impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecProcess> for UnifiedExecRunt
|
||||
attempt: &SandboxAttempt<'_>,
|
||||
ctx: &ToolCtx,
|
||||
) -> Result<UnifiedExecProcess, ToolError> {
|
||||
let base_command = &req.command;
|
||||
let base_command = ctx.command_override.as_ref().unwrap_or(&req.command);
|
||||
let session_shell = ctx.session.user_shell();
|
||||
let command = maybe_wrap_shell_lc_with_snapshot(
|
||||
base_command,
|
||||
|
||||
@@ -118,6 +118,7 @@ pub(crate) struct ApprovalCtx<'a> {
|
||||
pub call_id: &'a str,
|
||||
pub retry_reason: Option<String>,
|
||||
pub network_approval_context: Option<NetworkApprovalContext>,
|
||||
pub command_override: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
// Specifies what tool orchestrator should do with a given tool call.
|
||||
@@ -301,6 +302,8 @@ pub(crate) struct ToolCtx {
|
||||
pub turn: Arc<TurnContext>,
|
||||
pub call_id: String,
|
||||
pub tool_name: String,
|
||||
pub command_override: Option<Vec<String>>,
|
||||
pub effective_command: Option<Arc<tokio::sync::Mutex<Vec<String>>>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
||||
@@ -165,10 +165,12 @@ impl UnifiedExecProcessManager {
|
||||
.open_session_with_sandbox(&request, cwd.clone(), context)
|
||||
.await;
|
||||
|
||||
let (process, mut deferred_network_approval) = match process {
|
||||
Ok((process, deferred_network_approval)) => {
|
||||
(Arc::new(process), deferred_network_approval)
|
||||
}
|
||||
let (process, mut deferred_network_approval, effective_command) = match process {
|
||||
Ok((process, deferred_network_approval, effective_command)) => (
|
||||
Arc::new(process),
|
||||
deferred_network_approval,
|
||||
effective_command,
|
||||
),
|
||||
Err(err) => {
|
||||
self.release_process_id(request.process_id).await;
|
||||
return Err(err);
|
||||
@@ -183,7 +185,7 @@ impl UnifiedExecProcessManager {
|
||||
None,
|
||||
);
|
||||
let emitter = ToolEmitter::unified_exec(
|
||||
&request.command,
|
||||
&effective_command,
|
||||
cwd.clone(),
|
||||
ExecCommandSource::UnifiedExecStartup,
|
||||
Some(request.process_id.to_string()),
|
||||
@@ -236,7 +238,7 @@ impl UnifiedExecProcessManager {
|
||||
Arc::clone(&context.session),
|
||||
Arc::clone(&context.turn),
|
||||
context.call_id.clone(),
|
||||
request.command.clone(),
|
||||
effective_command.clone(),
|
||||
cwd.clone(),
|
||||
Some(process_id.to_string()),
|
||||
Arc::clone(&transcript),
|
||||
@@ -264,7 +266,7 @@ impl UnifiedExecProcessManager {
|
||||
self.store_process(
|
||||
Arc::clone(&process),
|
||||
context,
|
||||
&request.command,
|
||||
&effective_command,
|
||||
cwd.clone(),
|
||||
start,
|
||||
process_id,
|
||||
@@ -289,7 +291,7 @@ impl UnifiedExecProcessManager {
|
||||
},
|
||||
exit_code,
|
||||
original_token_count: Some(original_token_count),
|
||||
session_command: Some(request.command.clone()),
|
||||
session_command: Some(effective_command.clone()),
|
||||
};
|
||||
|
||||
Ok(response)
|
||||
@@ -572,7 +574,14 @@ impl UnifiedExecProcessManager {
|
||||
request: &ExecCommandRequest,
|
||||
cwd: PathBuf,
|
||||
context: &UnifiedExecContext,
|
||||
) -> Result<(UnifiedExecProcess, Option<DeferredNetworkApproval>), UnifiedExecError> {
|
||||
) -> Result<
|
||||
(
|
||||
UnifiedExecProcess,
|
||||
Option<DeferredNetworkApproval>,
|
||||
Vec<String>,
|
||||
),
|
||||
UnifiedExecError,
|
||||
> {
|
||||
let env = apply_unified_exec_env(create_env(
|
||||
&context.turn.shell_environment_policy,
|
||||
Some(context.session.conversation_id),
|
||||
@@ -613,13 +622,17 @@ impl UnifiedExecProcessManager {
|
||||
justification: request.justification.clone(),
|
||||
exec_approval_requirement,
|
||||
};
|
||||
let effective_command =
|
||||
std::sync::Arc::new(tokio::sync::Mutex::new(request.command.clone()));
|
||||
let tool_ctx = ToolCtx {
|
||||
session: context.session.clone(),
|
||||
turn: context.turn.clone(),
|
||||
call_id: context.call_id.clone(),
|
||||
tool_name: "exec_command".to_string(),
|
||||
command_override: None,
|
||||
effective_command: Some(std::sync::Arc::clone(&effective_command)),
|
||||
};
|
||||
orchestrator
|
||||
let result = orchestrator
|
||||
.run(
|
||||
&mut runtime,
|
||||
&req,
|
||||
@@ -628,8 +641,13 @@ impl UnifiedExecProcessManager {
|
||||
context.turn.approval_policy.value(),
|
||||
)
|
||||
.await
|
||||
.map(|result| (result.output, result.deferred_network_approval))
|
||||
.map_err(|e| UnifiedExecError::create_process(format!("{e:?}")))
|
||||
.map_err(|e| UnifiedExecError::create_process(format!("{e:?}")))?;
|
||||
let effective_command = effective_command.lock().await.clone();
|
||||
Ok((
|
||||
result.output,
|
||||
result.deferred_network_approval,
|
||||
effective_command,
|
||||
))
|
||||
}
|
||||
|
||||
pub(super) async fn collect_output_until_deadline(
|
||||
|
||||
@@ -34,6 +34,7 @@ use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use core_test_support::wait_for_event_match;
|
||||
use core_test_support::wait_for_event_with_timeout;
|
||||
use core_test_support::zsh_fork::build_zsh_fork_test;
|
||||
use core_test_support::zsh_fork::restrictive_workspace_write_policy;
|
||||
@@ -1815,6 +1816,102 @@ async fn approving_apply_patch_for_session_skips_future_prompts_for_same_file()
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "current_thread")]
|
||||
#[cfg(unix)]
|
||||
async fn approved_override_command_updates_exec_events() -> Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
let approval_policy = AskForApproval::OnRequest;
|
||||
let sandbox_policy = SandboxPolicy::new_read_only_policy();
|
||||
let sandbox_policy_for_config = sandbox_policy.clone();
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
||||
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "override-command-events";
|
||||
let original_command = r#"cat < "hello" > /var/test.txt"#;
|
||||
let override_payload = "printf 'override-output'";
|
||||
let (event, expected_command) = ActionKind::RunCommand {
|
||||
command: original_command,
|
||||
}
|
||||
.prepare(
|
||||
&test,
|
||||
&server,
|
||||
call_id,
|
||||
SandboxPermissions::RequireEscalated,
|
||||
)
|
||||
.await?;
|
||||
let expected_command =
|
||||
expected_command.expect("override command scenario should produce a shell command");
|
||||
|
||||
let _ = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-override-command-1"),
|
||||
event,
|
||||
ev_completed("resp-override-command-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let results_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-override-command-1", "done"),
|
||||
ev_completed("resp-override-command-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"override-command-events",
|
||||
approval_policy,
|
||||
sandbox_policy.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let approval = expect_exec_approval(&test, expected_command.as_str()).await;
|
||||
let mut override_command = approval.command.clone();
|
||||
*override_command
|
||||
.last_mut()
|
||||
.expect("shell approval command should include the shell payload") =
|
||||
override_payload.to_string();
|
||||
|
||||
test.codex
|
||||
.submit(Op::ExecApproval {
|
||||
id: approval.effective_approval_id(),
|
||||
turn_id: None,
|
||||
decision: ReviewDecision::ApprovedOverrideCommand {
|
||||
command: override_command.clone(),
|
||||
},
|
||||
})
|
||||
.await?;
|
||||
|
||||
let begin_event = wait_for_event_match(&test.codex, |event| match event {
|
||||
EventMsg::ExecCommandBegin(ev) if ev.call_id == call_id => Some(ev.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let end_event = wait_for_event_match(&test.codex, |event| match event {
|
||||
EventMsg::ExecCommandEnd(ev) if ev.call_id == call_id => Some(ev.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
wait_for_completion(&test).await;
|
||||
|
||||
assert_eq!(begin_event.command, override_command);
|
||||
assert_eq!(end_event.command, override_command);
|
||||
assert_eq!(end_event.stdout, "override-output");
|
||||
|
||||
let output_item = results_mock.single_request().function_call_output(call_id);
|
||||
let result = parse_result(&output_item);
|
||||
assert_eq!(result.stdout, "override-output");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "current_thread")]
|
||||
#[cfg(unix)]
|
||||
async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts() -> Result<()> {
|
||||
|
||||
@@ -208,6 +208,7 @@ impl ExecApprovalRequestEvent {
|
||||
match &self.available_decisions {
|
||||
Some(decisions) => decisions.clone(),
|
||||
None => Self::default_available_decisions(
|
||||
&self.command,
|
||||
self.network_approval_context.as_ref(),
|
||||
self.proposed_execpolicy_amendment.as_ref(),
|
||||
self.proposed_network_policy_amendments.as_deref(),
|
||||
@@ -217,6 +218,7 @@ impl ExecApprovalRequestEvent {
|
||||
}
|
||||
|
||||
pub fn default_available_decisions(
|
||||
command: &[String],
|
||||
network_approval_context: Option<&NetworkApprovalContext>,
|
||||
proposed_execpolicy_amendment: Option<&ExecPolicyAmendment>,
|
||||
proposed_network_policy_amendments: Option<&[NetworkPolicyAmendment]>,
|
||||
@@ -238,10 +240,21 @@ impl ExecApprovalRequestEvent {
|
||||
}
|
||||
|
||||
if additional_permissions.is_some() {
|
||||
return vec![ReviewDecision::Approved, ReviewDecision::Abort];
|
||||
return vec![
|
||||
ReviewDecision::Approved,
|
||||
ReviewDecision::ApprovedOverrideCommand {
|
||||
command: command.to_vec(),
|
||||
},
|
||||
ReviewDecision::Abort,
|
||||
];
|
||||
}
|
||||
|
||||
let mut decisions = vec![ReviewDecision::Approved];
|
||||
let mut decisions = vec![
|
||||
ReviewDecision::Approved,
|
||||
ReviewDecision::ApprovedOverrideCommand {
|
||||
command: command.to_vec(),
|
||||
},
|
||||
];
|
||||
if let Some(prefix) = proposed_execpolicy_amendment {
|
||||
decisions.push(ReviewDecision::ApprovedExecpolicyAmendment {
|
||||
proposed_execpolicy_amendment: prefix.clone(),
|
||||
@@ -317,3 +330,66 @@ pub struct ApplyPatchApprovalRequestEvent {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub grant_root: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn command_prompts_advertise_override_decision() {
|
||||
let command = vec!["echo".to_string(), "hi".to_string()];
|
||||
|
||||
let decisions =
|
||||
ExecApprovalRequestEvent::default_available_decisions(&command, None, None, None, None);
|
||||
|
||||
assert_eq!(
|
||||
decisions,
|
||||
vec![
|
||||
ReviewDecision::Approved,
|
||||
ReviewDecision::ApprovedOverrideCommand {
|
||||
command: command.clone(),
|
||||
},
|
||||
ReviewDecision::Abort,
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn network_only_prompts_do_not_advertise_override_decision() {
|
||||
let decisions = ExecApprovalRequestEvent::default_available_decisions(
|
||||
&["echo".to_string(), "hi".to_string()],
|
||||
Some(&NetworkApprovalContext {
|
||||
host: "example.com".to_string(),
|
||||
protocol: NetworkApprovalProtocol::Https,
|
||||
}),
|
||||
None,
|
||||
Some(&[
|
||||
NetworkPolicyAmendment {
|
||||
host: "example.com".to_string(),
|
||||
action: NetworkPolicyRuleAction::Allow,
|
||||
},
|
||||
NetworkPolicyAmendment {
|
||||
host: "example.com".to_string(),
|
||||
action: NetworkPolicyRuleAction::Deny,
|
||||
},
|
||||
]),
|
||||
None,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
decisions,
|
||||
vec![
|
||||
ReviewDecision::Approved,
|
||||
ReviewDecision::ApprovedForSession,
|
||||
ReviewDecision::NetworkPolicyAmendment {
|
||||
network_policy_amendment: NetworkPolicyAmendment {
|
||||
host: "example.com".to_string(),
|
||||
action: NetworkPolicyRuleAction::Allow,
|
||||
},
|
||||
},
|
||||
ReviewDecision::Abort,
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3105,6 +3105,10 @@ pub enum ReviewDecision {
|
||||
/// User has approved this command and the agent should execute it.
|
||||
Approved,
|
||||
|
||||
/// User has approved this command and wants to execute a different argv
|
||||
/// vector instead of the original one-time request.
|
||||
ApprovedOverrideCommand { command: Vec<String> },
|
||||
|
||||
/// User has approved this command and wants to apply the proposed execpolicy
|
||||
/// amendment so future matching commands are permitted.
|
||||
ApprovedExecpolicyAmendment {
|
||||
@@ -3133,11 +3137,19 @@ pub enum ReviewDecision {
|
||||
}
|
||||
|
||||
impl ReviewDecision {
|
||||
pub fn override_command(&self) -> Option<&[String]> {
|
||||
match self {
|
||||
ReviewDecision::ApprovedOverrideCommand { command } => Some(command.as_slice()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an opaque version of the decision without PII. We can't use an ignored flag
|
||||
/// on `serde` because the serialization is required by some surfaces.
|
||||
pub fn to_opaque_string(&self) -> &'static str {
|
||||
match self {
|
||||
ReviewDecision::Approved => "approved",
|
||||
ReviewDecision::ApprovedOverrideCommand { .. } => "approved_with_command_override",
|
||||
ReviewDecision::ApprovedExecpolicyAmendment { .. } => "approved_with_amendment",
|
||||
ReviewDecision::ApprovedForSession => "approved_for_session",
|
||||
ReviewDecision::NetworkPolicyAmendment {
|
||||
|
||||
Reference in New Issue
Block a user