Compare commits

...

12 Commits

Author SHA1 Message Date
Qiyao Qin
ad2eb4ca5a fix run out of sandbox issue 2026-03-14 17:44:09 -07:00
Qiyao Qin
e3772c7eca reject mcp approval override 2026-03-14 16:52:28 -07:00
Qiyao Qin
d347bc8761 add test coverage 2026-03-14 16:32:01 -07:00
Qiyao Qin
08bf18a078 deny override for network policy 2026-03-14 16:31:57 -07:00
Qiyao Qin
a9a7cc1e16 use separate emitter 2026-03-14 16:22:35 -07:00
Qiyao Qin
c65b7e1df6 move the event emission back 2026-03-14 16:22:35 -07:00
Qiyao Qin
39e83fee71 add tool override marker to tool output 2026-03-14 16:22:35 -07:00
Qiyao Qin
6ac7664d4f add unit tests 2026-03-14 16:22:35 -07:00
Qiyao Qin
b082630ec1 disallow command override if it's a network only approval 2026-03-14 16:22:35 -07:00
Qiyao Qin
8ba4ae3bf3 add AcceptWithOverrideCommand in availableDecisions 2026-03-14 16:22:35 -07:00
Qiyao Qin
4c5d2f4f07 make event emits with the effective command 2026-03-14 16:22:35 -07:00
Qiyao Qin
bccfefd43c Add AcceptWithOverrideCommand for approval decision 2026-03-14 16:22:35 -07:00
35 changed files with 1353 additions and 92 deletions

View File

@@ -33,6 +33,31 @@
],
"type": "string"
},
{
"additionalProperties": false,
"description": "User has approved this command and wants to execute a different argv vector instead of the original one-time request.",
"properties": {
"approved_override_command": {
"properties": {
"command": {
"items": {
"type": "string"
},
"type": "array"
}
},
"required": [
"command"
],
"type": "object"
}
},
"required": [
"approved_override_command"
],
"title": "ApprovedOverrideCommandReviewDecision",
"type": "object"
},
{
"additionalProperties": false,
"description": "User has approved this command and wants to apply the proposed execpolicy amendment so future matching commands are permitted.",

View File

@@ -228,6 +228,31 @@
],
"type": "string"
},
{
"additionalProperties": false,
"description": "User approved the command, but wants to execute a different argv vector.",
"properties": {
"acceptWithOverrideCommand": {
"properties": {
"command": {
"items": {
"type": "string"
},
"type": "array"
}
},
"required": [
"command"
],
"type": "object"
}
},
"required": [
"acceptWithOverrideCommand"
],
"title": "AcceptWithOverrideCommandCommandExecutionApprovalDecision",
"type": "object"
},
{
"description": "User approved the command and future prompts in the same session-scoped approval cache should run without prompting.",
"enum": [

View File

@@ -10,6 +10,31 @@
],
"type": "string"
},
{
"additionalProperties": false,
"description": "User approved the command, but wants to execute a different argv vector.",
"properties": {
"acceptWithOverrideCommand": {
"properties": {
"command": {
"items": {
"type": "string"
},
"type": "array"
}
},
"required": [
"command"
],
"type": "object"
}
},
"required": [
"acceptWithOverrideCommand"
],
"title": "AcceptWithOverrideCommandCommandExecutionApprovalDecision",
"type": "object"
},
{
"description": "User approved the command and future prompts in the same session-scoped approval cache should run without prompting.",
"enum": [

View File

@@ -33,6 +33,31 @@
],
"type": "string"
},
{
"additionalProperties": false,
"description": "User has approved this command and wants to execute a different argv vector instead of the original one-time request.",
"properties": {
"approved_override_command": {
"properties": {
"command": {
"items": {
"type": "string"
},
"type": "array"
}
},
"required": [
"command"
],
"type": "object"
}
},
"required": [
"approved_override_command"
],
"title": "ApprovedOverrideCommandReviewDecision",
"type": "object"
},
{
"additionalProperties": false,
"description": "User has approved this command and wants to apply the proposed execpolicy amendment so future matching commands are permitted.",

View File

@@ -294,6 +294,31 @@
],
"type": "string"
},
{
"additionalProperties": false,
"description": "User approved the command, but wants to execute a different argv vector.",
"properties": {
"acceptWithOverrideCommand": {
"properties": {
"command": {
"items": {
"type": "string"
},
"type": "array"
}
},
"required": [
"command"
],
"type": "object"
}
},
"required": [
"acceptWithOverrideCommand"
],
"title": "AcceptWithOverrideCommandCommandExecutionApprovalDecision",
"type": "object"
},
{
"description": "User approved the command and future prompts in the same session-scoped approval cache should run without prompting.",
"enum": [

View File

@@ -1639,6 +1639,31 @@
],
"type": "string"
},
{
"additionalProperties": false,
"description": "User approved the command, but wants to execute a different argv vector.",
"properties": {
"acceptWithOverrideCommand": {
"properties": {
"command": {
"items": {
"type": "string"
},
"type": "array"
}
},
"required": [
"command"
],
"type": "object"
}
},
"required": [
"acceptWithOverrideCommand"
],
"title": "AcceptWithOverrideCommandCommandExecutionApprovalDecision",
"type": "object"
},
{
"description": "User approved the command and future prompts in the same session-scoped approval cache should run without prompting.",
"enum": [
@@ -3392,6 +3417,31 @@
],
"type": "string"
},
{
"additionalProperties": false,
"description": "User has approved this command and wants to execute a different argv vector instead of the original one-time request.",
"properties": {
"approved_override_command": {
"properties": {
"command": {
"items": {
"type": "string"
},
"type": "array"
}
},
"required": [
"command"
],
"type": "object"
}
},
"required": [
"approved_override_command"
],
"title": "ApprovedOverrideCommandReviewDecision",
"type": "object"
},
{
"additionalProperties": false,
"description": "User has approved this command and wants to apply the proposed execpolicy amendment so future matching commands are permitted.",

View File

@@ -7,4 +7,4 @@ import type { NetworkPolicyAmendment } from "./NetworkPolicyAmendment";
/**
* User's decision in response to an ExecApprovalRequest.
*/
export type ReviewDecision = "approved" | { "approved_execpolicy_amendment": { proposed_execpolicy_amendment: ExecPolicyAmendment, } } | "approved_for_session" | { "network_policy_amendment": { network_policy_amendment: NetworkPolicyAmendment, } } | "denied" | "abort";
export type ReviewDecision = "approved" | { "approved_override_command": { command: Array<string>, } } | { "approved_execpolicy_amendment": { proposed_execpolicy_amendment: ExecPolicyAmendment, } } | "approved_for_session" | { "network_policy_amendment": { network_policy_amendment: NetworkPolicyAmendment, } } | "denied" | "abort";

View File

@@ -4,4 +4,4 @@
import type { ExecPolicyAmendment } from "./ExecPolicyAmendment";
import type { NetworkPolicyAmendment } from "./NetworkPolicyAmendment";
export type CommandExecutionApprovalDecision = "accept" | "acceptForSession" | { "acceptWithExecpolicyAmendment": { execpolicy_amendment: ExecPolicyAmendment, } } | { "applyNetworkPolicyAmendment": { network_policy_amendment: NetworkPolicyAmendment, } } | "decline" | "cancel";
export type CommandExecutionApprovalDecision = "accept" | { "acceptWithOverrideCommand": { command: Array<string>, } } | "acceptForSession" | { "acceptWithExecpolicyAmendment": { execpolicy_amendment: ExecPolicyAmendment, } } | { "applyNetworkPolicyAmendment": { network_policy_amendment: NetworkPolicyAmendment, } } | "decline" | "cancel";

View File

@@ -959,6 +959,8 @@ pub struct ConfigEdit {
pub enum CommandExecutionApprovalDecision {
/// User approved the command.
Accept,
/// User approved the command, but wants to execute a different argv vector.
AcceptWithOverrideCommand { command: Vec<String> },
/// User approved the command and future prompts in the same session-scoped
/// approval cache should run without prompting.
AcceptForSession,
@@ -981,6 +983,9 @@ impl From<CoreReviewDecision> for CommandExecutionApprovalDecision {
fn from(value: CoreReviewDecision) -> Self {
match value {
CoreReviewDecision::Approved => Self::Accept,
CoreReviewDecision::ApprovedOverrideCommand { command } => {
Self::AcceptWithOverrideCommand { command }
}
CoreReviewDecision::ApprovedExecpolicyAmendment {
proposed_execpolicy_amendment,
} => Self::AcceptWithExecpolicyAmendment {
@@ -6033,6 +6038,27 @@ mod tests {
);
}
#[test]
fn command_execution_request_approval_response_accepts_override_command() {
let response = serde_json::from_value::<CommandExecutionRequestApprovalResponse>(json!({
"decision": {
"acceptWithOverrideCommand": {
"command": ["echo", "hi"]
}
}
}))
.expect("override command response should deserialize");
assert_eq!(
response,
CommandExecutionRequestApprovalResponse {
decision: CommandExecutionApprovalDecision::AcceptWithOverrideCommand {
command: vec!["echo".to_string(), "hi".to_string()],
},
}
);
}
#[test]
fn permissions_request_approval_response_accepts_partial_macos_grants() {
let cases = vec![

View File

@@ -897,7 +897,7 @@ When an upstream HTTP status is available (for example, from the Responses API o
Certain actions (shell commands or modifying files) may require explicit user approval depending on the user's config. When `turn/start` is used, the app-server drives an approval flow by sending a server-initiated JSON-RPC request to the client. The client must respond to tell Codex whether to proceed. UIs should present these requests inline with the active turn so users can review the proposed command or diff before choosing.
- Requests include `threadId` and `turnId`—use them to scope UI state to the active conversation.
- Respond with a single `{ "decision": ... }` payload. Command approvals support `accept`, `acceptForSession`, `acceptWithExecpolicyAmendment`, `applyNetworkPolicyAmendment`, `decline`, or `cancel`. The server resumes or declines the work and ends the item with `item/completed`.
- Respond with a single `{ "decision": ... }` payload. Command approvals support `accept`, `acceptWithOverrideCommand`, `acceptForSession`, `acceptWithExecpolicyAmendment`, `applyNetworkPolicyAmendment`, `decline`, or `cancel`. The server resumes or declines the work and ends the item with `item/completed`.
### Command execution approvals
@@ -905,7 +905,7 @@ Order of messages:
1. `item/started` — shows the pending `commandExecution` item with `command`, `cwd`, and other fields so you can render the proposed action.
2. `item/commandExecution/requestApproval` (request) — carries the same `itemId`, `threadId`, `turnId`, optionally `approvalId` (for subcommand callbacks), and `reason`. For normal command approvals, it also includes `command`, `cwd`, and `commandActions` for friendly display. When `initialize.params.capabilities.experimentalApi = true`, it may also include experimental `additionalPermissions` describing requested per-command sandbox access; any filesystem paths in that payload are absolute on the wire, and network access is represented as `additionalPermissions.network.enabled`. For network-only approvals, those command fields may be omitted and `networkApprovalContext` is provided instead. Optional persistence hints may also be included via `proposedExecpolicyAmendment` and `proposedNetworkPolicyAmendments`. Clients can prefer `availableDecisions` when present to render the exact set of choices the server wants to expose, while still falling back to the older heuristics if it is omitted.
3. Client response — for example `{ "decision": "accept" }`, `{ "decision": "acceptForSession" }`, `{ "decision": { "acceptWithExecpolicyAmendment": { "execpolicy_amendment": [...] } } }`, `{ "decision": { "applyNetworkPolicyAmendment": { "network_policy_amendment": { "host": "example.com", "action": "allow" } } } }`, `{ "decision": "decline" }`, or `{ "decision": "cancel" }`.
3. Client response — for example `{ "decision": "accept" }`, `{ "decision": { "acceptWithOverrideCommand": { "command": ["echo", "hi"] } } }`, `{ "decision": "acceptForSession" }`, `{ "decision": { "acceptWithExecpolicyAmendment": { "execpolicy_amendment": [...] } } }`, `{ "decision": { "applyNetworkPolicyAmendment": { "network_policy_amendment": { "host": "example.com", "action": "allow" } } } }`, `{ "decision": "decline" }`, or `{ "decision": "cancel" }`.
4. `serverRequest/resolved``{ threadId, requestId }` confirms the pending request has been resolved or cleared, including lifecycle cleanup on turn start/complete/interrupt.
5. `item/completed` — final `commandExecution` item with `status: "completed" | "failed" | "declined"` and execution output. Render this as the authoritative result.

View File

@@ -2408,6 +2408,69 @@ fn map_file_change_approval_decision(
}
}
fn map_command_execution_approval_decision(
decision: CommandExecutionApprovalDecision,
allow_override_command: bool,
) -> (ReviewDecision, Option<CommandExecutionStatus>) {
match decision {
CommandExecutionApprovalDecision::Accept => (ReviewDecision::Approved, None),
CommandExecutionApprovalDecision::AcceptWithOverrideCommand { command } => {
if !allow_override_command {
error!(
"failed to deserialize CommandExecutionRequestApprovalResponse: override command is not allowed for network-only approvals"
);
(
ReviewDecision::Denied,
Some(CommandExecutionStatus::Declined),
)
} else if command.is_empty() {
error!(
"failed to deserialize CommandExecutionRequestApprovalResponse: override command cannot be empty"
);
(
ReviewDecision::Denied,
Some(CommandExecutionStatus::Declined),
)
} else {
(ReviewDecision::ApprovedOverrideCommand { command }, None)
}
}
CommandExecutionApprovalDecision::AcceptForSession => {
(ReviewDecision::ApprovedForSession, None)
}
CommandExecutionApprovalDecision::AcceptWithExecpolicyAmendment {
execpolicy_amendment,
} => (
ReviewDecision::ApprovedExecpolicyAmendment {
proposed_execpolicy_amendment: execpolicy_amendment.into_core(),
},
None,
),
CommandExecutionApprovalDecision::ApplyNetworkPolicyAmendment {
network_policy_amendment,
} => {
let completion_status = match network_policy_amendment.action {
V2NetworkPolicyRuleAction::Allow => None,
V2NetworkPolicyRuleAction::Deny => Some(CommandExecutionStatus::Declined),
};
(
ReviewDecision::NetworkPolicyAmendment {
network_policy_amendment: network_policy_amendment.into_core(),
},
completion_status,
)
}
CommandExecutionApprovalDecision::Decline => (
ReviewDecision::Denied,
Some(CommandExecutionStatus::Declined),
),
CommandExecutionApprovalDecision::Cancel => (
ReviewDecision::Abort,
Some(CommandExecutionStatus::Declined),
),
}
}
#[allow(clippy::too_many_arguments)]
async fn on_file_change_request_approval_response(
event_turn_id: String,
@@ -2502,44 +2565,12 @@ async fn on_command_execution_request_approval_response(
}
});
let decision = response.decision;
let (decision, completion_status) = match decision {
CommandExecutionApprovalDecision::Accept => (ReviewDecision::Approved, None),
CommandExecutionApprovalDecision::AcceptForSession => {
(ReviewDecision::ApprovedForSession, None)
}
CommandExecutionApprovalDecision::AcceptWithExecpolicyAmendment {
execpolicy_amendment,
} => (
ReviewDecision::ApprovedExecpolicyAmendment {
proposed_execpolicy_amendment: execpolicy_amendment.into_core(),
},
None,
),
CommandExecutionApprovalDecision::ApplyNetworkPolicyAmendment {
network_policy_amendment,
} => {
let completion_status = match network_policy_amendment.action {
V2NetworkPolicyRuleAction::Allow => None,
V2NetworkPolicyRuleAction::Deny => Some(CommandExecutionStatus::Declined),
};
(
ReviewDecision::NetworkPolicyAmendment {
network_policy_amendment: network_policy_amendment.into_core(),
},
completion_status,
)
}
CommandExecutionApprovalDecision::Decline => (
ReviewDecision::Denied,
Some(CommandExecutionStatus::Declined),
),
CommandExecutionApprovalDecision::Cancel => (
ReviewDecision::Abort,
Some(CommandExecutionStatus::Declined),
),
};
let (decision, completion_status) = map_command_execution_approval_decision(
response.decision,
// Network-only prompts omit the completion item, and they
// must not allow command override responses.
completion_item.is_some(),
);
(decision, completion_status)
}
Ok(Err(err)) if is_turn_transition_server_request_error(&err) => return,
@@ -2884,6 +2915,19 @@ mod tests {
assert_eq!(completion_status, None);
}
#[test]
fn command_execution_override_is_rejected_for_network_only_prompts() {
let (decision, completion_status) = map_command_execution_approval_decision(
CommandExecutionApprovalDecision::AcceptWithOverrideCommand {
command: vec!["echo".to_string(), "hi".to_string()],
},
false,
);
assert_eq!(decision, ReviewDecision::Denied);
assert_eq!(completion_status, Some(CommandExecutionStatus::Declined));
}
#[test]
fn mcp_server_elicitation_turn_transition_error_maps_to_cancel() {
let error = JSONRPCErrorError {

View File

@@ -111,6 +111,7 @@ fn is_shell_tool_name(name: &str) -> bool {
struct ExecOutputJson {
output: String,
metadata: ExecOutputMetadataJson,
executed_command: Option<Vec<String>>,
}
#[derive(Deserialize)]
@@ -131,6 +132,9 @@ fn build_structured_output(parsed: &ExecOutputJson) -> String {
"Wall time: {} seconds",
parsed.metadata.duration_seconds
));
if let Some(executed_command) = parsed.executed_command.as_ref() {
sections.push(format!("Executed command: {}", executed_command.join(" ")));
}
let mut output = parsed.output.clone();
if let Some((stripped, total_lines)) = strip_total_output_header(&parsed.output) {

View File

@@ -196,6 +196,44 @@ fn reserializes_shell_outputs_for_function_and_custom_tool_calls() {
);
}
#[test]
fn reserializes_shell_outputs_preserve_executed_command_when_present() {
let raw_output = r#"{"output":"override-ok","metadata":{"exit_code":0,"duration_seconds":0.5},"executed_command":["/bin/echo","override-ok"]}"#;
let expected_output = "Exit code: 0\nWall time: 0.5 seconds\nExecuted command: /bin/echo override-ok\nOutput:\noverride-ok";
let mut items = vec![
ResponseItem::FunctionCall {
id: None,
name: "shell".to_string(),
namespace: None,
arguments: "{}".to_string(),
call_id: "call-1".to_string(),
},
ResponseItem::FunctionCallOutput {
call_id: "call-1".to_string(),
output: FunctionCallOutputPayload::from_text(raw_output.to_string()),
},
];
reserialize_shell_outputs(&mut items);
assert_eq!(
items,
vec![
ResponseItem::FunctionCall {
id: None,
name: "shell".to_string(),
namespace: None,
arguments: "{}".to_string(),
call_id: "call-1".to_string(),
},
ResponseItem::FunctionCallOutput {
call_id: "call-1".to_string(),
output: FunctionCallOutputPayload::from_text(expected_output.to_string()),
},
]
);
}
#[test]
fn tool_search_output_namespace_serializes_with_deferred_child_tools() {
let namespace = tools::ToolSearchOutputTool::Namespace(tools::ResponsesApiNamespace {

View File

@@ -2877,6 +2877,7 @@ impl Session {
});
let available_decisions = available_decisions.unwrap_or_else(|| {
ExecApprovalRequestEvent::default_available_decisions(
&command,
network_approval_context.as_ref(),
proposed_execpolicy_amendment.as_ref(),
proposed_network_policy_amendments.as_deref(),

View File

@@ -700,9 +700,9 @@ async fn maybe_auto_review_mcp_request_user_input(
ReviewDecision::Approved
| ReviewDecision::ApprovedExecpolicyAmendment { .. }
| ReviewDecision::NetworkPolicyAmendment { .. } => MCP_TOOL_APPROVAL_ACCEPT.to_string(),
ReviewDecision::Denied | ReviewDecision::Abort => {
MCP_TOOL_APPROVAL_DECLINE_SYNTHETIC.to_string()
}
ReviewDecision::ApprovedOverrideCommand { .. }
| ReviewDecision::Denied
| ReviewDecision::Abort => MCP_TOOL_APPROVAL_DECLINE_SYNTHETIC.to_string(),
};
Some(RequestUserInputResponse {
answers: HashMap::from([(

View File

@@ -690,7 +690,9 @@ fn mcp_tool_approval_decision_from_guardian(decision: ReviewDecision) -> McpTool
| ReviewDecision::ApprovedExecpolicyAmendment { .. }
| ReviewDecision::NetworkPolicyAmendment { .. } => McpToolApprovalDecision::Accept,
ReviewDecision::ApprovedForSession => McpToolApprovalDecision::AcceptForSession,
ReviewDecision::Denied | ReviewDecision::Abort => McpToolApprovalDecision::Decline,
ReviewDecision::ApprovedOverrideCommand { .. }
| ReviewDecision::Denied
| ReviewDecision::Abort => McpToolApprovalDecision::Decline,
}
}

View File

@@ -602,6 +602,12 @@ fn guardian_review_decision_maps_to_mcp_tool_decision() {
mcp_tool_approval_decision_from_guardian(ReviewDecision::Approved),
McpToolApprovalDecision::Accept
);
assert_eq!(
mcp_tool_approval_decision_from_guardian(ReviewDecision::ApprovedOverrideCommand {
command: vec!["echo".to_string(), "override".to_string()],
}),
McpToolApprovalDecision::Decline
);
assert_eq!(
mcp_tool_approval_decision_from_guardian(ReviewDecision::Denied),
McpToolApprovalDecision::Decline

View File

@@ -90,6 +90,7 @@ pub(crate) async fn emit_exec_command_begin(
pub(crate) enum ToolEmitter {
Shell {
command: Vec<String>,
is_command_overridden: bool,
cwd: PathBuf,
source: ExecCommandSource,
parsed_cmd: Vec<ParsedCommand>,
@@ -111,6 +112,7 @@ pub(crate) enum ToolEmitter {
impl ToolEmitter {
pub fn shell(
command: Vec<String>,
is_command_overridden: bool,
cwd: PathBuf,
source: ExecCommandSource,
freeform: bool,
@@ -118,6 +120,7 @@ impl ToolEmitter {
let parsed_cmd = parse_command(&command);
Self::Shell {
command,
is_command_overridden,
cwd,
source,
parsed_cmd,
@@ -153,6 +156,7 @@ impl ToolEmitter {
(
Self::Shell {
command,
is_command_overridden: _,
cwd,
source,
parsed_cmd,
@@ -289,10 +293,30 @@ impl ToolEmitter {
ctx: ToolEventCtx<'_>,
) -> String {
match self {
Self::Shell { freeform: true, .. } => {
super::format_exec_output_for_model_freeform(output, ctx.turn.truncation_policy)
}
_ => super::format_exec_output_for_model_structured(output, ctx.turn.truncation_policy),
Self::Shell {
freeform: true,
command,
is_command_overridden,
..
} => super::format_exec_output_for_model_freeform(
output,
ctx.turn.truncation_policy,
is_command_overridden.then_some(command.as_slice()),
),
Self::Shell {
command,
is_command_overridden,
..
} => super::format_exec_output_for_model_structured(
output,
ctx.turn.truncation_policy,
is_command_overridden.then_some(command.as_slice()),
),
_ => super::format_exec_output_for_model_structured(
output,
ctx.turn.truncation_policy,
None,
),
}
}

View File

@@ -215,6 +215,8 @@ impl ToolHandler for ApplyPatchHandler {
turn: turn.clone(),
call_id: call_id.clone(),
tool_name: tool_name.to_string(),
command_override: None,
effective_command: None,
};
let out = orchestrator
.run(
@@ -319,6 +321,8 @@ pub(crate) async fn intercept_apply_patch(
turn: turn.clone(),
call_id: call_id.to_string(),
tool_name: tool_name.to_string(),
command_override: None,
effective_command: None,
};
let out = orchestrator
.run(

View File

@@ -223,6 +223,7 @@ fn default_runtime_manager(codex_home: std::path::PathBuf) -> ArtifactRuntimeMan
async fn emit_exec_begin(session: &Session, turn: &TurnContext, call_id: &str) {
let emitter = ToolEmitter::shell(
vec![ARTIFACTS_TOOL_NAME.to_string()],
false,
turn.cwd.clone(),
ExecCommandSource::Agent,
true,
@@ -249,6 +250,7 @@ async fn emit_exec_end(
};
let emitter = ToolEmitter::shell(
vec![ARTIFACTS_TOOL_NAME.to_string()],
false,
turn.cwd.clone(),
ExecCommandSource::Agent,
true,

View File

@@ -61,6 +61,7 @@ async fn emit_js_repl_exec_begin(
) {
let emitter = ToolEmitter::shell(
vec!["js_repl".to_string()],
false,
turn.cwd.clone(),
ExecCommandSource::Agent,
false,
@@ -80,6 +81,7 @@ async fn emit_js_repl_exec_end(
let exec_output = build_js_repl_exec_output(output, error, duration);
let emitter = ToolEmitter::shell(
vec!["js_repl".to_string()],
false,
turn.cwd.clone(),
ExecCommandSource::Agent,
false,

View File

@@ -411,14 +411,15 @@ impl ShellHandler {
}
let source = ExecCommandSource::Agent;
let emitter = ToolEmitter::shell(
let begin_emitter = ToolEmitter::shell(
exec_params.command.clone(),
false,
exec_params.cwd.clone(),
source,
freeform,
);
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
emitter.begin(event_ctx).await;
begin_emitter.begin(event_ctx).await;
let exec_approval_requirement = session
.services
@@ -462,11 +463,15 @@ impl ShellHandler {
}
}
};
let effective_command =
std::sync::Arc::new(tokio::sync::Mutex::new(exec_params.command.clone()));
let tool_ctx = ToolCtx {
session: session.clone(),
turn: turn.clone(),
call_id: call_id.clone(),
tool_name,
command_override: None,
effective_command: Some(std::sync::Arc::clone(&effective_command)),
};
let out = orchestrator
.run(
@@ -478,8 +483,16 @@ impl ShellHandler {
)
.await
.map(|result| result.output);
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
let content = emitter.finish(event_ctx, out).await?;
let effective_command = effective_command.lock().await.clone();
let is_command_overridden = effective_command != exec_params.command;
let finish_emitter = ToolEmitter::shell(
effective_command,
is_command_overridden,
exec_params.cwd.clone(),
source,
freeform,
);
let content = finish_emitter.finish(event_ctx, out).await?;
Ok(FunctionToolOutput::from_text(content, Some(true)))
}
}

View File

@@ -1,10 +1,17 @@
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use codex_protocol::models::ShellCommandToolCallParams;
use codex_protocol::models::function_call_output_content_items_to_text;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::ExecCommandStatus;
use codex_protocol::protocol::ReviewDecision;
use pretty_assertions::assert_eq;
use crate::codex::make_session_and_context;
use crate::codex::make_session_and_context_with_rx;
use crate::exec_env::create_env;
use crate::is_safe_command::is_known_safe_command;
use crate::powershell::try_find_powershell_executable_blocking;
@@ -13,8 +20,15 @@ use crate::sandboxing::SandboxPermissions;
use crate::shell::Shell;
use crate::shell::ShellType;
use crate::shell_snapshot::ShellSnapshot;
use crate::state::ActiveTurn;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolPayload;
use crate::tools::handlers::ShellCommandHandler;
use crate::tools::handlers::ShellHandler;
use crate::tools::registry::ToolHandler;
use crate::turn_diff_tracker::TurnDiffTracker;
use tokio::sync::watch;
use tokio::time::timeout;
/// The logic for is_known_safe_command() has heuristics for known shells,
/// so we must ensure the commands generated by [ShellCommandHandler] can be
@@ -63,6 +77,10 @@ fn assert_safe(shell: &Shell, command: &str) {
));
}
fn tool_output_text(output: &crate::tools::context::FunctionToolOutput) -> String {
function_call_output_content_items_to_text(&output.body).expect("tool output text")
}
#[tokio::test]
async fn shell_command_handler_to_exec_params_uses_session_shell_and_turn_context() {
let (session, turn_context) = make_session_and_context().await;
@@ -178,3 +196,437 @@ fn shell_command_handler_rejects_login_when_disallowed() {
"unexpected error: {err}"
);
}
#[tokio::test]
async fn shell_handler_retry_emits_single_begin_before_approval_request() {
let (session, mut turn_context, rx) = make_session_and_context_with_rx().await;
*session.active_turn.lock().await = Some(ActiveTurn::default());
Arc::get_mut(&mut turn_context)
.expect("single turn context ref")
.approval_policy
.set(AskForApproval::OnFailure)
.expect("test setup should allow updating approval policy");
let temp_dir = tempfile::tempdir_in(
turn_context
.cwd
.parent()
.expect("workspace cwd should have a parent"),
)
.expect("create temp dir outside workspace");
let file_path = temp_dir.path().join("outside-sandbox.txt");
let expected_stdout = "hello from outside sandbox\n".to_string();
std::fs::write(&file_path, expected_stdout.as_bytes()).expect("write readable file");
let command = if cfg!(windows) {
vec![
"cmd.exe".to_string(),
"/C".to_string(),
format!("type \"{}\"", file_path.display()),
]
} else {
vec!["/bin/cat".to_string(), file_path.display().to_string()]
};
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
let call_id = "call-1".to_string();
let handler = ShellHandler;
let handle: tokio::task::JoinHandle<
Result<crate::tools::context::FunctionToolOutput, crate::function_tool::FunctionCallError>,
> = tokio::spawn({
let session = Arc::clone(&session);
let turn_context = Arc::clone(&turn_context);
let tracker = Arc::clone(&tracker);
let command = command.clone();
let call_id = call_id.clone();
async move {
handler
.handle(ToolInvocation {
session,
turn: turn_context.clone(),
tracker,
call_id: call_id.clone(),
tool_name: "shell".to_string(),
tool_namespace: None,
payload: ToolPayload::Function {
arguments: serde_json::json!({
"command": command,
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
"timeout_ms": 1_000u64,
"sandbox_permissions": SandboxPermissions::UseDefault,
})
.to_string(),
},
})
.await
}
});
let mut begin_count = 0usize;
let mut begin_idx = None;
let mut event_idx = 0usize;
let approval_idx = loop {
let event = timeout(Duration::from_secs(5), rx.recv())
.await
.expect("timed out waiting for approval flow event")
.expect("approval flow event missing");
match event.msg {
EventMsg::ExecCommandBegin(begin) if begin.call_id == call_id => {
begin_count += 1;
if begin_idx.is_none() {
begin_idx = Some(event_idx);
}
}
EventMsg::ExecApprovalRequest(request) if request.call_id == call_id => {
break event_idx;
}
_ => {}
}
event_idx += 1;
};
assert_eq!(begin_count, 1);
assert_eq!(begin_idx, Some(0));
assert!(
approval_idx > begin_idx.expect("begin event should exist"),
"approval request should follow the begin event"
);
session
.notify_approval(&call_id, ReviewDecision::Approved)
.await;
let end_event = loop {
let event = timeout(Duration::from_secs(5), rx.recv())
.await
.expect("timed out waiting for exec end event")
.expect("exec end event missing");
match event.msg {
EventMsg::ExecCommandBegin(begin) if begin.call_id == call_id => {
begin_count += 1;
}
EventMsg::ExecCommandEnd(end) if end.call_id == call_id => {
break end;
}
_ => {}
}
};
let _response = timeout(Duration::from_secs(5), handle)
.await
.expect("timed out waiting for shell handler")
.expect("shell handler join error")
.expect("shell handler should succeed");
assert_eq!(begin_count, 1);
assert_eq!(end_event.status, ExecCommandStatus::Completed);
assert_eq!(end_event.command, command);
assert_eq!(end_event.stdout, expected_stdout);
assert_eq!(end_event.exit_code, 0);
}
#[tokio::test]
async fn shell_handler_retry_override_keeps_single_begin_and_reports_override() {
let (session, mut turn_context, rx) = make_session_and_context_with_rx().await;
*session.active_turn.lock().await = Some(ActiveTurn::default());
Arc::get_mut(&mut turn_context)
.expect("single turn context ref")
.approval_policy
.set(AskForApproval::OnFailure)
.expect("test setup should allow updating approval policy");
let temp_dir = tempfile::tempdir_in(
turn_context
.cwd
.parent()
.expect("workspace cwd should have a parent"),
)
.expect("create temp dir outside workspace");
let file_path = temp_dir.path().join("outside-sandbox.txt");
std::fs::write(&file_path, b"should not run\n").expect("write readable file");
let requested_command = if cfg!(windows) {
vec![
"cmd.exe".to_string(),
"/C".to_string(),
format!("type \"{}\"", file_path.display()),
]
} else {
vec!["/bin/cat".to_string(), file_path.display().to_string()]
};
let override_command = if cfg!(windows) {
vec![
"cmd.exe".to_string(),
"/C".to_string(),
"echo override-after-retry".to_string(),
]
} else {
vec!["/bin/echo".to_string(), "override-after-retry".to_string()]
};
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
let call_id = "call-retry-override".to_string();
let handler = ShellHandler;
let handle: tokio::task::JoinHandle<
Result<crate::tools::context::FunctionToolOutput, crate::function_tool::FunctionCallError>,
> = tokio::spawn({
let session = Arc::clone(&session);
let turn_context = Arc::clone(&turn_context);
let tracker = Arc::clone(&tracker);
let requested_command = requested_command.clone();
let call_id = call_id.clone();
async move {
handler
.handle(ToolInvocation {
session,
turn: turn_context.clone(),
tracker,
call_id: call_id.clone(),
tool_name: "shell".to_string(),
tool_namespace: None,
payload: ToolPayload::Function {
arguments: serde_json::json!({
"command": requested_command,
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
"timeout_ms": 1_000u64,
"sandbox_permissions": SandboxPermissions::UseDefault,
})
.to_string(),
},
})
.await
}
});
let mut begin_count = 0usize;
let begin_event = loop {
let event = timeout(Duration::from_secs(5), rx.recv())
.await
.expect("timed out waiting for begin event")
.expect("begin event missing");
if let EventMsg::ExecCommandBegin(begin) = event.msg
&& begin.call_id == call_id
{
begin_count += 1;
break begin;
}
};
let approval_event = loop {
let event = timeout(Duration::from_secs(5), rx.recv())
.await
.expect("timed out waiting for retry approval request")
.expect("retry approval request missing");
match event.msg {
EventMsg::ExecCommandBegin(begin) if begin.call_id == call_id => {
begin_count += 1;
}
EventMsg::ExecApprovalRequest(request) if request.call_id == call_id => {
break request;
}
_ => {}
}
};
assert_eq!(begin_count, 1);
assert_eq!(begin_event.command, requested_command);
assert_eq!(approval_event.command, requested_command);
session
.notify_approval(
&call_id,
ReviewDecision::ApprovedOverrideCommand {
command: override_command.clone(),
},
)
.await;
let end_event = loop {
let event = timeout(Duration::from_secs(5), rx.recv())
.await
.expect("timed out waiting for exec end event")
.expect("exec end event missing");
match event.msg {
EventMsg::ExecCommandBegin(begin) if begin.call_id == call_id => {
begin_count += 1;
}
EventMsg::ExecCommandEnd(end) if end.call_id == call_id => {
break end;
}
_ => {}
}
};
let response = timeout(Duration::from_secs(5), handle)
.await
.expect("timed out waiting for shell handler")
.expect("shell handler join error");
let duration_seconds = (end_event.duration.as_secs_f32() * 10.0).round() / 10.0;
let expected_body = serde_json::json!({
"output": end_event.aggregated_output,
"metadata": {
"exit_code": end_event.exit_code,
"duration_seconds": duration_seconds,
},
"executed_command": override_command,
});
let tool_output = match response {
Ok(output) => tool_output_text(&output),
Err(crate::function_tool::FunctionCallError::RespondToModel(message)) => message,
Err(err) => panic!("unexpected shell handler error: {err:?}"),
};
assert_eq!(begin_count, 1);
assert_eq!(
end_event.command,
expected_body["executed_command"]
.as_array()
.expect("array")
.iter()
.map(|value| value.as_str().expect("string").to_string())
.collect::<Vec<_>>()
);
assert_eq!(
serde_json::from_str::<serde_json::Value>(&tool_output).expect("valid json"),
expected_body
);
}
#[tokio::test]
async fn shell_handler_retry_override_reapplies_sandbox_selection() {
let (session, mut turn_context, rx) = make_session_and_context_with_rx().await;
*session.active_turn.lock().await = Some(ActiveTurn::default());
Arc::get_mut(&mut turn_context)
.expect("single turn context ref")
.approval_policy
.set(AskForApproval::OnFailure)
.expect("test setup should allow updating approval policy");
let temp_dir = tempfile::tempdir_in(
turn_context
.cwd
.parent()
.expect("workspace cwd should have a parent"),
)
.expect("create temp dir outside workspace");
let requested_path = temp_dir.path().join("requested-outside-sandbox.txt");
let override_path = temp_dir.path().join("override-outside-sandbox.txt");
std::fs::write(&requested_path, b"requested should stay sandboxed\n")
.expect("write requested file");
std::fs::write(&override_path, b"override should stay sandboxed\n")
.expect("write override file");
let requested_command = if cfg!(windows) {
vec![
"cmd.exe".to_string(),
"/C".to_string(),
format!("type \"{}\"", requested_path.display()),
]
} else {
vec!["/bin/cat".to_string(), requested_path.display().to_string()]
};
let override_command = if cfg!(windows) {
vec![
"cmd.exe".to_string(),
"/C".to_string(),
format!("type \"{}\"", override_path.display()),
]
} else {
vec!["/bin/cat".to_string(), override_path.display().to_string()]
};
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
let call_id = "call-retry-override-sandboxed".to_string();
let handler = ShellHandler;
let handle: tokio::task::JoinHandle<
Result<crate::tools::context::FunctionToolOutput, crate::function_tool::FunctionCallError>,
> = tokio::spawn({
let session = Arc::clone(&session);
let turn_context = Arc::clone(&turn_context);
let tracker = Arc::clone(&tracker);
let requested_command = requested_command.clone();
let call_id = call_id.clone();
async move {
handler
.handle(ToolInvocation {
session,
turn: turn_context.clone(),
tracker,
call_id: call_id.clone(),
tool_name: "shell".to_string(),
tool_namespace: None,
payload: ToolPayload::Function {
arguments: serde_json::json!({
"command": requested_command,
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
"timeout_ms": 1_000u64,
"sandbox_permissions": SandboxPermissions::UseDefault,
})
.to_string(),
},
})
.await
}
});
let begin_event = loop {
let event = timeout(Duration::from_secs(5), rx.recv())
.await
.expect("timed out waiting for begin event")
.expect("begin event missing");
if let EventMsg::ExecCommandBegin(begin) = event.msg
&& begin.call_id == call_id
{
break begin;
}
};
let approval_event = loop {
let event = timeout(Duration::from_secs(5), rx.recv())
.await
.expect("timed out waiting for retry approval request")
.expect("retry approval request missing");
if let EventMsg::ExecApprovalRequest(request) = event.msg
&& request.call_id == call_id
{
break request;
}
};
assert_eq!(begin_event.command, requested_command);
assert_eq!(approval_event.command, requested_command);
session
.notify_approval(
&call_id,
ReviewDecision::ApprovedOverrideCommand {
command: override_command.clone(),
},
)
.await;
let end_event = loop {
let event = timeout(Duration::from_secs(5), rx.recv())
.await
.expect("timed out waiting for exec end event")
.expect("exec end event missing");
if let EventMsg::ExecCommandEnd(end) = event.msg
&& end.call_id == call_id
{
break end;
}
};
let response = timeout(Duration::from_secs(5), handle)
.await
.expect("timed out waiting for shell handler")
.expect("shell handler join error");
assert!(
response.is_err(),
"shell handler should keep override command sandboxed"
);
assert_eq!(end_event.status, ExecCommandStatus::Failed);
assert_eq!(end_event.command, override_command);
}

View File

@@ -18,6 +18,7 @@ use crate::exec::ExecToolCallOutput;
use crate::truncate::TruncationPolicy;
use crate::truncate::formatted_truncate_text;
use crate::truncate::truncate_text;
use codex_shell_command::parse_command::shlex_join;
pub use router::ToolRouter;
use serde::Serialize;
@@ -32,6 +33,7 @@ pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
pub fn format_exec_output_for_model_structured(
exec_output: &ExecToolCallOutput,
truncation_policy: TruncationPolicy,
executed_command: Option<&[String]>,
) -> String {
let ExecToolCallOutput {
exit_code,
@@ -49,6 +51,8 @@ pub fn format_exec_output_for_model_structured(
struct ExecOutput<'a> {
output: &'a str,
metadata: ExecMetadata,
#[serde(skip_serializing_if = "Option::is_none")]
executed_command: Option<&'a [String]>,
}
// round to 1 decimal place
@@ -62,6 +66,7 @@ pub fn format_exec_output_for_model_structured(
exit_code: *exit_code,
duration_seconds,
},
executed_command,
};
#[expect(clippy::expect_used)]
@@ -71,6 +76,7 @@ pub fn format_exec_output_for_model_structured(
pub fn format_exec_output_for_model_freeform(
exec_output: &ExecToolCallOutput,
truncation_policy: TruncationPolicy,
executed_command: Option<&[String]>,
) -> String {
// round to 1 decimal place
let duration_seconds = ((exec_output.duration.as_secs_f32()) * 10.0).round() / 10.0;
@@ -83,6 +89,9 @@ pub fn format_exec_output_for_model_freeform(
let mut sections = Vec::new();
if let Some(command) = executed_command {
sections.push(format!("Executed command: {}", shlex_join(command)));
}
sections.push(format!("Exit code: {}", exec_output.exit_code));
sections.push(format!("Wall time: {duration_seconds} seconds"));
if total_lines != formatted_output.lines().count() {
@@ -117,3 +126,49 @@ fn build_content_with_timeout(exec_output: &ExecToolCallOutput) -> String {
exec_output.aggregated_output.text.clone()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::time::Duration;
#[test]
fn format_exec_output_for_model_structured_includes_executed_command_when_present() {
let exec_output = ExecToolCallOutput {
aggregated_output: crate::exec::StreamOutput::new("override-ok\n".to_string()),
duration: Duration::from_millis(100),
..ExecToolCallOutput::default()
};
let formatted = format_exec_output_for_model_structured(
&exec_output,
TruncationPolicy::Bytes(1024),
Some(&["/bin/echo".to_string(), "override-ok".to_string()]),
);
assert_eq!(
formatted,
r#"{"output":"override-ok\n","metadata":{"exit_code":0,"duration_seconds":0.1},"executed_command":["/bin/echo","override-ok"]}"#
);
}
#[test]
fn format_exec_output_for_model_freeform_includes_executed_command_when_present() {
let exec_output = ExecToolCallOutput {
aggregated_output: crate::exec::StreamOutput::new("override-ok\n".to_string()),
duration: Duration::from_millis(100),
..ExecToolCallOutput::default()
};
let formatted = format_exec_output_for_model_freeform(
&exec_output,
TruncationPolicy::Bytes(1024),
Some(&["/bin/echo".to_string(), "override-ok".to_string()]),
);
assert_eq!(
formatted,
"Executed command: /bin/echo override-ok\nExit code: 0\nWall time: 0.1 seconds\nOutput:\noverride-ok\n"
);
}
}

View File

@@ -118,6 +118,22 @@ fn allows_network_approval_flow(policy: AskForApproval) -> bool {
!matches!(policy, AskForApproval::Never)
}
#[cfg(test)]
fn pending_decision_for_network_review(
review_decision: &ReviewDecision,
) -> Option<PendingApprovalDecision> {
match review_decision {
ReviewDecision::Approved | ReviewDecision::ApprovedExecpolicyAmendment { .. } => {
Some(PendingApprovalDecision::AllowOnce)
}
ReviewDecision::ApprovedForSession => Some(PendingApprovalDecision::AllowForSession),
ReviewDecision::ApprovedOverrideCommand { .. }
| ReviewDecision::Denied
| ReviewDecision::Abort => Some(PendingApprovalDecision::Deny),
ReviewDecision::NetworkPolicyAmendment { .. } => None,
}
}
impl PendingApprovalDecision {
fn to_network_decision(self) -> NetworkDecision {
match self {
@@ -465,7 +481,9 @@ impl NetworkApprovalService {
PendingApprovalDecision::Deny
}
},
ReviewDecision::Denied | ReviewDecision::Abort => {
ReviewDecision::Denied
| ReviewDecision::Abort
| ReviewDecision::ApprovedOverrideCommand { .. } => {
if routes_approval_to_guardian(&turn_context) {
if let Some(owner_call) = owner_call.as_ref() {
self.record_call_outcome(

View File

@@ -1,7 +1,47 @@
use super::*;
use crate::codex::make_session_and_context_with_rx;
use crate::protocol::EventMsg;
use crate::protocol::TurnAbortReason;
use crate::state::TaskKind;
use crate::tasks::SessionTask;
use crate::tasks::SessionTaskContext;
use async_trait::async_trait;
use codex_network_proxy::BlockedRequestArgs;
use codex_network_proxy::NetworkPolicyRequest;
use codex_network_proxy::NetworkPolicyRequestArgs;
use codex_network_proxy::NetworkProtocol;
use codex_protocol::approvals::ExecPolicyAmendment;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::user_input::UserInput;
use pretty_assertions::assert_eq;
use std::time::Duration;
use tokio::time::timeout;
use tokio_util::sync::CancellationToken;
struct WaitForCancellationTask;
#[async_trait]
impl SessionTask for WaitForCancellationTask {
fn kind(&self) -> TaskKind {
TaskKind::Regular
}
fn span_name(&self) -> &'static str {
"network-approval-test"
}
async fn run(
self: Arc<Self>,
_session: Arc<SessionTaskContext>,
_ctx: Arc<crate::codex::TurnContext>,
_input: Vec<UserInput>,
cancellation_token: CancellationToken,
) -> Option<String> {
cancellation_token.cancelled().await;
None
}
}
#[tokio::test]
async fn pending_approvals_are_deduped_per_host_protocol_and_port() {
@@ -137,6 +177,100 @@ fn only_never_policy_disables_network_approval_flow() {
assert!(allows_network_approval_flow(AskForApproval::UnlessTrusted));
}
#[test]
fn network_review_rejects_command_override_but_allows_execpolicy_amendment() {
assert_eq!(
pending_decision_for_network_review(&ReviewDecision::ApprovedOverrideCommand {
command: vec!["echo".to_string(), "override".to_string()],
}),
Some(PendingApprovalDecision::Deny)
);
assert_eq!(
pending_decision_for_network_review(&ReviewDecision::ApprovedExecpolicyAmendment {
proposed_execpolicy_amendment: ExecPolicyAmendment::new(vec![
"echo".to_string(),
"override".to_string(),
]),
}),
Some(PendingApprovalDecision::AllowOnce)
);
}
#[tokio::test]
async fn inline_network_review_rejects_command_override_at_runtime() {
let service = Arc::new(NetworkApprovalService::default());
let (session, turn_context, rx) = make_session_and_context_with_rx().await;
service
.register_call("registration-1".to_string(), "turn-1".to_string())
.await;
session
.spawn_task(
Arc::clone(&turn_context),
vec![UserInput::Text {
text: "need network".to_string(),
text_elements: Vec::new(),
}],
WaitForCancellationTask,
)
.await;
let request = NetworkPolicyRequest::new(NetworkPolicyRequestArgs {
protocol: NetworkProtocol::Http,
host: "example.com".to_string(),
port: 80,
client_addr: None,
method: Some("GET".to_string()),
command: None,
exec_policy_hint: None,
});
let handle = tokio::spawn({
let service = Arc::clone(&service);
let session = Arc::clone(&session);
async move { service.handle_inline_policy_request(session, request).await }
});
let approval_request = loop {
let event = timeout(Duration::from_secs(5), rx.recv())
.await
.expect("timed out waiting for approval request")
.expect("approval request missing");
if let EventMsg::ExecApprovalRequest(request) = event.msg {
break request;
}
};
assert_eq!(
approval_request.command,
vec![
"network-access".to_string(),
"http://example.com:80".to_string(),
]
);
session
.notify_approval(
&approval_request.call_id,
ReviewDecision::ApprovedOverrideCommand {
command: vec!["echo".to_string(), "override".to_string()],
},
)
.await;
let decision = timeout(Duration::from_secs(5), handle)
.await
.expect("timed out waiting for inline network decision")
.expect("inline network decision join error");
assert_eq!(decision, NetworkDecision::deny("not_allowed"));
assert_eq!(
service.take_call_outcome("registration-1").await,
Some(NetworkApprovalOutcome::DeniedByUser)
);
session.abort_all_tasks(TurnAbortReason::Interrupted).await;
}
fn denied_blocked_request(host: &str) -> BlockedRequest {
BlockedRequest::new(BlockedRequestArgs {
host: host.to_string(),

View File

@@ -65,13 +65,7 @@ impl ToolOrchestrator {
)
.await;
let attempt_tool_ctx = ToolCtx {
session: tool_ctx.session.clone(),
turn: tool_ctx.turn.clone(),
call_id: tool_ctx.call_id.clone(),
tool_name: tool_ctx.tool_name.clone(),
};
let run_result = tool.run(req, attempt, &attempt_tool_ctx).await;
let run_result = tool.run(req, attempt, tool_ctx).await;
let Some(network_approval) = network_approval else {
return (run_result, None);
@@ -113,6 +107,14 @@ impl ToolOrchestrator {
let otel_ci = &tool_ctx.call_id;
let otel_user = ToolDecisionSource::User;
let otel_cfg = ToolDecisionSource::Config;
let mut effective_tool_ctx = ToolCtx {
session: tool_ctx.session.clone(),
turn: tool_ctx.turn.clone(),
call_id: tool_ctx.call_id.clone(),
tool_name: tool_ctx.tool_name.clone(),
command_override: tool_ctx.command_override.clone(),
effective_command: tool_ctx.effective_command.clone(),
};
// 1) Approval
let mut already_approved = false;
@@ -129,15 +131,22 @@ impl ToolOrchestrator {
}
ExecApprovalRequirement::NeedsApproval { reason, .. } => {
let approval_ctx = ApprovalCtx {
session: &tool_ctx.session,
turn: &tool_ctx.turn,
call_id: &tool_ctx.call_id,
session: &effective_tool_ctx.session,
turn: &effective_tool_ctx.turn,
call_id: &effective_tool_ctx.call_id,
retry_reason: reason,
network_approval_context: None,
command_override: effective_tool_ctx.command_override.clone(),
};
let decision = tool.start_approval_async(req, approval_ctx).await;
otel.tool_decision(otel_tn, otel_ci, &decision, otel_user.clone());
if let Some(command) = decision.override_command() {
effective_tool_ctx.command_override = Some(command.to_vec());
if let Some(effective_command) = &effective_tool_ctx.effective_command {
*effective_command.lock().await = command.to_vec();
}
}
match decision {
ReviewDecision::Denied | ReviewDecision::Abort => {
@@ -149,6 +158,7 @@ impl ToolOrchestrator {
return Err(ToolError::Rejected(reason));
}
ReviewDecision::Approved
| ReviewDecision::ApprovedOverrideCommand { .. }
| ReviewDecision::ApprovedExecpolicyAmendment { .. }
| ReviewDecision::ApprovedForSession => {}
ReviewDecision::NetworkPolicyAmendment {
@@ -205,7 +215,7 @@ impl ToolOrchestrator {
let (first_result, first_deferred_network_approval) = Self::run_attempt(
tool,
req,
tool_ctx,
&effective_tool_ctx,
&initial_attempt,
has_managed_network_requirements,
)
@@ -276,17 +286,26 @@ impl ToolOrchestrator {
let bypass_retry_approval = tool
.should_bypass_approval(approval_policy, already_approved)
&& network_approval_context.is_none();
let mut command_overridden_on_retry = false;
if !bypass_retry_approval {
let approval_ctx = ApprovalCtx {
session: &tool_ctx.session,
turn: &tool_ctx.turn,
call_id: &tool_ctx.call_id,
session: &effective_tool_ctx.session,
turn: &effective_tool_ctx.turn,
call_id: &effective_tool_ctx.call_id,
retry_reason: Some(retry_reason),
network_approval_context: network_approval_context.clone(),
command_override: effective_tool_ctx.command_override.clone(),
};
let decision = tool.start_approval_async(req, approval_ctx).await;
otel.tool_decision(otel_tn, otel_ci, &decision, otel_user);
if let Some(command) = decision.override_command() {
command_overridden_on_retry = true;
effective_tool_ctx.command_override = Some(command.to_vec());
if let Some(effective_command) = &effective_tool_ctx.effective_command {
*effective_command.lock().await = command.to_vec();
}
}
match decision {
ReviewDecision::Denied | ReviewDecision::Abort => {
@@ -298,6 +317,7 @@ impl ToolOrchestrator {
return Err(ToolError::Rejected(reason));
}
ReviewDecision::Approved
| ReviewDecision::ApprovedOverrideCommand { .. }
| ReviewDecision::ApprovedExecpolicyAmendment { .. }
| ReviewDecision::ApprovedForSession => {}
ReviewDecision::NetworkPolicyAmendment {
@@ -311,15 +331,37 @@ impl ToolOrchestrator {
}
}
let escalated_attempt = SandboxAttempt {
sandbox: crate::exec::SandboxType::None,
let retry_sandbox = if command_overridden_on_retry {
match tool.sandbox_mode_for_first_attempt(req) {
SandboxOverride::BypassSandboxFirstAttempt => {
crate::exec::SandboxType::None
}
SandboxOverride::NoOverride => self.sandbox.select_initial(
&turn_ctx.file_system_sandbox_policy,
turn_ctx.network_sandbox_policy,
tool.sandbox_preference(),
turn_ctx.windows_sandbox_level,
has_managed_network_requirements,
),
}
} else {
crate::exec::SandboxType::None
};
let retry_codex_linux_sandbox_exe = if command_overridden_on_retry {
turn_ctx.codex_linux_sandbox_exe.as_ref()
} else {
None
};
let retry_attempt = SandboxAttempt {
sandbox: retry_sandbox,
policy: &turn_ctx.sandbox_policy,
file_system_policy: &turn_ctx.file_system_sandbox_policy,
network_policy: turn_ctx.network_sandbox_policy,
enforce_managed_network: has_managed_network_requirements,
manager: &self.sandbox,
sandbox_cwd: &turn_ctx.cwd,
codex_linux_sandbox_exe: None,
codex_linux_sandbox_exe: retry_codex_linux_sandbox_exe,
use_legacy_landlock,
windows_sandbox_level: turn_ctx.windows_sandbox_level,
windows_sandbox_private_desktop: turn_ctx
@@ -332,8 +374,8 @@ impl ToolOrchestrator {
let (retry_result, retry_deferred_network_approval) = Self::run_attempt(
tool,
req,
tool_ctx,
&escalated_attempt,
&effective_tool_ctx,
&retry_attempt,
has_managed_network_requirements,
)
.await;

View File

@@ -144,7 +144,10 @@ impl Approvable<ShellRequest> for ShellRuntime {
ctx: ApprovalCtx<'a>,
) -> BoxFuture<'a, ReviewDecision> {
let keys = self.approval_keys(req);
let command = req.command.clone();
let command = ctx
.command_override
.clone()
.unwrap_or_else(|| req.command.clone());
let cwd = req.cwd.clone();
let retry_reason = ctx.retry_reason.clone();
let reason = retry_reason.clone().or_else(|| req.justification.clone());
@@ -221,8 +224,9 @@ impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
ctx: &ToolCtx,
) -> Result<ExecToolCallOutput, ToolError> {
let session_shell = ctx.session.user_shell();
let base_command = ctx.command_override.as_ref().unwrap_or(&req.command);
let command = maybe_wrap_shell_lc_with_snapshot(
&req.command,
base_command,
session_shell.as_ref(),
&req.cwd,
&req.explicit_env_overrides,

View File

@@ -547,6 +547,12 @@ impl CoreShellActionProvider {
EscalationDecision::run()
}
}
ReviewDecision::ApprovedOverrideCommand { .. } => {
EscalationDecision::deny(Some(
"Command override is not supported for execve approvals"
.to_string(),
))
}
ReviewDecision::ApprovedForSession => {
// Currently, we only add session approvals for
// skill scripts because we are storing only the

View File

@@ -115,7 +115,10 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
let session = ctx.session;
let turn = ctx.turn;
let call_id = ctx.call_id.to_string();
let command = req.command.clone();
let command = ctx
.command_override
.clone()
.unwrap_or_else(|| req.command.clone());
let cwd = req.cwd.clone();
let retry_reason = ctx.retry_reason.clone();
let reason = retry_reason.clone().or_else(|| req.justification.clone());
@@ -192,7 +195,7 @@ impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecProcess> for UnifiedExecRunt
attempt: &SandboxAttempt<'_>,
ctx: &ToolCtx,
) -> Result<UnifiedExecProcess, ToolError> {
let base_command = &req.command;
let base_command = ctx.command_override.as_ref().unwrap_or(&req.command);
let session_shell = ctx.session.user_shell();
let command = maybe_wrap_shell_lc_with_snapshot(
base_command,

View File

@@ -118,6 +118,7 @@ pub(crate) struct ApprovalCtx<'a> {
pub call_id: &'a str,
pub retry_reason: Option<String>,
pub network_approval_context: Option<NetworkApprovalContext>,
pub command_override: Option<Vec<String>>,
}
// Specifies what tool orchestrator should do with a given tool call.
@@ -301,6 +302,8 @@ pub(crate) struct ToolCtx {
pub turn: Arc<TurnContext>,
pub call_id: String,
pub tool_name: String,
pub command_override: Option<Vec<String>>,
pub effective_command: Option<Arc<tokio::sync::Mutex<Vec<String>>>>,
}
#[derive(Debug)]

View File

@@ -165,10 +165,12 @@ impl UnifiedExecProcessManager {
.open_session_with_sandbox(&request, cwd.clone(), context)
.await;
let (process, mut deferred_network_approval) = match process {
Ok((process, deferred_network_approval)) => {
(Arc::new(process), deferred_network_approval)
}
let (process, mut deferred_network_approval, effective_command) = match process {
Ok((process, deferred_network_approval, effective_command)) => (
Arc::new(process),
deferred_network_approval,
effective_command,
),
Err(err) => {
self.release_process_id(request.process_id).await;
return Err(err);
@@ -183,7 +185,7 @@ impl UnifiedExecProcessManager {
None,
);
let emitter = ToolEmitter::unified_exec(
&request.command,
&effective_command,
cwd.clone(),
ExecCommandSource::UnifiedExecStartup,
Some(request.process_id.to_string()),
@@ -236,7 +238,7 @@ impl UnifiedExecProcessManager {
Arc::clone(&context.session),
Arc::clone(&context.turn),
context.call_id.clone(),
request.command.clone(),
effective_command.clone(),
cwd.clone(),
Some(process_id.to_string()),
Arc::clone(&transcript),
@@ -264,7 +266,7 @@ impl UnifiedExecProcessManager {
self.store_process(
Arc::clone(&process),
context,
&request.command,
&effective_command,
cwd.clone(),
start,
process_id,
@@ -289,7 +291,7 @@ impl UnifiedExecProcessManager {
},
exit_code,
original_token_count: Some(original_token_count),
session_command: Some(request.command.clone()),
session_command: Some(effective_command.clone()),
};
Ok(response)
@@ -572,7 +574,14 @@ impl UnifiedExecProcessManager {
request: &ExecCommandRequest,
cwd: PathBuf,
context: &UnifiedExecContext,
) -> Result<(UnifiedExecProcess, Option<DeferredNetworkApproval>), UnifiedExecError> {
) -> Result<
(
UnifiedExecProcess,
Option<DeferredNetworkApproval>,
Vec<String>,
),
UnifiedExecError,
> {
let env = apply_unified_exec_env(create_env(
&context.turn.shell_environment_policy,
Some(context.session.conversation_id),
@@ -613,13 +622,17 @@ impl UnifiedExecProcessManager {
justification: request.justification.clone(),
exec_approval_requirement,
};
let effective_command =
std::sync::Arc::new(tokio::sync::Mutex::new(request.command.clone()));
let tool_ctx = ToolCtx {
session: context.session.clone(),
turn: context.turn.clone(),
call_id: context.call_id.clone(),
tool_name: "exec_command".to_string(),
command_override: None,
effective_command: Some(std::sync::Arc::clone(&effective_command)),
};
orchestrator
let result = orchestrator
.run(
&mut runtime,
&req,
@@ -628,8 +641,13 @@ impl UnifiedExecProcessManager {
context.turn.approval_policy.value(),
)
.await
.map(|result| (result.output, result.deferred_network_approval))
.map_err(|e| UnifiedExecError::create_process(format!("{e:?}")))
.map_err(|e| UnifiedExecError::create_process(format!("{e:?}")))?;
let effective_command = effective_command.lock().await.clone();
Ok((
result.output,
result.deferred_network_approval,
effective_command,
))
}
pub(super) async fn collect_output_until_deadline(

View File

@@ -34,6 +34,7 @@ use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use core_test_support::wait_for_event_with_timeout;
use core_test_support::zsh_fork::build_zsh_fork_test;
use core_test_support::zsh_fork::restrictive_workspace_write_policy;
@@ -1815,6 +1816,102 @@ async fn approving_apply_patch_for_session_skips_future_prompts_for_same_file()
Ok(())
}
#[tokio::test(flavor = "current_thread")]
#[cfg(unix)]
async fn approved_override_command_updates_exec_events() -> Result<()> {
let server = start_mock_server().await;
let approval_policy = AskForApproval::OnRequest;
let sandbox_policy = SandboxPolicy::new_read_only_policy();
let sandbox_policy_for_config = sandbox_policy.clone();
let mut builder = test_codex().with_config(move |config| {
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
});
let test = builder.build(&server).await?;
let call_id = "override-command-events";
let original_command = r#"cat < "hello" > /var/test.txt"#;
let override_payload = "printf 'override-output'";
let (event, expected_command) = ActionKind::RunCommand {
command: original_command,
}
.prepare(
&test,
&server,
call_id,
SandboxPermissions::RequireEscalated,
)
.await?;
let expected_command =
expected_command.expect("override command scenario should produce a shell command");
let _ = mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-override-command-1"),
event,
ev_completed("resp-override-command-1"),
]),
)
.await;
let results_mock = mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-override-command-1", "done"),
ev_completed("resp-override-command-2"),
]),
)
.await;
submit_turn(
&test,
"override-command-events",
approval_policy,
sandbox_policy.clone(),
)
.await?;
let approval = expect_exec_approval(&test, expected_command.as_str()).await;
let mut override_command = approval.command.clone();
*override_command
.last_mut()
.expect("shell approval command should include the shell payload") =
override_payload.to_string();
test.codex
.submit(Op::ExecApproval {
id: approval.effective_approval_id(),
turn_id: None,
decision: ReviewDecision::ApprovedOverrideCommand {
command: override_command.clone(),
},
})
.await?;
let begin_event = wait_for_event_match(&test.codex, |event| match event {
EventMsg::ExecCommandBegin(ev) if ev.call_id == call_id => Some(ev.clone()),
_ => None,
})
.await;
let end_event = wait_for_event_match(&test.codex, |event| match event {
EventMsg::ExecCommandEnd(ev) if ev.call_id == call_id => Some(ev.clone()),
_ => None,
})
.await;
wait_for_completion(&test).await;
assert_eq!(begin_event.command, override_command);
assert_eq!(end_event.command, override_command);
assert_eq!(end_event.stdout, "override-output");
let output_item = results_mock.single_request().function_call_output(call_id);
let result = parse_result(&output_item);
assert_eq!(result.stdout, "override-output");
Ok(())
}
#[tokio::test(flavor = "current_thread")]
#[cfg(unix)]
async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts() -> Result<()> {

View File

@@ -208,6 +208,7 @@ impl ExecApprovalRequestEvent {
match &self.available_decisions {
Some(decisions) => decisions.clone(),
None => Self::default_available_decisions(
&self.command,
self.network_approval_context.as_ref(),
self.proposed_execpolicy_amendment.as_ref(),
self.proposed_network_policy_amendments.as_deref(),
@@ -217,6 +218,7 @@ impl ExecApprovalRequestEvent {
}
pub fn default_available_decisions(
command: &[String],
network_approval_context: Option<&NetworkApprovalContext>,
proposed_execpolicy_amendment: Option<&ExecPolicyAmendment>,
proposed_network_policy_amendments: Option<&[NetworkPolicyAmendment]>,
@@ -238,10 +240,21 @@ impl ExecApprovalRequestEvent {
}
if additional_permissions.is_some() {
return vec![ReviewDecision::Approved, ReviewDecision::Abort];
return vec![
ReviewDecision::Approved,
ReviewDecision::ApprovedOverrideCommand {
command: command.to_vec(),
},
ReviewDecision::Abort,
];
}
let mut decisions = vec![ReviewDecision::Approved];
let mut decisions = vec![
ReviewDecision::Approved,
ReviewDecision::ApprovedOverrideCommand {
command: command.to_vec(),
},
];
if let Some(prefix) = proposed_execpolicy_amendment {
decisions.push(ReviewDecision::ApprovedExecpolicyAmendment {
proposed_execpolicy_amendment: prefix.clone(),
@@ -317,3 +330,66 @@ pub struct ApplyPatchApprovalRequestEvent {
#[serde(skip_serializing_if = "Option::is_none")]
pub grant_root: Option<PathBuf>,
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn command_prompts_advertise_override_decision() {
let command = vec!["echo".to_string(), "hi".to_string()];
let decisions =
ExecApprovalRequestEvent::default_available_decisions(&command, None, None, None, None);
assert_eq!(
decisions,
vec![
ReviewDecision::Approved,
ReviewDecision::ApprovedOverrideCommand {
command: command.clone(),
},
ReviewDecision::Abort,
]
);
}
#[test]
fn network_only_prompts_do_not_advertise_override_decision() {
let decisions = ExecApprovalRequestEvent::default_available_decisions(
&["echo".to_string(), "hi".to_string()],
Some(&NetworkApprovalContext {
host: "example.com".to_string(),
protocol: NetworkApprovalProtocol::Https,
}),
None,
Some(&[
NetworkPolicyAmendment {
host: "example.com".to_string(),
action: NetworkPolicyRuleAction::Allow,
},
NetworkPolicyAmendment {
host: "example.com".to_string(),
action: NetworkPolicyRuleAction::Deny,
},
]),
None,
);
assert_eq!(
decisions,
vec![
ReviewDecision::Approved,
ReviewDecision::ApprovedForSession,
ReviewDecision::NetworkPolicyAmendment {
network_policy_amendment: NetworkPolicyAmendment {
host: "example.com".to_string(),
action: NetworkPolicyRuleAction::Allow,
},
},
ReviewDecision::Abort,
]
);
}
}

View File

@@ -3105,6 +3105,10 @@ pub enum ReviewDecision {
/// User has approved this command and the agent should execute it.
Approved,
/// User has approved this command and wants to execute a different argv
/// vector instead of the original one-time request.
ApprovedOverrideCommand { command: Vec<String> },
/// User has approved this command and wants to apply the proposed execpolicy
/// amendment so future matching commands are permitted.
ApprovedExecpolicyAmendment {
@@ -3133,11 +3137,19 @@ pub enum ReviewDecision {
}
impl ReviewDecision {
pub fn override_command(&self) -> Option<&[String]> {
match self {
ReviewDecision::ApprovedOverrideCommand { command } => Some(command.as_slice()),
_ => None,
}
}
/// Returns an opaque version of the decision without PII. We can't use an ignored flag
/// on `serde` because the serialization is required by some surfaces.
pub fn to_opaque_string(&self) -> &'static str {
match self {
ReviewDecision::Approved => "approved",
ReviewDecision::ApprovedOverrideCommand { .. } => "approved_with_command_override",
ReviewDecision::ApprovedExecpolicyAmendment { .. } => "approved_with_amendment",
ReviewDecision::ApprovedForSession => "approved_for_session",
ReviewDecision::NetworkPolicyAmendment {