Compare commits

...

3 Commits

Author SHA1 Message Date
ashwinnathan-openai
3013b516cf fix app-server test import
Co-authored-by: Codex <noreply@openai.com>
2026-03-16 20:42:57 -07:00
ashwinnathan-openai
80721b465f route manual write_stdin approvals through guardian flow
Co-authored-by: Codex <noreply@openai.com>
2026-03-16 19:12:11 -07:00
ashwinnathan-openai
5cc05f66aa add manual tool execution mode
Co-authored-by: Codex <noreply@openai.com>
2026-03-16 18:13:35 -07:00
26 changed files with 535 additions and 44 deletions

View File

@@ -13640,8 +13640,25 @@
],
"type": "object"
},
"ToolExecutionMode": {
"enum": [
"auto",
"manual"
],
"type": "string"
},
"ToolsV2": {
"properties": {
"execution_mode": {
"anyOf": [
{
"$ref": "#/definitions/v2/ToolExecutionMode"
},
{
"type": "null"
}
]
},
"view_image": {
"type": [
"boolean",

View File

@@ -11400,8 +11400,25 @@
],
"type": "object"
},
"ToolExecutionMode": {
"enum": [
"auto",
"manual"
],
"type": "string"
},
"ToolsV2": {
"properties": {
"execution_mode": {
"anyOf": [
{
"$ref": "#/definitions/ToolExecutionMode"
},
{
"type": "null"
}
]
},
"view_image": {
"type": [
"boolean",

View File

@@ -760,8 +760,25 @@
],
"type": "string"
},
"ToolExecutionMode": {
"enum": [
"auto",
"manual"
],
"type": "string"
},
"ToolsV2": {
"properties": {
"execution_mode": {
"anyOf": [
{
"$ref": "#/definitions/ToolExecutionMode"
},
{
"type": "null"
}
]
},
"view_image": {
"type": [
"boolean",

View File

@@ -0,0 +1,5 @@
// GENERATED CODE! DO NOT MODIFY BY HAND!
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
export type ToolExecutionMode = "auto" | "manual";

View File

@@ -2,5 +2,6 @@
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
import type { WebSearchToolConfig } from "../WebSearchToolConfig";
import type { ToolExecutionMode } from "./ToolExecutionMode";
export type ToolsV2 = { web_search: WebSearchToolConfig | null, view_image: boolean | null, };
export type ToolsV2 = { web_search: WebSearchToolConfig | null, view_image: boolean | null, execution_mode: ToolExecutionMode | null, };

View File

@@ -303,6 +303,7 @@ export type { ThreadUnsubscribeParams } from "./ThreadUnsubscribeParams";
export type { ThreadUnsubscribeResponse } from "./ThreadUnsubscribeResponse";
export type { ThreadUnsubscribeStatus } from "./ThreadUnsubscribeStatus";
export type { TokenUsageBreakdown } from "./TokenUsageBreakdown";
export type { ToolExecutionMode } from "./ToolExecutionMode";
export type { ToolRequestUserInputAnswer } from "./ToolRequestUserInputAnswer";
export type { ToolRequestUserInputOption } from "./ToolRequestUserInputOption";
export type { ToolRequestUserInputParams } from "./ToolRequestUserInputParams";

View File

@@ -528,12 +528,21 @@ pub struct SandboxWorkspaceWrite {
pub exclude_slash_tmp: bool,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
#[ts(export_to = "v2/")]
pub enum ToolExecutionMode {
Auto,
Manual,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
#[ts(export_to = "v2/")]
pub struct ToolsV2 {
pub web_search: Option<WebSearchToolConfig>,
pub view_image: Option<bool>,
pub execution_mode: Option<ToolExecutionMode>,
}
#[derive(Serialize, Debug, Clone, PartialEq, JsonSchema, TS)]

View File

@@ -19,6 +19,7 @@ use codex_app_server_protocol::JSONRPCResponse;
use codex_app_server_protocol::MergeStrategy;
use codex_app_server_protocol::RequestId;
use codex_app_server_protocol::SandboxMode;
use codex_app_server_protocol::ToolExecutionMode;
use codex_app_server_protocol::ToolsV2;
use codex_app_server_protocol::WriteStatus;
use codex_core::config::set_project_trust_level;
@@ -102,6 +103,7 @@ allowed_domains = ["example.com"]
[tools]
view_image = false
execution_mode = "manual"
"#,
)?;
let codex_home_path = codex_home.path().canonicalize()?;
@@ -137,6 +139,7 @@ view_image = false
location: None,
}),
view_image: Some(false),
execution_mode: Some(ToolExecutionMode::Manual),
}
);
assert_eq!(
@@ -163,6 +166,12 @@ view_image = false
file: user_file.clone(),
}
);
assert_eq!(
origins.get("tools.execution_mode").expect("origin").name,
ConfigLayerSource::User {
file: user_file.clone(),
}
);
let layers = layers.expect("layers present");
assert_layers_user_then_optional_system(&layers, user_file)?;

View File

@@ -22,6 +22,7 @@ use codex_protocol::openai_models::ReasoningEffort;
use pretty_assertions::assert_eq;
use serde_json::Value;
use serde_json::json;
use std::collections::HashMap;
use std::path::Path;
use tempfile::TempDir;
use tokio::time::timeout;
@@ -191,6 +192,39 @@ model_reasoning_effort = "high"
Ok(())
}
#[tokio::test]
async fn thread_start_accepts_manual_tool_execution_mode_override() -> Result<()> {
let server = create_mock_responses_server_repeating_assistant("Done").await;
let codex_home = TempDir::new()?;
create_config_toml(codex_home.path(), &server.uri())?;
let mut mcp = McpProcess::new(codex_home.path()).await?;
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
let req_id = mcp
.send_thread_start_request(ThreadStartParams {
config: Some(HashMap::from([(
"tools".to_string(),
json!({
"execution_mode": "manual"
}),
)])),
..Default::default()
})
.await?;
let resp: JSONRPCResponse = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(req_id)),
)
.await??;
let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(resp)?;
assert!(!thread.id.is_empty(), "thread id should not be empty");
Ok(())
}
#[tokio::test]
async fn thread_start_accepts_flex_service_tier() -> Result<()> {
let server = create_mock_responses_server_repeating_assistant("Done").await;

View File

@@ -1543,9 +1543,25 @@
},
"type": "object"
},
"ToolExecutionMode": {
"enum": [
"auto",
"manual"
],
"type": "string"
},
"ToolsToml": {
"additionalProperties": false,
"properties": {
"execution_mode": {
"allOf": [
{
"$ref": "#/definitions/ToolExecutionMode"
}
],
"default": null,
"description": "Execution policy for approval-capable tools."
},
"view_image": {
"default": null,
"description": "Enable the `view_image` tool that lets the agent attach local images.",

View File

@@ -38,6 +38,8 @@ pub(crate) async fn apply_patch(
file_system_sandbox_policy: &FileSystemSandboxPolicy,
action: ApplyPatchAction,
) -> InternalApplyPatchInvocation {
let manual_tool_approval_required = turn_context.tools_config.requires_manual_tool_approval();
match assess_patch_safety(
&action,
turn_context.approval_policy.value(),
@@ -52,9 +54,17 @@ pub(crate) async fn apply_patch(
} => InternalApplyPatchInvocation::DelegateToExec(ApplyPatchExec {
action,
auto_approved: !user_explicitly_approved,
exec_approval_requirement: ExecApprovalRequirement::Skip {
bypass_sandbox: false,
proposed_execpolicy_amendment: None,
exec_approval_requirement: if manual_tool_approval_required {
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: None,
}
} else {
ExecApprovalRequirement::Skip {
bypass_sandbox: false,
proposed_execpolicy_amendment: None,
}
},
}),
SafetyCheck::AskUser => {
@@ -65,6 +75,7 @@ pub(crate) async fn apply_patch(
action,
auto_approved: false,
exec_approval_requirement: ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: None,
},

View File

@@ -872,6 +872,7 @@ impl TurnContext {
sandbox_policy: self.sandbox_policy.get(),
windows_sandbox_level: self.windows_sandbox_level,
})
.with_execution_mode(config.tool_execution_mode)
.with_unified_exec_shell_mode(self.tools_config.unified_exec_shell_mode.clone())
.with_web_search_config(self.tools_config.web_search_config.clone())
.with_allow_login_shell(self.tools_config.allow_login_shell)
@@ -1306,6 +1307,7 @@ impl Session {
sandbox_policy: session_configuration.sandbox_policy.get(),
windows_sandbox_level: session_configuration.windows_sandbox_level,
})
.with_execution_mode(per_turn_config.tool_execution_mode)
.with_unified_exec_shell_mode_for_session(
user_shell,
shell_zsh_path,
@@ -5287,6 +5289,7 @@ async fn spawn_review_thread(
sandbox_policy: parent_turn_context.sandbox_policy.get(),
windows_sandbox_level: parent_turn_context.windows_sandbox_level,
})
.with_execution_mode(config.tool_execution_mode)
.with_unified_exec_shell_mode_for_session(
sess.services.user_shell.as_ref(),
sess.services.shell_zsh_path.as_ref(),

View File

@@ -193,6 +193,7 @@ web_search = true
Some(ToolsToml {
web_search: None,
view_image: None,
execution_mode: None,
})
);
}
@@ -212,6 +213,27 @@ web_search = false
Some(ToolsToml {
web_search: None,
view_image: None,
execution_mode: None,
})
);
}
#[test]
fn tools_execution_mode_deserializes() {
let cfg: ConfigToml = toml::from_str(
r#"
[tools]
execution_mode = "manual"
"#,
)
.expect("TOML deserialization should succeed");
assert_eq!(
cfg.tools,
Some(ToolsToml {
web_search: None,
view_image: None,
execution_mode: Some(ToolExecutionMode::Manual),
})
);
}
@@ -4261,6 +4283,7 @@ fn test_precedence_fixture_with_o3_profile() -> std::io::Result<()> {
include_apply_patch_tool: false,
web_search_mode: Constrained::allow_any(WebSearchMode::Cached),
web_search_config: None,
tool_execution_mode: ToolExecutionMode::Auto,
use_experimental_unified_exec_tool: !cfg!(windows),
background_terminal_max_timeout: DEFAULT_MAX_BACKGROUND_TERMINAL_TIMEOUT_MS,
ghost_snapshot: GhostSnapshotConfig::default(),
@@ -4400,6 +4423,7 @@ fn test_precedence_fixture_with_gpt3_profile() -> std::io::Result<()> {
include_apply_patch_tool: false,
web_search_mode: Constrained::allow_any(WebSearchMode::Cached),
web_search_config: None,
tool_execution_mode: ToolExecutionMode::Auto,
use_experimental_unified_exec_tool: !cfg!(windows),
background_terminal_max_timeout: DEFAULT_MAX_BACKGROUND_TERMINAL_TIMEOUT_MS,
ghost_snapshot: GhostSnapshotConfig::default(),
@@ -4537,6 +4561,7 @@ fn test_precedence_fixture_with_zdr_profile() -> std::io::Result<()> {
include_apply_patch_tool: false,
web_search_mode: Constrained::allow_any(WebSearchMode::Cached),
web_search_config: None,
tool_execution_mode: ToolExecutionMode::Auto,
use_experimental_unified_exec_tool: !cfg!(windows),
background_terminal_max_timeout: DEFAULT_MAX_BACKGROUND_TERMINAL_TIMEOUT_MS,
ghost_snapshot: GhostSnapshotConfig::default(),
@@ -4660,6 +4685,7 @@ fn test_precedence_fixture_with_gpt5_profile() -> std::io::Result<()> {
include_apply_patch_tool: false,
web_search_mode: Constrained::allow_any(WebSearchMode::Cached),
web_search_config: None,
tool_execution_mode: ToolExecutionMode::Auto,
use_experimental_unified_exec_tool: !cfg!(windows),
background_terminal_max_timeout: DEFAULT_MAX_BACKGROUND_TERMINAL_TIMEOUT_MS,
ghost_snapshot: GhostSnapshotConfig::default(),

View File

@@ -512,6 +512,9 @@ pub struct Config {
/// Additional parameters for the web search tool when it is enabled.
pub web_search_config: Option<WebSearchConfig>,
/// Execution policy for approval-capable tools.
pub tool_execution_mode: ToolExecutionMode,
/// If set to `true`, used only the experimental unified exec tool.
pub use_experimental_unified_exec_tool: bool,
@@ -1578,6 +1581,17 @@ pub struct ToolsToml {
/// Enable the `view_image` tool that lets the agent attach local images.
#[serde(default)]
pub view_image: Option<bool>,
/// Execution policy for approval-capable tools.
#[serde(default)]
pub execution_mode: Option<ToolExecutionMode>,
}
#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, JsonSchema)]
#[serde(rename_all = "snake_case")]
pub enum ToolExecutionMode {
Auto,
Manual,
}
#[derive(Deserialize)]
@@ -2008,6 +2022,23 @@ fn resolve_web_search_config(
}
}
fn resolve_tool_execution_mode(
config_toml: &ConfigToml,
config_profile: &ConfigProfile,
) -> ToolExecutionMode {
config_profile
.tools
.as_ref()
.and_then(|tools| tools.execution_mode)
.or_else(|| {
config_toml
.tools
.as_ref()
.and_then(|tools| tools.execution_mode)
})
.unwrap_or(ToolExecutionMode::Auto)
}
pub(crate) fn resolve_web_search_mode_for_turn(
web_search_mode: &Constrained<WebSearchMode>,
sandbox_policy: &SandboxPolicy,
@@ -2302,6 +2333,7 @@ impl Config {
let web_search_mode = resolve_web_search_mode(&cfg, &config_profile, &features)
.unwrap_or(WebSearchMode::Cached);
let web_search_config = resolve_web_search_config(&cfg, &config_profile);
let tool_execution_mode = resolve_tool_execution_mode(&cfg, &config_profile);
let agent_roles =
agent_roles::load_agent_roles(&cfg, &config_layer_stack, &mut startup_warnings)?;
@@ -2717,6 +2749,7 @@ impl Config {
include_apply_patch_tool: include_apply_patch_tool_flag,
web_search_mode: constrained_web_search_mode.value,
web_search_config,
tool_execution_mode,
use_experimental_unified_exec_tool,
background_terminal_max_timeout,
ghost_snapshot,

View File

@@ -260,6 +260,7 @@ impl ExecPolicyManager {
reason: reason.to_string(),
},
None => ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: derive_prompt_reason(command, &evaluation),
proposed_execpolicy_amendment: requested_amendment.or_else(|| {
if auto_amendment_allowed {

View File

@@ -518,6 +518,7 @@ async fn omits_auto_amendment_for_heredoc_fallback_prompts() {
assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: None,
}
@@ -547,6 +548,7 @@ async fn drops_requested_amendment_for_heredoc_fallback_prompts_when_it_wont_mat
assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: None,
}
@@ -617,6 +619,7 @@ async fn exec_approval_requirement_prefers_execpolicy_match() {
assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: Some("`rm` requires approval by policy".to_string()),
proposed_execpolicy_amendment: None,
}
@@ -726,6 +729,7 @@ async fn requested_prefix_rule_can_approve_absolute_path_commands() {
assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
"cargo".to_string(),
@@ -931,6 +935,7 @@ async fn exec_approval_requirement_falls_back_to_heuristics() {
assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(command))
}
@@ -956,6 +961,7 @@ async fn empty_bash_lc_script_falls_back_to_original_command() {
assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(command)),
}
@@ -985,6 +991,7 @@ async fn whitespace_bash_lc_script_falls_back_to_original_command() {
assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(command)),
}
@@ -1014,6 +1021,7 @@ async fn request_rule_uses_prefix_rule() {
assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
"cargo".to_string(),
@@ -1046,6 +1054,7 @@ async fn request_rule_falls_back_when_prefix_rule_does_not_approve_all_commands(
assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
"rm".to_string(),
@@ -1082,6 +1091,7 @@ async fn heuristics_apply_when_other_commands_match_policy() {
})
.await,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
"orange".to_string()
@@ -1160,6 +1170,7 @@ async fn proposed_execpolicy_amendment_is_present_for_single_command_without_pol
assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(command))
}
@@ -1191,6 +1202,7 @@ async fn proposed_execpolicy_amendment_is_omitted_when_policy_prompts() {
assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: Some("`rm` requires approval by policy".to_string()),
proposed_execpolicy_amendment: None,
}
@@ -1219,6 +1231,7 @@ async fn proposed_execpolicy_amendment_is_present_for_multi_command_scripts() {
assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
"cargo".to_string(),
@@ -1255,6 +1268,7 @@ async fn proposed_execpolicy_amendment_uses_first_no_match_in_multi_command_scri
})
.await,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
"apple".to_string()
@@ -1481,6 +1495,7 @@ async fn dangerous_rm_rf_requires_approval_in_danger_full_access() {
assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(command)),
}
@@ -1519,6 +1534,7 @@ async fn verify_approval_requirement_for_unsafe_powershell_command() {
that no sandbox is present, so anything that is not "provably
safe" should require approval."#,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: expected_amendment.clone(),
},
@@ -1551,6 +1567,7 @@ async fn verify_approval_requirement_for_unsafe_powershell_command() {
let dangerous_command = vec_str(&["rm", "-rf", "/important/data"]);
assert_eq!(
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec_str(&[
"rm",

View File

@@ -441,6 +441,11 @@ impl ShellHandler {
prefix_rule,
})
.await;
let exec_approval_requirement = if turn.tools_config.requires_manual_tool_approval() {
exec_approval_requirement.force_manual_approval()
} else {
exec_approval_requirement
};
let req = ShellRequest {
command: exec_params.command.clone(),

View File

@@ -295,6 +295,7 @@ impl ToolHandler for UnifiedExecHandler {
input: &args.chars,
yield_time_ms: args.yield_time_ms,
max_output_tokens: args.max_output_tokens,
context: Some(&context),
})
.await
.map_err(|err| {

View File

@@ -45,6 +45,7 @@ fn guardian_review_request_includes_patch_context() {
},
)]),
exec_approval_requirement: ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: None,
},

View File

@@ -38,11 +38,14 @@ use crate::unified_exec::UnifiedExecError;
use crate::unified_exec::UnifiedExecProcess;
use crate::unified_exec::UnifiedExecProcessManager;
use codex_network_proxy::NetworkProxy;
use codex_protocol::approvals::ExecPolicyAmendment;
use codex_protocol::approvals::NetworkApprovalContext;
use codex_protocol::models::PermissionProfile;
use codex_protocol::protocol::ReviewDecision;
use futures::future::BoxFuture;
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
#[derive(Clone, Debug)]
pub struct UnifiedExecRequest {
@@ -83,6 +86,68 @@ impl<'a> UnifiedExecRuntime<'a> {
}
}
#[allow(clippy::too_many_arguments)]
pub(crate) async fn request_unified_exec_approval(
session: &Arc<crate::codex::Session>,
turn: &Arc<crate::codex::TurnContext>,
call_id: String,
command: Vec<String>,
cwd: PathBuf,
sandbox_permissions: SandboxPermissions,
additional_permissions: Option<PermissionProfile>,
justification: Option<String>,
tty: bool,
retry_reason: Option<String>,
network_approval_context: Option<NetworkApprovalContext>,
proposed_execpolicy_amendment: Option<ExecPolicyAmendment>,
approval_keys: Option<Vec<UnifiedExecApprovalKey>>,
) -> ReviewDecision {
let reason = retry_reason.clone().or_else(|| justification.clone());
if routes_approval_to_guardian(turn) {
return review_approval_request(
session,
turn,
GuardianApprovalRequest::ExecCommand {
id: call_id,
command,
cwd,
sandbox_permissions,
additional_permissions,
justification,
tty,
},
retry_reason,
)
.await;
}
let request_approval = || async {
let available_decisions = None;
session
.request_command_approval(
turn,
call_id,
/*approval_id*/ None,
command,
cwd,
reason,
network_approval_context,
proposed_execpolicy_amendment,
additional_permissions,
/*skill_metadata*/ None,
available_decisions,
)
.await
};
if let Some(keys) = approval_keys {
with_cached_approval(&session.services, "unified_exec", keys, request_approval).await
} else {
request_approval().await
}
}
impl Sandboxable for UnifiedExecRuntime<'_> {
fn sandbox_preference(&self) -> SandboxablePreference {
SandboxablePreference::Auto
@@ -118,45 +183,24 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
let command = req.command.clone();
let cwd = req.cwd.clone();
let retry_reason = ctx.retry_reason.clone();
let reason = retry_reason.clone().or_else(|| req.justification.clone());
Box::pin(async move {
if routes_approval_to_guardian(turn) {
return review_approval_request(
session,
turn,
GuardianApprovalRequest::ExecCommand {
id: call_id,
command,
cwd,
sandbox_permissions: req.sandbox_permissions,
additional_permissions: req.additional_permissions.clone(),
justification: req.justification.clone(),
tty: req.tty,
},
retry_reason,
)
.await;
}
with_cached_approval(&session.services, "unified_exec", keys, || async move {
let available_decisions = None;
session
.request_command_approval(
turn,
call_id,
/*approval_id*/ None,
command,
cwd,
reason,
ctx.network_approval_context.clone(),
req.exec_approval_requirement
.proposed_execpolicy_amendment()
.cloned(),
req.additional_permissions.clone(),
/*skill_metadata*/ None,
available_decisions,
)
.await
})
request_unified_exec_approval(
session,
turn,
call_id,
command,
cwd,
req.sandbox_permissions,
req.additional_permissions.clone(),
req.justification.clone(),
req.tty,
retry_reason,
ctx.network_approval_context.clone(),
req.exec_approval_requirement
.proposed_execpolicy_amendment()
.cloned(),
Some(keys),
)
.await
})
}

View File

@@ -134,6 +134,8 @@ pub(crate) enum ExecApprovalRequirement {
},
/// Approval required for this tool call.
NeedsApproval {
/// The first attempt should skip sandboxing after approval.
bypass_sandbox: bool,
reason: Option<String>,
/// Proposed execpolicy amendment to skip future approvals for similar commands
/// See core/src/exec_policy.rs for more details on how proposed_execpolicy_amendment is determined.
@@ -157,6 +159,21 @@ impl ExecApprovalRequirement {
_ => None,
}
}
pub fn force_manual_approval(self) -> Self {
match self {
Self::Skip {
bypass_sandbox,
proposed_execpolicy_amendment,
..
} => Self::NeedsApproval {
bypass_sandbox,
reason: None,
proposed_execpolicy_amendment,
},
other => other,
}
}
}
/// - Never, OnFailure: do not ask
@@ -191,6 +208,7 @@ pub(crate) fn default_exec_approval_requirement(
}
} else if needs_approval {
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: None,
}
@@ -217,7 +235,10 @@ pub(crate) fn sandbox_override_for_first_attempt(
if sandbox_permissions.requires_escalated_permissions()
|| matches!(
exec_approval_requirement,
ExecApprovalRequirement::Skip {
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: true,
..
} | ExecApprovalRequirement::Skip {
bypass_sandbox: true,
..
}

View File

@@ -30,6 +30,7 @@ fn restricted_sandbox_requires_exec_approval_on_request() {
&FileSystemSandboxPolicy::from(&sandbox_policy)
),
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: None,
}
@@ -75,6 +76,7 @@ fn default_exec_approval_requirement_keeps_prompt_when_granular_allows_sandbox_a
assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: None,
}
@@ -108,3 +110,66 @@ fn guardian_bypasses_sandbox_for_explicit_escalation_on_first_attempt() {
SandboxOverride::BypassSandboxFirstAttempt
);
}
#[test]
fn force_manual_approval_converts_skip_to_needs_approval() {
let requirement = ExecApprovalRequirement::Skip {
bypass_sandbox: false,
proposed_execpolicy_amendment: None,
};
assert_eq!(
requirement.force_manual_approval(),
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: false,
reason: None,
proposed_execpolicy_amendment: None,
}
);
}
#[test]
fn force_manual_approval_preserves_bypass_sandbox() {
let requirement = ExecApprovalRequirement::Skip {
bypass_sandbox: true,
proposed_execpolicy_amendment: None,
};
assert_eq!(
requirement.force_manual_approval(),
ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: true,
reason: None,
proposed_execpolicy_amendment: None,
}
);
}
#[test]
fn needs_approval_can_bypass_sandbox_on_first_attempt() {
assert_eq!(
sandbox_override_for_first_attempt(
SandboxPermissions::UseDefault,
&ExecApprovalRequirement::NeedsApproval {
bypass_sandbox: true,
reason: None,
proposed_execpolicy_amendment: None,
},
),
SandboxOverride::BypassSandboxFirstAttempt
);
}
#[test]
fn force_manual_approval_preserves_forbidden() {
let requirement = ExecApprovalRequirement::Forbidden {
reason: "denied".to_string(),
};
assert_eq!(
requirement.force_manual_approval(),
ExecApprovalRequirement::Forbidden {
reason: "denied".to_string(),
}
);
}

View File

@@ -3,6 +3,7 @@ use crate::client_common::tools::FreeformToolFormat;
use crate::client_common::tools::ResponsesApiTool;
use crate::client_common::tools::ToolSpec;
use crate::config::AgentRoleConfig;
use crate::config::ToolExecutionMode;
use crate::features::Feature;
use crate::features::Features;
use crate::mcp::CODEX_APPS_MCP_SERVER_NAME;
@@ -257,6 +258,7 @@ pub(crate) struct ToolsConfig {
shell_command_backend: ShellCommandBackendConfig,
pub unified_exec_shell_mode: UnifiedExecShellMode,
pub allow_login_shell: bool,
pub execution_mode: ToolExecutionMode,
pub apply_patch_tool_type: Option<ApplyPatchToolType>,
pub web_search_mode: Option<WebSearchMode>,
pub web_search_config: Option<WebSearchConfig>,
@@ -389,6 +391,7 @@ impl ToolsConfig {
shell_command_backend,
unified_exec_shell_mode: UnifiedExecShellMode::Direct,
allow_login_shell: true,
execution_mode: ToolExecutionMode::Auto,
apply_patch_tool_type,
web_search_mode: *web_search_mode,
web_search_config: None,
@@ -424,6 +427,11 @@ impl ToolsConfig {
self
}
pub fn with_execution_mode(mut self, execution_mode: ToolExecutionMode) -> Self {
self.execution_mode = execution_mode;
self
}
pub fn with_unified_exec_shell_mode(
mut self,
unified_exec_shell_mode: UnifiedExecShellMode,
@@ -458,6 +466,10 @@ impl ToolsConfig {
nested.code_mode_only_enabled = false;
nested
}
pub fn requires_manual_tool_approval(&self) -> bool {
matches!(self.execution_mode, ToolExecutionMode::Manual)
}
}
fn supports_image_generation(model_info: &ModelInfo) -> bool {

View File

@@ -99,12 +99,12 @@ pub(crate) struct ExecCommandRequest {
pub prefix_rule: Option<Vec<String>>,
}
#[derive(Debug)]
pub(crate) struct WriteStdinRequest<'a> {
pub process_id: i32,
pub input: &'a str,
pub yield_time_ms: u64,
pub max_output_tokens: Option<usize>,
pub context: Option<&'a UnifiedExecContext>,
}
#[derive(Default)]
@@ -146,6 +146,7 @@ struct ProcessEntry {
call_id: String,
process_id: i32,
command: Vec<String>,
cwd: PathBuf,
tty: bool,
network_approval_id: Option<String>,
session: Weak<Session>,

View File

@@ -3,11 +3,16 @@ use super::*;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::codex::make_session_and_context;
use crate::codex::make_session_and_context_with_rx;
use crate::config::ToolExecutionMode;
use crate::protocol::AskForApproval;
use crate::protocol::SandboxPolicy;
use crate::state::ActiveTurn;
use crate::tools::context::ExecCommandToolOutput;
use crate::unified_exec::ExecCommandRequest;
use crate::unified_exec::WriteStdinRequest;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::ReviewDecision;
use core_test_support::skip_if_sandbox;
use std::sync::Arc;
use tokio::time::Duration;
@@ -78,6 +83,7 @@ async fn write_stdin(
input,
yield_time_ms,
max_output_tokens: None,
context: None,
})
.await
}
@@ -147,6 +153,76 @@ async fn unified_exec_persists_across_requests() -> anyhow::Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn unified_exec_write_stdin_requires_manual_approval() -> anyhow::Result<()> {
skip_if_sandbox!(Ok(()));
let (session, turn, rx) = make_session_and_context_with_rx().await;
*session.active_turn.lock().await = Some(ActiveTurn::default());
let mut turn = Arc::try_unwrap(turn).expect("single turn context ref");
turn.sandbox_policy
.set(SandboxPolicy::DangerFullAccess)
.expect("test setup should allow updating sandbox policy");
turn.file_system_sandbox_policy =
codex_protocol::permissions::FileSystemSandboxPolicy::from(turn.sandbox_policy.get());
turn.network_sandbox_policy =
codex_protocol::permissions::NetworkSandboxPolicy::from(turn.sandbox_policy.get());
turn.tools_config.execution_mode = ToolExecutionMode::Manual;
let turn = Arc::new(turn);
let approval_task = tokio::spawn({
let session = Arc::clone(&session);
async move {
let mut approvals = Vec::new();
while approvals.len() < 2 {
let event = rx.recv().await.expect("approval event");
if let EventMsg::ExecApprovalRequest(request) = event.msg {
approvals.push(request.clone());
let approval_id = request
.approval_id
.as_deref()
.unwrap_or(request.call_id.as_str());
session
.notify_approval(approval_id, ReviewDecision::Approved)
.await;
}
}
approvals
}
});
let open_shell = exec_command(&session, &turn, "bash -i", 2_500).await?;
let process_id = open_shell.process_id.expect("expected process_id");
let context = UnifiedExecContext::new(Arc::clone(&session), Arc::clone(&turn), "call".into());
let output = session
.services
.unified_exec_manager
.write_stdin(WriteStdinRequest {
process_id,
input: "printf codex-manual-approval\n",
yield_time_ms: 2_500,
max_output_tokens: None,
context: Some(&context),
})
.await?;
assert!(
output.truncated_output().contains("codex-manual-approval"),
"interactive input should run after approval"
);
let approvals = approval_task.await?;
assert_eq!(approvals.len(), 2);
assert_eq!(
approvals[1].reason.as_deref(),
Some("interactive terminal input: printf codex-manual-approval\\n")
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn multi_unified_exec_sessions() -> anyhow::Result<()> {
skip_if_sandbox!(Ok(()));

View File

@@ -17,6 +17,7 @@ use crate::exec_env::create_env;
use crate::exec_policy::ExecApprovalRequest;
use crate::protocol::ExecCommandSource;
use crate::sandboxing::ExecRequest;
use crate::sandboxing::SandboxPermissions;
use crate::tools::context::ExecCommandToolOutput;
use crate::tools::events::ToolEmitter;
use crate::tools::events::ToolEventCtx;
@@ -26,6 +27,7 @@ use crate::tools::network_approval::finish_deferred_network_approval;
use crate::tools::orchestrator::ToolOrchestrator;
use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest;
use crate::tools::runtimes::unified_exec::UnifiedExecRuntime;
use crate::tools::runtimes::unified_exec::request_unified_exec_approval;
use crate::tools::sandboxing::ToolCtx;
use crate::truncate::approx_token_count;
use crate::unified_exec::ExecCommandRequest;
@@ -98,6 +100,7 @@ struct PreparedProcessHandles {
cancellation_token: CancellationToken,
pause_state: Option<watch::Receiver<bool>>,
command: Vec<String>,
cwd: PathBuf,
process_id: i32,
tty: bool,
}
@@ -320,6 +323,7 @@ impl UnifiedExecProcessManager {
cancellation_token,
pause_state,
command: session_command,
cwd,
process_id,
tty,
..
@@ -329,6 +333,42 @@ impl UnifiedExecProcessManager {
if !tty {
return Err(UnifiedExecError::StdinClosed);
}
if let Some(context) = request.context
&& context.turn.tools_config.requires_manual_tool_approval()
{
let input = request.input.escape_default().to_string();
let reason = if input.len() > 200 {
Some(format!("interactive terminal input: {}...", &input[..200]))
} else {
Some(format!("interactive terminal input: {input}"))
};
let decision = request_unified_exec_approval(
&context.session,
&context.turn,
context.call_id.clone(),
session_command.clone(),
cwd.clone(),
SandboxPermissions::UseDefault,
/*additional_permissions*/ None,
reason,
tty,
/*retry_reason*/ None,
/*network_approval_context*/ None,
/*proposed_execpolicy_amendment*/ None,
/*approval_keys*/ None,
)
.await;
if !matches!(
decision,
codex_protocol::protocol::ReviewDecision::Approved
| codex_protocol::protocol::ReviewDecision::ApprovedExecpolicyAmendment { .. }
| codex_protocol::protocol::ReviewDecision::ApprovedForSession
) {
return Err(UnifiedExecError::create_process(
"rejected by user".to_string(),
));
}
}
Self::send_input(&writer_tx, request.input.as_bytes()).await?;
// Give the remote process a brief window to react so that we are
// more likely to capture its output in the poll below.
@@ -463,6 +503,7 @@ impl UnifiedExecProcessManager {
cancellation_token,
pause_state,
command: entry.command.clone(),
cwd: entry.cwd.clone(),
process_id: entry.process_id,
tty: entry.tty,
})
@@ -496,6 +537,7 @@ impl UnifiedExecProcessManager {
call_id: context.call_id.clone(),
process_id,
command: command.to_vec(),
cwd: cwd.clone(),
tty,
network_approval_id,
session: Arc::downgrade(&context.session),
@@ -609,6 +651,12 @@ impl UnifiedExecProcessManager {
prefix_rule: request.prefix_rule.clone(),
})
.await;
let exec_approval_requirement = if context.turn.tools_config.requires_manual_tool_approval()
{
exec_approval_requirement.force_manual_approval()
} else {
exec_approval_requirement
};
let req = UnifiedExecToolRequest {
command: request.command.clone(),
cwd,