Compare commits

...

3 Commits

Author SHA1 Message Date
Ahmed Ibrahim
da2e1d2ba3 tests 2025-11-20 20:52:17 -08:00
Ahmed Ibrahim
528e7fde9d merge 2025-11-20 19:37:47 -08:00
Ahmed Ibrahim
2e44082a30 shell 2025-11-19 16:44:45 -08:00
22 changed files with 249 additions and 44 deletions

View File

@@ -1175,6 +1175,8 @@ impl CodexMessageProcessor {
with_escalated_permissions: None,
justification: None,
arg0: None,
max_output_tokens: None,
max_output_chars: None,
};
let effective_policy = params

View File

@@ -3072,6 +3072,8 @@ mod tests {
with_escalated_permissions: Some(true),
justification: Some("test".to_string()),
arg0: None,
max_output_tokens: None,
max_output_chars: None,
};
let params2 = ExecParams {
@@ -3082,6 +3084,8 @@ mod tests {
env: HashMap::new(),
justification: params.justification.clone(),
arg0: None,
max_output_tokens: None,
max_output_chars: None,
};
let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));

View File

@@ -6,6 +6,7 @@ use crate::truncate::truncate_function_output_items_with_policy;
use crate::truncate::truncate_text;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseItem;
use codex_protocol::models::ShellToolCallParams;
use codex_protocol::protocol::TokenUsage;
use codex_protocol::protocol::TokenUsageInfo;
use std::ops::Deref;
@@ -130,6 +131,47 @@ impl ContextManager {
normalize::remove_orphan_outputs(&mut self.items);
}
fn get_shell_truncation_policy(&self, call_id: &str) -> Option<TruncationPolicy> {
let call = self.get_call_for_call_id(call_id)?;
match call {
ResponseItem::FunctionCall { arguments, .. } => {
let shell_tool_call_params =
serde_json::from_str::<ShellToolCallParams>(&arguments).ok()?;
Self::create_truncation_policy(
shell_tool_call_params.max_output_tokens,
shell_tool_call_params.max_output_chars,
)
}
_ => None,
}
}
fn create_truncation_policy(
max_output_tokens: Option<usize>,
max_output_chars: Option<usize>,
) -> Option<TruncationPolicy> {
if let Some(max_output_tokens) = max_output_tokens {
Some(TruncationPolicy::Tokens(max_output_tokens))
} else {
max_output_chars.map(TruncationPolicy::Bytes)
}
}
fn get_call_for_call_id(&self, call_id: &str) -> Option<ResponseItem> {
self.items.iter().find_map(|item| match item {
ResponseItem::FunctionCall {
call_id: existing, ..
} => {
if existing == call_id {
Some(item.clone())
} else {
None
}
}
_ => None,
})
}
/// Returns a clone of the contents in the transcript.
fn contents(&self) -> Vec<ResponseItem> {
self.items.clone()
@@ -143,13 +185,12 @@ impl ContextManager {
let policy_with_serialization_budget = policy.mul(1.2);
match item {
ResponseItem::FunctionCallOutput { call_id, output } => {
let truncated =
truncate_text(output.content.as_str(), policy_with_serialization_budget);
let truncation_policy_override = self.get_shell_truncation_policy(call_id);
let truncation_policy =
truncation_policy_override.unwrap_or(policy_with_serialization_budget);
let truncated = truncate_text(output.content.as_str(), truncation_policy);
let truncated_items = output.content_items.as_ref().map(|items| {
truncate_function_output_items_with_policy(
items,
policy_with_serialization_budget,
)
truncate_function_output_items_with_policy(items, truncation_policy)
});
ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),

View File

@@ -57,6 +57,8 @@ pub struct ExecParams {
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
pub arg0: Option<String>,
pub max_output_tokens: Option<usize>,
pub max_output_chars: Option<usize>,
}
/// Mechanism to terminate an exec invocation before it finishes naturally.
@@ -141,6 +143,8 @@ pub async fn process_exec_tool_call(
with_escalated_permissions,
justification,
arg0: _,
max_output_tokens,
max_output_chars,
} = params;
let (program, args) = command.split_first().ok_or_else(|| {
@@ -158,6 +162,8 @@ pub async fn process_exec_tool_call(
expiration,
with_escalated_permissions,
justification,
max_output_tokens,
max_output_chars,
};
let manager = SandboxManager::new();
@@ -189,6 +195,8 @@ pub(crate) async fn execute_exec_env(
with_escalated_permissions,
justification,
arg0,
max_output_tokens,
max_output_chars,
} = env;
let params = ExecParams {
@@ -199,6 +207,8 @@ pub(crate) async fn execute_exec_env(
with_escalated_permissions,
justification,
arg0,
max_output_tokens,
max_output_chars,
};
let start = Instant::now();
@@ -841,6 +851,8 @@ mod tests {
with_escalated_permissions: None,
justification: None,
arg0: None,
max_output_tokens: None,
max_output_chars: None,
};
let output = exec(params, SandboxType::None, &SandboxPolicy::ReadOnly, None).await?;
@@ -886,6 +898,8 @@ mod tests {
with_escalated_permissions: None,
justification: None,
arg0: None,
max_output_tokens: None,
max_output_chars: None,
};
tokio::spawn(async move {
tokio::time::sleep(Duration::from_millis(1_000)).await;

View File

@@ -76,6 +76,7 @@ macro_rules! model_family {
(
$slug:expr, $family:expr $(, $key:ident : $value:expr )* $(,)?
) => {{
let truncation_policy = TruncationPolicy::Bytes(10_000);
// defaults
#[allow(unused_mut)]
let mut mf = ModelFamily {
@@ -90,10 +91,10 @@ macro_rules! model_family {
experimental_supported_tools: Vec::new(),
effective_context_window_percent: 95,
support_verbosity: false,
shell_type: ConfigShellToolType::Default,
shell_type: ConfigShellToolType::Default(truncation_policy),
default_verbosity: None,
default_reasoning_effort: None,
truncation_policy: TruncationPolicy::Bytes(10_000),
truncation_policy,
};
// apply overrides
@@ -138,6 +139,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
} else if slug.starts_with("gpt-3.5") {
model_family!(slug, "gpt-3.5", needs_special_apply_patch_instructions: true)
} else if slug.starts_with("test-gpt-5") {
let truncation_policy = TruncationPolicy::Tokens(10_000);
model_family!(
slug, slug,
supports_reasoning_summaries: true,
@@ -150,13 +152,13 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
"test_sync_tool".to_string(),
],
supports_parallel_tool_calls: true,
shell_type: ConfigShellToolType::ShellCommand,
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
support_verbosity: true,
truncation_policy: TruncationPolicy::Tokens(10_000),
)
// Internal models.
} else if slug.starts_with("codex-exp-") {
let truncation_policy = TruncationPolicy::Tokens(10_000);
model_family!(
slug, slug,
supports_reasoning_summaries: true,
@@ -168,41 +170,44 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
"list_dir".to_string(),
"read_file".to_string(),
],
shell_type: ConfigShellToolType::ShellCommand,
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
supports_parallel_tool_calls: true,
support_verbosity: true,
truncation_policy: TruncationPolicy::Tokens(10_000),
truncation_policy: truncation_policy,
)
// Production models.
} else if slug.starts_with("gpt-5.1-codex-max") {
let truncation_policy = TruncationPolicy::Tokens(10_000);
model_family!(
slug, slug,
supports_reasoning_summaries: true,
reasoning_summary_format: ReasoningSummaryFormat::Experimental,
base_instructions: GPT_5_1_CODEX_MAX_INSTRUCTIONS.to_string(),
apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
shell_type: ConfigShellToolType::ShellCommand,
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
supports_parallel_tool_calls: true,
support_verbosity: false,
truncation_policy: TruncationPolicy::Tokens(10_000),
truncation_policy: truncation_policy,
)
} else if slug.starts_with("gpt-5-codex")
|| slug.starts_with("gpt-5.1-codex")
|| slug.starts_with("codex-")
{
let truncation_policy = TruncationPolicy::Tokens(10_000);
model_family!(
slug, slug,
supports_reasoning_summaries: true,
reasoning_summary_format: ReasoningSummaryFormat::Experimental,
base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
shell_type: ConfigShellToolType::ShellCommand,
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
supports_parallel_tool_calls: true,
support_verbosity: false,
truncation_policy: TruncationPolicy::Tokens(10_000),
truncation_policy: truncation_policy,
)
} else if slug.starts_with("gpt-5.1") {
let truncation_policy = TruncationPolicy::Tokens(10_000);
model_family!(
slug, "gpt-5.1",
supports_reasoning_summaries: true,
@@ -212,7 +217,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
base_instructions: GPT_5_1_INSTRUCTIONS.to_string(),
default_reasoning_effort: Some(ReasoningEffort::Medium),
truncation_policy: TruncationPolicy::Bytes(10_000),
shell_type: ConfigShellToolType::ShellCommand,
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
supports_parallel_tool_calls: true,
)
} else if slug.starts_with("gpt-5") {
@@ -220,7 +225,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
slug, "gpt-5",
supports_reasoning_summaries: true,
needs_special_apply_patch_instructions: true,
shell_type: ConfigShellToolType::Default,
shell_type: ConfigShellToolType::Default(TruncationPolicy::Bytes(10_000)),
support_verbosity: true,
truncation_policy: TruncationPolicy::Bytes(10_000),
)
@@ -230,6 +235,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
}
pub fn derive_default_model_family(model: &str) -> ModelFamily {
let truncation_policy = TruncationPolicy::Bytes(10_000);
ModelFamily {
slug: model.to_string(),
family: model.to_string(),
@@ -242,9 +248,9 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily {
experimental_supported_tools: Vec::new(),
effective_context_window_percent: 95,
support_verbosity: false,
shell_type: ConfigShellToolType::Default,
shell_type: ConfigShellToolType::Default(truncation_policy),
default_verbosity: None,
default_reasoning_effort: None,
truncation_policy: TruncationPolicy::Bytes(10_000),
truncation_policy,
}
}

View File

@@ -58,6 +58,8 @@ pub struct CommandSpec {
pub expiration: ExecExpiration,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
pub max_output_tokens: Option<usize>,
pub max_output_chars: Option<usize>,
}
#[derive(Debug)]
@@ -70,6 +72,8 @@ pub struct ExecEnv {
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
pub arg0: Option<String>,
pub max_output_tokens: Option<usize>,
pub max_output_chars: Option<usize>,
}
pub enum SandboxPreference {
@@ -184,6 +188,8 @@ impl SandboxManager {
with_escalated_permissions: spec.with_escalated_permissions,
justification: spec.justification,
arg0: arg0_override,
max_output_tokens: spec.max_output_tokens,
max_output_chars: spec.max_output_chars,
})
}

View File

@@ -102,6 +102,8 @@ impl SessionTask for UserShellCommandTask {
with_escalated_permissions: None,
justification: None,
arg0: None,
max_output_tokens: None,
max_output_chars: None,
};
let stdout_stream = Some(StdoutStream {

View File

@@ -15,6 +15,8 @@ use crate::protocol::PatchApplyEndEvent;
use crate::protocol::TurnDiffEvent;
use crate::tools::context::SharedTurnDiffTracker;
use crate::tools::sandboxing::ToolError;
use crate::truncate::TruncationPolicy;
use crate::truncate::formatted_truncate_text;
use codex_protocol::parse_command::ParsedCommand;
use std::collections::HashMap;
use std::path::Path;
@@ -29,6 +31,7 @@ pub(crate) struct ToolEventCtx<'a> {
pub turn: &'a TurnContext,
pub call_id: &'a str,
pub turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
pub override_truncation_policy: Option<&'a TruncationPolicy>,
}
impl<'a> ToolEventCtx<'a> {
@@ -37,12 +40,14 @@ impl<'a> ToolEventCtx<'a> {
turn: &'a TurnContext,
call_id: &'a str,
turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
override_truncation_policy: Option<&'a TruncationPolicy>,
) -> Self {
Self {
session,
turn,
call_id,
turn_diff_tracker,
override_truncation_policy,
}
}
}
@@ -255,13 +260,13 @@ impl ToolEmitter {
fn format_exec_output_for_model(
&self,
output: &ExecToolCallOutput,
ctx: ToolEventCtx<'_>,
truncation_policy: &TruncationPolicy,
) -> String {
match self {
Self::Shell { freeform: true, .. } => {
super::format_exec_output_for_model_freeform(output, ctx.turn.truncation_policy)
super::format_exec_output_for_model_freeform(output, *truncation_policy)
}
_ => super::format_exec_output_for_model_structured(output, ctx.turn.truncation_policy),
_ => super::format_exec_output_for_model_structured(output, *truncation_policy),
}
}
@@ -270,9 +275,12 @@ impl ToolEmitter {
ctx: ToolEventCtx<'_>,
out: Result<ExecToolCallOutput, ToolError>,
) -> Result<String, FunctionCallError> {
let truncation_policy = ctx
.override_truncation_policy
.unwrap_or(&ctx.turn.truncation_policy);
let (event, result) = match out {
Ok(output) => {
let content = self.format_exec_output_for_model(&output, ctx);
let content = self.format_exec_output_for_model(&output, truncation_policy);
let exit_code = output.exit_code;
let event = ToolEventStage::Success(output);
let result = if exit_code == 0 {
@@ -284,24 +292,26 @@ impl ToolEmitter {
}
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
| Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
let response = self.format_exec_output_for_model(&output, ctx);
let response = self.format_exec_output_for_model(&output, truncation_policy);
let event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
let result = Err(FunctionCallError::RespondToModel(response));
(event, result)
}
Err(ToolError::Codex(err)) => {
let message = format!("execution error: {err:?}");
let event = ToolEventStage::Failure(ToolEventFailure::Message(message.clone()));
let result = Err(FunctionCallError::RespondToModel(message));
let formatted_error = formatted_truncate_text(&err.to_string(), *truncation_policy);
let message = format!("execution error: {formatted_error}");
let event = ToolEventStage::Failure(ToolEventFailure::Message(message));
let result = Err(FunctionCallError::RespondToModel(formatted_error));
(event, result)
}
Err(ToolError::Rejected(msg)) => {
let formatted_msg = formatted_truncate_text(&msg, *truncation_policy);
// Normalize common rejection messages for exec tools so tests and
// users see a clear, consistent phrase.
let normalized = if msg == "rejected by user" {
let normalized = if formatted_msg == "rejected by user" {
"exec command rejected by user".to_string()
} else {
msg
formatted_msg
};
let event = ToolEventStage::Failure(ToolEventFailure::Message(normalized.clone()));
let result = Err(FunctionCallError::RespondToModel(normalized));

View File

@@ -100,6 +100,7 @@ impl ToolHandler for ApplyPatchHandler {
turn.as_ref(),
&call_id,
Some(&tracker),
None,
);
emitter.begin(event_ctx).await;
@@ -127,6 +128,7 @@ impl ToolHandler for ApplyPatchHandler {
turn.as_ref(),
&call_id,
Some(&tracker),
None,
);
let content = emitter.finish(event_ctx, out).await?;
Ok(ToolOutput::Function {

View File

@@ -27,6 +27,7 @@ use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
use crate::tools::runtimes::shell::ShellRequest;
use crate::tools::runtimes::shell::ShellRuntime;
use crate::tools::sandboxing::ToolCtx;
use crate::truncate::TruncationPolicy;
pub struct ShellHandler;
@@ -42,6 +43,8 @@ impl ShellHandler {
with_escalated_permissions: params.with_escalated_permissions,
justification: params.justification,
arg0: None,
max_output_tokens: params.max_output_tokens,
max_output_chars: params.max_output_chars,
}
}
}
@@ -64,6 +67,8 @@ impl ShellCommandHandler {
with_escalated_permissions: params.with_escalated_permissions,
justification: params.justification,
arg0: None,
max_output_tokens: params.max_output_tokens,
max_output_chars: params.max_output_chars,
}
}
}
@@ -209,6 +214,9 @@ impl ShellHandler {
)));
}
let override_truncation_policy =
create_truncation_policy(exec_params.max_output_tokens, exec_params.max_output_chars);
// Intercept apply_patch if present.
match codex_apply_patch::maybe_parse_apply_patch_verified(
&exec_params.command,
@@ -237,6 +245,7 @@ impl ShellHandler {
turn.as_ref(),
&call_id,
Some(&tracker),
override_truncation_policy.as_ref(),
);
emitter.begin(event_ctx).await;
@@ -263,6 +272,7 @@ impl ShellHandler {
turn.as_ref(),
&call_id,
Some(&tracker),
override_truncation_policy.as_ref(),
);
let content = emitter.finish(event_ctx, out).await?;
return Ok(ToolOutput::Function {
@@ -294,7 +304,13 @@ impl ShellHandler {
source,
freeform,
);
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
let event_ctx = ToolEventCtx::new(
session.as_ref(),
turn.as_ref(),
&call_id,
None,
override_truncation_policy.as_ref(),
);
emitter.begin(event_ctx).await;
let req = ShellRequest {
@@ -304,6 +320,8 @@ impl ShellHandler {
env: exec_params.env.clone(),
with_escalated_permissions: exec_params.with_escalated_permissions,
justification: exec_params.justification.clone(),
max_output_tokens: exec_params.max_output_tokens,
max_output_chars: exec_params.max_output_chars,
approval_requirement: create_approval_requirement_for_command(
&turn.exec_policy,
&exec_params.command,
@@ -323,7 +341,13 @@ impl ShellHandler {
let out = orchestrator
.run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy)
.await;
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
let event_ctx = ToolEventCtx::new(
session.as_ref(),
turn.as_ref(),
&call_id,
None,
override_truncation_policy.as_ref(),
);
let content = emitter.finish(event_ctx, out).await?;
Ok(ToolOutput::Function {
content,
@@ -333,6 +357,16 @@ impl ShellHandler {
}
}
fn create_truncation_policy(
max_output_tokens: Option<usize>,
max_output_chars: Option<usize>,
) -> Option<TruncationPolicy> {
if let Some(max_output_tokens) = max_output_tokens {
Some(TruncationPolicy::Tokens(max_output_tokens))
} else {
max_output_chars.map(TruncationPolicy::Bytes)
}
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;

View File

@@ -162,6 +162,7 @@ impl ToolHandler for UnifiedExecHandler {
context.turn.as_ref(),
&context.call_id,
None,
None,
);
let emitter = ToolEmitter::unified_exec(
&command,

View File

@@ -116,6 +116,8 @@ impl ToolRouter {
timeout_ms: exec.timeout_ms,
with_escalated_permissions: None,
justification: None,
max_output_tokens: None,
max_output_chars: None,
};
Ok(Some(ToolCall {
tool_name: "local_shell".to_string(),

View File

@@ -72,6 +72,8 @@ impl ApplyPatchRuntime {
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
max_output_tokens: None,
max_output_chars: None,
})
}

View File

@@ -16,6 +16,7 @@ pub mod unified_exec;
/// Shared helper to construct a CommandSpec from a tokenized command line.
/// Validates that at least a program is present.
#[allow(clippy::too_many_arguments)]
pub(crate) fn build_command_spec(
command: &[String],
cwd: &Path,
@@ -23,6 +24,8 @@ pub(crate) fn build_command_spec(
expiration: ExecExpiration,
with_escalated_permissions: Option<bool>,
justification: Option<String>,
max_output_tokens: Option<usize>,
max_output_chars: Option<usize>,
) -> Result<CommandSpec, ToolError> {
let (program, args) = command
.split_first()
@@ -35,5 +38,7 @@ pub(crate) fn build_command_spec(
expiration,
with_escalated_permissions,
justification,
max_output_tokens,
max_output_chars,
})
}

View File

@@ -31,6 +31,8 @@ pub struct ShellRequest {
pub env: std::collections::HashMap<String, String>,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
pub max_output_tokens: Option<usize>,
pub max_output_chars: Option<usize>,
pub approval_requirement: ApprovalRequirement,
}
@@ -136,6 +138,8 @@ impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
req.timeout_ms.into(),
req.with_escalated_permissions,
req.justification.clone(),
req.max_output_tokens,
req.max_output_chars,
)?;
let env = attempt
.env_for(spec)

View File

@@ -35,6 +35,8 @@ pub struct UnifiedExecRequest {
pub env: HashMap<String, String>,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
pub max_output_tokens: Option<usize>,
pub max_output_chars: Option<usize>,
pub approval_requirement: ApprovalRequirement,
}
@@ -73,6 +75,8 @@ impl UnifiedExecRequest {
env,
with_escalated_permissions,
justification,
max_output_tokens: None,
max_output_chars: None,
approval_requirement,
}
}
@@ -154,6 +158,8 @@ impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRunt
ExecExpiration::DefaultTimeout,
req.with_escalated_permissions,
req.justification.clone(),
req.max_output_tokens,
req.max_output_chars,
)
.map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
let exec_env = attempt

View File

@@ -8,6 +8,7 @@ use crate::tools::handlers::apply_patch::ApplyPatchToolType;
use crate::tools::handlers::apply_patch::create_apply_patch_freeform_tool;
use crate::tools::handlers::apply_patch::create_apply_patch_json_tool;
use crate::tools::registry::ToolRegistryBuilder;
use crate::truncate::TruncationPolicy;
use serde::Deserialize;
use serde::Serialize;
use serde_json::Value as JsonValue;
@@ -17,7 +18,7 @@ use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum ConfigShellToolType {
Default,
Default(TruncationPolicy),
Local,
UnifiedExec,
/// Do not include a shell tool by default. Useful when using Codex
@@ -26,7 +27,7 @@ pub enum ConfigShellToolType {
/// to customize agent behavior.
Disabled,
/// Takes a command as a single string to be run in the user's default shell.
ShellCommand,
ShellCommand(TruncationPolicy),
}
#[derive(Debug, Clone)]
@@ -264,7 +265,7 @@ fn create_write_stdin_tool() -> ToolSpec {
})
}
fn create_shell_tool() -> ToolSpec {
fn create_shell_tool(truncation_policy: TruncationPolicy) -> ToolSpec {
let mut properties = BTreeMap::new();
properties.insert(
"command".to_string(),
@@ -298,6 +299,24 @@ fn create_shell_tool() -> ToolSpec {
description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
},
);
match truncation_policy {
TruncationPolicy::Tokens(_) => {
properties.insert(
"max_output_tokens".to_string(),
JsonSchema::Number {
description: Some("Maximum number of tokens to return from stdout/stderr. Excess tokens will be truncated".to_string()),
},
);
}
TruncationPolicy::Bytes(_) => {
properties.insert(
"max_output_chars".to_string(),
JsonSchema::Number {
description: Some("Maximum number of characters to return from stdout/stderr. Excess characters will be truncated".to_string()),
},
);
}
}
let description = if cfg!(windows) {
r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"].
@@ -328,7 +347,7 @@ Examples of valid command strings:
})
}
fn create_shell_command_tool() -> ToolSpec {
fn create_shell_command_tool(truncation_policy: TruncationPolicy) -> ToolSpec {
let mut properties = BTreeMap::new();
properties.insert(
"command".to_string(),
@@ -362,6 +381,30 @@ fn create_shell_command_tool() -> ToolSpec {
description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
},
);
match truncation_policy {
TruncationPolicy::Tokens(_) => {
properties.insert(
"max_output_tokens".to_string(),
JsonSchema::Number {
description: Some(
"Maximum number of tokens to return. Excess output will be truncated."
.to_string(),
),
},
);
}
TruncationPolicy::Bytes(_) => {
properties.insert(
"max_output_chars".to_string(),
JsonSchema::Number {
description: Some(
"Maximum number of tokens to return. Excess output will be truncated."
.to_string(),
),
},
);
}
}
let description = if cfg!(windows) {
r#"Runs a Powershell command (Windows) and returns its output.
@@ -999,8 +1042,8 @@ pub(crate) fn build_specs(
let shell_command_handler = Arc::new(ShellCommandHandler);
match &config.shell_type {
ConfigShellToolType::Default => {
builder.push_spec(create_shell_tool());
ConfigShellToolType::Default(truncation_policy) => {
builder.push_spec(create_shell_tool(*truncation_policy));
}
ConfigShellToolType::Local => {
builder.push_spec(ToolSpec::LocalShell {});
@@ -1014,8 +1057,8 @@ pub(crate) fn build_specs(
ConfigShellToolType::Disabled => {
// Do nothing.
}
ConfigShellToolType::ShellCommand => {
builder.push_spec(create_shell_command_tool());
ConfigShellToolType::ShellCommand(truncation_policy) => {
builder.push_spec(create_shell_command_tool(*truncation_policy));
}
}
@@ -1158,11 +1201,11 @@ mod tests {
fn shell_tool_name(config: &ToolsConfig) -> Option<&'static str> {
match config.shell_type {
ConfigShellToolType::Default => Some("shell"),
ConfigShellToolType::Default(_) => Some("shell"),
ConfigShellToolType::Local => Some("local_shell"),
ConfigShellToolType::UnifiedExec => None,
ConfigShellToolType::Disabled => None,
ConfigShellToolType::ShellCommand => Some("shell_command"),
ConfigShellToolType::ShellCommand(_) => Some("shell_command"),
}
}
@@ -1908,7 +1951,7 @@ mod tests {
#[test]
fn test_shell_tool() {
let tool = super::create_shell_tool();
let tool = super::create_shell_tool(TruncationPolicy::Bytes(10_000));
let ToolSpec::Function(ResponsesApiTool {
description, name, ..
}) = &tool
@@ -1938,7 +1981,7 @@ Examples of valid command strings:
#[test]
fn test_shell_command_tool() {
let tool = super::create_shell_command_tool();
let tool = super::create_shell_command_tool(TruncationPolicy::Tokens(10_000));
let ToolSpec::Function(ResponsesApiTool {
description, name, ..
}) = &tool

View File

@@ -174,6 +174,7 @@ impl UnifiedExecSessionManager {
turn_ref.as_ref(),
request.call_id,
None,
None,
)
};
interaction_emitter
@@ -369,6 +370,7 @@ impl UnifiedExecSessionManager {
entry.turn_ref.as_ref(),
&entry.call_id,
None,
None,
);
let emitter = ToolEmitter::unified_exec(
&entry.command,
@@ -402,6 +404,7 @@ impl UnifiedExecSessionManager {
context.turn.as_ref(),
&context.call_id,
None,
None,
);
let emitter =
ToolEmitter::unified_exec(command, cwd, ExecCommandSource::UnifiedExecStartup, None);

View File

@@ -37,6 +37,8 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput
with_escalated_permissions: None,
justification: None,
arg0: None,
max_output_tokens: None,
max_output_chars: None,
};
let policy = SandboxPolicy::new_read_only_policy();

View File

@@ -86,6 +86,8 @@ impl EscalateServer {
with_escalated_permissions: None,
justification: None,
arg0: None,
max_output_tokens: None,
max_output_chars: None,
},
get_platform_sandbox().unwrap_or(SandboxType::None),
&sandbox_policy,

View File

@@ -45,6 +45,8 @@ async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) {
with_escalated_permissions: None,
justification: None,
arg0: None,
max_output_tokens: None,
max_output_chars: None,
};
let sandbox_policy = SandboxPolicy::WorkspaceWrite {
@@ -148,6 +150,8 @@ async fn assert_network_blocked(cmd: &[&str]) {
with_escalated_permissions: None,
justification: None,
arg0: None,
max_output_tokens: None,
max_output_chars: None,
};
let sandbox_policy = SandboxPolicy::new_read_only_policy();

View File

@@ -322,6 +322,10 @@ pub struct ShellToolCallParams {
pub with_escalated_permissions: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub justification: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_output_tokens: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_output_chars: Option<usize>,
}
/// If the `name` of a `ResponseItem::FunctionCall` is `shell_command`, the
@@ -338,6 +342,10 @@ pub struct ShellCommandToolCallParams {
pub with_escalated_permissions: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub justification: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_output_tokens: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_output_chars: Option<usize>,
}
/// Responses API compatible content items that can be returned by a tool call.
@@ -650,6 +658,8 @@ mod tests {
timeout_ms: Some(1000),
with_escalated_permissions: None,
justification: None,
max_output_tokens: None,
max_output_chars: None,
},
params
);