mirror of
https://github.com/openai/codex.git
synced 2026-02-02 15:03:38 +00:00
Compare commits
3 Commits
fix/otlp-l
...
shell-tool
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
da2e1d2ba3 | ||
|
|
528e7fde9d | ||
|
|
2e44082a30 |
@@ -1175,6 +1175,8 @@ impl CodexMessageProcessor {
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let effective_policy = params
|
||||
|
||||
@@ -3072,6 +3072,8 @@ mod tests {
|
||||
with_escalated_permissions: Some(true),
|
||||
justification: Some("test".to_string()),
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let params2 = ExecParams {
|
||||
@@ -3082,6 +3084,8 @@ mod tests {
|
||||
env: HashMap::new(),
|
||||
justification: params.justification.clone(),
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
|
||||
@@ -6,6 +6,7 @@ use crate::truncate::truncate_function_output_items_with_policy;
|
||||
use crate::truncate::truncate_text;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::models::ShellToolCallParams;
|
||||
use codex_protocol::protocol::TokenUsage;
|
||||
use codex_protocol::protocol::TokenUsageInfo;
|
||||
use std::ops::Deref;
|
||||
@@ -130,6 +131,47 @@ impl ContextManager {
|
||||
normalize::remove_orphan_outputs(&mut self.items);
|
||||
}
|
||||
|
||||
fn get_shell_truncation_policy(&self, call_id: &str) -> Option<TruncationPolicy> {
|
||||
let call = self.get_call_for_call_id(call_id)?;
|
||||
match call {
|
||||
ResponseItem::FunctionCall { arguments, .. } => {
|
||||
let shell_tool_call_params =
|
||||
serde_json::from_str::<ShellToolCallParams>(&arguments).ok()?;
|
||||
Self::create_truncation_policy(
|
||||
shell_tool_call_params.max_output_tokens,
|
||||
shell_tool_call_params.max_output_chars,
|
||||
)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn create_truncation_policy(
|
||||
max_output_tokens: Option<usize>,
|
||||
max_output_chars: Option<usize>,
|
||||
) -> Option<TruncationPolicy> {
|
||||
if let Some(max_output_tokens) = max_output_tokens {
|
||||
Some(TruncationPolicy::Tokens(max_output_tokens))
|
||||
} else {
|
||||
max_output_chars.map(TruncationPolicy::Bytes)
|
||||
}
|
||||
}
|
||||
|
||||
fn get_call_for_call_id(&self, call_id: &str) -> Option<ResponseItem> {
|
||||
self.items.iter().find_map(|item| match item {
|
||||
ResponseItem::FunctionCall {
|
||||
call_id: existing, ..
|
||||
} => {
|
||||
if existing == call_id {
|
||||
Some(item.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns a clone of the contents in the transcript.
|
||||
fn contents(&self) -> Vec<ResponseItem> {
|
||||
self.items.clone()
|
||||
@@ -143,13 +185,12 @@ impl ContextManager {
|
||||
let policy_with_serialization_budget = policy.mul(1.2);
|
||||
match item {
|
||||
ResponseItem::FunctionCallOutput { call_id, output } => {
|
||||
let truncated =
|
||||
truncate_text(output.content.as_str(), policy_with_serialization_budget);
|
||||
let truncation_policy_override = self.get_shell_truncation_policy(call_id);
|
||||
let truncation_policy =
|
||||
truncation_policy_override.unwrap_or(policy_with_serialization_budget);
|
||||
let truncated = truncate_text(output.content.as_str(), truncation_policy);
|
||||
let truncated_items = output.content_items.as_ref().map(|items| {
|
||||
truncate_function_output_items_with_policy(
|
||||
items,
|
||||
policy_with_serialization_budget,
|
||||
)
|
||||
truncate_function_output_items_with_policy(items, truncation_policy)
|
||||
});
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
|
||||
@@ -57,6 +57,8 @@ pub struct ExecParams {
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub arg0: Option<String>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub max_output_chars: Option<usize>,
|
||||
}
|
||||
|
||||
/// Mechanism to terminate an exec invocation before it finishes naturally.
|
||||
@@ -141,6 +143,8 @@ pub async fn process_exec_tool_call(
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
arg0: _,
|
||||
max_output_tokens,
|
||||
max_output_chars,
|
||||
} = params;
|
||||
|
||||
let (program, args) = command.split_first().ok_or_else(|| {
|
||||
@@ -158,6 +162,8 @@ pub async fn process_exec_tool_call(
|
||||
expiration,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
max_output_tokens,
|
||||
max_output_chars,
|
||||
};
|
||||
|
||||
let manager = SandboxManager::new();
|
||||
@@ -189,6 +195,8 @@ pub(crate) async fn execute_exec_env(
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
arg0,
|
||||
max_output_tokens,
|
||||
max_output_chars,
|
||||
} = env;
|
||||
|
||||
let params = ExecParams {
|
||||
@@ -199,6 +207,8 @@ pub(crate) async fn execute_exec_env(
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
arg0,
|
||||
max_output_tokens,
|
||||
max_output_chars,
|
||||
};
|
||||
|
||||
let start = Instant::now();
|
||||
@@ -841,6 +851,8 @@ mod tests {
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let output = exec(params, SandboxType::None, &SandboxPolicy::ReadOnly, None).await?;
|
||||
@@ -886,6 +898,8 @@ mod tests {
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
tokio::spawn(async move {
|
||||
tokio::time::sleep(Duration::from_millis(1_000)).await;
|
||||
|
||||
@@ -76,6 +76,7 @@ macro_rules! model_family {
|
||||
(
|
||||
$slug:expr, $family:expr $(, $key:ident : $value:expr )* $(,)?
|
||||
) => {{
|
||||
let truncation_policy = TruncationPolicy::Bytes(10_000);
|
||||
// defaults
|
||||
#[allow(unused_mut)]
|
||||
let mut mf = ModelFamily {
|
||||
@@ -90,10 +91,10 @@ macro_rules! model_family {
|
||||
experimental_supported_tools: Vec::new(),
|
||||
effective_context_window_percent: 95,
|
||||
support_verbosity: false,
|
||||
shell_type: ConfigShellToolType::Default,
|
||||
shell_type: ConfigShellToolType::Default(truncation_policy),
|
||||
default_verbosity: None,
|
||||
default_reasoning_effort: None,
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
truncation_policy,
|
||||
};
|
||||
|
||||
// apply overrides
|
||||
@@ -138,6 +139,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
} else if slug.starts_with("gpt-3.5") {
|
||||
model_family!(slug, "gpt-3.5", needs_special_apply_patch_instructions: true)
|
||||
} else if slug.starts_with("test-gpt-5") {
|
||||
let truncation_policy = TruncationPolicy::Tokens(10_000);
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
@@ -150,13 +152,13 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
"test_sync_tool".to_string(),
|
||||
],
|
||||
supports_parallel_tool_calls: true,
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
|
||||
support_verbosity: true,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
)
|
||||
|
||||
// Internal models.
|
||||
} else if slug.starts_with("codex-exp-") {
|
||||
let truncation_policy = TruncationPolicy::Tokens(10_000);
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
@@ -168,41 +170,44 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
"list_dir".to_string(),
|
||||
"read_file".to_string(),
|
||||
],
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
|
||||
supports_parallel_tool_calls: true,
|
||||
support_verbosity: true,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
truncation_policy: truncation_policy,
|
||||
)
|
||||
|
||||
// Production models.
|
||||
} else if slug.starts_with("gpt-5.1-codex-max") {
|
||||
let truncation_policy = TruncationPolicy::Tokens(10_000);
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
reasoning_summary_format: ReasoningSummaryFormat::Experimental,
|
||||
base_instructions: GPT_5_1_CODEX_MAX_INSTRUCTIONS.to_string(),
|
||||
apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
|
||||
supports_parallel_tool_calls: true,
|
||||
support_verbosity: false,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
truncation_policy: truncation_policy,
|
||||
)
|
||||
} else if slug.starts_with("gpt-5-codex")
|
||||
|| slug.starts_with("gpt-5.1-codex")
|
||||
|| slug.starts_with("codex-")
|
||||
{
|
||||
let truncation_policy = TruncationPolicy::Tokens(10_000);
|
||||
model_family!(
|
||||
slug, slug,
|
||||
supports_reasoning_summaries: true,
|
||||
reasoning_summary_format: ReasoningSummaryFormat::Experimental,
|
||||
base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
|
||||
apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
|
||||
supports_parallel_tool_calls: true,
|
||||
support_verbosity: false,
|
||||
truncation_policy: TruncationPolicy::Tokens(10_000),
|
||||
truncation_policy: truncation_policy,
|
||||
)
|
||||
} else if slug.starts_with("gpt-5.1") {
|
||||
let truncation_policy = TruncationPolicy::Tokens(10_000);
|
||||
model_family!(
|
||||
slug, "gpt-5.1",
|
||||
supports_reasoning_summaries: true,
|
||||
@@ -212,7 +217,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
base_instructions: GPT_5_1_INSTRUCTIONS.to_string(),
|
||||
default_reasoning_effort: Some(ReasoningEffort::Medium),
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
shell_type: ConfigShellToolType::ShellCommand(truncation_policy),
|
||||
supports_parallel_tool_calls: true,
|
||||
)
|
||||
} else if slug.starts_with("gpt-5") {
|
||||
@@ -220,7 +225,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
slug, "gpt-5",
|
||||
supports_reasoning_summaries: true,
|
||||
needs_special_apply_patch_instructions: true,
|
||||
shell_type: ConfigShellToolType::Default,
|
||||
shell_type: ConfigShellToolType::Default(TruncationPolicy::Bytes(10_000)),
|
||||
support_verbosity: true,
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
)
|
||||
@@ -230,6 +235,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
|
||||
}
|
||||
|
||||
pub fn derive_default_model_family(model: &str) -> ModelFamily {
|
||||
let truncation_policy = TruncationPolicy::Bytes(10_000);
|
||||
ModelFamily {
|
||||
slug: model.to_string(),
|
||||
family: model.to_string(),
|
||||
@@ -242,9 +248,9 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily {
|
||||
experimental_supported_tools: Vec::new(),
|
||||
effective_context_window_percent: 95,
|
||||
support_verbosity: false,
|
||||
shell_type: ConfigShellToolType::Default,
|
||||
shell_type: ConfigShellToolType::Default(truncation_policy),
|
||||
default_verbosity: None,
|
||||
default_reasoning_effort: None,
|
||||
truncation_policy: TruncationPolicy::Bytes(10_000),
|
||||
truncation_policy,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,6 +58,8 @@ pub struct CommandSpec {
|
||||
pub expiration: ExecExpiration,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub max_output_chars: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -70,6 +72,8 @@ pub struct ExecEnv {
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub arg0: Option<String>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub max_output_chars: Option<usize>,
|
||||
}
|
||||
|
||||
pub enum SandboxPreference {
|
||||
@@ -184,6 +188,8 @@ impl SandboxManager {
|
||||
with_escalated_permissions: spec.with_escalated_permissions,
|
||||
justification: spec.justification,
|
||||
arg0: arg0_override,
|
||||
max_output_tokens: spec.max_output_tokens,
|
||||
max_output_chars: spec.max_output_chars,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -102,6 +102,8 @@ impl SessionTask for UserShellCommandTask {
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let stdout_stream = Some(StdoutStream {
|
||||
|
||||
@@ -15,6 +15,8 @@ use crate::protocol::PatchApplyEndEvent;
|
||||
use crate::protocol::TurnDiffEvent;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::truncate::formatted_truncate_text;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
@@ -29,6 +31,7 @@ pub(crate) struct ToolEventCtx<'a> {
|
||||
pub turn: &'a TurnContext,
|
||||
pub call_id: &'a str,
|
||||
pub turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
|
||||
pub override_truncation_policy: Option<&'a TruncationPolicy>,
|
||||
}
|
||||
|
||||
impl<'a> ToolEventCtx<'a> {
|
||||
@@ -37,12 +40,14 @@ impl<'a> ToolEventCtx<'a> {
|
||||
turn: &'a TurnContext,
|
||||
call_id: &'a str,
|
||||
turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
|
||||
override_truncation_policy: Option<&'a TruncationPolicy>,
|
||||
) -> Self {
|
||||
Self {
|
||||
session,
|
||||
turn,
|
||||
call_id,
|
||||
turn_diff_tracker,
|
||||
override_truncation_policy,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -255,13 +260,13 @@ impl ToolEmitter {
|
||||
fn format_exec_output_for_model(
|
||||
&self,
|
||||
output: &ExecToolCallOutput,
|
||||
ctx: ToolEventCtx<'_>,
|
||||
truncation_policy: &TruncationPolicy,
|
||||
) -> String {
|
||||
match self {
|
||||
Self::Shell { freeform: true, .. } => {
|
||||
super::format_exec_output_for_model_freeform(output, ctx.turn.truncation_policy)
|
||||
super::format_exec_output_for_model_freeform(output, *truncation_policy)
|
||||
}
|
||||
_ => super::format_exec_output_for_model_structured(output, ctx.turn.truncation_policy),
|
||||
_ => super::format_exec_output_for_model_structured(output, *truncation_policy),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -270,9 +275,12 @@ impl ToolEmitter {
|
||||
ctx: ToolEventCtx<'_>,
|
||||
out: Result<ExecToolCallOutput, ToolError>,
|
||||
) -> Result<String, FunctionCallError> {
|
||||
let truncation_policy = ctx
|
||||
.override_truncation_policy
|
||||
.unwrap_or(&ctx.turn.truncation_policy);
|
||||
let (event, result) = match out {
|
||||
Ok(output) => {
|
||||
let content = self.format_exec_output_for_model(&output, ctx);
|
||||
let content = self.format_exec_output_for_model(&output, truncation_policy);
|
||||
let exit_code = output.exit_code;
|
||||
let event = ToolEventStage::Success(output);
|
||||
let result = if exit_code == 0 {
|
||||
@@ -284,24 +292,26 @@ impl ToolEmitter {
|
||||
}
|
||||
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
|
||||
| Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
|
||||
let response = self.format_exec_output_for_model(&output, ctx);
|
||||
let response = self.format_exec_output_for_model(&output, truncation_policy);
|
||||
let event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
|
||||
let result = Err(FunctionCallError::RespondToModel(response));
|
||||
(event, result)
|
||||
}
|
||||
Err(ToolError::Codex(err)) => {
|
||||
let message = format!("execution error: {err:?}");
|
||||
let event = ToolEventStage::Failure(ToolEventFailure::Message(message.clone()));
|
||||
let result = Err(FunctionCallError::RespondToModel(message));
|
||||
let formatted_error = formatted_truncate_text(&err.to_string(), *truncation_policy);
|
||||
let message = format!("execution error: {formatted_error}");
|
||||
let event = ToolEventStage::Failure(ToolEventFailure::Message(message));
|
||||
let result = Err(FunctionCallError::RespondToModel(formatted_error));
|
||||
(event, result)
|
||||
}
|
||||
Err(ToolError::Rejected(msg)) => {
|
||||
let formatted_msg = formatted_truncate_text(&msg, *truncation_policy);
|
||||
// Normalize common rejection messages for exec tools so tests and
|
||||
// users see a clear, consistent phrase.
|
||||
let normalized = if msg == "rejected by user" {
|
||||
let normalized = if formatted_msg == "rejected by user" {
|
||||
"exec command rejected by user".to_string()
|
||||
} else {
|
||||
msg
|
||||
formatted_msg
|
||||
};
|
||||
let event = ToolEventStage::Failure(ToolEventFailure::Message(normalized.clone()));
|
||||
let result = Err(FunctionCallError::RespondToModel(normalized));
|
||||
|
||||
@@ -100,6 +100,7 @@ impl ToolHandler for ApplyPatchHandler {
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
Some(&tracker),
|
||||
None,
|
||||
);
|
||||
emitter.begin(event_ctx).await;
|
||||
|
||||
@@ -127,6 +128,7 @@ impl ToolHandler for ApplyPatchHandler {
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
Some(&tracker),
|
||||
None,
|
||||
);
|
||||
let content = emitter.finish(event_ctx, out).await?;
|
||||
Ok(ToolOutput::Function {
|
||||
|
||||
@@ -27,6 +27,7 @@ use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
|
||||
use crate::tools::runtimes::shell::ShellRequest;
|
||||
use crate::tools::runtimes::shell::ShellRuntime;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
|
||||
pub struct ShellHandler;
|
||||
|
||||
@@ -42,6 +43,8 @@ impl ShellHandler {
|
||||
with_escalated_permissions: params.with_escalated_permissions,
|
||||
justification: params.justification,
|
||||
arg0: None,
|
||||
max_output_tokens: params.max_output_tokens,
|
||||
max_output_chars: params.max_output_chars,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -64,6 +67,8 @@ impl ShellCommandHandler {
|
||||
with_escalated_permissions: params.with_escalated_permissions,
|
||||
justification: params.justification,
|
||||
arg0: None,
|
||||
max_output_tokens: params.max_output_tokens,
|
||||
max_output_chars: params.max_output_chars,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -209,6 +214,9 @@ impl ShellHandler {
|
||||
)));
|
||||
}
|
||||
|
||||
let override_truncation_policy =
|
||||
create_truncation_policy(exec_params.max_output_tokens, exec_params.max_output_chars);
|
||||
|
||||
// Intercept apply_patch if present.
|
||||
match codex_apply_patch::maybe_parse_apply_patch_verified(
|
||||
&exec_params.command,
|
||||
@@ -237,6 +245,7 @@ impl ShellHandler {
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
Some(&tracker),
|
||||
override_truncation_policy.as_ref(),
|
||||
);
|
||||
emitter.begin(event_ctx).await;
|
||||
|
||||
@@ -263,6 +272,7 @@ impl ShellHandler {
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
Some(&tracker),
|
||||
override_truncation_policy.as_ref(),
|
||||
);
|
||||
let content = emitter.finish(event_ctx, out).await?;
|
||||
return Ok(ToolOutput::Function {
|
||||
@@ -294,7 +304,13 @@ impl ShellHandler {
|
||||
source,
|
||||
freeform,
|
||||
);
|
||||
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
|
||||
let event_ctx = ToolEventCtx::new(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
None,
|
||||
override_truncation_policy.as_ref(),
|
||||
);
|
||||
emitter.begin(event_ctx).await;
|
||||
|
||||
let req = ShellRequest {
|
||||
@@ -304,6 +320,8 @@ impl ShellHandler {
|
||||
env: exec_params.env.clone(),
|
||||
with_escalated_permissions: exec_params.with_escalated_permissions,
|
||||
justification: exec_params.justification.clone(),
|
||||
max_output_tokens: exec_params.max_output_tokens,
|
||||
max_output_chars: exec_params.max_output_chars,
|
||||
approval_requirement: create_approval_requirement_for_command(
|
||||
&turn.exec_policy,
|
||||
&exec_params.command,
|
||||
@@ -323,7 +341,13 @@ impl ShellHandler {
|
||||
let out = orchestrator
|
||||
.run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy)
|
||||
.await;
|
||||
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
|
||||
let event_ctx = ToolEventCtx::new(
|
||||
session.as_ref(),
|
||||
turn.as_ref(),
|
||||
&call_id,
|
||||
None,
|
||||
override_truncation_policy.as_ref(),
|
||||
);
|
||||
let content = emitter.finish(event_ctx, out).await?;
|
||||
Ok(ToolOutput::Function {
|
||||
content,
|
||||
@@ -333,6 +357,16 @@ impl ShellHandler {
|
||||
}
|
||||
}
|
||||
|
||||
fn create_truncation_policy(
|
||||
max_output_tokens: Option<usize>,
|
||||
max_output_chars: Option<usize>,
|
||||
) -> Option<TruncationPolicy> {
|
||||
if let Some(max_output_tokens) = max_output_tokens {
|
||||
Some(TruncationPolicy::Tokens(max_output_tokens))
|
||||
} else {
|
||||
max_output_chars.map(TruncationPolicy::Bytes)
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::PathBuf;
|
||||
|
||||
@@ -162,6 +162,7 @@ impl ToolHandler for UnifiedExecHandler {
|
||||
context.turn.as_ref(),
|
||||
&context.call_id,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
let emitter = ToolEmitter::unified_exec(
|
||||
&command,
|
||||
|
||||
@@ -116,6 +116,8 @@ impl ToolRouter {
|
||||
timeout_ms: exec.timeout_ms,
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
Ok(Some(ToolCall {
|
||||
tool_name: "local_shell".to_string(),
|
||||
|
||||
@@ -72,6 +72,8 @@ impl ApplyPatchRuntime {
|
||||
env: HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ pub mod unified_exec;
|
||||
|
||||
/// Shared helper to construct a CommandSpec from a tokenized command line.
|
||||
/// Validates that at least a program is present.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) fn build_command_spec(
|
||||
command: &[String],
|
||||
cwd: &Path,
|
||||
@@ -23,6 +24,8 @@ pub(crate) fn build_command_spec(
|
||||
expiration: ExecExpiration,
|
||||
with_escalated_permissions: Option<bool>,
|
||||
justification: Option<String>,
|
||||
max_output_tokens: Option<usize>,
|
||||
max_output_chars: Option<usize>,
|
||||
) -> Result<CommandSpec, ToolError> {
|
||||
let (program, args) = command
|
||||
.split_first()
|
||||
@@ -35,5 +38,7 @@ pub(crate) fn build_command_spec(
|
||||
expiration,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
max_output_tokens,
|
||||
max_output_chars,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -31,6 +31,8 @@ pub struct ShellRequest {
|
||||
pub env: std::collections::HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub max_output_chars: Option<usize>,
|
||||
pub approval_requirement: ApprovalRequirement,
|
||||
}
|
||||
|
||||
@@ -136,6 +138,8 @@ impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
|
||||
req.timeout_ms.into(),
|
||||
req.with_escalated_permissions,
|
||||
req.justification.clone(),
|
||||
req.max_output_tokens,
|
||||
req.max_output_chars,
|
||||
)?;
|
||||
let env = attempt
|
||||
.env_for(spec)
|
||||
|
||||
@@ -35,6 +35,8 @@ pub struct UnifiedExecRequest {
|
||||
pub env: HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub max_output_chars: Option<usize>,
|
||||
pub approval_requirement: ApprovalRequirement,
|
||||
}
|
||||
|
||||
@@ -73,6 +75,8 @@ impl UnifiedExecRequest {
|
||||
env,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
approval_requirement,
|
||||
}
|
||||
}
|
||||
@@ -154,6 +158,8 @@ impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRunt
|
||||
ExecExpiration::DefaultTimeout,
|
||||
req.with_escalated_permissions,
|
||||
req.justification.clone(),
|
||||
req.max_output_tokens,
|
||||
req.max_output_chars,
|
||||
)
|
||||
.map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
|
||||
let exec_env = attempt
|
||||
|
||||
@@ -8,6 +8,7 @@ use crate::tools::handlers::apply_patch::ApplyPatchToolType;
|
||||
use crate::tools::handlers::apply_patch::create_apply_patch_freeform_tool;
|
||||
use crate::tools::handlers::apply_patch::create_apply_patch_json_tool;
|
||||
use crate::tools::registry::ToolRegistryBuilder;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value as JsonValue;
|
||||
@@ -17,7 +18,7 @@ use std::collections::HashMap;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub enum ConfigShellToolType {
|
||||
Default,
|
||||
Default(TruncationPolicy),
|
||||
Local,
|
||||
UnifiedExec,
|
||||
/// Do not include a shell tool by default. Useful when using Codex
|
||||
@@ -26,7 +27,7 @@ pub enum ConfigShellToolType {
|
||||
/// to customize agent behavior.
|
||||
Disabled,
|
||||
/// Takes a command as a single string to be run in the user's default shell.
|
||||
ShellCommand,
|
||||
ShellCommand(TruncationPolicy),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -264,7 +265,7 @@ fn create_write_stdin_tool() -> ToolSpec {
|
||||
})
|
||||
}
|
||||
|
||||
fn create_shell_tool() -> ToolSpec {
|
||||
fn create_shell_tool(truncation_policy: TruncationPolicy) -> ToolSpec {
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"command".to_string(),
|
||||
@@ -298,6 +299,24 @@ fn create_shell_tool() -> ToolSpec {
|
||||
description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
|
||||
},
|
||||
);
|
||||
match truncation_policy {
|
||||
TruncationPolicy::Tokens(_) => {
|
||||
properties.insert(
|
||||
"max_output_tokens".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some("Maximum number of tokens to return from stdout/stderr. Excess tokens will be truncated".to_string()),
|
||||
},
|
||||
);
|
||||
}
|
||||
TruncationPolicy::Bytes(_) => {
|
||||
properties.insert(
|
||||
"max_output_chars".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some("Maximum number of characters to return from stdout/stderr. Excess characters will be truncated".to_string()),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let description = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"].
|
||||
@@ -328,7 +347,7 @@ Examples of valid command strings:
|
||||
})
|
||||
}
|
||||
|
||||
fn create_shell_command_tool() -> ToolSpec {
|
||||
fn create_shell_command_tool(truncation_policy: TruncationPolicy) -> ToolSpec {
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"command".to_string(),
|
||||
@@ -362,6 +381,30 @@ fn create_shell_command_tool() -> ToolSpec {
|
||||
description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
|
||||
},
|
||||
);
|
||||
match truncation_policy {
|
||||
TruncationPolicy::Tokens(_) => {
|
||||
properties.insert(
|
||||
"max_output_tokens".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some(
|
||||
"Maximum number of tokens to return. Excess output will be truncated."
|
||||
.to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
}
|
||||
TruncationPolicy::Bytes(_) => {
|
||||
properties.insert(
|
||||
"max_output_chars".to_string(),
|
||||
JsonSchema::Number {
|
||||
description: Some(
|
||||
"Maximum number of tokens to return. Excess output will be truncated."
|
||||
.to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let description = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output.
|
||||
@@ -999,8 +1042,8 @@ pub(crate) fn build_specs(
|
||||
let shell_command_handler = Arc::new(ShellCommandHandler);
|
||||
|
||||
match &config.shell_type {
|
||||
ConfigShellToolType::Default => {
|
||||
builder.push_spec(create_shell_tool());
|
||||
ConfigShellToolType::Default(truncation_policy) => {
|
||||
builder.push_spec(create_shell_tool(*truncation_policy));
|
||||
}
|
||||
ConfigShellToolType::Local => {
|
||||
builder.push_spec(ToolSpec::LocalShell {});
|
||||
@@ -1014,8 +1057,8 @@ pub(crate) fn build_specs(
|
||||
ConfigShellToolType::Disabled => {
|
||||
// Do nothing.
|
||||
}
|
||||
ConfigShellToolType::ShellCommand => {
|
||||
builder.push_spec(create_shell_command_tool());
|
||||
ConfigShellToolType::ShellCommand(truncation_policy) => {
|
||||
builder.push_spec(create_shell_command_tool(*truncation_policy));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1158,11 +1201,11 @@ mod tests {
|
||||
|
||||
fn shell_tool_name(config: &ToolsConfig) -> Option<&'static str> {
|
||||
match config.shell_type {
|
||||
ConfigShellToolType::Default => Some("shell"),
|
||||
ConfigShellToolType::Default(_) => Some("shell"),
|
||||
ConfigShellToolType::Local => Some("local_shell"),
|
||||
ConfigShellToolType::UnifiedExec => None,
|
||||
ConfigShellToolType::Disabled => None,
|
||||
ConfigShellToolType::ShellCommand => Some("shell_command"),
|
||||
ConfigShellToolType::ShellCommand(_) => Some("shell_command"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1908,7 +1951,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_shell_tool() {
|
||||
let tool = super::create_shell_tool();
|
||||
let tool = super::create_shell_tool(TruncationPolicy::Bytes(10_000));
|
||||
let ToolSpec::Function(ResponsesApiTool {
|
||||
description, name, ..
|
||||
}) = &tool
|
||||
@@ -1938,7 +1981,7 @@ Examples of valid command strings:
|
||||
|
||||
#[test]
|
||||
fn test_shell_command_tool() {
|
||||
let tool = super::create_shell_command_tool();
|
||||
let tool = super::create_shell_command_tool(TruncationPolicy::Tokens(10_000));
|
||||
let ToolSpec::Function(ResponsesApiTool {
|
||||
description, name, ..
|
||||
}) = &tool
|
||||
|
||||
@@ -174,6 +174,7 @@ impl UnifiedExecSessionManager {
|
||||
turn_ref.as_ref(),
|
||||
request.call_id,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
};
|
||||
interaction_emitter
|
||||
@@ -369,6 +370,7 @@ impl UnifiedExecSessionManager {
|
||||
entry.turn_ref.as_ref(),
|
||||
&entry.call_id,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
let emitter = ToolEmitter::unified_exec(
|
||||
&entry.command,
|
||||
@@ -402,6 +404,7 @@ impl UnifiedExecSessionManager {
|
||||
context.turn.as_ref(),
|
||||
&context.call_id,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
let emitter =
|
||||
ToolEmitter::unified_exec(command, cwd, ExecCommandSource::UnifiedExecStartup, None);
|
||||
|
||||
@@ -37,6 +37,8 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let policy = SandboxPolicy::new_read_only_policy();
|
||||
|
||||
@@ -86,6 +86,8 @@ impl EscalateServer {
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
},
|
||||
get_platform_sandbox().unwrap_or(SandboxType::None),
|
||||
&sandbox_policy,
|
||||
|
||||
@@ -45,6 +45,8 @@ async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) {
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let sandbox_policy = SandboxPolicy::WorkspaceWrite {
|
||||
@@ -148,6 +150,8 @@ async fn assert_network_blocked(cmd: &[&str]) {
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
arg0: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
};
|
||||
|
||||
let sandbox_policy = SandboxPolicy::new_read_only_policy();
|
||||
|
||||
@@ -322,6 +322,10 @@ pub struct ShellToolCallParams {
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub justification: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub max_output_tokens: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub max_output_chars: Option<usize>,
|
||||
}
|
||||
|
||||
/// If the `name` of a `ResponseItem::FunctionCall` is `shell_command`, the
|
||||
@@ -338,6 +342,10 @@ pub struct ShellCommandToolCallParams {
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub justification: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub max_output_tokens: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub max_output_chars: Option<usize>,
|
||||
}
|
||||
|
||||
/// Responses API compatible content items that can be returned by a tool call.
|
||||
@@ -650,6 +658,8 @@ mod tests {
|
||||
timeout_ms: Some(1000),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
max_output_tokens: None,
|
||||
max_output_chars: None,
|
||||
},
|
||||
params
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user