mirror of
https://github.com/openai/codex.git
synced 2026-02-05 16:33:42 +00:00
Compare commits
2 Commits
centralize
...
pr5657
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4a62376e6b | ||
|
|
f8af4f5c8d |
@@ -17,6 +17,7 @@ use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::FileChange;
|
||||
use codex_protocol::protocol::RateLimitSnapshot;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxCommandAssessment;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::protocol::TurnAbortReason;
|
||||
use paste::paste;
|
||||
@@ -847,6 +848,8 @@ pub struct ExecCommandApprovalParams {
|
||||
pub cwd: PathBuf,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reason: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
pub parsed_cmd: Vec<ParsedCommand>,
|
||||
}
|
||||
|
||||
@@ -1063,6 +1066,7 @@ mod tests {
|
||||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
cwd: PathBuf::from("/tmp"),
|
||||
reason: Some("because tests".to_string()),
|
||||
risk: None,
|
||||
parsed_cmd: vec![ParsedCommand::Unknown {
|
||||
cmd: "echo hello".to_string(),
|
||||
}],
|
||||
|
||||
@@ -1447,6 +1447,7 @@ async fn apply_bespoke_event_handling(
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
}) => {
|
||||
let params = ExecCommandApprovalParams {
|
||||
@@ -1455,6 +1456,7 @@ async fn apply_bespoke_event_handling(
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
};
|
||||
let rx = outgoing
|
||||
@@ -1523,6 +1525,7 @@ async fn derive_config_from_params(
|
||||
include_view_image_tool: None,
|
||||
show_raw_agent_reasoning: None,
|
||||
tools_web_search_request: None,
|
||||
experimental_sandbox_command_assessment: None,
|
||||
additional_writable_roots: Vec::new(),
|
||||
};
|
||||
|
||||
|
||||
@@ -311,6 +311,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
|
||||
],
|
||||
cwd: working_directory.clone(),
|
||||
reason: None,
|
||||
risk: None,
|
||||
parsed_cmd: vec![ParsedCommand::Unknown {
|
||||
cmd: "python3 -c 'print(42)'".to_string()
|
||||
}],
|
||||
|
||||
@@ -134,6 +134,14 @@ impl ModelClient {
|
||||
self.stream_with_task_kind(prompt, TaskKind::Regular).await
|
||||
}
|
||||
|
||||
pub fn config(&self) -> Arc<Config> {
|
||||
Arc::clone(&self.config)
|
||||
}
|
||||
|
||||
pub fn provider(&self) -> &ModelProviderInfo {
|
||||
&self.provider
|
||||
}
|
||||
|
||||
pub(crate) async fn stream_with_task_kind(
|
||||
&self,
|
||||
prompt: &Prompt,
|
||||
|
||||
@@ -59,7 +59,7 @@ use crate::client_common::ResponseEvent;
|
||||
use crate::config::Config;
|
||||
use crate::config_types::McpServerTransportConfig;
|
||||
use crate::config_types::ShellEnvironmentPolicy;
|
||||
use crate::context_manager::ContextManager;
|
||||
use crate::conversation_history::ConversationHistory;
|
||||
use crate::environment_context::EnvironmentContext;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result as CodexResult;
|
||||
@@ -88,6 +88,7 @@ use crate::protocol::Op;
|
||||
use crate::protocol::RateLimitSnapshot;
|
||||
use crate::protocol::ReviewDecision;
|
||||
use crate::protocol::ReviewOutputEvent;
|
||||
use crate::protocol::SandboxCommandAssessment;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::protocol::SessionConfiguredEvent;
|
||||
use crate::protocol::StreamErrorEvent;
|
||||
@@ -755,6 +756,32 @@ impl Session {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn assess_sandbox_command(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
call_id: &str,
|
||||
command: &[String],
|
||||
failure_message: Option<&str>,
|
||||
) -> Option<SandboxCommandAssessment> {
|
||||
let config = turn_context.client.config();
|
||||
let provider = turn_context.client.provider().clone();
|
||||
let auth_manager = Arc::clone(&self.services.auth_manager);
|
||||
let otel = self.services.otel_event_manager.clone();
|
||||
crate::sandboxing::assessment::assess_command(
|
||||
config,
|
||||
provider,
|
||||
auth_manager,
|
||||
&otel,
|
||||
self.conversation_id,
|
||||
call_id,
|
||||
command,
|
||||
&turn_context.sandbox_policy,
|
||||
&turn_context.cwd,
|
||||
failure_message,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Emit an exec approval request event and await the user's decision.
|
||||
///
|
||||
/// The request is keyed by `sub_id`/`call_id` so matching responses are delivered
|
||||
@@ -767,6 +794,7 @@ impl Session {
|
||||
command: Vec<String>,
|
||||
cwd: PathBuf,
|
||||
reason: Option<String>,
|
||||
risk: Option<SandboxCommandAssessment>,
|
||||
) -> ReviewDecision {
|
||||
let sub_id = turn_context.sub_id.clone();
|
||||
// Add the tx_approve callback to the map before sending the request.
|
||||
@@ -792,6 +820,7 @@ impl Session {
|
||||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
});
|
||||
self.send_event(turn_context, event).await;
|
||||
@@ -867,7 +896,7 @@ impl Session {
|
||||
turn_context: &TurnContext,
|
||||
rollout_items: &[RolloutItem],
|
||||
) -> Vec<ResponseItem> {
|
||||
let mut history = ContextManager::new();
|
||||
let mut history = ConversationHistory::new();
|
||||
for item in rollout_items {
|
||||
match item {
|
||||
RolloutItem::ResponseItem(response_item) => {
|
||||
@@ -941,7 +970,7 @@ impl Session {
|
||||
state.history_snapshot()
|
||||
}
|
||||
|
||||
pub(crate) async fn clone_history(&self) -> ContextManager {
|
||||
pub(crate) async fn clone_history(&self) -> ConversationHistory {
|
||||
let state = self.state.lock().await;
|
||||
state.clone_history()
|
||||
}
|
||||
@@ -1524,7 +1553,7 @@ pub(crate) async fn run_task(
|
||||
// For normal turns, continue recording to the session history as before.
|
||||
let is_review_mode = turn_context.is_review_mode;
|
||||
|
||||
let mut review_thread_history: ContextManager = ContextManager::new();
|
||||
let mut review_thread_history: ConversationHistory = ConversationHistory::new();
|
||||
if is_review_mode {
|
||||
// Seed review threads with environment context so the model knows the working directory.
|
||||
review_thread_history
|
||||
@@ -2843,7 +2872,7 @@ mod tests {
|
||||
turn_context: &TurnContext,
|
||||
) -> (Vec<RolloutItem>, Vec<ResponseItem>) {
|
||||
let mut rollout_items = Vec::new();
|
||||
let mut live_history = ContextManager::new();
|
||||
let mut live_history = ConversationHistory::new();
|
||||
|
||||
let initial_context = session.build_initial_context(turn_context);
|
||||
for item in &initial_context {
|
||||
|
||||
@@ -223,6 +223,9 @@ pub struct Config {
|
||||
|
||||
pub tools_web_search_request: bool,
|
||||
|
||||
/// When `true`, run a model-based assessment for commands denied by the sandbox.
|
||||
pub experimental_sandbox_command_assessment: bool,
|
||||
|
||||
pub use_experimental_streamable_shell_tool: bool,
|
||||
|
||||
/// If set to `true`, used only the experimental unified exec tool.
|
||||
@@ -958,6 +961,7 @@ pub struct ConfigToml {
|
||||
pub experimental_use_unified_exec_tool: Option<bool>,
|
||||
pub experimental_use_rmcp_client: Option<bool>,
|
||||
pub experimental_use_freeform_apply_patch: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
}
|
||||
|
||||
impl From<ConfigToml> for UserSavedConfig {
|
||||
@@ -1118,6 +1122,7 @@ pub struct ConfigOverrides {
|
||||
pub include_view_image_tool: Option<bool>,
|
||||
pub show_raw_agent_reasoning: Option<bool>,
|
||||
pub tools_web_search_request: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
/// Additional directories that should be treated as writable roots for this session.
|
||||
pub additional_writable_roots: Vec<PathBuf>,
|
||||
}
|
||||
@@ -1147,6 +1152,7 @@ impl Config {
|
||||
include_view_image_tool: include_view_image_tool_override,
|
||||
show_raw_agent_reasoning,
|
||||
tools_web_search_request: override_tools_web_search_request,
|
||||
experimental_sandbox_command_assessment: sandbox_command_assessment_override,
|
||||
additional_writable_roots,
|
||||
} = overrides;
|
||||
|
||||
@@ -1172,6 +1178,7 @@ impl Config {
|
||||
include_apply_patch_tool: include_apply_patch_tool_override,
|
||||
include_view_image_tool: include_view_image_tool_override,
|
||||
web_search_request: override_tools_web_search_request,
|
||||
experimental_sandbox_command_assessment: sandbox_command_assessment_override,
|
||||
};
|
||||
|
||||
let features = Features::from_config(&cfg, &config_profile, feature_overrides);
|
||||
@@ -1269,6 +1276,8 @@ impl Config {
|
||||
let use_experimental_streamable_shell_tool = features.enabled(Feature::StreamableShell);
|
||||
let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec);
|
||||
let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient);
|
||||
let experimental_sandbox_command_assessment =
|
||||
features.enabled(Feature::SandboxCommandAssessment);
|
||||
|
||||
let forced_chatgpt_workspace_id =
|
||||
cfg.forced_chatgpt_workspace_id.as_ref().and_then(|value| {
|
||||
@@ -1390,6 +1399,7 @@ impl Config {
|
||||
forced_login_method,
|
||||
include_apply_patch_tool: include_apply_patch_tool_flag,
|
||||
tools_web_search_request,
|
||||
experimental_sandbox_command_assessment,
|
||||
use_experimental_streamable_shell_tool,
|
||||
use_experimental_unified_exec_tool,
|
||||
use_experimental_use_rmcp_client,
|
||||
@@ -2873,6 +2883,7 @@ model_verbosity = "high"
|
||||
forced_login_method: None,
|
||||
include_apply_patch_tool: false,
|
||||
tools_web_search_request: false,
|
||||
experimental_sandbox_command_assessment: false,
|
||||
use_experimental_streamable_shell_tool: false,
|
||||
use_experimental_unified_exec_tool: false,
|
||||
use_experimental_use_rmcp_client: false,
|
||||
@@ -2941,6 +2952,7 @@ model_verbosity = "high"
|
||||
forced_login_method: None,
|
||||
include_apply_patch_tool: false,
|
||||
tools_web_search_request: false,
|
||||
experimental_sandbox_command_assessment: false,
|
||||
use_experimental_streamable_shell_tool: false,
|
||||
use_experimental_unified_exec_tool: false,
|
||||
use_experimental_use_rmcp_client: false,
|
||||
@@ -3024,6 +3036,7 @@ model_verbosity = "high"
|
||||
forced_login_method: None,
|
||||
include_apply_patch_tool: false,
|
||||
tools_web_search_request: false,
|
||||
experimental_sandbox_command_assessment: false,
|
||||
use_experimental_streamable_shell_tool: false,
|
||||
use_experimental_unified_exec_tool: false,
|
||||
use_experimental_use_rmcp_client: false,
|
||||
@@ -3093,6 +3106,7 @@ model_verbosity = "high"
|
||||
forced_login_method: None,
|
||||
include_apply_patch_tool: false,
|
||||
tools_web_search_request: false,
|
||||
experimental_sandbox_command_assessment: false,
|
||||
use_experimental_streamable_shell_tool: false,
|
||||
use_experimental_unified_exec_tool: false,
|
||||
use_experimental_use_rmcp_client: false,
|
||||
|
||||
@@ -26,6 +26,7 @@ pub struct ConfigProfile {
|
||||
pub experimental_use_exec_command_tool: Option<bool>,
|
||||
pub experimental_use_rmcp_client: Option<bool>,
|
||||
pub experimental_use_freeform_apply_patch: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
pub tools_web_search: Option<bool>,
|
||||
pub tools_view_image: Option<bool>,
|
||||
/// Optional feature toggles scoped to this profile.
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
mod manager;
|
||||
pub(crate) use manager::ContextManager;
|
||||
pub mod truncation;
|
||||
@@ -1,159 +0,0 @@
|
||||
use codex_utils_string::take_bytes_at_char_boundary;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct TruncationConfig {
|
||||
pub max_bytes: usize,
|
||||
pub max_lines: usize,
|
||||
pub truncation_notice: &'static str,
|
||||
}
|
||||
|
||||
// Telemetry preview limits: keep log events smaller than model budgets.
|
||||
pub(crate) const TELEMETRY_PREVIEW_MAX_BYTES: usize = 2 * 1024; // 2 KiB
|
||||
pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
|
||||
pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
|
||||
"[... telemetry preview truncated ...]";
|
||||
|
||||
pub(crate) const CONTEXT_OUTPUT_TRUNCATION: TruncationConfig = TruncationConfig {
|
||||
max_bytes: TELEMETRY_PREVIEW_MAX_BYTES,
|
||||
max_lines: TELEMETRY_PREVIEW_MAX_LINES,
|
||||
truncation_notice: TELEMETRY_PREVIEW_TRUNCATION_NOTICE,
|
||||
};
|
||||
|
||||
pub(crate) fn truncate_with_config(content: &str, config: TruncationConfig) -> String {
|
||||
let TruncationConfig {
|
||||
max_bytes,
|
||||
max_lines,
|
||||
truncation_notice,
|
||||
} = config;
|
||||
|
||||
let truncated_slice = take_bytes_at_char_boundary(content, max_bytes);
|
||||
let truncated_by_bytes = truncated_slice.len() < content.len();
|
||||
|
||||
let mut preview = String::new();
|
||||
let mut lines_iter = truncated_slice.lines();
|
||||
for idx in 0..max_lines {
|
||||
match lines_iter.next() {
|
||||
Some(line) => {
|
||||
if idx > 0 {
|
||||
preview.push('\n');
|
||||
}
|
||||
preview.push_str(line);
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
let truncated_by_lines = lines_iter.next().is_some();
|
||||
|
||||
if !truncated_by_bytes && !truncated_by_lines {
|
||||
return content.to_string();
|
||||
}
|
||||
|
||||
if preview.len() < truncated_slice.len()
|
||||
&& truncated_slice
|
||||
.as_bytes()
|
||||
.get(preview.len())
|
||||
.is_some_and(|byte| *byte == b'\n')
|
||||
{
|
||||
preview.push('\n');
|
||||
}
|
||||
|
||||
if !preview.is_empty() && !preview.ends_with('\n') {
|
||||
preview.push('\n');
|
||||
}
|
||||
|
||||
preview.push_str(truncation_notice);
|
||||
preview
|
||||
}
|
||||
|
||||
pub(crate) fn truncate_context_output(content: &str) -> String {
|
||||
truncate_with_config(content, CONTEXT_OUTPUT_TRUNCATION)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn truncate_with_config_returns_original_within_limits() {
|
||||
let content = "short output";
|
||||
let config = TruncationConfig {
|
||||
max_bytes: 64,
|
||||
max_lines: 5,
|
||||
truncation_notice: "[notice]",
|
||||
};
|
||||
assert_eq!(truncate_with_config(content, config), content);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_with_config_truncates_by_bytes() {
|
||||
let config = TruncationConfig {
|
||||
max_bytes: 16,
|
||||
max_lines: 10,
|
||||
truncation_notice: "[notice]",
|
||||
};
|
||||
let content = "abcdefghijklmnopqrstuvwxyz";
|
||||
let truncated = truncate_with_config(content, config);
|
||||
assert!(truncated.contains("[notice]"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_with_config_truncates_by_lines() {
|
||||
let config = TruncationConfig {
|
||||
max_bytes: 1024,
|
||||
max_lines: 2,
|
||||
truncation_notice: "[notice]",
|
||||
};
|
||||
let content = "l1\nl2\nl3\nl4";
|
||||
let truncated = truncate_with_config(content, config);
|
||||
assert!(truncated.lines().count() <= 3);
|
||||
assert!(truncated.contains("[notice]"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_preview_returns_original_within_limits() {
|
||||
let content = "short output";
|
||||
let config = TruncationConfig {
|
||||
max_bytes: TELEMETRY_PREVIEW_MAX_BYTES,
|
||||
max_lines: TELEMETRY_PREVIEW_MAX_LINES,
|
||||
truncation_notice: TELEMETRY_PREVIEW_TRUNCATION_NOTICE,
|
||||
};
|
||||
assert_eq!(truncate_with_config(content, config), content);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_preview_truncates_by_bytes() {
|
||||
let config = TruncationConfig {
|
||||
max_bytes: TELEMETRY_PREVIEW_MAX_BYTES,
|
||||
max_lines: TELEMETRY_PREVIEW_MAX_LINES,
|
||||
truncation_notice: TELEMETRY_PREVIEW_TRUNCATION_NOTICE,
|
||||
};
|
||||
let content = "x".repeat(TELEMETRY_PREVIEW_MAX_BYTES + 8);
|
||||
let preview = truncate_with_config(&content, config);
|
||||
|
||||
assert!(preview.contains(TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
|
||||
assert!(
|
||||
preview.len()
|
||||
<= TELEMETRY_PREVIEW_MAX_BYTES + TELEMETRY_PREVIEW_TRUNCATION_NOTICE.len() + 1
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_preview_truncates_by_lines() {
|
||||
let config = TruncationConfig {
|
||||
max_bytes: TELEMETRY_PREVIEW_MAX_BYTES,
|
||||
max_lines: TELEMETRY_PREVIEW_MAX_LINES,
|
||||
truncation_notice: TELEMETRY_PREVIEW_TRUNCATION_NOTICE,
|
||||
};
|
||||
let content = (0..(TELEMETRY_PREVIEW_MAX_LINES + 5))
|
||||
.map(|idx| format!("line {idx}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
let preview = truncate_with_config(&content, config);
|
||||
let lines: Vec<&str> = preview.lines().collect();
|
||||
|
||||
assert!(lines.len() <= TELEMETRY_PREVIEW_MAX_LINES + 1);
|
||||
assert_eq!(lines.last(), Some(&TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,3 @@
|
||||
use crate::context_manager::truncation::truncate_context_output;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::TokenUsage;
|
||||
@@ -7,13 +6,13 @@ use tracing::error;
|
||||
|
||||
/// Transcript of conversation history
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(crate) struct ContextManager {
|
||||
pub(crate) struct ConversationHistory {
|
||||
/// The oldest items are at the beginning of the vector.
|
||||
items: Vec<ResponseItem>,
|
||||
token_info: Option<TokenUsageInfo>,
|
||||
}
|
||||
|
||||
impl ContextManager {
|
||||
impl ConversationHistory {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
items: Vec::new(),
|
||||
@@ -45,8 +44,7 @@ impl ContextManager {
|
||||
continue;
|
||||
}
|
||||
|
||||
let processed = Self::process_item(&item);
|
||||
self.items.push(processed);
|
||||
self.items.push(item.clone());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,29 +76,6 @@ impl ContextManager {
|
||||
self.remove_orphan_outputs();
|
||||
}
|
||||
|
||||
fn process_item(item: &ResponseItem) -> ResponseItem {
|
||||
match item {
|
||||
ResponseItem::FunctionCallOutput { call_id, output } => {
|
||||
let truncated_content = truncate_context_output(output.content.as_str());
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: truncated_content,
|
||||
success: output.success,
|
||||
},
|
||||
}
|
||||
}
|
||||
ResponseItem::CustomToolCallOutput { call_id, output } => {
|
||||
let truncated = truncate_context_output(output);
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: truncated,
|
||||
}
|
||||
}
|
||||
_ => item.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a clone of the contents in the transcript.
|
||||
fn contents(&self) -> Vec<ResponseItem> {
|
||||
self.items.clone()
|
||||
@@ -131,7 +106,7 @@ impl ContextManager {
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: truncate_context_output("aborted"),
|
||||
content: "aborted".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
@@ -154,7 +129,7 @@ impl ContextManager {
|
||||
idx,
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: truncate_context_output("aborted"),
|
||||
output: "aborted".to_string(),
|
||||
},
|
||||
));
|
||||
}
|
||||
@@ -178,7 +153,7 @@ impl ContextManager {
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: truncate_context_output("aborted"),
|
||||
content: "aborted".to_string(),
|
||||
success: None,
|
||||
},
|
||||
},
|
||||
@@ -274,10 +249,7 @@ impl ContextManager {
|
||||
}
|
||||
|
||||
pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
|
||||
self.items = items
|
||||
.into_iter()
|
||||
.map(|item| Self::process_item(&item))
|
||||
.collect();
|
||||
self.items = items;
|
||||
}
|
||||
|
||||
/// Removes the corresponding paired item for the provided `item`, if any.
|
||||
@@ -390,8 +362,6 @@ fn is_api_message(message: &ResponseItem) -> bool {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::context_manager::truncation::TELEMETRY_PREVIEW_MAX_BYTES;
|
||||
use crate::context_manager::truncation::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::LocalShellAction;
|
||||
@@ -409,8 +379,8 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn create_history_with_items(items: Vec<ResponseItem>) -> ContextManager {
|
||||
let mut h = ContextManager::new();
|
||||
fn create_history_with_items(items: Vec<ResponseItem>) -> ConversationHistory {
|
||||
let mut h = ConversationHistory::new();
|
||||
h.record_items(items.iter());
|
||||
h
|
||||
}
|
||||
@@ -427,7 +397,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn filters_non_api_messages() {
|
||||
let mut h = ContextManager::default();
|
||||
let mut h = ConversationHistory::default();
|
||||
// System message is not an API message; Other is ignored.
|
||||
let system = ResponseItem::Message {
|
||||
id: None,
|
||||
@@ -465,61 +435,6 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn record_items_truncates_function_call_output() {
|
||||
let mut h = ContextManager::new();
|
||||
let long_content = "a".repeat(TELEMETRY_PREVIEW_MAX_BYTES + 32);
|
||||
let item = ResponseItem::FunctionCallOutput {
|
||||
call_id: "call-long".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: long_content.clone(),
|
||||
success: Some(true),
|
||||
},
|
||||
};
|
||||
|
||||
h.record_items([&item]);
|
||||
|
||||
let stored = h.contents();
|
||||
let ResponseItem::FunctionCallOutput { output, .. } = &stored[0] else {
|
||||
panic!("expected FunctionCallOutput variant");
|
||||
};
|
||||
assert!(
|
||||
output
|
||||
.content
|
||||
.ends_with(TELEMETRY_PREVIEW_TRUNCATION_NOTICE),
|
||||
"truncated content should end with notice"
|
||||
);
|
||||
assert!(
|
||||
output.content.len() < long_content.len(),
|
||||
"content should shrink after truncation"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn record_items_truncates_custom_tool_output() {
|
||||
let mut h = ContextManager::new();
|
||||
let long_content = "b".repeat(TELEMETRY_PREVIEW_MAX_BYTES + 64);
|
||||
let item = ResponseItem::CustomToolCallOutput {
|
||||
call_id: "custom-long".to_string(),
|
||||
output: long_content.clone(),
|
||||
};
|
||||
|
||||
h.record_items([&item]);
|
||||
|
||||
let stored = h.contents();
|
||||
let ResponseItem::CustomToolCallOutput { output, .. } = &stored[0] else {
|
||||
panic!("expected CustomToolCallOutput variant");
|
||||
};
|
||||
assert!(
|
||||
output.ends_with(TELEMETRY_PREVIEW_TRUNCATION_NOTICE),
|
||||
"truncated output should end with notice"
|
||||
);
|
||||
assert!(
|
||||
output.len() < long_content.len(),
|
||||
"output should shrink after truncation"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_first_item_removes_matching_output_for_function_call() {
|
||||
let items = vec![
|
||||
@@ -32,8 +32,34 @@ pub struct NewConversation {
|
||||
|
||||
/// [`ConversationManager`] is responsible for creating conversations and
|
||||
/// maintaining them in memory.
|
||||
#[derive(Clone)]
|
||||
struct ConversationEntry {
|
||||
conversation: Arc<CodexConversation>,
|
||||
session_configured: SessionConfiguredEvent,
|
||||
}
|
||||
|
||||
impl ConversationEntry {
|
||||
fn new(
|
||||
conversation: Arc<CodexConversation>,
|
||||
session_configured: SessionConfiguredEvent,
|
||||
) -> Self {
|
||||
Self {
|
||||
conversation,
|
||||
session_configured,
|
||||
}
|
||||
}
|
||||
|
||||
fn to_new_conversation(&self, conversation_id: ConversationId) -> NewConversation {
|
||||
NewConversation {
|
||||
conversation_id,
|
||||
conversation: self.conversation.clone(),
|
||||
session_configured: self.session_configured.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ConversationManager {
|
||||
conversations: Arc<RwLock<HashMap<ConversationId, Arc<CodexConversation>>>>,
|
||||
conversations: Arc<RwLock<HashMap<ConversationId, ConversationEntry>>>,
|
||||
auth_manager: Arc<AuthManager>,
|
||||
session_source: SessionSource,
|
||||
}
|
||||
@@ -99,10 +125,11 @@ impl ConversationManager {
|
||||
};
|
||||
|
||||
let conversation = Arc::new(CodexConversation::new(codex));
|
||||
let entry = ConversationEntry::new(conversation.clone(), session_configured.clone());
|
||||
self.conversations
|
||||
.write()
|
||||
.await
|
||||
.insert(conversation_id, conversation.clone());
|
||||
.insert(conversation_id, entry);
|
||||
|
||||
Ok(NewConversation {
|
||||
conversation_id,
|
||||
@@ -118,7 +145,7 @@ impl ConversationManager {
|
||||
let conversations = self.conversations.read().await;
|
||||
conversations
|
||||
.get(&conversation_id)
|
||||
.cloned()
|
||||
.map(|entry| entry.conversation.clone())
|
||||
.ok_or_else(|| CodexErr::ConversationNotFound(conversation_id))
|
||||
}
|
||||
|
||||
@@ -129,11 +156,22 @@ impl ConversationManager {
|
||||
auth_manager: Arc<AuthManager>,
|
||||
) -> CodexResult<NewConversation> {
|
||||
let initial_history = RolloutRecorder::get_rollout_history(&rollout_path).await?;
|
||||
let CodexSpawnOk {
|
||||
codex,
|
||||
conversation_id,
|
||||
} = Codex::spawn(config, auth_manager, initial_history, self.session_source).await?;
|
||||
self.finalize_spawn(codex, conversation_id).await
|
||||
if let InitialHistory::Resumed(resumed) = &initial_history
|
||||
&& let Some(existing) = self
|
||||
.conversations
|
||||
.read()
|
||||
.await
|
||||
.get(&resumed.conversation_id)
|
||||
.cloned()
|
||||
{
|
||||
Ok(existing.to_new_conversation(resumed.conversation_id))
|
||||
} else {
|
||||
let CodexSpawnOk {
|
||||
codex,
|
||||
conversation_id,
|
||||
} = Codex::spawn(config, auth_manager, initial_history, self.session_source).await?;
|
||||
self.finalize_spawn(codex, conversation_id).await
|
||||
}
|
||||
}
|
||||
|
||||
/// Removes the conversation from the manager's internal map, though the
|
||||
@@ -144,7 +182,11 @@ impl ConversationManager {
|
||||
&self,
|
||||
conversation_id: &ConversationId,
|
||||
) -> Option<Arc<CodexConversation>> {
|
||||
self.conversations.write().await.remove(conversation_id)
|
||||
self.conversations
|
||||
.write()
|
||||
.await
|
||||
.remove(conversation_id)
|
||||
.map(|entry| entry.conversation)
|
||||
}
|
||||
|
||||
/// Fork an existing conversation by taking messages up to the given position
|
||||
|
||||
@@ -39,6 +39,8 @@ pub enum Feature {
|
||||
ViewImageTool,
|
||||
/// Allow the model to request web searches.
|
||||
WebSearchRequest,
|
||||
/// Enable the model-based risk assessments for sandboxed commands.
|
||||
SandboxCommandAssessment,
|
||||
}
|
||||
|
||||
impl Feature {
|
||||
@@ -73,6 +75,7 @@ pub struct FeatureOverrides {
|
||||
pub include_apply_patch_tool: Option<bool>,
|
||||
pub include_view_image_tool: Option<bool>,
|
||||
pub web_search_request: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
}
|
||||
|
||||
impl FeatureOverrides {
|
||||
@@ -137,6 +140,7 @@ impl Features {
|
||||
let mut features = Features::with_defaults();
|
||||
|
||||
let base_legacy = LegacyFeatureToggles {
|
||||
experimental_sandbox_command_assessment: cfg.experimental_sandbox_command_assessment,
|
||||
experimental_use_freeform_apply_patch: cfg.experimental_use_freeform_apply_patch,
|
||||
experimental_use_exec_command_tool: cfg.experimental_use_exec_command_tool,
|
||||
experimental_use_unified_exec_tool: cfg.experimental_use_unified_exec_tool,
|
||||
@@ -154,6 +158,8 @@ impl Features {
|
||||
let profile_legacy = LegacyFeatureToggles {
|
||||
include_apply_patch_tool: config_profile.include_apply_patch_tool,
|
||||
include_view_image_tool: config_profile.include_view_image_tool,
|
||||
experimental_sandbox_command_assessment: config_profile
|
||||
.experimental_sandbox_command_assessment,
|
||||
experimental_use_freeform_apply_patch: config_profile
|
||||
.experimental_use_freeform_apply_patch,
|
||||
experimental_use_exec_command_tool: config_profile.experimental_use_exec_command_tool,
|
||||
@@ -236,4 +242,10 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::Stable,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::SandboxCommandAssessment,
|
||||
key: "experimental_sandbox_command_assessment",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
];
|
||||
|
||||
@@ -9,6 +9,10 @@ struct Alias {
|
||||
}
|
||||
|
||||
const ALIASES: &[Alias] = &[
|
||||
Alias {
|
||||
legacy_key: "experimental_sandbox_command_assessment",
|
||||
feature: Feature::SandboxCommandAssessment,
|
||||
},
|
||||
Alias {
|
||||
legacy_key: "experimental_use_unified_exec_tool",
|
||||
feature: Feature::UnifiedExec,
|
||||
@@ -53,6 +57,7 @@ pub(crate) fn feature_for_key(key: &str) -> Option<Feature> {
|
||||
pub struct LegacyFeatureToggles {
|
||||
pub include_apply_patch_tool: Option<bool>,
|
||||
pub include_view_image_tool: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
pub experimental_use_freeform_apply_patch: Option<bool>,
|
||||
pub experimental_use_exec_command_tool: Option<bool>,
|
||||
pub experimental_use_unified_exec_tool: Option<bool>,
|
||||
@@ -69,6 +74,12 @@ impl LegacyFeatureToggles {
|
||||
self.include_apply_patch_tool,
|
||||
"include_apply_patch_tool",
|
||||
);
|
||||
set_if_some(
|
||||
features,
|
||||
Feature::SandboxCommandAssessment,
|
||||
self.experimental_sandbox_command_assessment,
|
||||
"experimental_sandbox_command_assessment",
|
||||
);
|
||||
set_if_some(
|
||||
features,
|
||||
Feature::ApplyPatchFreeform,
|
||||
|
||||
@@ -20,7 +20,7 @@ pub mod config_edit;
|
||||
pub mod config_loader;
|
||||
pub mod config_profile;
|
||||
pub mod config_types;
|
||||
mod context_manager;
|
||||
mod conversation_history;
|
||||
pub mod custom_prompts;
|
||||
mod environment_context;
|
||||
pub mod error;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::codex::Session;
|
||||
use crate::context_manager::ContextManager;
|
||||
use crate::conversation_history::ConversationHistory;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
@@ -11,7 +11,7 @@ use tracing::warn;
|
||||
pub(crate) async fn process_items(
|
||||
processed_items: Vec<crate::codex::ProcessedResponseItem>,
|
||||
is_review_mode: bool,
|
||||
review_thread_history: &mut ContextManager,
|
||||
review_thread_history: &mut ConversationHistory,
|
||||
sess: &Session,
|
||||
) -> (Vec<ResponseInputItem>, Vec<ResponseItem>) {
|
||||
let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
|
||||
|
||||
275
codex-rs/core/src/sandboxing/assessment.rs
Normal file
275
codex-rs/core/src/sandboxing/assessment.rs
Normal file
@@ -0,0 +1,275 @@
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
use crate::AuthManager;
|
||||
use crate::ModelProviderInfo;
|
||||
use crate::client::ModelClient;
|
||||
use crate::client_common::Prompt;
|
||||
use crate::client_common::ResponseEvent;
|
||||
use crate::config::Config;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use askama::Template;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::SandboxCommandAssessment;
|
||||
use futures::StreamExt;
|
||||
use serde_json::json;
|
||||
use tokio::time::timeout;
|
||||
use tracing::warn;
|
||||
|
||||
const SANDBOX_ASSESSMENT_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
|
||||
const SANDBOX_RISK_CATEGORY_VALUES: &[&str] = &[
|
||||
"data_deletion",
|
||||
"data_exfiltration",
|
||||
"privilege_escalation",
|
||||
"system_modification",
|
||||
"network_access",
|
||||
"resource_exhaustion",
|
||||
"compliance",
|
||||
];
|
||||
|
||||
#[derive(Template)]
|
||||
#[template(path = "sandboxing/assessment_prompt.md", escape = "none")]
|
||||
struct SandboxAssessmentPromptTemplate<'a> {
|
||||
platform: &'a str,
|
||||
sandbox_policy: &'a str,
|
||||
filesystem_roots: Option<&'a str>,
|
||||
working_directory: &'a str,
|
||||
command_argv: &'a str,
|
||||
command_joined: &'a str,
|
||||
sandbox_failure_message: Option<&'a str>,
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) async fn assess_command(
|
||||
config: Arc<Config>,
|
||||
provider: ModelProviderInfo,
|
||||
auth_manager: Arc<AuthManager>,
|
||||
parent_otel: &OtelEventManager,
|
||||
conversation_id: ConversationId,
|
||||
call_id: &str,
|
||||
command: &[String],
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
cwd: &Path,
|
||||
failure_message: Option<&str>,
|
||||
) -> Option<SandboxCommandAssessment> {
|
||||
if !config.experimental_sandbox_command_assessment || command.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let command_json = serde_json::to_string(command).unwrap_or_else(|_| "[]".to_string());
|
||||
let command_joined =
|
||||
shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
|
||||
let failure = failure_message
|
||||
.map(str::trim)
|
||||
.filter(|msg| !msg.is_empty())
|
||||
.map(str::to_string);
|
||||
|
||||
let cwd_str = cwd.to_string_lossy().to_string();
|
||||
let sandbox_summary = summarize_sandbox_policy(sandbox_policy);
|
||||
let mut roots = sandbox_roots_for_prompt(sandbox_policy, cwd);
|
||||
roots.sort();
|
||||
roots.dedup();
|
||||
|
||||
let platform = std::env::consts::OS;
|
||||
let roots_formatted = roots.iter().map(|root| root.to_string_lossy().to_string());
|
||||
let filesystem_roots = match roots_formatted.collect::<Vec<_>>() {
|
||||
collected if collected.is_empty() => None,
|
||||
collected => Some(collected.join(", ")),
|
||||
};
|
||||
|
||||
let prompt_template = SandboxAssessmentPromptTemplate {
|
||||
platform,
|
||||
sandbox_policy: sandbox_summary.as_str(),
|
||||
filesystem_roots: filesystem_roots.as_deref(),
|
||||
working_directory: cwd_str.as_str(),
|
||||
command_argv: command_json.as_str(),
|
||||
command_joined: command_joined.as_str(),
|
||||
sandbox_failure_message: failure.as_deref(),
|
||||
};
|
||||
let rendered_prompt = match prompt_template.render() {
|
||||
Ok(rendered) => rendered,
|
||||
Err(err) => {
|
||||
warn!("failed to render sandbox assessment prompt: {err}");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let (system_prompt_section, user_prompt_section) = match rendered_prompt.split_once("\n---\n") {
|
||||
Some(split) => split,
|
||||
None => {
|
||||
warn!("rendered sandbox assessment prompt missing separator");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let system_prompt = system_prompt_section
|
||||
.strip_prefix("System Prompt:\n")
|
||||
.unwrap_or(system_prompt_section)
|
||||
.trim()
|
||||
.to_string();
|
||||
let user_prompt = user_prompt_section
|
||||
.strip_prefix("User Prompt:\n")
|
||||
.unwrap_or(user_prompt_section)
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
let prompt = Prompt {
|
||||
input: vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText { text: user_prompt }],
|
||||
}],
|
||||
tools: Vec::new(),
|
||||
parallel_tool_calls: false,
|
||||
base_instructions_override: Some(system_prompt),
|
||||
output_schema: Some(sandbox_assessment_schema()),
|
||||
};
|
||||
|
||||
let child_otel =
|
||||
parent_otel.with_model(config.model.as_str(), config.model_family.slug.as_str());
|
||||
|
||||
let client = ModelClient::new(
|
||||
Arc::clone(&config),
|
||||
Some(auth_manager),
|
||||
child_otel,
|
||||
provider,
|
||||
config.model_reasoning_effort,
|
||||
config.model_reasoning_summary,
|
||||
conversation_id,
|
||||
);
|
||||
|
||||
let start = Instant::now();
|
||||
let assessment_result = timeout(SANDBOX_ASSESSMENT_TIMEOUT, async move {
|
||||
let mut stream = client.stream(&prompt).await?;
|
||||
let mut last_json: Option<String> = None;
|
||||
while let Some(event) = stream.next().await {
|
||||
match event {
|
||||
Ok(ResponseEvent::OutputItemDone(item)) => {
|
||||
if let Some(text) = response_item_text(&item) {
|
||||
last_json = Some(text);
|
||||
}
|
||||
}
|
||||
Ok(ResponseEvent::RateLimits(_)) => {}
|
||||
Ok(ResponseEvent::Completed { .. }) => break,
|
||||
Ok(_) => continue,
|
||||
Err(err) => return Err(err),
|
||||
}
|
||||
}
|
||||
Ok(last_json)
|
||||
})
|
||||
.await;
|
||||
let duration = start.elapsed();
|
||||
parent_otel.sandbox_assessment_latency(call_id, duration);
|
||||
|
||||
match assessment_result {
|
||||
Ok(Ok(Some(raw))) => match serde_json::from_str::<SandboxCommandAssessment>(raw.trim()) {
|
||||
Ok(assessment) => {
|
||||
parent_otel.sandbox_assessment(
|
||||
call_id,
|
||||
"success",
|
||||
Some(assessment.risk_level),
|
||||
&assessment.risk_categories,
|
||||
duration,
|
||||
);
|
||||
return Some(assessment);
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("failed to parse sandbox assessment JSON: {err}");
|
||||
parent_otel.sandbox_assessment(call_id, "parse_error", None, &[], duration);
|
||||
}
|
||||
},
|
||||
Ok(Ok(None)) => {
|
||||
warn!("sandbox assessment response did not include any message");
|
||||
parent_otel.sandbox_assessment(call_id, "no_output", None, &[], duration);
|
||||
}
|
||||
Ok(Err(err)) => {
|
||||
warn!("sandbox assessment failed: {err}");
|
||||
parent_otel.sandbox_assessment(call_id, "model_error", None, &[], duration);
|
||||
}
|
||||
Err(_) => {
|
||||
warn!("sandbox assessment timed out");
|
||||
parent_otel.sandbox_assessment(call_id, "timeout", None, &[], duration);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn summarize_sandbox_policy(policy: &SandboxPolicy) -> String {
|
||||
match policy {
|
||||
SandboxPolicy::DangerFullAccess => "danger-full-access".to_string(),
|
||||
SandboxPolicy::ReadOnly => "read-only".to_string(),
|
||||
SandboxPolicy::WorkspaceWrite { network_access, .. } => {
|
||||
let network = if *network_access {
|
||||
"network"
|
||||
} else {
|
||||
"no-network"
|
||||
};
|
||||
format!("workspace-write (network_access={network})")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn sandbox_roots_for_prompt(policy: &SandboxPolicy, cwd: &Path) -> Vec<PathBuf> {
|
||||
let mut roots = vec![cwd.to_path_buf()];
|
||||
if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = policy {
|
||||
roots.extend(writable_roots.iter().cloned());
|
||||
}
|
||||
roots
|
||||
}
|
||||
|
||||
fn sandbox_assessment_schema() -> serde_json::Value {
|
||||
json!({
|
||||
"type": "object",
|
||||
"required": ["description", "risk_level", "risk_categories"],
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"maxLength": 500
|
||||
},
|
||||
"risk_level": {
|
||||
"type": "string",
|
||||
"enum": ["low", "medium", "high"]
|
||||
},
|
||||
"risk_categories": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"enum": SANDBOX_RISK_CATEGORY_VALUES
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
})
|
||||
}
|
||||
|
||||
fn response_item_text(item: &ResponseItem) -> Option<String> {
|
||||
match item {
|
||||
ResponseItem::Message { content, .. } => {
|
||||
let mut buffers: Vec<&str> = Vec::new();
|
||||
for segment in content {
|
||||
match segment {
|
||||
ContentItem::InputText { text } | ContentItem::OutputText { text } => {
|
||||
if !text.is_empty() {
|
||||
buffers.push(text);
|
||||
}
|
||||
}
|
||||
ContentItem::InputImage { .. } => {}
|
||||
}
|
||||
}
|
||||
if buffers.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(buffers.join("\n"))
|
||||
}
|
||||
}
|
||||
ResponseItem::FunctionCallOutput { output, .. } => Some(output.content.clone()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -5,6 +5,9 @@ Build platform wrappers and produce ExecEnv for execution. Owns low‑level
|
||||
sandbox placement and transformation of portable CommandSpec into a
|
||||
ready‑to‑spawn environment.
|
||||
*/
|
||||
|
||||
pub mod assessment;
|
||||
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::SandboxType;
|
||||
use crate::exec::StdoutStream;
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
use codex_protocol::models::ResponseItem;
|
||||
|
||||
use crate::codex::SessionConfiguration;
|
||||
use crate::context_manager::ContextManager;
|
||||
use crate::conversation_history::ConversationHistory;
|
||||
use crate::protocol::RateLimitSnapshot;
|
||||
use crate::protocol::TokenUsage;
|
||||
use crate::protocol::TokenUsageInfo;
|
||||
@@ -11,7 +11,7 @@ use crate::protocol::TokenUsageInfo;
|
||||
/// Persistent, session-scoped state previously stored directly on `Session`.
|
||||
pub(crate) struct SessionState {
|
||||
pub(crate) session_configuration: SessionConfiguration,
|
||||
pub(crate) history: ContextManager,
|
||||
pub(crate) history: ConversationHistory,
|
||||
pub(crate) latest_rate_limits: Option<RateLimitSnapshot>,
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ impl SessionState {
|
||||
pub(crate) fn new(session_configuration: SessionConfiguration) -> Self {
|
||||
Self {
|
||||
session_configuration,
|
||||
history: ContextManager::new(),
|
||||
history: ConversationHistory::new(),
|
||||
latest_rate_limits: None,
|
||||
}
|
||||
}
|
||||
@@ -38,7 +38,7 @@ impl SessionState {
|
||||
self.history.get_history()
|
||||
}
|
||||
|
||||
pub(crate) fn clone_history(&self) -> ContextManager {
|
||||
pub(crate) fn clone_history(&self) -> ConversationHistory {
|
||||
self.history.clone()
|
||||
}
|
||||
|
||||
|
||||
@@ -1,11 +1,15 @@
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::tools::TELEMETRY_PREVIEW_MAX_BYTES;
|
||||
use crate::tools::TELEMETRY_PREVIEW_MAX_LINES;
|
||||
use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::models::FunctionCallOutputPayload;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ShellToolCallParams;
|
||||
use codex_protocol::protocol::FileChange;
|
||||
use codex_utils_string::take_bytes_at_char_boundary;
|
||||
use mcp_types::CallToolResult;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
@@ -72,7 +76,7 @@ pub enum ToolOutput {
|
||||
impl ToolOutput {
|
||||
pub fn log_preview(&self) -> String {
|
||||
match self {
|
||||
ToolOutput::Function { content, .. } => content.clone(),
|
||||
ToolOutput::Function { content, .. } => telemetry_preview(content),
|
||||
ToolOutput::Mcp { result } => format!("{result:?}"),
|
||||
}
|
||||
}
|
||||
@@ -107,6 +111,46 @@ impl ToolOutput {
|
||||
}
|
||||
}
|
||||
|
||||
fn telemetry_preview(content: &str) -> String {
|
||||
let truncated_slice = take_bytes_at_char_boundary(content, TELEMETRY_PREVIEW_MAX_BYTES);
|
||||
let truncated_by_bytes = truncated_slice.len() < content.len();
|
||||
|
||||
let mut preview = String::new();
|
||||
let mut lines_iter = truncated_slice.lines();
|
||||
for idx in 0..TELEMETRY_PREVIEW_MAX_LINES {
|
||||
match lines_iter.next() {
|
||||
Some(line) => {
|
||||
if idx > 0 {
|
||||
preview.push('\n');
|
||||
}
|
||||
preview.push_str(line);
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
let truncated_by_lines = lines_iter.next().is_some();
|
||||
|
||||
if !truncated_by_bytes && !truncated_by_lines {
|
||||
return content.to_string();
|
||||
}
|
||||
|
||||
if preview.len() < truncated_slice.len()
|
||||
&& truncated_slice
|
||||
.as_bytes()
|
||||
.get(preview.len())
|
||||
.is_some_and(|byte| *byte == b'\n')
|
||||
{
|
||||
preview.push('\n');
|
||||
}
|
||||
|
||||
if !preview.is_empty() && !preview.ends_with('\n') {
|
||||
preview.push('\n');
|
||||
}
|
||||
preview.push_str(TELEMETRY_PREVIEW_TRUNCATION_NOTICE);
|
||||
|
||||
preview
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -152,6 +196,38 @@ mod tests {
|
||||
other => panic!("expected FunctionCallOutput, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_preview_returns_original_within_limits() {
|
||||
let content = "short output";
|
||||
assert_eq!(telemetry_preview(content), content);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_preview_truncates_by_bytes() {
|
||||
let content = "x".repeat(TELEMETRY_PREVIEW_MAX_BYTES + 8);
|
||||
let preview = telemetry_preview(&content);
|
||||
|
||||
assert!(preview.contains(TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
|
||||
assert!(
|
||||
preview.len()
|
||||
<= TELEMETRY_PREVIEW_MAX_BYTES + TELEMETRY_PREVIEW_TRUNCATION_NOTICE.len() + 1
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telemetry_preview_truncates_by_lines() {
|
||||
let content = (0..(TELEMETRY_PREVIEW_MAX_LINES + 5))
|
||||
.map(|idx| format!("line {idx}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
let preview = telemetry_preview(&content);
|
||||
let lines: Vec<&str> = preview.lines().collect();
|
||||
|
||||
assert!(lines.len() <= TELEMETRY_PREVIEW_MAX_LINES + 1);
|
||||
assert_eq!(lines.last(), Some(&TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
|
||||
@@ -22,6 +22,12 @@ pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
|
||||
pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
|
||||
pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;
|
||||
|
||||
// Telemetry preview limits: keep log events smaller than model budgets.
|
||||
pub(crate) const TELEMETRY_PREVIEW_MAX_BYTES: usize = 2 * 1024; // 2 KiB
|
||||
pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
|
||||
pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
|
||||
"[... telemetry preview truncated ...]";
|
||||
|
||||
/// Format the combined exec output for sending back to the model.
|
||||
/// Includes exit code and duration metadata; truncates large bodies safely.
|
||||
pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {
|
||||
|
||||
@@ -7,9 +7,11 @@ retry without sandbox on denial (no re‑approval thanks to caching).
|
||||
*/
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::error::get_error_message_ui;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
@@ -38,6 +40,7 @@ impl ToolOrchestrator {
|
||||
) -> Result<Out, ToolError>
|
||||
where
|
||||
T: ToolRuntime<Rq, Out>,
|
||||
Rq: ProvidesSandboxRetryData,
|
||||
{
|
||||
let otel = turn_ctx.client.get_otel_event_manager();
|
||||
let otel_tn = &tool_ctx.tool_name;
|
||||
@@ -56,6 +59,7 @@ impl ToolOrchestrator {
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: None,
|
||||
risk: None,
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
@@ -107,12 +111,33 @@ impl ToolOrchestrator {
|
||||
|
||||
// Ask for approval before retrying without sandbox.
|
||||
if !tool.should_bypass_approval(approval_policy, already_approved) {
|
||||
let mut risk = None;
|
||||
|
||||
if let Some(metadata) = req.sandbox_retry_data() {
|
||||
let err = SandboxErr::Denied {
|
||||
output: output.clone(),
|
||||
};
|
||||
let friendly = get_error_message_ui(&CodexErr::Sandbox(err));
|
||||
let failure_summary = format!("failed in sandbox: {friendly}");
|
||||
|
||||
risk = tool_ctx
|
||||
.session
|
||||
.assess_sandbox_command(
|
||||
turn_ctx,
|
||||
&tool_ctx.call_id,
|
||||
&metadata.command,
|
||||
Some(failure_summary.as_str()),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let reason_msg = build_denial_reason_from_output(output.as_ref());
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: Some(reason_msg),
|
||||
risk,
|
||||
};
|
||||
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
@@ -10,7 +10,9 @@ use crate::sandboxing::CommandSpec;
|
||||
use crate::sandboxing::execute_env;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
use crate::tools::sandboxing::Sandboxable;
|
||||
use crate::tools::sandboxing::SandboxablePreference;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
@@ -32,6 +34,12 @@ pub struct ApplyPatchRequest {
|
||||
pub codex_exe: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for ApplyPatchRequest {
|
||||
fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ApplyPatchRuntime;
|
||||
|
||||
@@ -106,9 +114,10 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
|
||||
let call_id = ctx.call_id.to_string();
|
||||
let cwd = req.cwd.clone();
|
||||
let retry_reason = ctx.retry_reason.clone();
|
||||
let risk = ctx.risk.clone();
|
||||
let user_explicitly_approved = req.user_explicitly_approved;
|
||||
Box::pin(async move {
|
||||
with_cached_approval(&session.services, key, || async move {
|
||||
with_cached_approval(&session.services, key, move || async move {
|
||||
if let Some(reason) = retry_reason {
|
||||
session
|
||||
.request_command_approval(
|
||||
@@ -117,6 +126,7 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
|
||||
vec!["apply_patch".to_string()],
|
||||
cwd,
|
||||
Some(reason),
|
||||
risk,
|
||||
)
|
||||
.await
|
||||
} else if user_explicitly_approved {
|
||||
|
||||
@@ -12,7 +12,9 @@ use crate::sandboxing::execute_env;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
use crate::tools::sandboxing::Sandboxable;
|
||||
use crate::tools::sandboxing::SandboxablePreference;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
@@ -34,6 +36,15 @@ pub struct ShellRequest {
|
||||
pub justification: Option<String>,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for ShellRequest {
|
||||
fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
|
||||
Some(SandboxRetryData {
|
||||
command: self.command.clone(),
|
||||
cwd: self.cwd.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ShellRuntime;
|
||||
|
||||
@@ -90,13 +101,14 @@ impl Approvable<ShellRequest> for ShellRuntime {
|
||||
.retry_reason
|
||||
.clone()
|
||||
.or_else(|| req.justification.clone());
|
||||
let risk = ctx.risk.clone();
|
||||
let session = ctx.session;
|
||||
let turn = ctx.turn;
|
||||
let call_id = ctx.call_id.to_string();
|
||||
Box::pin(async move {
|
||||
with_cached_approval(&session.services, key, || async move {
|
||||
with_cached_approval(&session.services, key, move || async move {
|
||||
session
|
||||
.request_command_approval(turn, call_id, command, cwd, reason)
|
||||
.request_command_approval(turn, call_id, command, cwd, reason, risk)
|
||||
.await
|
||||
})
|
||||
.await
|
||||
|
||||
@@ -9,7 +9,9 @@ use crate::error::SandboxErr;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
use crate::tools::sandboxing::Sandboxable;
|
||||
use crate::tools::sandboxing::SandboxablePreference;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
@@ -31,6 +33,15 @@ pub struct UnifiedExecRequest {
|
||||
pub env: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for UnifiedExecRequest {
|
||||
fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
|
||||
Some(SandboxRetryData {
|
||||
command: self.command.clone(),
|
||||
cwd: self.cwd.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub struct UnifiedExecApprovalKey {
|
||||
pub command: Vec<String>,
|
||||
@@ -85,10 +96,11 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
|
||||
let command = req.command.clone();
|
||||
let cwd = req.cwd.clone();
|
||||
let reason = ctx.retry_reason.clone();
|
||||
let risk = ctx.risk.clone();
|
||||
Box::pin(async move {
|
||||
with_cached_approval(&session.services, key, || async move {
|
||||
session
|
||||
.request_command_approval(turn, call_id, command, cwd, reason)
|
||||
.request_command_approval(turn, call_id, command, cwd, reason, risk)
|
||||
.await
|
||||
})
|
||||
.await
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::error::CodexErr;
|
||||
use crate::protocol::SandboxCommandAssessment;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::sandboxing::CommandSpec;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
@@ -18,6 +19,7 @@ use std::collections::HashMap;
|
||||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use futures::Future;
|
||||
use futures::future::BoxFuture;
|
||||
@@ -81,6 +83,7 @@ pub(crate) struct ApprovalCtx<'a> {
|
||||
pub turn: &'a TurnContext,
|
||||
pub call_id: &'a str,
|
||||
pub retry_reason: Option<String>,
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
}
|
||||
|
||||
pub(crate) trait Approvable<Req> {
|
||||
@@ -156,6 +159,17 @@ pub(crate) struct ToolCtx<'a> {
|
||||
pub tool_name: String,
|
||||
}
|
||||
|
||||
/// Captures the command metadata needed to re-run a tool request without sandboxing.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct SandboxRetryData {
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
}
|
||||
|
||||
pub(crate) trait ProvidesSandboxRetryData {
|
||||
fn sandbox_retry_data(&self) -> Option<SandboxRetryData>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum ToolError {
|
||||
Rejected(String),
|
||||
|
||||
27
codex-rs/core/templates/sandboxing/assessment_prompt.md
Normal file
27
codex-rs/core/templates/sandboxing/assessment_prompt.md
Normal file
@@ -0,0 +1,27 @@
|
||||
You are a security analyst evaluating shell commands that were blocked by a sandbox. Given the provided metadata, summarize the command's likely intent and assess the risk. Return strictly valid JSON with the keys:
|
||||
- description (concise summary, at most two sentences)
|
||||
- risk_level ("low", "medium", or "high")
|
||||
- risk_categories (optional array of zero or more category strings)
|
||||
Risk level examples:
|
||||
- low: read-only inspections, listing files, printing configuration
|
||||
- medium: modifying project files, installing dependencies, fetching artifacts from trusted sources
|
||||
- high: deleting or overwriting data, exfiltrating secrets, escalating privileges, or disabling security controls
|
||||
Recognized risk_categories: data_deletion, data_exfiltration, privilege_escalation, system_modification, network_access, resource_exhaustion, compliance.
|
||||
Use multiple categories when appropriate.
|
||||
If information is insufficient, choose the most cautious risk level supported by the evidence.
|
||||
Respond with JSON only, without markdown code fences or extra commentary.
|
||||
|
||||
---
|
||||
|
||||
Command metadata:
|
||||
Platform: {{ platform }}
|
||||
Sandbox policy: {{ sandbox_policy }}
|
||||
{% if let Some(roots) = filesystem_roots %}
|
||||
Filesystem roots: {{ roots }}
|
||||
{% endif %}
|
||||
Working directory: {{ working_directory }}
|
||||
Command argv: {{ command_argv }}
|
||||
Command (joined): {{ command_joined }}
|
||||
{% if let Some(message) = sandbox_failure_message %}
|
||||
Sandbox failure message: {{ message }}
|
||||
{% endif %}
|
||||
@@ -247,7 +247,11 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
|
||||
session_configured,
|
||||
..
|
||||
} = conversation_manager
|
||||
.resume_conversation_from_rollout(config, session_path.clone(), auth_manager)
|
||||
.resume_conversation_from_rollout(
|
||||
config.clone(),
|
||||
session_path.clone(),
|
||||
auth_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.expect("resume conversation");
|
||||
|
||||
@@ -260,6 +264,23 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
|
||||
let expected_initial_json = json!([]);
|
||||
assert_eq!(initial_json, expected_initial_json);
|
||||
|
||||
let NewConversation {
|
||||
conversation: codex_again,
|
||||
session_configured: session_configured_again,
|
||||
..
|
||||
} = conversation_manager
|
||||
.resume_conversation_from_rollout(
|
||||
config.clone(),
|
||||
session_path.clone(),
|
||||
auth_manager.clone(),
|
||||
)
|
||||
.await
|
||||
.expect("resume existing conversation");
|
||||
assert!(Arc::ptr_eq(&codex, &codex_again));
|
||||
let session_configured_json = serde_json::to_value(&session_configured).unwrap();
|
||||
let session_configured_again_json = serde_json::to_value(&session_configured_again).unwrap();
|
||||
assert_eq!(session_configured_json, session_configured_again_json);
|
||||
|
||||
// 2) Submit new input; the request body must include the prior item followed by the new user input.
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
|
||||
@@ -179,6 +179,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
include_view_image_tool: None,
|
||||
show_raw_agent_reasoning: oss.then_some(true),
|
||||
tools_web_search_request: None,
|
||||
experimental_sandbox_command_assessment: None,
|
||||
additional_writable_roots: Vec::new(),
|
||||
};
|
||||
// Parse `-c` overrides.
|
||||
|
||||
@@ -158,6 +158,7 @@ impl CodexToolCallParam {
|
||||
include_view_image_tool: None,
|
||||
show_raw_agent_reasoning: None,
|
||||
tools_web_search_request: None,
|
||||
experimental_sandbox_command_assessment: None,
|
||||
additional_writable_roots: Vec::new(),
|
||||
};
|
||||
|
||||
|
||||
@@ -178,6 +178,7 @@ async fn run_codex_tool_session_inner(
|
||||
cwd,
|
||||
call_id,
|
||||
reason: _,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
}) => {
|
||||
handle_exec_approval_request(
|
||||
@@ -190,6 +191,7 @@ async fn run_codex_tool_session_inner(
|
||||
event.id.clone(),
|
||||
call_id,
|
||||
parsed_cmd,
|
||||
risk,
|
||||
)
|
||||
.await;
|
||||
continue;
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::sync::Arc;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::ReviewDecision;
|
||||
use codex_core::protocol::SandboxCommandAssessment;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use mcp_types::ElicitRequest;
|
||||
use mcp_types::ElicitRequestParamsRequestedSchema;
|
||||
@@ -37,6 +38,8 @@ pub struct ExecApprovalElicitRequestParams {
|
||||
pub codex_command: Vec<String>,
|
||||
pub codex_cwd: PathBuf,
|
||||
pub codex_parsed_cmd: Vec<ParsedCommand>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub codex_risk: Option<SandboxCommandAssessment>,
|
||||
}
|
||||
|
||||
// TODO(mbolin): ExecApprovalResponse does not conform to ElicitResult. See:
|
||||
@@ -59,6 +62,7 @@ pub(crate) async fn handle_exec_approval_request(
|
||||
event_id: String,
|
||||
call_id: String,
|
||||
codex_parsed_cmd: Vec<ParsedCommand>,
|
||||
codex_risk: Option<SandboxCommandAssessment>,
|
||||
) {
|
||||
let escaped_command =
|
||||
shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
|
||||
@@ -81,6 +85,7 @@ pub(crate) async fn handle_exec_approval_request(
|
||||
codex_command: command,
|
||||
codex_cwd: cwd,
|
||||
codex_parsed_cmd,
|
||||
codex_risk,
|
||||
};
|
||||
let params_json = match serde_json::to_value(¶ms) {
|
||||
Ok(value) => value,
|
||||
|
||||
@@ -196,6 +196,7 @@ fn create_expected_elicitation_request(
|
||||
codex_cwd: workdir.to_path_buf(),
|
||||
codex_call_id: "call1234".to_string(),
|
||||
codex_parsed_cmd,
|
||||
codex_risk: None,
|
||||
})?),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -8,6 +8,8 @@ use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::protocol::SandboxRiskCategory;
|
||||
use codex_protocol::protocol::SandboxRiskLevel;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use eventsource_stream::Event as StreamEvent;
|
||||
use eventsource_stream::EventStreamError as StreamError;
|
||||
@@ -366,6 +368,63 @@ impl OtelEventManager {
|
||||
);
|
||||
}
|
||||
|
||||
pub fn sandbox_assessment(
|
||||
&self,
|
||||
call_id: &str,
|
||||
status: &str,
|
||||
risk_level: Option<SandboxRiskLevel>,
|
||||
risk_categories: &[SandboxRiskCategory],
|
||||
duration: Duration,
|
||||
) {
|
||||
let level = risk_level.map(|level| level.as_str());
|
||||
let categories = if risk_categories.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
risk_categories
|
||||
.iter()
|
||||
.map(SandboxRiskCategory::as_str)
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
};
|
||||
|
||||
tracing::event!(
|
||||
tracing::Level::INFO,
|
||||
event.name = "codex.sandbox_assessment",
|
||||
event.timestamp = %timestamp(),
|
||||
conversation.id = %self.metadata.conversation_id,
|
||||
app.version = %self.metadata.app_version,
|
||||
auth_mode = self.metadata.auth_mode,
|
||||
user.account_id = self.metadata.account_id,
|
||||
user.email = self.metadata.account_email,
|
||||
terminal.type = %self.metadata.terminal_type,
|
||||
model = %self.metadata.model,
|
||||
slug = %self.metadata.slug,
|
||||
call_id = %call_id,
|
||||
status = %status,
|
||||
risk_level = level,
|
||||
risk_categories = categories,
|
||||
duration_ms = %duration.as_millis(),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn sandbox_assessment_latency(&self, call_id: &str, duration: Duration) {
|
||||
tracing::event!(
|
||||
tracing::Level::INFO,
|
||||
event.name = "codex.sandbox_assessment_latency",
|
||||
event.timestamp = %timestamp(),
|
||||
conversation.id = %self.metadata.conversation_id,
|
||||
app.version = %self.metadata.app_version,
|
||||
auth_mode = self.metadata.auth_mode,
|
||||
user.account_id = self.metadata.account_id,
|
||||
user.email = self.metadata.account_email,
|
||||
terminal.type = %self.metadata.terminal_type,
|
||||
model = %self.metadata.model,
|
||||
slug = %self.metadata.slug,
|
||||
call_id = %call_id,
|
||||
duration_ms = %duration.as_millis(),
|
||||
);
|
||||
}
|
||||
|
||||
pub async fn log_tool_result<F, Fut, E>(
|
||||
&self,
|
||||
tool_name: &str,
|
||||
|
||||
91
codex-rs/protocol/src/approvals.rs
Normal file
91
codex-rs/protocol/src/approvals.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::parse_command::ParsedCommand;
|
||||
use crate::protocol::FileChange;
|
||||
use schemars::JsonSchema;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use ts_rs::TS;
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SandboxRiskLevel {
|
||||
Low,
|
||||
Medium,
|
||||
High,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SandboxRiskCategory {
|
||||
DataDeletion,
|
||||
DataExfiltration,
|
||||
PrivilegeEscalation,
|
||||
SystemModification,
|
||||
NetworkAccess,
|
||||
ResourceExhaustion,
|
||||
Compliance,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
|
||||
pub struct SandboxCommandAssessment {
|
||||
pub description: String,
|
||||
pub risk_level: SandboxRiskLevel,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub risk_categories: Vec<SandboxRiskCategory>,
|
||||
}
|
||||
|
||||
impl SandboxRiskLevel {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Low => "low",
|
||||
Self::Medium => "medium",
|
||||
Self::High => "high",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SandboxRiskCategory {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::DataDeletion => "data_deletion",
|
||||
Self::DataExfiltration => "data_exfiltration",
|
||||
Self::PrivilegeEscalation => "privilege_escalation",
|
||||
Self::SystemModification => "system_modification",
|
||||
Self::NetworkAccess => "network_access",
|
||||
Self::ResourceExhaustion => "resource_exhaustion",
|
||||
Self::Compliance => "compliance",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct ExecApprovalRequestEvent {
|
||||
/// Identifier for the associated exec call, if available.
|
||||
pub call_id: String,
|
||||
/// The command to be executed.
|
||||
pub command: Vec<String>,
|
||||
/// The command's working directory.
|
||||
pub cwd: PathBuf,
|
||||
/// Optional human-readable reason for the approval (e.g. retry without sandbox).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reason: Option<String>,
|
||||
/// Optional model-provided risk assessment describing the blocked command.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
pub parsed_cmd: Vec<ParsedCommand>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct ApplyPatchApprovalRequestEvent {
|
||||
/// Responses API call id for the associated patch apply call, if available.
|
||||
pub call_id: String,
|
||||
pub changes: HashMap<PathBuf, FileChange>,
|
||||
/// Optional explanatory reason (e.g. request for extra write access).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reason: Option<String>,
|
||||
/// When set, the agent is asking the user to allow writes under this root for the remainder of the session.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub grant_root: Option<PathBuf>,
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
pub mod account;
|
||||
mod conversation_id;
|
||||
pub use conversation_id::ConversationId;
|
||||
pub mod approvals;
|
||||
pub mod config_types;
|
||||
pub mod custom_prompts;
|
||||
pub mod items;
|
||||
|
||||
@@ -34,6 +34,12 @@ use serde_with::serde_as;
|
||||
use strum_macros::Display;
|
||||
use ts_rs::TS;
|
||||
|
||||
pub use crate::approvals::ApplyPatchApprovalRequestEvent;
|
||||
pub use crate::approvals::ExecApprovalRequestEvent;
|
||||
pub use crate::approvals::SandboxCommandAssessment;
|
||||
pub use crate::approvals::SandboxRiskCategory;
|
||||
pub use crate::approvals::SandboxRiskLevel;
|
||||
|
||||
/// Open/close tags for special user-input blocks. Used across crates to avoid
|
||||
/// duplicated hardcoded strings.
|
||||
pub const USER_INSTRUCTIONS_OPEN_TAG: &str = "<user_instructions>";
|
||||
@@ -1126,33 +1132,6 @@ pub struct ExecCommandOutputDeltaEvent {
|
||||
pub chunk: Vec<u8>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct ExecApprovalRequestEvent {
|
||||
/// Identifier for the associated exec call, if available.
|
||||
pub call_id: String,
|
||||
/// The command to be executed.
|
||||
pub command: Vec<String>,
|
||||
/// The command's working directory.
|
||||
pub cwd: PathBuf,
|
||||
/// Optional human-readable reason for the approval (e.g. retry without sandbox).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reason: Option<String>,
|
||||
pub parsed_cmd: Vec<ParsedCommand>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct ApplyPatchApprovalRequestEvent {
|
||||
/// Responses API call id for the associated patch apply call, if available.
|
||||
pub call_id: String,
|
||||
pub changes: HashMap<PathBuf, FileChange>,
|
||||
/// Optional explanatory reason (e.g. request for extra write access).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reason: Option<String>,
|
||||
/// When set, the agent is asking the user to allow writes under this root for the remainder of the session.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub grant_root: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct BackgroundEventEvent {
|
||||
pub message: String,
|
||||
|
||||
@@ -19,6 +19,9 @@ use crate::render::renderable::Renderable;
|
||||
use codex_core::protocol::FileChange;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::ReviewDecision;
|
||||
use codex_core::protocol::SandboxCommandAssessment;
|
||||
use codex_core::protocol::SandboxRiskCategory;
|
||||
use codex_core::protocol::SandboxRiskLevel;
|
||||
use crossterm::event::KeyCode;
|
||||
use crossterm::event::KeyEvent;
|
||||
use crossterm::event::KeyEventKind;
|
||||
@@ -38,6 +41,7 @@ pub(crate) enum ApprovalRequest {
|
||||
id: String,
|
||||
command: Vec<String>,
|
||||
reason: Option<String>,
|
||||
risk: Option<SandboxCommandAssessment>,
|
||||
},
|
||||
ApplyPatch {
|
||||
id: String,
|
||||
@@ -285,12 +289,17 @@ impl From<ApprovalRequest> for ApprovalRequestState {
|
||||
id,
|
||||
command,
|
||||
reason,
|
||||
risk,
|
||||
} => {
|
||||
let reason = reason.filter(|item| !item.is_empty());
|
||||
let has_reason = reason.is_some();
|
||||
let mut header: Vec<Line<'static>> = Vec::new();
|
||||
if let Some(reason) = reason
|
||||
&& !reason.is_empty()
|
||||
{
|
||||
if let Some(reason) = reason {
|
||||
header.push(Line::from(vec!["Reason: ".into(), reason.italic()]));
|
||||
}
|
||||
if let Some(risk) = risk.as_ref() {
|
||||
header.extend(render_risk_lines(risk));
|
||||
} else if has_reason {
|
||||
header.push(Line::from(""));
|
||||
}
|
||||
let full_cmd = strip_bash_lc_and_escape(&command);
|
||||
@@ -330,6 +339,52 @@ impl From<ApprovalRequest> for ApprovalRequestState {
|
||||
}
|
||||
}
|
||||
|
||||
fn render_risk_lines(risk: &SandboxCommandAssessment) -> Vec<Line<'static>> {
|
||||
let level_span = match risk.risk_level {
|
||||
SandboxRiskLevel::Low => "LOW".green().bold(),
|
||||
SandboxRiskLevel::Medium => "MEDIUM".cyan().bold(),
|
||||
SandboxRiskLevel::High => "HIGH".red().bold(),
|
||||
};
|
||||
|
||||
let mut lines = Vec::new();
|
||||
|
||||
let description = risk.description.trim();
|
||||
if !description.is_empty() {
|
||||
lines.push(Line::from(vec![
|
||||
"Summary: ".into(),
|
||||
description.to_string().into(),
|
||||
]));
|
||||
}
|
||||
|
||||
let mut spans: Vec<Span<'static>> = vec!["Risk: ".into(), level_span];
|
||||
if !risk.risk_categories.is_empty() {
|
||||
spans.push(" (".into());
|
||||
for (idx, category) in risk.risk_categories.iter().enumerate() {
|
||||
if idx > 0 {
|
||||
spans.push(", ".into());
|
||||
}
|
||||
spans.push(risk_category_label(*category).into());
|
||||
}
|
||||
spans.push(")".into());
|
||||
}
|
||||
|
||||
lines.push(Line::from(spans));
|
||||
lines.push(Line::from(""));
|
||||
lines
|
||||
}
|
||||
|
||||
fn risk_category_label(category: SandboxRiskCategory) -> &'static str {
|
||||
match category {
|
||||
SandboxRiskCategory::DataDeletion => "data deletion",
|
||||
SandboxRiskCategory::DataExfiltration => "data exfiltration",
|
||||
SandboxRiskCategory::PrivilegeEscalation => "privilege escalation",
|
||||
SandboxRiskCategory::SystemModification => "system modification",
|
||||
SandboxRiskCategory::NetworkAccess => "network access",
|
||||
SandboxRiskCategory::ResourceExhaustion => "resource exhaustion",
|
||||
SandboxRiskCategory::Compliance => "compliance",
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum ApprovalVariant {
|
||||
Exec { id: String, command: Vec<String> },
|
||||
@@ -404,6 +459,7 @@ mod tests {
|
||||
id: "test".to_string(),
|
||||
command: vec!["echo".to_string(), "hi".to_string()],
|
||||
reason: Some("reason".to_string()),
|
||||
risk: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -445,6 +501,7 @@ mod tests {
|
||||
id: "test".into(),
|
||||
command,
|
||||
reason: None,
|
||||
risk: None,
|
||||
};
|
||||
|
||||
let view = ApprovalOverlay::new(exec_request, tx);
|
||||
|
||||
@@ -557,6 +557,7 @@ mod tests {
|
||||
id: "1".to_string(),
|
||||
command: vec!["echo".into(), "ok".into()],
|
||||
reason: None,
|
||||
risk: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -777,6 +777,7 @@ impl ChatWidget {
|
||||
id,
|
||||
command: ev.command,
|
||||
reason: ev.reason,
|
||||
risk: ev.risk,
|
||||
};
|
||||
self.bottom_pane.push_approval_request(request);
|
||||
self.request_redraw();
|
||||
|
||||
@@ -402,6 +402,7 @@ fn exec_approval_emits_proposed_command_and_decision_history() {
|
||||
reason: Some(
|
||||
"this is a test reason such as one that would be produced by the model".into(),
|
||||
),
|
||||
risk: None,
|
||||
parsed_cmd: vec![],
|
||||
};
|
||||
chat.handle_codex_event(Event {
|
||||
@@ -444,6 +445,7 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() {
|
||||
reason: Some(
|
||||
"this is a test reason such as one that would be produced by the model".into(),
|
||||
),
|
||||
risk: None,
|
||||
parsed_cmd: vec![],
|
||||
};
|
||||
chat.handle_codex_event(Event {
|
||||
@@ -492,6 +494,7 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() {
|
||||
command: vec!["bash".into(), "-lc".into(), long],
|
||||
cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
|
||||
reason: None,
|
||||
risk: None,
|
||||
parsed_cmd: vec![],
|
||||
};
|
||||
chat.handle_codex_event(Event {
|
||||
@@ -1421,6 +1424,7 @@ fn approval_modal_exec_snapshot() {
|
||||
reason: Some(
|
||||
"this is a test reason such as one that would be produced by the model".into(),
|
||||
),
|
||||
risk: None,
|
||||
parsed_cmd: vec![],
|
||||
};
|
||||
chat.handle_codex_event(Event {
|
||||
@@ -1465,6 +1469,7 @@ fn approval_modal_exec_without_reason_snapshot() {
|
||||
command: vec!["bash".into(), "-lc".into(), "echo hello world".into()],
|
||||
cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
|
||||
reason: None,
|
||||
risk: None,
|
||||
parsed_cmd: vec![],
|
||||
};
|
||||
chat.handle_codex_event(Event {
|
||||
@@ -1675,6 +1680,7 @@ fn status_widget_and_approval_modal_snapshot() {
|
||||
reason: Some(
|
||||
"this is a test reason such as one that would be produced by the model".into(),
|
||||
),
|
||||
risk: None,
|
||||
parsed_cmd: vec![],
|
||||
};
|
||||
chat.handle_codex_event(Event {
|
||||
|
||||
@@ -148,6 +148,7 @@ pub async fn run_main(
|
||||
include_view_image_tool: None,
|
||||
show_raw_agent_reasoning: cli.oss.then_some(true),
|
||||
tools_web_search_request: cli.web_search.then_some(true),
|
||||
experimental_sandbox_command_assessment: None,
|
||||
additional_writable_roots: additional_dirs,
|
||||
};
|
||||
let raw_overrides = cli.config_overrides.raw_overrides.clone();
|
||||
|
||||
Reference in New Issue
Block a user