Compare commits

..

3 Commits

Author SHA1 Message Date
Ahmed Ibrahim
377b251d59 tests 2025-10-24 13:32:54 -07:00
Ahmed Ibrahim
acd62cc610 centralized_processing 2025-10-24 13:15:00 -07:00
Ahmed Ibrahim
0e30347746 centralized_processing 2025-10-24 13:14:44 -07:00
41 changed files with 314 additions and 879 deletions

View File

@@ -17,7 +17,6 @@ use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::FileChange;
use codex_protocol::protocol::RateLimitSnapshot;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SandboxCommandAssessment;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::protocol::TurnAbortReason;
use paste::paste;
@@ -848,8 +847,6 @@ pub struct ExecCommandApprovalParams {
pub cwd: PathBuf,
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub risk: Option<SandboxCommandAssessment>,
pub parsed_cmd: Vec<ParsedCommand>,
}
@@ -1066,7 +1063,6 @@ mod tests {
command: vec!["echo".to_string(), "hello".to_string()],
cwd: PathBuf::from("/tmp"),
reason: Some("because tests".to_string()),
risk: None,
parsed_cmd: vec![ParsedCommand::Unknown {
cmd: "echo hello".to_string(),
}],

View File

@@ -1447,7 +1447,6 @@ async fn apply_bespoke_event_handling(
command,
cwd,
reason,
risk,
parsed_cmd,
}) => {
let params = ExecCommandApprovalParams {
@@ -1456,7 +1455,6 @@ async fn apply_bespoke_event_handling(
command,
cwd,
reason,
risk,
parsed_cmd,
};
let rx = outgoing
@@ -1525,7 +1523,6 @@ async fn derive_config_from_params(
include_view_image_tool: None,
show_raw_agent_reasoning: None,
tools_web_search_request: None,
experimental_sandbox_command_assessment: None,
additional_writable_roots: Vec::new(),
};

View File

@@ -311,7 +311,6 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
],
cwd: working_directory.clone(),
reason: None,
risk: None,
parsed_cmd: vec![ParsedCommand::Unknown {
cmd: "python3 -c 'print(42)'".to_string()
}],

View File

@@ -134,14 +134,6 @@ impl ModelClient {
self.stream_with_task_kind(prompt, TaskKind::Regular).await
}
pub fn config(&self) -> Arc<Config> {
Arc::clone(&self.config)
}
pub fn provider(&self) -> &ModelProviderInfo {
&self.provider
}
pub(crate) async fn stream_with_task_kind(
&self,
prompt: &Prompt,

View File

@@ -59,7 +59,7 @@ use crate::client_common::ResponseEvent;
use crate::config::Config;
use crate::config_types::McpServerTransportConfig;
use crate::config_types::ShellEnvironmentPolicy;
use crate::conversation_history::ConversationHistory;
use crate::context_manager::ContextManager;
use crate::environment_context::EnvironmentContext;
use crate::error::CodexErr;
use crate::error::Result as CodexResult;
@@ -88,7 +88,6 @@ use crate::protocol::Op;
use crate::protocol::RateLimitSnapshot;
use crate::protocol::ReviewDecision;
use crate::protocol::ReviewOutputEvent;
use crate::protocol::SandboxCommandAssessment;
use crate::protocol::SandboxPolicy;
use crate::protocol::SessionConfiguredEvent;
use crate::protocol::StreamErrorEvent;
@@ -756,32 +755,6 @@ impl Session {
}
}
pub(crate) async fn assess_sandbox_command(
&self,
turn_context: &TurnContext,
call_id: &str,
command: &[String],
failure_message: Option<&str>,
) -> Option<SandboxCommandAssessment> {
let config = turn_context.client.config();
let provider = turn_context.client.provider().clone();
let auth_manager = Arc::clone(&self.services.auth_manager);
let otel = self.services.otel_event_manager.clone();
crate::sandboxing::assessment::assess_command(
config,
provider,
auth_manager,
&otel,
self.conversation_id,
call_id,
command,
&turn_context.sandbox_policy,
&turn_context.cwd,
failure_message,
)
.await
}
/// Emit an exec approval request event and await the user's decision.
///
/// The request is keyed by `sub_id`/`call_id` so matching responses are delivered
@@ -794,7 +767,6 @@ impl Session {
command: Vec<String>,
cwd: PathBuf,
reason: Option<String>,
risk: Option<SandboxCommandAssessment>,
) -> ReviewDecision {
let sub_id = turn_context.sub_id.clone();
// Add the tx_approve callback to the map before sending the request.
@@ -820,7 +792,6 @@ impl Session {
command,
cwd,
reason,
risk,
parsed_cmd,
});
self.send_event(turn_context, event).await;
@@ -896,7 +867,7 @@ impl Session {
turn_context: &TurnContext,
rollout_items: &[RolloutItem],
) -> Vec<ResponseItem> {
let mut history = ConversationHistory::new();
let mut history = ContextManager::new();
for item in rollout_items {
match item {
RolloutItem::ResponseItem(response_item) => {
@@ -970,7 +941,7 @@ impl Session {
state.history_snapshot()
}
pub(crate) async fn clone_history(&self) -> ConversationHistory {
pub(crate) async fn clone_history(&self) -> ContextManager {
let state = self.state.lock().await;
state.clone_history()
}
@@ -1553,7 +1524,7 @@ pub(crate) async fn run_task(
// For normal turns, continue recording to the session history as before.
let is_review_mode = turn_context.is_review_mode;
let mut review_thread_history: ConversationHistory = ConversationHistory::new();
let mut review_thread_history: ContextManager = ContextManager::new();
if is_review_mode {
// Seed review threads with environment context so the model knows the working directory.
review_thread_history
@@ -2872,7 +2843,7 @@ mod tests {
turn_context: &TurnContext,
) -> (Vec<RolloutItem>, Vec<ResponseItem>) {
let mut rollout_items = Vec::new();
let mut live_history = ConversationHistory::new();
let mut live_history = ContextManager::new();
let initial_context = session.build_initial_context(turn_context);
for item in &initial_context {

View File

@@ -223,9 +223,6 @@ pub struct Config {
pub tools_web_search_request: bool,
/// When `true`, run a model-based assessment for commands denied by the sandbox.
pub experimental_sandbox_command_assessment: bool,
pub use_experimental_streamable_shell_tool: bool,
/// If set to `true`, used only the experimental unified exec tool.
@@ -961,7 +958,6 @@ pub struct ConfigToml {
pub experimental_use_unified_exec_tool: Option<bool>,
pub experimental_use_rmcp_client: Option<bool>,
pub experimental_use_freeform_apply_patch: Option<bool>,
pub experimental_sandbox_command_assessment: Option<bool>,
}
impl From<ConfigToml> for UserSavedConfig {
@@ -1122,7 +1118,6 @@ pub struct ConfigOverrides {
pub include_view_image_tool: Option<bool>,
pub show_raw_agent_reasoning: Option<bool>,
pub tools_web_search_request: Option<bool>,
pub experimental_sandbox_command_assessment: Option<bool>,
/// Additional directories that should be treated as writable roots for this session.
pub additional_writable_roots: Vec<PathBuf>,
}
@@ -1152,7 +1147,6 @@ impl Config {
include_view_image_tool: include_view_image_tool_override,
show_raw_agent_reasoning,
tools_web_search_request: override_tools_web_search_request,
experimental_sandbox_command_assessment: sandbox_command_assessment_override,
additional_writable_roots,
} = overrides;
@@ -1178,7 +1172,6 @@ impl Config {
include_apply_patch_tool: include_apply_patch_tool_override,
include_view_image_tool: include_view_image_tool_override,
web_search_request: override_tools_web_search_request,
experimental_sandbox_command_assessment: sandbox_command_assessment_override,
};
let features = Features::from_config(&cfg, &config_profile, feature_overrides);
@@ -1276,8 +1269,6 @@ impl Config {
let use_experimental_streamable_shell_tool = features.enabled(Feature::StreamableShell);
let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec);
let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient);
let experimental_sandbox_command_assessment =
features.enabled(Feature::SandboxCommandAssessment);
let forced_chatgpt_workspace_id =
cfg.forced_chatgpt_workspace_id.as_ref().and_then(|value| {
@@ -1399,7 +1390,6 @@ impl Config {
forced_login_method,
include_apply_patch_tool: include_apply_patch_tool_flag,
tools_web_search_request,
experimental_sandbox_command_assessment,
use_experimental_streamable_shell_tool,
use_experimental_unified_exec_tool,
use_experimental_use_rmcp_client,
@@ -2883,7 +2873,6 @@ model_verbosity = "high"
forced_login_method: None,
include_apply_patch_tool: false,
tools_web_search_request: false,
experimental_sandbox_command_assessment: false,
use_experimental_streamable_shell_tool: false,
use_experimental_unified_exec_tool: false,
use_experimental_use_rmcp_client: false,
@@ -2952,7 +2941,6 @@ model_verbosity = "high"
forced_login_method: None,
include_apply_patch_tool: false,
tools_web_search_request: false,
experimental_sandbox_command_assessment: false,
use_experimental_streamable_shell_tool: false,
use_experimental_unified_exec_tool: false,
use_experimental_use_rmcp_client: false,
@@ -3036,7 +3024,6 @@ model_verbosity = "high"
forced_login_method: None,
include_apply_patch_tool: false,
tools_web_search_request: false,
experimental_sandbox_command_assessment: false,
use_experimental_streamable_shell_tool: false,
use_experimental_unified_exec_tool: false,
use_experimental_use_rmcp_client: false,
@@ -3106,7 +3093,6 @@ model_verbosity = "high"
forced_login_method: None,
include_apply_patch_tool: false,
tools_web_search_request: false,
experimental_sandbox_command_assessment: false,
use_experimental_streamable_shell_tool: false,
use_experimental_unified_exec_tool: false,
use_experimental_use_rmcp_client: false,

View File

@@ -26,7 +26,6 @@ pub struct ConfigProfile {
pub experimental_use_exec_command_tool: Option<bool>,
pub experimental_use_rmcp_client: Option<bool>,
pub experimental_use_freeform_apply_patch: Option<bool>,
pub experimental_sandbox_command_assessment: Option<bool>,
pub tools_web_search: Option<bool>,
pub tools_view_image: Option<bool>,
/// Optional feature toggles scoped to this profile.

View File

@@ -1,3 +1,4 @@
use crate::context_manager::truncation::truncate_context_output;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::TokenUsage;
@@ -6,13 +7,13 @@ use tracing::error;
/// Transcript of conversation history
#[derive(Debug, Clone, Default)]
pub(crate) struct ConversationHistory {
pub(crate) struct ContextManager {
/// The oldest items are at the beginning of the vector.
items: Vec<ResponseItem>,
token_info: Option<TokenUsageInfo>,
}
impl ConversationHistory {
impl ContextManager {
pub(crate) fn new() -> Self {
Self {
items: Vec::new(),
@@ -44,7 +45,8 @@ impl ConversationHistory {
continue;
}
self.items.push(item.clone());
let processed = Self::process_item(&item);
self.items.push(processed);
}
}
@@ -76,6 +78,29 @@ impl ConversationHistory {
self.remove_orphan_outputs();
}
fn process_item(item: &ResponseItem) -> ResponseItem {
match item {
ResponseItem::FunctionCallOutput { call_id, output } => {
let truncated_content = truncate_context_output(output.content.as_str());
ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: FunctionCallOutputPayload {
content: truncated_content,
success: output.success,
},
}
}
ResponseItem::CustomToolCallOutput { call_id, output } => {
let truncated = truncate_context_output(output);
ResponseItem::CustomToolCallOutput {
call_id: call_id.clone(),
output: truncated,
}
}
_ => item.clone(),
}
}
/// Returns a clone of the contents in the transcript.
fn contents(&self) -> Vec<ResponseItem> {
self.items.clone()
@@ -106,7 +131,7 @@ impl ConversationHistory {
ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: FunctionCallOutputPayload {
content: "aborted".to_string(),
content: truncate_context_output("aborted"),
success: None,
},
},
@@ -129,7 +154,7 @@ impl ConversationHistory {
idx,
ResponseItem::CustomToolCallOutput {
call_id: call_id.clone(),
output: "aborted".to_string(),
output: truncate_context_output("aborted"),
},
));
}
@@ -153,7 +178,7 @@ impl ConversationHistory {
ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: FunctionCallOutputPayload {
content: "aborted".to_string(),
content: truncate_context_output("aborted"),
success: None,
},
},
@@ -249,7 +274,10 @@ impl ConversationHistory {
}
pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
self.items = items;
self.items = items
.into_iter()
.map(|item| Self::process_item(&item))
.collect();
}
/// Removes the corresponding paired item for the provided `item`, if any.
@@ -362,6 +390,8 @@ fn is_api_message(message: &ResponseItem) -> bool {
#[cfg(test)]
mod tests {
use super::*;
use crate::context_manager::truncation::TELEMETRY_PREVIEW_MAX_BYTES;
use crate::context_manager::truncation::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::LocalShellAction;
@@ -379,8 +409,8 @@ mod tests {
}
}
fn create_history_with_items(items: Vec<ResponseItem>) -> ConversationHistory {
let mut h = ConversationHistory::new();
fn create_history_with_items(items: Vec<ResponseItem>) -> ContextManager {
let mut h = ContextManager::new();
h.record_items(items.iter());
h
}
@@ -397,7 +427,7 @@ mod tests {
#[test]
fn filters_non_api_messages() {
let mut h = ConversationHistory::default();
let mut h = ContextManager::default();
// System message is not an API message; Other is ignored.
let system = ResponseItem::Message {
id: None,
@@ -435,6 +465,61 @@ mod tests {
);
}
#[test]
fn record_items_truncates_function_call_output() {
let mut h = ContextManager::new();
let long_content = "a".repeat(TELEMETRY_PREVIEW_MAX_BYTES + 32);
let item = ResponseItem::FunctionCallOutput {
call_id: "call-long".to_string(),
output: FunctionCallOutputPayload {
content: long_content.clone(),
success: Some(true),
},
};
h.record_items([&item]);
let stored = h.contents();
let ResponseItem::FunctionCallOutput { output, .. } = &stored[0] else {
panic!("expected FunctionCallOutput variant");
};
assert!(
output
.content
.ends_with(TELEMETRY_PREVIEW_TRUNCATION_NOTICE),
"truncated content should end with notice"
);
assert!(
output.content.len() < long_content.len(),
"content should shrink after truncation"
);
}
#[test]
fn record_items_truncates_custom_tool_output() {
let mut h = ContextManager::new();
let long_content = "b".repeat(TELEMETRY_PREVIEW_MAX_BYTES + 64);
let item = ResponseItem::CustomToolCallOutput {
call_id: "custom-long".to_string(),
output: long_content.clone(),
};
h.record_items([&item]);
let stored = h.contents();
let ResponseItem::CustomToolCallOutput { output, .. } = &stored[0] else {
panic!("expected CustomToolCallOutput variant");
};
assert!(
output.ends_with(TELEMETRY_PREVIEW_TRUNCATION_NOTICE),
"truncated output should end with notice"
);
assert!(
output.len() < long_content.len(),
"output should shrink after truncation"
);
}
#[test]
fn remove_first_item_removes_matching_output_for_function_call() {
let items = vec![

View File

@@ -0,0 +1,3 @@
mod manager;
pub(crate) use manager::ContextManager;
pub mod truncation;

View File

@@ -0,0 +1,159 @@
use codex_utils_string::take_bytes_at_char_boundary;
#[derive(Clone, Copy)]
pub(crate) struct TruncationConfig {
pub max_bytes: usize,
pub max_lines: usize,
pub truncation_notice: &'static str,
}
// Telemetry preview limits: keep log events smaller than model budgets.
pub(crate) const TELEMETRY_PREVIEW_MAX_BYTES: usize = 2 * 1024; // 2 KiB
pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
"[... telemetry preview truncated ...]";
pub(crate) const CONTEXT_OUTPUT_TRUNCATION: TruncationConfig = TruncationConfig {
max_bytes: TELEMETRY_PREVIEW_MAX_BYTES,
max_lines: TELEMETRY_PREVIEW_MAX_LINES,
truncation_notice: TELEMETRY_PREVIEW_TRUNCATION_NOTICE,
};
pub(crate) fn truncate_with_config(content: &str, config: TruncationConfig) -> String {
let TruncationConfig {
max_bytes,
max_lines,
truncation_notice,
} = config;
let truncated_slice = take_bytes_at_char_boundary(content, max_bytes);
let truncated_by_bytes = truncated_slice.len() < content.len();
let mut preview = String::new();
let mut lines_iter = truncated_slice.lines();
for idx in 0..max_lines {
match lines_iter.next() {
Some(line) => {
if idx > 0 {
preview.push('\n');
}
preview.push_str(line);
}
None => break,
}
}
let truncated_by_lines = lines_iter.next().is_some();
if !truncated_by_bytes && !truncated_by_lines {
return content.to_string();
}
if preview.len() < truncated_slice.len()
&& truncated_slice
.as_bytes()
.get(preview.len())
.is_some_and(|byte| *byte == b'\n')
{
preview.push('\n');
}
if !preview.is_empty() && !preview.ends_with('\n') {
preview.push('\n');
}
preview.push_str(truncation_notice);
preview
}
pub(crate) fn truncate_context_output(content: &str) -> String {
truncate_with_config(content, CONTEXT_OUTPUT_TRUNCATION)
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn truncate_with_config_returns_original_within_limits() {
let content = "short output";
let config = TruncationConfig {
max_bytes: 64,
max_lines: 5,
truncation_notice: "[notice]",
};
assert_eq!(truncate_with_config(content, config), content);
}
#[test]
fn truncate_with_config_truncates_by_bytes() {
let config = TruncationConfig {
max_bytes: 16,
max_lines: 10,
truncation_notice: "[notice]",
};
let content = "abcdefghijklmnopqrstuvwxyz";
let truncated = truncate_with_config(content, config);
assert!(truncated.contains("[notice]"));
}
#[test]
fn truncate_with_config_truncates_by_lines() {
let config = TruncationConfig {
max_bytes: 1024,
max_lines: 2,
truncation_notice: "[notice]",
};
let content = "l1\nl2\nl3\nl4";
let truncated = truncate_with_config(content, config);
assert!(truncated.lines().count() <= 3);
assert!(truncated.contains("[notice]"));
}
#[test]
fn telemetry_preview_returns_original_within_limits() {
let content = "short output";
let config = TruncationConfig {
max_bytes: TELEMETRY_PREVIEW_MAX_BYTES,
max_lines: TELEMETRY_PREVIEW_MAX_LINES,
truncation_notice: TELEMETRY_PREVIEW_TRUNCATION_NOTICE,
};
assert_eq!(truncate_with_config(content, config), content);
}
#[test]
fn telemetry_preview_truncates_by_bytes() {
let config = TruncationConfig {
max_bytes: TELEMETRY_PREVIEW_MAX_BYTES,
max_lines: TELEMETRY_PREVIEW_MAX_LINES,
truncation_notice: TELEMETRY_PREVIEW_TRUNCATION_NOTICE,
};
let content = "x".repeat(TELEMETRY_PREVIEW_MAX_BYTES + 8);
let preview = truncate_with_config(&content, config);
assert!(preview.contains(TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
assert!(
preview.len()
<= TELEMETRY_PREVIEW_MAX_BYTES + TELEMETRY_PREVIEW_TRUNCATION_NOTICE.len() + 1
);
}
#[test]
fn telemetry_preview_truncates_by_lines() {
let config = TruncationConfig {
max_bytes: TELEMETRY_PREVIEW_MAX_BYTES,
max_lines: TELEMETRY_PREVIEW_MAX_LINES,
truncation_notice: TELEMETRY_PREVIEW_TRUNCATION_NOTICE,
};
let content = (0..(TELEMETRY_PREVIEW_MAX_LINES + 5))
.map(|idx| format!("line {idx}"))
.collect::<Vec<_>>()
.join("\n");
let preview = truncate_with_config(&content, config);
let lines: Vec<&str> = preview.lines().collect();
assert!(lines.len() <= TELEMETRY_PREVIEW_MAX_LINES + 1);
assert_eq!(lines.last(), Some(&TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
}
}

View File

@@ -32,34 +32,8 @@ pub struct NewConversation {
/// [`ConversationManager`] is responsible for creating conversations and
/// maintaining them in memory.
#[derive(Clone)]
struct ConversationEntry {
conversation: Arc<CodexConversation>,
session_configured: SessionConfiguredEvent,
}
impl ConversationEntry {
fn new(
conversation: Arc<CodexConversation>,
session_configured: SessionConfiguredEvent,
) -> Self {
Self {
conversation,
session_configured,
}
}
fn to_new_conversation(&self, conversation_id: ConversationId) -> NewConversation {
NewConversation {
conversation_id,
conversation: self.conversation.clone(),
session_configured: self.session_configured.clone(),
}
}
}
pub struct ConversationManager {
conversations: Arc<RwLock<HashMap<ConversationId, ConversationEntry>>>,
conversations: Arc<RwLock<HashMap<ConversationId, Arc<CodexConversation>>>>,
auth_manager: Arc<AuthManager>,
session_source: SessionSource,
}
@@ -125,11 +99,10 @@ impl ConversationManager {
};
let conversation = Arc::new(CodexConversation::new(codex));
let entry = ConversationEntry::new(conversation.clone(), session_configured.clone());
self.conversations
.write()
.await
.insert(conversation_id, entry);
.insert(conversation_id, conversation.clone());
Ok(NewConversation {
conversation_id,
@@ -145,7 +118,7 @@ impl ConversationManager {
let conversations = self.conversations.read().await;
conversations
.get(&conversation_id)
.map(|entry| entry.conversation.clone())
.cloned()
.ok_or_else(|| CodexErr::ConversationNotFound(conversation_id))
}
@@ -156,22 +129,11 @@ impl ConversationManager {
auth_manager: Arc<AuthManager>,
) -> CodexResult<NewConversation> {
let initial_history = RolloutRecorder::get_rollout_history(&rollout_path).await?;
if let InitialHistory::Resumed(resumed) = &initial_history
&& let Some(existing) = self
.conversations
.read()
.await
.get(&resumed.conversation_id)
.cloned()
{
Ok(existing.to_new_conversation(resumed.conversation_id))
} else {
let CodexSpawnOk {
codex,
conversation_id,
} = Codex::spawn(config, auth_manager, initial_history, self.session_source).await?;
self.finalize_spawn(codex, conversation_id).await
}
let CodexSpawnOk {
codex,
conversation_id,
} = Codex::spawn(config, auth_manager, initial_history, self.session_source).await?;
self.finalize_spawn(codex, conversation_id).await
}
/// Removes the conversation from the manager's internal map, though the
@@ -182,11 +144,7 @@ impl ConversationManager {
&self,
conversation_id: &ConversationId,
) -> Option<Arc<CodexConversation>> {
self.conversations
.write()
.await
.remove(conversation_id)
.map(|entry| entry.conversation)
self.conversations.write().await.remove(conversation_id)
}
/// Fork an existing conversation by taking messages up to the given position

View File

@@ -39,8 +39,6 @@ pub enum Feature {
ViewImageTool,
/// Allow the model to request web searches.
WebSearchRequest,
/// Enable the model-based risk assessments for sandboxed commands.
SandboxCommandAssessment,
}
impl Feature {
@@ -75,7 +73,6 @@ pub struct FeatureOverrides {
pub include_apply_patch_tool: Option<bool>,
pub include_view_image_tool: Option<bool>,
pub web_search_request: Option<bool>,
pub experimental_sandbox_command_assessment: Option<bool>,
}
impl FeatureOverrides {
@@ -140,7 +137,6 @@ impl Features {
let mut features = Features::with_defaults();
let base_legacy = LegacyFeatureToggles {
experimental_sandbox_command_assessment: cfg.experimental_sandbox_command_assessment,
experimental_use_freeform_apply_patch: cfg.experimental_use_freeform_apply_patch,
experimental_use_exec_command_tool: cfg.experimental_use_exec_command_tool,
experimental_use_unified_exec_tool: cfg.experimental_use_unified_exec_tool,
@@ -158,8 +154,6 @@ impl Features {
let profile_legacy = LegacyFeatureToggles {
include_apply_patch_tool: config_profile.include_apply_patch_tool,
include_view_image_tool: config_profile.include_view_image_tool,
experimental_sandbox_command_assessment: config_profile
.experimental_sandbox_command_assessment,
experimental_use_freeform_apply_patch: config_profile
.experimental_use_freeform_apply_patch,
experimental_use_exec_command_tool: config_profile.experimental_use_exec_command_tool,
@@ -242,10 +236,4 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::Stable,
default_enabled: false,
},
FeatureSpec {
id: Feature::SandboxCommandAssessment,
key: "experimental_sandbox_command_assessment",
stage: Stage::Experimental,
default_enabled: false,
},
];

View File

@@ -9,10 +9,6 @@ struct Alias {
}
const ALIASES: &[Alias] = &[
Alias {
legacy_key: "experimental_sandbox_command_assessment",
feature: Feature::SandboxCommandAssessment,
},
Alias {
legacy_key: "experimental_use_unified_exec_tool",
feature: Feature::UnifiedExec,
@@ -57,7 +53,6 @@ pub(crate) fn feature_for_key(key: &str) -> Option<Feature> {
pub struct LegacyFeatureToggles {
pub include_apply_patch_tool: Option<bool>,
pub include_view_image_tool: Option<bool>,
pub experimental_sandbox_command_assessment: Option<bool>,
pub experimental_use_freeform_apply_patch: Option<bool>,
pub experimental_use_exec_command_tool: Option<bool>,
pub experimental_use_unified_exec_tool: Option<bool>,
@@ -74,12 +69,6 @@ impl LegacyFeatureToggles {
self.include_apply_patch_tool,
"include_apply_patch_tool",
);
set_if_some(
features,
Feature::SandboxCommandAssessment,
self.experimental_sandbox_command_assessment,
"experimental_sandbox_command_assessment",
);
set_if_some(
features,
Feature::ApplyPatchFreeform,

View File

@@ -20,7 +20,7 @@ pub mod config_edit;
pub mod config_loader;
pub mod config_profile;
pub mod config_types;
mod conversation_history;
mod context_manager;
pub mod custom_prompts;
mod environment_context;
pub mod error;

View File

@@ -1,5 +1,5 @@
use crate::codex::Session;
use crate::conversation_history::ConversationHistory;
use crate::context_manager::ContextManager;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
@@ -11,7 +11,7 @@ use tracing::warn;
pub(crate) async fn process_items(
processed_items: Vec<crate::codex::ProcessedResponseItem>,
is_review_mode: bool,
review_thread_history: &mut ConversationHistory,
review_thread_history: &mut ContextManager,
sess: &Session,
) -> (Vec<ResponseInputItem>, Vec<ResponseItem>) {
let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();

View File

@@ -1,275 +0,0 @@
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use std::time::Instant;
use crate::AuthManager;
use crate::ModelProviderInfo;
use crate::client::ModelClient;
use crate::client_common::Prompt;
use crate::client_common::ResponseEvent;
use crate::config::Config;
use crate::protocol::SandboxPolicy;
use askama::Template;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::ConversationId;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::SandboxCommandAssessment;
use futures::StreamExt;
use serde_json::json;
use tokio::time::timeout;
use tracing::warn;
const SANDBOX_ASSESSMENT_TIMEOUT: Duration = Duration::from_secs(5);
const SANDBOX_RISK_CATEGORY_VALUES: &[&str] = &[
"data_deletion",
"data_exfiltration",
"privilege_escalation",
"system_modification",
"network_access",
"resource_exhaustion",
"compliance",
];
#[derive(Template)]
#[template(path = "sandboxing/assessment_prompt.md", escape = "none")]
struct SandboxAssessmentPromptTemplate<'a> {
platform: &'a str,
sandbox_policy: &'a str,
filesystem_roots: Option<&'a str>,
working_directory: &'a str,
command_argv: &'a str,
command_joined: &'a str,
sandbox_failure_message: Option<&'a str>,
}
#[allow(clippy::too_many_arguments)]
pub(crate) async fn assess_command(
config: Arc<Config>,
provider: ModelProviderInfo,
auth_manager: Arc<AuthManager>,
parent_otel: &OtelEventManager,
conversation_id: ConversationId,
call_id: &str,
command: &[String],
sandbox_policy: &SandboxPolicy,
cwd: &Path,
failure_message: Option<&str>,
) -> Option<SandboxCommandAssessment> {
if !config.experimental_sandbox_command_assessment || command.is_empty() {
return None;
}
let command_json = serde_json::to_string(command).unwrap_or_else(|_| "[]".to_string());
let command_joined =
shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
let failure = failure_message
.map(str::trim)
.filter(|msg| !msg.is_empty())
.map(str::to_string);
let cwd_str = cwd.to_string_lossy().to_string();
let sandbox_summary = summarize_sandbox_policy(sandbox_policy);
let mut roots = sandbox_roots_for_prompt(sandbox_policy, cwd);
roots.sort();
roots.dedup();
let platform = std::env::consts::OS;
let roots_formatted = roots.iter().map(|root| root.to_string_lossy().to_string());
let filesystem_roots = match roots_formatted.collect::<Vec<_>>() {
collected if collected.is_empty() => None,
collected => Some(collected.join(", ")),
};
let prompt_template = SandboxAssessmentPromptTemplate {
platform,
sandbox_policy: sandbox_summary.as_str(),
filesystem_roots: filesystem_roots.as_deref(),
working_directory: cwd_str.as_str(),
command_argv: command_json.as_str(),
command_joined: command_joined.as_str(),
sandbox_failure_message: failure.as_deref(),
};
let rendered_prompt = match prompt_template.render() {
Ok(rendered) => rendered,
Err(err) => {
warn!("failed to render sandbox assessment prompt: {err}");
return None;
}
};
let (system_prompt_section, user_prompt_section) = match rendered_prompt.split_once("\n---\n") {
Some(split) => split,
None => {
warn!("rendered sandbox assessment prompt missing separator");
return None;
}
};
let system_prompt = system_prompt_section
.strip_prefix("System Prompt:\n")
.unwrap_or(system_prompt_section)
.trim()
.to_string();
let user_prompt = user_prompt_section
.strip_prefix("User Prompt:\n")
.unwrap_or(user_prompt_section)
.trim()
.to_string();
let prompt = Prompt {
input: vec![ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText { text: user_prompt }],
}],
tools: Vec::new(),
parallel_tool_calls: false,
base_instructions_override: Some(system_prompt),
output_schema: Some(sandbox_assessment_schema()),
};
let child_otel =
parent_otel.with_model(config.model.as_str(), config.model_family.slug.as_str());
let client = ModelClient::new(
Arc::clone(&config),
Some(auth_manager),
child_otel,
provider,
config.model_reasoning_effort,
config.model_reasoning_summary,
conversation_id,
);
let start = Instant::now();
let assessment_result = timeout(SANDBOX_ASSESSMENT_TIMEOUT, async move {
let mut stream = client.stream(&prompt).await?;
let mut last_json: Option<String> = None;
while let Some(event) = stream.next().await {
match event {
Ok(ResponseEvent::OutputItemDone(item)) => {
if let Some(text) = response_item_text(&item) {
last_json = Some(text);
}
}
Ok(ResponseEvent::RateLimits(_)) => {}
Ok(ResponseEvent::Completed { .. }) => break,
Ok(_) => continue,
Err(err) => return Err(err),
}
}
Ok(last_json)
})
.await;
let duration = start.elapsed();
parent_otel.sandbox_assessment_latency(call_id, duration);
match assessment_result {
Ok(Ok(Some(raw))) => match serde_json::from_str::<SandboxCommandAssessment>(raw.trim()) {
Ok(assessment) => {
parent_otel.sandbox_assessment(
call_id,
"success",
Some(assessment.risk_level),
&assessment.risk_categories,
duration,
);
return Some(assessment);
}
Err(err) => {
warn!("failed to parse sandbox assessment JSON: {err}");
parent_otel.sandbox_assessment(call_id, "parse_error", None, &[], duration);
}
},
Ok(Ok(None)) => {
warn!("sandbox assessment response did not include any message");
parent_otel.sandbox_assessment(call_id, "no_output", None, &[], duration);
}
Ok(Err(err)) => {
warn!("sandbox assessment failed: {err}");
parent_otel.sandbox_assessment(call_id, "model_error", None, &[], duration);
}
Err(_) => {
warn!("sandbox assessment timed out");
parent_otel.sandbox_assessment(call_id, "timeout", None, &[], duration);
}
}
None
}
fn summarize_sandbox_policy(policy: &SandboxPolicy) -> String {
match policy {
SandboxPolicy::DangerFullAccess => "danger-full-access".to_string(),
SandboxPolicy::ReadOnly => "read-only".to_string(),
SandboxPolicy::WorkspaceWrite { network_access, .. } => {
let network = if *network_access {
"network"
} else {
"no-network"
};
format!("workspace-write (network_access={network})")
}
}
}
fn sandbox_roots_for_prompt(policy: &SandboxPolicy, cwd: &Path) -> Vec<PathBuf> {
let mut roots = vec![cwd.to_path_buf()];
if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = policy {
roots.extend(writable_roots.iter().cloned());
}
roots
}
fn sandbox_assessment_schema() -> serde_json::Value {
json!({
"type": "object",
"required": ["description", "risk_level", "risk_categories"],
"properties": {
"description": {
"type": "string",
"minLength": 1,
"maxLength": 500
},
"risk_level": {
"type": "string",
"enum": ["low", "medium", "high"]
},
"risk_categories": {
"type": "array",
"items": {
"type": "string",
"enum": SANDBOX_RISK_CATEGORY_VALUES
}
}
},
"additionalProperties": false
})
}
fn response_item_text(item: &ResponseItem) -> Option<String> {
match item {
ResponseItem::Message { content, .. } => {
let mut buffers: Vec<&str> = Vec::new();
for segment in content {
match segment {
ContentItem::InputText { text } | ContentItem::OutputText { text } => {
if !text.is_empty() {
buffers.push(text);
}
}
ContentItem::InputImage { .. } => {}
}
}
if buffers.is_empty() {
None
} else {
Some(buffers.join("\n"))
}
}
ResponseItem::FunctionCallOutput { output, .. } => Some(output.content.clone()),
_ => None,
}
}

View File

@@ -5,9 +5,6 @@ Build platform wrappers and produce ExecEnv for execution. Owns lowlevel
sandbox placement and transformation of portable CommandSpec into a
readytospawn environment.
*/
pub mod assessment;
use crate::exec::ExecToolCallOutput;
use crate::exec::SandboxType;
use crate::exec::StdoutStream;

View File

@@ -3,7 +3,7 @@
use codex_protocol::models::ResponseItem;
use crate::codex::SessionConfiguration;
use crate::conversation_history::ConversationHistory;
use crate::context_manager::ContextManager;
use crate::protocol::RateLimitSnapshot;
use crate::protocol::TokenUsage;
use crate::protocol::TokenUsageInfo;
@@ -11,7 +11,7 @@ use crate::protocol::TokenUsageInfo;
/// Persistent, session-scoped state previously stored directly on `Session`.
pub(crate) struct SessionState {
pub(crate) session_configuration: SessionConfiguration,
pub(crate) history: ConversationHistory,
pub(crate) history: ContextManager,
pub(crate) latest_rate_limits: Option<RateLimitSnapshot>,
}
@@ -20,7 +20,7 @@ impl SessionState {
pub(crate) fn new(session_configuration: SessionConfiguration) -> Self {
Self {
session_configuration,
history: ConversationHistory::new(),
history: ContextManager::new(),
latest_rate_limits: None,
}
}
@@ -38,7 +38,7 @@ impl SessionState {
self.history.get_history()
}
pub(crate) fn clone_history(&self) -> ConversationHistory {
pub(crate) fn clone_history(&self) -> ContextManager {
self.history.clone()
}

View File

@@ -1,15 +1,11 @@
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::tools::TELEMETRY_PREVIEW_MAX_BYTES;
use crate::tools::TELEMETRY_PREVIEW_MAX_LINES;
use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
use crate::turn_diff_tracker::TurnDiffTracker;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ShellToolCallParams;
use codex_protocol::protocol::FileChange;
use codex_utils_string::take_bytes_at_char_boundary;
use mcp_types::CallToolResult;
use std::borrow::Cow;
use std::collections::HashMap;
@@ -76,7 +72,7 @@ pub enum ToolOutput {
impl ToolOutput {
pub fn log_preview(&self) -> String {
match self {
ToolOutput::Function { content, .. } => telemetry_preview(content),
ToolOutput::Function { content, .. } => content.clone(),
ToolOutput::Mcp { result } => format!("{result:?}"),
}
}
@@ -111,46 +107,6 @@ impl ToolOutput {
}
}
fn telemetry_preview(content: &str) -> String {
let truncated_slice = take_bytes_at_char_boundary(content, TELEMETRY_PREVIEW_MAX_BYTES);
let truncated_by_bytes = truncated_slice.len() < content.len();
let mut preview = String::new();
let mut lines_iter = truncated_slice.lines();
for idx in 0..TELEMETRY_PREVIEW_MAX_LINES {
match lines_iter.next() {
Some(line) => {
if idx > 0 {
preview.push('\n');
}
preview.push_str(line);
}
None => break,
}
}
let truncated_by_lines = lines_iter.next().is_some();
if !truncated_by_bytes && !truncated_by_lines {
return content.to_string();
}
if preview.len() < truncated_slice.len()
&& truncated_slice
.as_bytes()
.get(preview.len())
.is_some_and(|byte| *byte == b'\n')
{
preview.push('\n');
}
if !preview.is_empty() && !preview.ends_with('\n') {
preview.push('\n');
}
preview.push_str(TELEMETRY_PREVIEW_TRUNCATION_NOTICE);
preview
}
#[cfg(test)]
mod tests {
use super::*;
@@ -196,38 +152,6 @@ mod tests {
other => panic!("expected FunctionCallOutput, got {other:?}"),
}
}
#[test]
fn telemetry_preview_returns_original_within_limits() {
let content = "short output";
assert_eq!(telemetry_preview(content), content);
}
#[test]
fn telemetry_preview_truncates_by_bytes() {
let content = "x".repeat(TELEMETRY_PREVIEW_MAX_BYTES + 8);
let preview = telemetry_preview(&content);
assert!(preview.contains(TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
assert!(
preview.len()
<= TELEMETRY_PREVIEW_MAX_BYTES + TELEMETRY_PREVIEW_TRUNCATION_NOTICE.len() + 1
);
}
#[test]
fn telemetry_preview_truncates_by_lines() {
let content = (0..(TELEMETRY_PREVIEW_MAX_LINES + 5))
.map(|idx| format!("line {idx}"))
.collect::<Vec<_>>()
.join("\n");
let preview = telemetry_preview(&content);
let lines: Vec<&str> = preview.lines().collect();
assert!(lines.len() <= TELEMETRY_PREVIEW_MAX_LINES + 1);
assert_eq!(lines.last(), Some(&TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
}
}
#[derive(Clone, Debug)]

View File

@@ -22,12 +22,6 @@ pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;
// Telemetry preview limits: keep log events smaller than model budgets.
pub(crate) const TELEMETRY_PREVIEW_MAX_BYTES: usize = 2 * 1024; // 2 KiB
pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
"[... telemetry preview truncated ...]";
/// Format the combined exec output for sending back to the model.
/// Includes exit code and duration metadata; truncates large bodies safely.
pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {

View File

@@ -7,11 +7,9 @@ retry without sandbox on denial (no reapproval thanks to caching).
*/
use crate::error::CodexErr;
use crate::error::SandboxErr;
use crate::error::get_error_message_ui;
use crate::exec::ExecToolCallOutput;
use crate::sandboxing::SandboxManager;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
@@ -40,7 +38,6 @@ impl ToolOrchestrator {
) -> Result<Out, ToolError>
where
T: ToolRuntime<Rq, Out>,
Rq: ProvidesSandboxRetryData,
{
let otel = turn_ctx.client.get_otel_event_manager();
let otel_tn = &tool_ctx.tool_name;
@@ -59,7 +56,6 @@ impl ToolOrchestrator {
turn: turn_ctx,
call_id: &tool_ctx.call_id,
retry_reason: None,
risk: None,
};
let decision = tool.start_approval_async(req, approval_ctx).await;
@@ -111,33 +107,12 @@ impl ToolOrchestrator {
// Ask for approval before retrying without sandbox.
if !tool.should_bypass_approval(approval_policy, already_approved) {
let mut risk = None;
if let Some(metadata) = req.sandbox_retry_data() {
let err = SandboxErr::Denied {
output: output.clone(),
};
let friendly = get_error_message_ui(&CodexErr::Sandbox(err));
let failure_summary = format!("failed in sandbox: {friendly}");
risk = tool_ctx
.session
.assess_sandbox_command(
turn_ctx,
&tool_ctx.call_id,
&metadata.command,
Some(failure_summary.as_str()),
)
.await;
}
let reason_msg = build_denial_reason_from_output(output.as_ref());
let approval_ctx = ApprovalCtx {
session: tool_ctx.session,
turn: turn_ctx,
call_id: &tool_ctx.call_id,
retry_reason: Some(reason_msg),
risk,
};
let decision = tool.start_approval_async(req, approval_ctx).await;

View File

@@ -10,9 +10,7 @@ use crate::sandboxing::CommandSpec;
use crate::sandboxing::execute_env;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxRetryData;
use crate::tools::sandboxing::Sandboxable;
use crate::tools::sandboxing::SandboxablePreference;
use crate::tools::sandboxing::ToolCtx;
@@ -34,12 +32,6 @@ pub struct ApplyPatchRequest {
pub codex_exe: Option<PathBuf>,
}
impl ProvidesSandboxRetryData for ApplyPatchRequest {
fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
None
}
}
#[derive(Default)]
pub struct ApplyPatchRuntime;
@@ -114,10 +106,9 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
let call_id = ctx.call_id.to_string();
let cwd = req.cwd.clone();
let retry_reason = ctx.retry_reason.clone();
let risk = ctx.risk.clone();
let user_explicitly_approved = req.user_explicitly_approved;
Box::pin(async move {
with_cached_approval(&session.services, key, move || async move {
with_cached_approval(&session.services, key, || async move {
if let Some(reason) = retry_reason {
session
.request_command_approval(
@@ -126,7 +117,6 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
vec!["apply_patch".to_string()],
cwd,
Some(reason),
risk,
)
.await
} else if user_explicitly_approved {

View File

@@ -12,9 +12,7 @@ use crate::sandboxing::execute_env;
use crate::tools::runtimes::build_command_spec;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxRetryData;
use crate::tools::sandboxing::Sandboxable;
use crate::tools::sandboxing::SandboxablePreference;
use crate::tools::sandboxing::ToolCtx;
@@ -36,15 +34,6 @@ pub struct ShellRequest {
pub justification: Option<String>,
}
impl ProvidesSandboxRetryData for ShellRequest {
fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
Some(SandboxRetryData {
command: self.command.clone(),
cwd: self.cwd.clone(),
})
}
}
#[derive(Default)]
pub struct ShellRuntime;
@@ -101,14 +90,13 @@ impl Approvable<ShellRequest> for ShellRuntime {
.retry_reason
.clone()
.or_else(|| req.justification.clone());
let risk = ctx.risk.clone();
let session = ctx.session;
let turn = ctx.turn;
let call_id = ctx.call_id.to_string();
Box::pin(async move {
with_cached_approval(&session.services, key, move || async move {
with_cached_approval(&session.services, key, || async move {
session
.request_command_approval(turn, call_id, command, cwd, reason, risk)
.request_command_approval(turn, call_id, command, cwd, reason)
.await
})
.await

View File

@@ -9,9 +9,7 @@ use crate::error::SandboxErr;
use crate::tools::runtimes::build_command_spec;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxRetryData;
use crate::tools::sandboxing::Sandboxable;
use crate::tools::sandboxing::SandboxablePreference;
use crate::tools::sandboxing::ToolCtx;
@@ -33,15 +31,6 @@ pub struct UnifiedExecRequest {
pub env: HashMap<String, String>,
}
impl ProvidesSandboxRetryData for UnifiedExecRequest {
fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
Some(SandboxRetryData {
command: self.command.clone(),
cwd: self.cwd.clone(),
})
}
}
#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
pub struct UnifiedExecApprovalKey {
pub command: Vec<String>,
@@ -96,11 +85,10 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
let command = req.command.clone();
let cwd = req.cwd.clone();
let reason = ctx.retry_reason.clone();
let risk = ctx.risk.clone();
Box::pin(async move {
with_cached_approval(&session.services, key, || async move {
session
.request_command_approval(turn, call_id, command, cwd, reason, risk)
.request_command_approval(turn, call_id, command, cwd, reason)
.await
})
.await

View File

@@ -7,7 +7,6 @@
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::error::CodexErr;
use crate::protocol::SandboxCommandAssessment;
use crate::protocol::SandboxPolicy;
use crate::sandboxing::CommandSpec;
use crate::sandboxing::SandboxManager;
@@ -19,7 +18,6 @@ use std::collections::HashMap;
use std::fmt::Debug;
use std::hash::Hash;
use std::path::Path;
use std::path::PathBuf;
use futures::Future;
use futures::future::BoxFuture;
@@ -83,7 +81,6 @@ pub(crate) struct ApprovalCtx<'a> {
pub turn: &'a TurnContext,
pub call_id: &'a str,
pub retry_reason: Option<String>,
pub risk: Option<SandboxCommandAssessment>,
}
pub(crate) trait Approvable<Req> {
@@ -159,17 +156,6 @@ pub(crate) struct ToolCtx<'a> {
pub tool_name: String,
}
/// Captures the command metadata needed to re-run a tool request without sandboxing.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct SandboxRetryData {
pub command: Vec<String>,
pub cwd: PathBuf,
}
pub(crate) trait ProvidesSandboxRetryData {
fn sandbox_retry_data(&self) -> Option<SandboxRetryData>;
}
#[derive(Debug)]
pub(crate) enum ToolError {
Rejected(String),

View File

@@ -1,27 +0,0 @@
You are a security analyst evaluating shell commands that were blocked by a sandbox. Given the provided metadata, summarize the command's likely intent and assess the risk. Return strictly valid JSON with the keys:
- description (concise summary, at most two sentences)
- risk_level ("low", "medium", or "high")
- risk_categories (optional array of zero or more category strings)
Risk level examples:
- low: read-only inspections, listing files, printing configuration
- medium: modifying project files, installing dependencies, fetching artifacts from trusted sources
- high: deleting or overwriting data, exfiltrating secrets, escalating privileges, or disabling security controls
Recognized risk_categories: data_deletion, data_exfiltration, privilege_escalation, system_modification, network_access, resource_exhaustion, compliance.
Use multiple categories when appropriate.
If information is insufficient, choose the most cautious risk level supported by the evidence.
Respond with JSON only, without markdown code fences or extra commentary.
---
Command metadata:
Platform: {{ platform }}
Sandbox policy: {{ sandbox_policy }}
{% if let Some(roots) = filesystem_roots %}
Filesystem roots: {{ roots }}
{% endif %}
Working directory: {{ working_directory }}
Command argv: {{ command_argv }}
Command (joined): {{ command_joined }}
{% if let Some(message) = sandbox_failure_message %}
Sandbox failure message: {{ message }}
{% endif %}

View File

@@ -247,11 +247,7 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
session_configured,
..
} = conversation_manager
.resume_conversation_from_rollout(
config.clone(),
session_path.clone(),
auth_manager.clone(),
)
.resume_conversation_from_rollout(config, session_path.clone(), auth_manager)
.await
.expect("resume conversation");
@@ -264,23 +260,6 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
let expected_initial_json = json!([]);
assert_eq!(initial_json, expected_initial_json);
let NewConversation {
conversation: codex_again,
session_configured: session_configured_again,
..
} = conversation_manager
.resume_conversation_from_rollout(
config.clone(),
session_path.clone(),
auth_manager.clone(),
)
.await
.expect("resume existing conversation");
assert!(Arc::ptr_eq(&codex, &codex_again));
let session_configured_json = serde_json::to_value(&session_configured).unwrap();
let session_configured_again_json = serde_json::to_value(&session_configured_again).unwrap();
assert_eq!(session_configured_json, session_configured_again_json);
// 2) Submit new input; the request body must include the prior item followed by the new user input.
codex
.submit(Op::UserInput {

View File

@@ -179,7 +179,6 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
include_view_image_tool: None,
show_raw_agent_reasoning: oss.then_some(true),
tools_web_search_request: None,
experimental_sandbox_command_assessment: None,
additional_writable_roots: Vec::new(),
};
// Parse `-c` overrides.

View File

@@ -158,7 +158,6 @@ impl CodexToolCallParam {
include_view_image_tool: None,
show_raw_agent_reasoning: None,
tools_web_search_request: None,
experimental_sandbox_command_assessment: None,
additional_writable_roots: Vec::new(),
};

View File

@@ -178,7 +178,6 @@ async fn run_codex_tool_session_inner(
cwd,
call_id,
reason: _,
risk,
parsed_cmd,
}) => {
handle_exec_approval_request(
@@ -191,7 +190,6 @@ async fn run_codex_tool_session_inner(
event.id.clone(),
call_id,
parsed_cmd,
risk,
)
.await;
continue;

View File

@@ -4,7 +4,6 @@ use std::sync::Arc;
use codex_core::CodexConversation;
use codex_core::protocol::Op;
use codex_core::protocol::ReviewDecision;
use codex_core::protocol::SandboxCommandAssessment;
use codex_protocol::parse_command::ParsedCommand;
use mcp_types::ElicitRequest;
use mcp_types::ElicitRequestParamsRequestedSchema;
@@ -38,8 +37,6 @@ pub struct ExecApprovalElicitRequestParams {
pub codex_command: Vec<String>,
pub codex_cwd: PathBuf,
pub codex_parsed_cmd: Vec<ParsedCommand>,
#[serde(skip_serializing_if = "Option::is_none")]
pub codex_risk: Option<SandboxCommandAssessment>,
}
// TODO(mbolin): ExecApprovalResponse does not conform to ElicitResult. See:
@@ -62,7 +59,6 @@ pub(crate) async fn handle_exec_approval_request(
event_id: String,
call_id: String,
codex_parsed_cmd: Vec<ParsedCommand>,
codex_risk: Option<SandboxCommandAssessment>,
) {
let escaped_command =
shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
@@ -85,7 +81,6 @@ pub(crate) async fn handle_exec_approval_request(
codex_command: command,
codex_cwd: cwd,
codex_parsed_cmd,
codex_risk,
};
let params_json = match serde_json::to_value(&params) {
Ok(value) => value,

View File

@@ -196,7 +196,6 @@ fn create_expected_elicitation_request(
codex_cwd: workdir.to_path_buf(),
codex_call_id: "call1234".to_string(),
codex_parsed_cmd,
codex_risk: None,
})?),
})
}

View File

@@ -8,8 +8,6 @@ use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::protocol::SandboxRiskCategory;
use codex_protocol::protocol::SandboxRiskLevel;
use codex_protocol::user_input::UserInput;
use eventsource_stream::Event as StreamEvent;
use eventsource_stream::EventStreamError as StreamError;
@@ -368,63 +366,6 @@ impl OtelEventManager {
);
}
pub fn sandbox_assessment(
&self,
call_id: &str,
status: &str,
risk_level: Option<SandboxRiskLevel>,
risk_categories: &[SandboxRiskCategory],
duration: Duration,
) {
let level = risk_level.map(|level| level.as_str());
let categories = if risk_categories.is_empty() {
String::new()
} else {
risk_categories
.iter()
.map(SandboxRiskCategory::as_str)
.collect::<Vec<_>>()
.join(", ")
};
tracing::event!(
tracing::Level::INFO,
event.name = "codex.sandbox_assessment",
event.timestamp = %timestamp(),
conversation.id = %self.metadata.conversation_id,
app.version = %self.metadata.app_version,
auth_mode = self.metadata.auth_mode,
user.account_id = self.metadata.account_id,
user.email = self.metadata.account_email,
terminal.type = %self.metadata.terminal_type,
model = %self.metadata.model,
slug = %self.metadata.slug,
call_id = %call_id,
status = %status,
risk_level = level,
risk_categories = categories,
duration_ms = %duration.as_millis(),
);
}
pub fn sandbox_assessment_latency(&self, call_id: &str, duration: Duration) {
tracing::event!(
tracing::Level::INFO,
event.name = "codex.sandbox_assessment_latency",
event.timestamp = %timestamp(),
conversation.id = %self.metadata.conversation_id,
app.version = %self.metadata.app_version,
auth_mode = self.metadata.auth_mode,
user.account_id = self.metadata.account_id,
user.email = self.metadata.account_email,
terminal.type = %self.metadata.terminal_type,
model = %self.metadata.model,
slug = %self.metadata.slug,
call_id = %call_id,
duration_ms = %duration.as_millis(),
);
}
pub async fn log_tool_result<F, Fut, E>(
&self,
tool_name: &str,

View File

@@ -1,91 +0,0 @@
use std::collections::HashMap;
use std::path::PathBuf;
use crate::parse_command::ParsedCommand;
use crate::protocol::FileChange;
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Serialize;
use ts_rs::TS;
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
pub enum SandboxRiskLevel {
Low,
Medium,
High,
}
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
pub enum SandboxRiskCategory {
DataDeletion,
DataExfiltration,
PrivilegeEscalation,
SystemModification,
NetworkAccess,
ResourceExhaustion,
Compliance,
}
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
pub struct SandboxCommandAssessment {
pub description: String,
pub risk_level: SandboxRiskLevel,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub risk_categories: Vec<SandboxRiskCategory>,
}
impl SandboxRiskLevel {
pub fn as_str(&self) -> &'static str {
match self {
Self::Low => "low",
Self::Medium => "medium",
Self::High => "high",
}
}
}
impl SandboxRiskCategory {
pub fn as_str(&self) -> &'static str {
match self {
Self::DataDeletion => "data_deletion",
Self::DataExfiltration => "data_exfiltration",
Self::PrivilegeEscalation => "privilege_escalation",
Self::SystemModification => "system_modification",
Self::NetworkAccess => "network_access",
Self::ResourceExhaustion => "resource_exhaustion",
Self::Compliance => "compliance",
}
}
}
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ExecApprovalRequestEvent {
/// Identifier for the associated exec call, if available.
pub call_id: String,
/// The command to be executed.
pub command: Vec<String>,
/// The command's working directory.
pub cwd: PathBuf,
/// Optional human-readable reason for the approval (e.g. retry without sandbox).
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
/// Optional model-provided risk assessment describing the blocked command.
#[serde(skip_serializing_if = "Option::is_none")]
pub risk: Option<SandboxCommandAssessment>,
pub parsed_cmd: Vec<ParsedCommand>,
}
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ApplyPatchApprovalRequestEvent {
/// Responses API call id for the associated patch apply call, if available.
pub call_id: String,
pub changes: HashMap<PathBuf, FileChange>,
/// Optional explanatory reason (e.g. request for extra write access).
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
/// When set, the agent is asking the user to allow writes under this root for the remainder of the session.
#[serde(skip_serializing_if = "Option::is_none")]
pub grant_root: Option<PathBuf>,
}

View File

@@ -1,7 +1,6 @@
pub mod account;
mod conversation_id;
pub use conversation_id::ConversationId;
pub mod approvals;
pub mod config_types;
pub mod custom_prompts;
pub mod items;

View File

@@ -34,12 +34,6 @@ use serde_with::serde_as;
use strum_macros::Display;
use ts_rs::TS;
pub use crate::approvals::ApplyPatchApprovalRequestEvent;
pub use crate::approvals::ExecApprovalRequestEvent;
pub use crate::approvals::SandboxCommandAssessment;
pub use crate::approvals::SandboxRiskCategory;
pub use crate::approvals::SandboxRiskLevel;
/// Open/close tags for special user-input blocks. Used across crates to avoid
/// duplicated hardcoded strings.
pub const USER_INSTRUCTIONS_OPEN_TAG: &str = "<user_instructions>";
@@ -1132,6 +1126,33 @@ pub struct ExecCommandOutputDeltaEvent {
pub chunk: Vec<u8>,
}
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ExecApprovalRequestEvent {
/// Identifier for the associated exec call, if available.
pub call_id: String,
/// The command to be executed.
pub command: Vec<String>,
/// The command's working directory.
pub cwd: PathBuf,
/// Optional human-readable reason for the approval (e.g. retry without sandbox).
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
pub parsed_cmd: Vec<ParsedCommand>,
}
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ApplyPatchApprovalRequestEvent {
/// Responses API call id for the associated patch apply call, if available.
pub call_id: String,
pub changes: HashMap<PathBuf, FileChange>,
/// Optional explanatory reason (e.g. request for extra write access).
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
/// When set, the agent is asking the user to allow writes under this root for the remainder of the session.
#[serde(skip_serializing_if = "Option::is_none")]
pub grant_root: Option<PathBuf>,
}
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct BackgroundEventEvent {
pub message: String,

View File

@@ -19,9 +19,6 @@ use crate::render::renderable::Renderable;
use codex_core::protocol::FileChange;
use codex_core::protocol::Op;
use codex_core::protocol::ReviewDecision;
use codex_core::protocol::SandboxCommandAssessment;
use codex_core::protocol::SandboxRiskCategory;
use codex_core::protocol::SandboxRiskLevel;
use crossterm::event::KeyCode;
use crossterm::event::KeyEvent;
use crossterm::event::KeyEventKind;
@@ -41,7 +38,6 @@ pub(crate) enum ApprovalRequest {
id: String,
command: Vec<String>,
reason: Option<String>,
risk: Option<SandboxCommandAssessment>,
},
ApplyPatch {
id: String,
@@ -289,17 +285,12 @@ impl From<ApprovalRequest> for ApprovalRequestState {
id,
command,
reason,
risk,
} => {
let reason = reason.filter(|item| !item.is_empty());
let has_reason = reason.is_some();
let mut header: Vec<Line<'static>> = Vec::new();
if let Some(reason) = reason {
if let Some(reason) = reason
&& !reason.is_empty()
{
header.push(Line::from(vec!["Reason: ".into(), reason.italic()]));
}
if let Some(risk) = risk.as_ref() {
header.extend(render_risk_lines(risk));
} else if has_reason {
header.push(Line::from(""));
}
let full_cmd = strip_bash_lc_and_escape(&command);
@@ -339,52 +330,6 @@ impl From<ApprovalRequest> for ApprovalRequestState {
}
}
fn render_risk_lines(risk: &SandboxCommandAssessment) -> Vec<Line<'static>> {
let level_span = match risk.risk_level {
SandboxRiskLevel::Low => "LOW".green().bold(),
SandboxRiskLevel::Medium => "MEDIUM".cyan().bold(),
SandboxRiskLevel::High => "HIGH".red().bold(),
};
let mut lines = Vec::new();
let description = risk.description.trim();
if !description.is_empty() {
lines.push(Line::from(vec![
"Summary: ".into(),
description.to_string().into(),
]));
}
let mut spans: Vec<Span<'static>> = vec!["Risk: ".into(), level_span];
if !risk.risk_categories.is_empty() {
spans.push(" (".into());
for (idx, category) in risk.risk_categories.iter().enumerate() {
if idx > 0 {
spans.push(", ".into());
}
spans.push(risk_category_label(*category).into());
}
spans.push(")".into());
}
lines.push(Line::from(spans));
lines.push(Line::from(""));
lines
}
fn risk_category_label(category: SandboxRiskCategory) -> &'static str {
match category {
SandboxRiskCategory::DataDeletion => "data deletion",
SandboxRiskCategory::DataExfiltration => "data exfiltration",
SandboxRiskCategory::PrivilegeEscalation => "privilege escalation",
SandboxRiskCategory::SystemModification => "system modification",
SandboxRiskCategory::NetworkAccess => "network access",
SandboxRiskCategory::ResourceExhaustion => "resource exhaustion",
SandboxRiskCategory::Compliance => "compliance",
}
}
#[derive(Clone)]
enum ApprovalVariant {
Exec { id: String, command: Vec<String> },
@@ -459,7 +404,6 @@ mod tests {
id: "test".to_string(),
command: vec!["echo".to_string(), "hi".to_string()],
reason: Some("reason".to_string()),
risk: None,
}
}
@@ -501,7 +445,6 @@ mod tests {
id: "test".into(),
command,
reason: None,
risk: None,
};
let view = ApprovalOverlay::new(exec_request, tx);

View File

@@ -557,7 +557,6 @@ mod tests {
id: "1".to_string(),
command: vec!["echo".into(), "ok".into()],
reason: None,
risk: None,
}
}

View File

@@ -777,7 +777,6 @@ impl ChatWidget {
id,
command: ev.command,
reason: ev.reason,
risk: ev.risk,
};
self.bottom_pane.push_approval_request(request);
self.request_redraw();

View File

@@ -402,7 +402,6 @@ fn exec_approval_emits_proposed_command_and_decision_history() {
reason: Some(
"this is a test reason such as one that would be produced by the model".into(),
),
risk: None,
parsed_cmd: vec![],
};
chat.handle_codex_event(Event {
@@ -445,7 +444,6 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() {
reason: Some(
"this is a test reason such as one that would be produced by the model".into(),
),
risk: None,
parsed_cmd: vec![],
};
chat.handle_codex_event(Event {
@@ -494,7 +492,6 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() {
command: vec!["bash".into(), "-lc".into(), long],
cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
reason: None,
risk: None,
parsed_cmd: vec![],
};
chat.handle_codex_event(Event {
@@ -1424,7 +1421,6 @@ fn approval_modal_exec_snapshot() {
reason: Some(
"this is a test reason such as one that would be produced by the model".into(),
),
risk: None,
parsed_cmd: vec![],
};
chat.handle_codex_event(Event {
@@ -1469,7 +1465,6 @@ fn approval_modal_exec_without_reason_snapshot() {
command: vec!["bash".into(), "-lc".into(), "echo hello world".into()],
cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
reason: None,
risk: None,
parsed_cmd: vec![],
};
chat.handle_codex_event(Event {
@@ -1680,7 +1675,6 @@ fn status_widget_and_approval_modal_snapshot() {
reason: Some(
"this is a test reason such as one that would be produced by the model".into(),
),
risk: None,
parsed_cmd: vec![],
};
chat.handle_codex_event(Event {

View File

@@ -148,7 +148,6 @@ pub async fn run_main(
include_view_image_tool: None,
show_raw_agent_reasoning: cli.oss.then_some(true),
tools_web_search_request: cli.web_search.then_some(true),
experimental_sandbox_command_assessment: None,
additional_writable_roots: additional_dirs,
};
let raw_overrides = cli.config_overrides.raw_overrides.clone();