Compare commits

...

3 Commits

Author SHA1 Message Date
rhan-oai
0a500b4e20 [codex-analytics] guardian review truncation 2026-04-16 10:30:05 -07:00
rhan-oai
37c2befb6e [codex-analytics] guardian review analytics events emission 2026-04-16 10:03:51 -07:00
rhan-oai
d59234e830 [codex-analytics] guardian review analytics schema polishing 2026-04-15 15:27:01 -07:00
12 changed files with 920 additions and 186 deletions

View File

@@ -8,6 +8,12 @@ use crate::events::CodexPluginEventRequest;
use crate::events::CodexPluginUsedEventRequest;
use crate::events::CodexRuntimeMetadata;
use crate::events::CodexTurnEventRequest;
use crate::events::GuardianApprovalRequestSource;
use crate::events::GuardianReviewDecision;
use crate::events::GuardianReviewEventParams;
use crate::events::GuardianReviewFailureReason;
use crate::events::GuardianReviewTerminalStatus;
use crate::events::GuardianReviewedAction;
use crate::events::ThreadInitializedEvent;
use crate::events::ThreadInitializedEventParams;
use crate::events::TrackEventRequest;
@@ -78,6 +84,7 @@ use codex_plugin::AppConnectorId;
use codex_plugin::PluginCapabilitySummary;
use codex_plugin::PluginId;
use codex_plugin::PluginTelemetryMetadata;
use codex_protocol::approvals::NetworkApprovalProtocol;
use codex_protocol::config_types::ApprovalsReviewer;
use codex_protocol::config_types::ModeKind;
use codex_protocol::protocol::AskForApproval;
@@ -1043,6 +1050,135 @@ async fn compaction_event_ingests_custom_fact() {
assert_eq!(payload[0]["event_params"]["status"], "failed");
}
#[tokio::test]
async fn guardian_review_event_ingests_custom_fact_with_optional_target_item() {
let mut reducer = AnalyticsReducer::default();
let mut events = Vec::new();
reducer
.ingest(
AnalyticsFact::Initialize {
connection_id: 7,
params: InitializeParams {
client_info: ClientInfo {
name: "codex-tui".to_string(),
title: None,
version: "1.0.0".to_string(),
},
capabilities: Some(InitializeCapabilities {
experimental_api: false,
opt_out_notification_methods: None,
}),
},
product_client_id: DEFAULT_ORIGINATOR.to_string(),
runtime: sample_runtime_metadata(),
rpc_transport: AppServerRpcTransport::Websocket,
},
&mut events,
)
.await;
reducer
.ingest(
AnalyticsFact::Response {
connection_id: 7,
response: Box::new(sample_thread_start_response(
"thread-guardian",
/*ephemeral*/ false,
"gpt-5",
)),
},
&mut events,
)
.await;
events.clear();
reducer
.ingest(
AnalyticsFact::Custom(CustomAnalyticsFact::GuardianReview(Box::new(
GuardianReviewEventParams {
thread_id: "thread-guardian".to_string(),
turn_id: "turn-guardian".to_string(),
review_id: "review-guardian".to_string(),
target_item_id: None,
approval_request_source: GuardianApprovalRequestSource::DelegatedSubagent,
reviewed_action: GuardianReviewedAction::NetworkAccess {
protocol: NetworkApprovalProtocol::Https,
port: 443,
},
reviewed_action_truncated: false,
decision: GuardianReviewDecision::Denied,
terminal_status: GuardianReviewTerminalStatus::TimedOut,
failure_reason: Some(GuardianReviewFailureReason::Timeout),
risk_level: None,
user_authorization: None,
outcome: None,
guardian_thread_id: None,
guardian_session_kind: None,
guardian_model: None,
guardian_reasoning_effort: None,
had_prior_review_context: None,
review_timeout_ms: 90_000,
tool_call_count: None,
time_to_first_token_ms: None,
completion_latency_ms: Some(90_000),
started_at: 100,
completed_at: Some(190),
input_tokens: None,
cached_input_tokens: None,
output_tokens: None,
reasoning_output_tokens: None,
total_tokens: None,
},
))),
&mut events,
)
.await;
let payload = serde_json::to_value(&events).expect("serialize events");
assert_eq!(payload.as_array().expect("events array").len(), 1);
assert_eq!(payload[0]["event_type"], "codex_guardian_review");
assert_eq!(payload[0]["event_params"]["thread_id"], "thread-guardian");
assert_eq!(payload[0]["event_params"]["turn_id"], "turn-guardian");
assert_eq!(payload[0]["event_params"]["review_id"], "review-guardian");
assert_eq!(payload[0]["event_params"]["target_item_id"], json!(null));
assert_eq!(
payload[0]["event_params"]["approval_request_source"],
"delegated_subagent"
);
assert_eq!(
payload[0]["event_params"]["app_server_client"]["product_client_id"],
DEFAULT_ORIGINATOR
);
assert_eq!(
payload[0]["event_params"]["runtime"]["codex_rs_version"],
"0.1.0"
);
assert_eq!(
payload[0]["event_params"]["reviewed_action"]["type"],
"network_access"
);
assert_eq!(
payload[0]["event_params"]["reviewed_action"]["protocol"],
"https"
);
assert_eq!(payload[0]["event_params"]["reviewed_action"]["port"], 443);
assert!(payload[0]["event_params"].get("retry_reason").is_none());
assert!(payload[0]["event_params"].get("rationale").is_none());
assert!(
payload[0]["event_params"]["reviewed_action"]
.get("target")
.is_none()
);
assert!(
payload[0]["event_params"]["reviewed_action"]
.get("host")
.is_none()
);
assert_eq!(payload[0]["event_params"]["terminal_status"], "timed_out");
assert_eq!(payload[0]["event_params"]["failure_reason"], "timeout");
assert_eq!(payload[0]["event_params"]["review_timeout_ms"], 90_000);
}
#[test]
fn subagent_thread_started_review_serializes_expected_shape() {
let event = TrackEventRequest::ThreadInitialized(subagent_thread_started_event_request(

View File

@@ -1,5 +1,11 @@
use crate::facts::AppInvocation;
use crate::facts::CodexCompactionEvent;
use crate::facts::CompactionImplementation;
use crate::facts::CompactionPhase;
use crate::facts::CompactionReason;
use crate::facts::CompactionStatus;
use crate::facts::CompactionStrategy;
use crate::facts::CompactionTrigger;
use crate::facts::InvocationType;
use crate::facts::PluginState;
use crate::facts::SubAgentThreadStartedInput;
@@ -15,6 +21,10 @@ use codex_plugin::PluginTelemetryMetadata;
use codex_protocol::approvals::NetworkApprovalProtocol;
use codex_protocol::models::PermissionProfile;
use codex_protocol::models::SandboxPermissions;
use codex_protocol::protocol::GuardianAssessmentOutcome;
use codex_protocol::protocol::GuardianCommandSource;
use codex_protocol::protocol::GuardianRiskLevel;
use codex_protocol::protocol::GuardianUserAuthorization;
use codex_protocol::protocol::SubAgentSource;
use serde::Serialize;
@@ -146,31 +156,6 @@ pub enum GuardianReviewSessionKind {
EphemeralForked,
}
#[derive(Clone, Copy, Debug, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum GuardianReviewRiskLevel {
Low,
Medium,
High,
Critical,
}
#[derive(Clone, Copy, Debug, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum GuardianReviewUserAuthorization {
Unknown,
Low,
Medium,
High,
}
#[derive(Clone, Copy, Debug, Serialize)]
#[serde(rename_all = "lowercase")]
pub enum GuardianReviewOutcome {
Allow,
Deny,
}
#[derive(Clone, Copy, Debug, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum GuardianApprovalRequestSource {
@@ -185,36 +170,21 @@ pub enum GuardianApprovalRequestSource {
#[serde(tag = "type", rename_all = "snake_case")]
pub enum GuardianReviewedAction {
Shell {
command: Vec<String>,
command_display: String,
cwd: String,
sandbox_permissions: SandboxPermissions,
additional_permissions: Option<PermissionProfile>,
justification: Option<String>,
},
UnifiedExec {
command: Vec<String>,
command_display: String,
cwd: String,
sandbox_permissions: SandboxPermissions,
additional_permissions: Option<PermissionProfile>,
justification: Option<String>,
tty: bool,
},
Execve {
source: GuardianCommandSource,
program: String,
argv: Vec<String>,
cwd: String,
additional_permissions: Option<PermissionProfile>,
},
ApplyPatch {
cwd: String,
files: Vec<String>,
},
ApplyPatch {},
NetworkAccess {
target: String,
host: String,
protocol: NetworkApprovalProtocol,
port: u16,
},
@@ -227,37 +197,28 @@ pub enum GuardianReviewedAction {
},
}
#[derive(Clone, Copy, Debug, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum GuardianCommandSource {
Shell,
UnifiedExec,
}
#[derive(Clone, Serialize)]
pub struct GuardianReviewEventParams {
pub thread_id: String,
pub turn_id: String,
pub review_id: String,
pub target_item_id: String,
pub retry_reason: Option<String>,
pub target_item_id: Option<String>,
pub approval_request_source: GuardianApprovalRequestSource,
pub reviewed_action: GuardianReviewedAction,
pub reviewed_action_truncated: bool,
pub decision: GuardianReviewDecision,
pub terminal_status: GuardianReviewTerminalStatus,
pub failure_reason: Option<GuardianReviewFailureReason>,
pub risk_level: Option<GuardianReviewRiskLevel>,
pub user_authorization: Option<GuardianReviewUserAuthorization>,
pub outcome: Option<GuardianReviewOutcome>,
pub rationale: Option<String>,
pub risk_level: Option<GuardianRiskLevel>,
pub user_authorization: Option<GuardianUserAuthorization>,
pub outcome: Option<GuardianAssessmentOutcome>,
pub guardian_thread_id: Option<String>,
pub guardian_session_kind: Option<GuardianReviewSessionKind>,
pub guardian_model: Option<String>,
pub guardian_reasoning_effort: Option<String>,
pub had_prior_review_context: Option<bool>,
pub review_timeout_ms: u64,
pub tool_call_count: u64,
pub tool_call_count: Option<u64>,
pub time_to_first_token_ms: Option<u64>,
pub completion_latency_ms: Option<u64>,
pub started_at: u64,
@@ -309,12 +270,12 @@ pub(crate) struct CodexCompactionEventParams {
pub(crate) thread_source: Option<&'static str>,
pub(crate) subagent_source: Option<String>,
pub(crate) parent_thread_id: Option<String>,
pub(crate) trigger: crate::facts::CompactionTrigger,
pub(crate) reason: crate::facts::CompactionReason,
pub(crate) implementation: crate::facts::CompactionImplementation,
pub(crate) phase: crate::facts::CompactionPhase,
pub(crate) strategy: crate::facts::CompactionStrategy,
pub(crate) status: crate::facts::CompactionStatus,
pub(crate) trigger: CompactionTrigger,
pub(crate) reason: CompactionReason,
pub(crate) implementation: CompactionImplementation,
pub(crate) phase: CompactionPhase,
pub(crate) strategy: CompactionStrategy,
pub(crate) status: CompactionStatus,
pub(crate) error: Option<String>,
pub(crate) active_context_tokens_before: i64,
pub(crate) active_context_tokens_after: i64,

View File

@@ -9,15 +9,11 @@ use std::time::UNIX_EPOCH;
pub use client::AnalyticsEventsClient;
pub use events::AppServerRpcTransport;
pub use events::GuardianApprovalRequestSource;
pub use events::GuardianCommandSource;
pub use events::GuardianReviewDecision;
pub use events::GuardianReviewEventParams;
pub use events::GuardianReviewFailureReason;
pub use events::GuardianReviewOutcome;
pub use events::GuardianReviewRiskLevel;
pub use events::GuardianReviewSessionKind;
pub use events::GuardianReviewTerminalStatus;
pub use events::GuardianReviewUserAuthorization;
pub use events::GuardianReviewedAction;
pub use facts::AnalyticsJsonRpcError;
pub use facts::AppInvocation;

View File

@@ -3,6 +3,7 @@ use std::sync::Arc;
use async_channel::Receiver;
use async_channel::Sender;
use codex_analytics::GuardianApprovalRequestSource;
use codex_async_utils::OrCancelExt;
use codex_exec_server::EnvironmentManager;
use codex_protocol::protocol::ApplyPatchApprovalRequestEvent;
@@ -753,6 +754,7 @@ fn spawn_guardian_review(
review_id,
request,
retry_reason,
GuardianApprovalRequestSource::DelegatedSubagent,
cancel_token,
));
let _ = tx.send(decision);

View File

@@ -167,32 +167,49 @@ fn guardian_command_source_tool_name(source: GuardianCommandSource) -> &'static
}
}
fn truncate_guardian_action_value(value: Value) -> Value {
fn truncate_guardian_action_value(value: Value) -> (Value, bool) {
match value {
Value::String(text) => Value::String(guardian_truncate_text(
&text,
GUARDIAN_MAX_ACTION_STRING_TOKENS,
)),
Value::Array(values) => Value::Array(
values
Value::String(text) => {
let (text, truncated) =
guardian_truncate_text(&text, GUARDIAN_MAX_ACTION_STRING_TOKENS);
(Value::String(text), truncated)
}
Value::Array(values) => {
let mut truncated = false;
let values = values
.into_iter()
.map(truncate_guardian_action_value)
.collect::<Vec<_>>(),
),
.map(|value| {
let (value, value_truncated) = truncate_guardian_action_value(value);
truncated |= value_truncated;
value
})
.collect::<Vec<_>>();
(Value::Array(values), truncated)
}
Value::Object(values) => {
let mut entries = values.into_iter().collect::<Vec<_>>();
entries.sort_by(|(left, _), (right, _)| left.cmp(right));
Value::Object(
entries
.into_iter()
.map(|(key, value)| (key, truncate_guardian_action_value(value)))
.collect(),
)
let mut truncated = false;
let values = entries
.into_iter()
.map(|(key, value)| {
let (value, value_truncated) = truncate_guardian_action_value(value);
truncated |= value_truncated;
(key, value)
})
.collect();
(Value::Object(values), truncated)
}
other => other,
other => (other, false),
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct FormattedGuardianAction {
pub(crate) text: String,
pub(crate) truncated: bool,
}
pub(crate) fn guardian_approval_request_to_json(
action: &GuardianApprovalRequest,
) -> serde_json::Result<Value> {
@@ -382,10 +399,13 @@ pub(crate) fn guardian_request_turn_id<'a>(
}
}
pub(crate) fn format_guardian_action_pretty(
pub(crate) fn format_guardian_action_pretty_with_truncation(
action: &GuardianApprovalRequest,
) -> serde_json::Result<String> {
let mut value = guardian_approval_request_to_json(action)?;
value = truncate_guardian_action_value(value);
serde_json::to_string_pretty(&value)
) -> serde_json::Result<FormattedGuardianAction> {
let value = guardian_approval_request_to_json(action)?;
let (value, truncated) = truncate_guardian_action_value(value);
Ok(FormattedGuardianAction {
text: serde_json::to_string_pretty(&value)?,
truncated,
})
}

View File

@@ -19,6 +19,7 @@ mod review_session;
use std::time::Duration;
use codex_protocol::protocol::GuardianAssessmentDecisionSource;
use codex_protocol::protocol::GuardianAssessmentOutcome;
use serde::Deserialize;
use serde::Serialize;
@@ -45,14 +46,6 @@ const GUARDIAN_MAX_ACTION_STRING_TOKENS: usize = 16_000;
const GUARDIAN_RECENT_ENTRY_LIMIT: usize = 40;
const TRUNCATION_TAG: &str = "truncated";
/// Final allow/deny outcome returned by the guardian reviewer.
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub(crate) enum GuardianAssessmentOutcome {
Allow,
Deny,
}
/// Structured output contract that the guardian reviewer must satisfy.
#[derive(Debug, Clone, Deserialize, Serialize)]
pub(crate) struct GuardianAssessment {
@@ -69,7 +62,7 @@ pub(crate) struct GuardianRejection {
}
#[cfg(test)]
use approval_request::format_guardian_action_pretty;
use approval_request::format_guardian_action_pretty_with_truncation;
#[cfg(test)]
use approval_request::guardian_assessment_action;
#[cfg(test)]
@@ -101,6 +94,8 @@ use prompt::render_guardian_transcript_entries;
#[cfg(test)]
use review::GuardianReviewOutcome;
#[cfg(test)]
use review::GuardianReviewSessionResult;
#[cfg(test)]
use review::run_guardian_review_session as run_guardian_review_session_for_test;
#[cfg(test)]
use review_session::build_guardian_review_session_config as build_guardian_review_session_config_for_test;

View File

@@ -19,7 +19,7 @@ use super::GUARDIAN_RECENT_ENTRY_LIMIT;
use super::GuardianApprovalRequest;
use super::GuardianAssessment;
use super::TRUNCATION_TAG;
use super::approval_request::format_guardian_action_pretty;
use super::approval_request::format_guardian_action_pretty_with_truncation;
/// Transcript entry retained for guardian review after filtering.
#[derive(Debug, PartialEq, Eq)]
@@ -56,6 +56,7 @@ impl GuardianTranscriptEntryKind {
pub(crate) struct GuardianPromptItems {
pub(crate) items: Vec<UserInput>,
pub(crate) transcript_cursor: GuardianTranscriptCursor,
pub(crate) reviewed_action_truncated: bool,
}
/// Points to the end of the transcript that the guardian has already reviewed.
@@ -91,7 +92,7 @@ pub(crate) async fn build_guardian_prompt_items(
parent_history_version: history.history_version(),
transcript_entry_count: transcript_entries.len(),
};
let planned_action_json = format_guardian_action_pretty(&request)?;
let planned_action = format_guardian_action_pretty_with_truncation(&request)?;
let prompt_shape = match mode {
GuardianPromptMode::Full => GuardianPromptShape::Full,
@@ -176,11 +177,12 @@ pub(crate) async fn build_guardian_prompt_items(
.to_string(),
);
push_text("Planned action JSON:\n".to_string());
push_text(format!("{planned_action_json}\n"));
push_text(format!("{}\n", planned_action.text));
push_text(">>> APPROVAL REQUEST END\n".to_string());
Ok(GuardianPromptItems {
items,
transcript_cursor,
reviewed_action_truncated: planned_action.truncated,
})
}
@@ -240,7 +242,7 @@ fn render_guardian_transcript_entries_with_offset(
} else {
GUARDIAN_MAX_MESSAGE_ENTRY_TOKENS
};
let text = guardian_truncate_text(&entry.text, token_cap);
let (text, _) = guardian_truncate_text(&entry.text, token_cap);
let rendered = format!(
"[{}] {}: {}",
index + entry_number_offset + 1,
@@ -420,20 +422,20 @@ pub(crate) fn collect_guardian_transcript_entries(
entries
}
pub(crate) fn guardian_truncate_text(content: &str, token_cap: usize) -> String {
pub(crate) fn guardian_truncate_text(content: &str, token_cap: usize) -> (String, bool) {
if content.is_empty() {
return String::new();
return (String::new(), false);
}
let max_bytes = approx_bytes_for_tokens(token_cap);
if content.len() <= max_bytes {
return content.to_string();
return (content.to_string(), false);
}
let omitted_tokens = approx_tokens_from_byte_count(content.len().saturating_sub(max_bytes));
let marker = format!("<{TRUNCATION_TAG} omitted_approx_tokens=\"{omitted_tokens}\" />");
if max_bytes <= marker.len() {
return marker;
return (marker, true);
}
let available_bytes = max_bytes.saturating_sub(marker.len());
@@ -441,7 +443,7 @@ pub(crate) fn guardian_truncate_text(content: &str, token_cap: usize) -> String
let suffix_budget = available_bytes.saturating_sub(prefix_budget);
let (prefix, suffix) = split_guardian_truncation_bounds(content, prefix_budget, suffix_budget);
format!("{prefix}{marker}{suffix}")
(format!("{prefix}{marker}{suffix}"), true)
}
fn split_guardian_truncation_bounds(

View File

@@ -1,5 +1,14 @@
use std::sync::Arc;
use std::time::Instant;
use codex_analytics::GuardianApprovalRequestSource;
use codex_analytics::GuardianReviewDecision;
use codex_analytics::GuardianReviewFailureReason;
use codex_analytics::GuardianReviewSessionKind;
use codex_analytics::GuardianReviewTerminalStatus;
use codex_analytics::GuardianReviewedAction;
use codex_analytics::now_unix_seconds;
use codex_features::Feature;
use codex_protocol::config_types::ApprovalsReviewer;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::EventMsg;
@@ -10,12 +19,14 @@ use codex_protocol::protocol::GuardianRiskLevel;
use codex_protocol::protocol::GuardianUserAuthorization;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SubAgentSource;
use codex_protocol::protocol::TokenUsage;
use codex_protocol::protocol::WarningEvent;
use tokio_util::sync::CancellationToken;
use crate::codex::Session;
use crate::codex::TurnContext;
use super::GUARDIAN_REVIEW_TIMEOUT;
use super::GUARDIAN_REVIEWER_NAME;
use super::GuardianApprovalRequest;
use super::GuardianAssessment;
@@ -26,6 +37,7 @@ use super::approval_request::guardian_request_target_item_id;
use super::approval_request::guardian_request_turn_id;
use super::prompt::guardian_output_schema;
use super::prompt::parse_guardian_assessment;
use super::review_session::GuardianReviewSessionMetadata;
use super::review_session::GuardianReviewSessionOutcome;
use super::review_session::GuardianReviewSessionParams;
use super::review_session::build_guardian_review_session_config;
@@ -73,13 +85,79 @@ pub(crate) fn guardian_timeout_message() -> String {
GUARDIAN_TIMEOUT_INSTRUCTIONS.to_string()
}
#[derive(Debug)]
pub(super) struct GuardianReviewSessionResult {
pub(super) outcome: GuardianReviewOutcome,
pub(super) metadata: Option<GuardianReviewSessionMetadata>,
}
#[derive(Debug)]
pub(super) enum GuardianReviewOutcome {
Completed(anyhow::Result<GuardianAssessment>),
Completed(GuardianAssessment),
Failed(GuardianReviewFailure),
TimedOut,
Aborted,
}
impl GuardianReviewSessionResult {
fn completed(
assessment: GuardianAssessment,
metadata: Option<GuardianReviewSessionMetadata>,
) -> Self {
Self {
outcome: GuardianReviewOutcome::Completed(assessment),
metadata,
}
}
fn failed(
failure: GuardianReviewFailure,
metadata: Option<GuardianReviewSessionMetadata>,
) -> Self {
Self {
outcome: GuardianReviewOutcome::Failed(failure),
metadata,
}
}
fn timed_out(metadata: Option<GuardianReviewSessionMetadata>) -> Self {
Self {
outcome: GuardianReviewOutcome::TimedOut,
metadata,
}
}
fn aborted(metadata: Option<GuardianReviewSessionMetadata>) -> Self {
Self {
outcome: GuardianReviewOutcome::Aborted,
metadata,
}
}
}
#[derive(Debug)]
pub(super) enum GuardianReviewFailure {
PromptBuild(anyhow::Error),
Session(anyhow::Error),
Parse(anyhow::Error),
}
impl GuardianReviewFailure {
fn reason(&self) -> GuardianReviewFailureReason {
match self {
Self::PromptBuild(_) => GuardianReviewFailureReason::PromptBuildError,
Self::Session(_) => GuardianReviewFailureReason::SessionError,
Self::Parse(_) => GuardianReviewFailureReason::ParseError,
}
}
fn error(&self) -> &anyhow::Error {
match self {
Self::PromptBuild(err) | Self::Session(err) | Self::Parse(err) => err,
}
}
}
fn guardian_risk_level_str(level: GuardianRiskLevel) -> &'static str {
match level {
GuardianRiskLevel::Low => "low",
@@ -89,6 +167,177 @@ fn guardian_risk_level_str(level: GuardianRiskLevel) -> &'static str {
}
}
fn guardian_reviewed_action(request: &GuardianApprovalRequest) -> GuardianReviewedAction {
match request {
GuardianApprovalRequest::Shell {
sandbox_permissions,
additional_permissions,
..
} => GuardianReviewedAction::Shell {
sandbox_permissions: *sandbox_permissions,
additional_permissions: additional_permissions.clone(),
},
GuardianApprovalRequest::ExecCommand {
sandbox_permissions,
additional_permissions,
tty,
..
} => GuardianReviewedAction::UnifiedExec {
sandbox_permissions: *sandbox_permissions,
additional_permissions: additional_permissions.clone(),
tty: *tty,
},
#[cfg(unix)]
GuardianApprovalRequest::Execve {
source,
program,
additional_permissions,
..
} => GuardianReviewedAction::Execve {
source: *source,
program: program.clone(),
additional_permissions: additional_permissions.clone(),
},
GuardianApprovalRequest::ApplyPatch { .. } => GuardianReviewedAction::ApplyPatch {},
GuardianApprovalRequest::NetworkAccess { protocol, port, .. } => {
GuardianReviewedAction::NetworkAccess {
protocol: *protocol,
port: *port,
}
}
GuardianApprovalRequest::McpToolCall {
server,
tool_name,
connector_id,
connector_name,
tool_title,
..
} => GuardianReviewedAction::McpToolCall {
server: server.clone(),
tool_name: tool_name.clone(),
connector_id: connector_id.clone(),
connector_name: connector_name.clone(),
tool_title: tool_title.clone(),
},
}
}
struct GuardianReviewAnalyticsContext {
thread_id: String,
turn_id: String,
review_id: String,
target_item_id: Option<String>,
approval_request_source: GuardianApprovalRequestSource,
reviewed_action: GuardianReviewedAction,
started_at: u64,
started_instant: Instant,
}
struct GuardianReviewAnalyticsResult {
decision: GuardianReviewDecision,
terminal_status: GuardianReviewTerminalStatus,
failure_reason: Option<GuardianReviewFailureReason>,
risk_level: Option<GuardianRiskLevel>,
user_authorization: Option<GuardianUserAuthorization>,
outcome: Option<GuardianAssessmentOutcome>,
guardian_thread_id: Option<String>,
guardian_session_kind: Option<GuardianReviewSessionKind>,
guardian_model: Option<String>,
guardian_reasoning_effort: Option<String>,
had_prior_review_context: Option<bool>,
reviewed_action_truncated: bool,
token_usage: Option<TokenUsage>,
time_to_first_token_ms: Option<u64>,
completed_at: u64,
}
impl GuardianReviewAnalyticsResult {
fn from_session_metadata(
metadata: Option<GuardianReviewSessionMetadata>,
completed_at: u64,
) -> Self {
let mut result = Self {
decision: GuardianReviewDecision::Denied,
terminal_status: GuardianReviewTerminalStatus::FailedClosed,
failure_reason: None,
risk_level: None,
user_authorization: None,
outcome: None,
guardian_thread_id: None,
guardian_session_kind: None,
guardian_model: None,
guardian_reasoning_effort: None,
had_prior_review_context: None,
reviewed_action_truncated: false,
token_usage: None,
time_to_first_token_ms: None,
completed_at,
};
if let Some(metadata) = metadata {
result.guardian_thread_id = Some(metadata.guardian_thread_id);
result.guardian_session_kind = Some(metadata.guardian_session_kind);
result.guardian_model = Some(metadata.guardian_model);
result.guardian_reasoning_effort = metadata.guardian_reasoning_effort;
result.had_prior_review_context = Some(metadata.had_prior_review_context);
result.reviewed_action_truncated = metadata.reviewed_action_truncated;
result.token_usage = metadata.token_usage;
}
result
}
}
impl GuardianReviewAnalyticsContext {
fn track(&self, session: &Session, turn: &TurnContext, result: GuardianReviewAnalyticsResult) {
if !turn.config.features.enabled(Feature::GeneralAnalytics) {
return;
}
let completion_latency_ms = self.started_instant.elapsed().as_millis() as u64;
session
.services
.analytics_events_client
.track_guardian_review(codex_analytics::GuardianReviewEventParams {
thread_id: self.thread_id.clone(),
turn_id: self.turn_id.clone(),
review_id: self.review_id.clone(),
target_item_id: self.target_item_id.clone(),
approval_request_source: self.approval_request_source,
reviewed_action: self.reviewed_action.clone(),
reviewed_action_truncated: result.reviewed_action_truncated,
decision: result.decision,
terminal_status: result.terminal_status,
failure_reason: result.failure_reason,
risk_level: result.risk_level,
user_authorization: result.user_authorization,
outcome: result.outcome,
guardian_thread_id: result.guardian_thread_id,
guardian_session_kind: result.guardian_session_kind,
guardian_model: result.guardian_model,
guardian_reasoning_effort: result.guardian_reasoning_effort,
had_prior_review_context: result.had_prior_review_context,
review_timeout_ms: GUARDIAN_REVIEW_TIMEOUT.as_millis() as u64,
// TODO(rhan-oai): plumb nested Guardian review session tool-call counts.
tool_call_count: None,
time_to_first_token_ms: result.time_to_first_token_ms,
completion_latency_ms: Some(completion_latency_ms),
started_at: self.started_at,
completed_at: Some(result.completed_at),
input_tokens: result.token_usage.as_ref().map(|usage| usage.input_tokens),
cached_input_tokens: result
.token_usage
.as_ref()
.map(|usage| usage.cached_input_tokens),
output_tokens: result.token_usage.as_ref().map(|usage| usage.output_tokens),
reasoning_output_tokens: result
.token_usage
.as_ref()
.map(|usage| usage.reasoning_output_tokens),
total_tokens: result.token_usage.as_ref().map(|usage| usage.total_tokens),
});
}
}
/// Whether this turn should route `on-request` approval prompts through the
/// guardian reviewer instead of surfacing them to the user. ARC may still
/// block actions earlier in the flow.
@@ -116,11 +365,24 @@ async fn run_guardian_review(
review_id: String,
request: GuardianApprovalRequest,
retry_reason: Option<String>,
approval_request_source: GuardianApprovalRequestSource,
external_cancel: Option<CancellationToken>,
) -> ReviewDecision {
let started_at = now_unix_seconds();
let started_instant = Instant::now();
let target_item_id = guardian_request_target_item_id(&request).map(str::to_string);
let assessment_turn_id = guardian_request_turn_id(&request, &turn.sub_id).to_string();
let action_summary = guardian_assessment_action(&request);
let analytics_context = GuardianReviewAnalyticsContext {
thread_id: session.conversation_id.to_string(),
turn_id: assessment_turn_id.clone(),
review_id: review_id.clone(),
target_item_id: target_item_id.clone(),
approval_request_source,
reviewed_action: guardian_reviewed_action(&request),
started_at,
started_instant,
};
session
.send_event(
turn.as_ref(),
@@ -142,6 +404,19 @@ async fn run_guardian_review(
.as_ref()
.is_some_and(CancellationToken::is_cancelled)
{
analytics_context.track(
session.as_ref(),
turn.as_ref(),
GuardianReviewAnalyticsResult {
decision: GuardianReviewDecision::Aborted,
terminal_status: GuardianReviewTerminalStatus::Aborted,
failure_reason: Some(GuardianReviewFailureReason::Cancelled),
..GuardianReviewAnalyticsResult::from_session_metadata(
/*metadata*/ None,
now_unix_seconds(),
)
},
);
session
.send_event(
turn.as_ref(),
@@ -163,28 +438,78 @@ async fn run_guardian_review(
let schema = guardian_output_schema();
let terminal_action = action_summary.clone();
let outcome = Box::pin(run_guardian_review_session(
let GuardianReviewSessionResult { outcome, metadata } = Box::pin(run_guardian_review_session(
session.clone(),
turn.clone(),
request,
retry_reason,
retry_reason.clone(),
schema,
external_cancel,
))
.await;
let completed_at = now_unix_seconds();
let result =
|metadata| GuardianReviewAnalyticsResult::from_session_metadata(metadata, completed_at);
let assessment = match outcome {
GuardianReviewOutcome::Completed(Ok(assessment)) => assessment,
GuardianReviewOutcome::Completed(Err(err)) => GuardianAssessment {
risk_level: GuardianRiskLevel::High,
user_authorization: GuardianUserAuthorization::Unknown,
outcome: GuardianAssessmentOutcome::Deny,
rationale: format!("Automatic approval review failed: {err}"),
},
GuardianReviewOutcome::Completed(assessment) => {
let approved = matches!(assessment.outcome, GuardianAssessmentOutcome::Allow);
analytics_context.track(
session.as_ref(),
turn.as_ref(),
GuardianReviewAnalyticsResult {
decision: if approved {
GuardianReviewDecision::Approved
} else {
GuardianReviewDecision::Denied
},
terminal_status: if approved {
GuardianReviewTerminalStatus::Approved
} else {
GuardianReviewTerminalStatus::Denied
},
failure_reason: None,
risk_level: Some(assessment.risk_level),
user_authorization: Some(assessment.user_authorization),
outcome: Some(assessment.outcome),
..result(metadata)
},
);
assessment
}
GuardianReviewOutcome::Failed(failure) => {
let rationale = format!("Automatic approval review failed: {}", failure.error());
analytics_context.track(
session.as_ref(),
turn.as_ref(),
GuardianReviewAnalyticsResult {
decision: GuardianReviewDecision::Denied,
terminal_status: GuardianReviewTerminalStatus::FailedClosed,
failure_reason: Some(failure.reason()),
..result(metadata)
},
);
GuardianAssessment {
risk_level: GuardianRiskLevel::High,
user_authorization: GuardianUserAuthorization::Unknown,
outcome: GuardianAssessmentOutcome::Deny,
rationale,
}
}
GuardianReviewOutcome::TimedOut => {
let rationale =
"Automatic approval review timed out while evaluating the requested approval."
.to_string();
analytics_context.track(
session.as_ref(),
turn.as_ref(),
GuardianReviewAnalyticsResult {
decision: GuardianReviewDecision::Denied,
terminal_status: GuardianReviewTerminalStatus::TimedOut,
failure_reason: Some(GuardianReviewFailureReason::Timeout),
..result(metadata)
},
);
session
.send_event(
turn.as_ref(),
@@ -212,6 +537,16 @@ async fn run_guardian_review(
return ReviewDecision::TimedOut;
}
GuardianReviewOutcome::Aborted => {
analytics_context.track(
session.as_ref(),
turn.as_ref(),
GuardianReviewAnalyticsResult {
decision: GuardianReviewDecision::Aborted,
terminal_status: GuardianReviewTerminalStatus::Aborted,
failure_reason: Some(GuardianReviewFailureReason::Cancelled),
..result(metadata)
},
);
session
.send_event(
turn.as_ref(),
@@ -311,6 +646,7 @@ pub(crate) async fn review_approval_request(
review_id,
request,
retry_reason,
GuardianApprovalRequestSource::MainTurn,
/*external_cancel*/ None,
))
.await
@@ -322,16 +658,18 @@ pub(crate) async fn review_approval_request_with_cancel(
review_id: String,
request: GuardianApprovalRequest,
retry_reason: Option<String>,
approval_request_source: GuardianApprovalRequestSource,
cancel_token: CancellationToken,
) -> ReviewDecision {
Box::pin(run_guardian_review(
run_guardian_review(
Arc::clone(session),
Arc::clone(turn),
review_id,
request,
retry_reason,
approval_request_source,
Some(cancel_token),
))
)
.await
}
@@ -356,11 +694,16 @@ pub(super) async fn run_guardian_review_session(
retry_reason: Option<String>,
schema: serde_json::Value,
external_cancel: Option<CancellationToken>,
) -> GuardianReviewOutcome {
) -> GuardianReviewSessionResult {
let live_network_config = match session.services.network_proxy.as_ref() {
Some(network_proxy) => match network_proxy.proxy().current_cfg().await {
Ok(config) => Some(config),
Err(err) => return GuardianReviewOutcome::Completed(Err(err)),
Err(err) => {
return GuardianReviewSessionResult::failed(
GuardianReviewFailure::PromptBuild(err),
/*metadata*/ None,
);
}
},
None => None,
};
@@ -410,37 +753,94 @@ pub(super) async fn run_guardian_review_session(
);
let guardian_config = match guardian_config {
Ok(config) => config,
Err(err) => return GuardianReviewOutcome::Completed(Err(err)),
Err(err) => {
return GuardianReviewSessionResult::failed(
GuardianReviewFailure::PromptBuild(err),
/*metadata*/ None,
);
}
};
match Box::pin(
session
.guardian_review_session
.run_review(GuardianReviewSessionParams {
parent_session: Arc::clone(&session),
parent_turn: turn.clone(),
spawn_config: guardian_config,
request,
retry_reason,
schema,
model: guardian_model,
reasoning_effort: guardian_reasoning_effort,
reasoning_summary: turn.reasoning_summary,
personality: turn.personality,
external_cancel,
}),
)
.await
{
GuardianReviewSessionOutcome::Completed(Ok(last_agent_message)) => {
GuardianReviewOutcome::Completed(parse_guardian_assessment(
last_agent_message.as_deref(),
))
let (session_outcome, session_metadata) = Box::pin(session.guardian_review_session.run_review(
GuardianReviewSessionParams {
parent_session: Arc::clone(&session),
parent_turn: turn.clone(),
spawn_config: guardian_config,
request,
retry_reason,
schema,
model: guardian_model,
reasoning_effort: guardian_reasoning_effort,
reasoning_summary: turn.reasoning_summary,
personality: turn.personality,
external_cancel,
},
))
.await;
match session_outcome {
GuardianReviewSessionOutcome::Completed(Ok(last_agent_message)) => match last_agent_message
{
Some(last_agent_message) => {
match parse_guardian_assessment(Some(&last_agent_message)) {
Ok(assessment) => {
GuardianReviewSessionResult::completed(assessment, session_metadata)
}
Err(err) => GuardianReviewSessionResult::failed(
GuardianReviewFailure::Parse(err),
session_metadata,
),
}
}
None => GuardianReviewSessionResult::failed(
GuardianReviewFailure::Session(anyhow::anyhow!(
"guardian review completed without an assessment payload"
)),
session_metadata,
),
},
GuardianReviewSessionOutcome::Completed(Err(err)) => GuardianReviewSessionResult::failed(
GuardianReviewFailure::Session(err),
session_metadata,
),
GuardianReviewSessionOutcome::PromptBuildFailed(err) => {
GuardianReviewSessionResult::failed(
GuardianReviewFailure::PromptBuild(err),
session_metadata,
)
}
GuardianReviewSessionOutcome::Completed(Err(err)) => {
GuardianReviewOutcome::Completed(Err(err))
GuardianReviewSessionOutcome::TimedOut => {
GuardianReviewSessionResult::timed_out(session_metadata)
}
GuardianReviewSessionOutcome::Aborted => {
GuardianReviewSessionResult::aborted(session_metadata)
}
GuardianReviewSessionOutcome::TimedOut => GuardianReviewOutcome::TimedOut,
GuardianReviewSessionOutcome::Aborted => GuardianReviewOutcome::Aborted,
}
}
#[cfg(test)]
mod review_tests {
use super::*;
#[test]
fn guardian_review_failure_reason_distinguishes_failure_kinds() {
let parse_failure = GuardianReviewFailure::Parse(anyhow::anyhow!("bad guardian JSON"));
let prompt_failure =
GuardianReviewFailure::PromptBuild(anyhow::anyhow!("bad prompt/config"));
let session_failure =
GuardianReviewFailure::Session(anyhow::anyhow!("guardian runtime failed"));
assert!(matches!(
parse_failure.reason(),
GuardianReviewFailureReason::ParseError
));
assert!(matches!(
prompt_failure.reason(),
GuardianReviewFailureReason::PromptBuildError
));
assert!(matches!(
session_failure.reason(),
GuardianReviewFailureReason::SessionError
));
}
}

View File

@@ -5,6 +5,7 @@ use std::sync::Arc;
use std::time::Duration;
use anyhow::anyhow;
use codex_analytics::GuardianReviewSessionKind;
use codex_protocol::config_types::Personality;
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
use codex_protocol::models::DeveloperInstructions;
@@ -17,6 +18,7 @@ use codex_protocol::protocol::Op;
use codex_protocol::protocol::RolloutItem;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::protocol::SubAgentSource;
use codex_protocol::protocol::TokenUsage;
use serde_json::Value;
use tokio::sync::Mutex;
use tokio_util::sync::CancellationToken;
@@ -58,10 +60,22 @@ const GUARDIAN_FOLLOWUP_REVIEW_REMINDER: &str = concat!(
#[derive(Debug)]
pub(crate) enum GuardianReviewSessionOutcome {
Completed(anyhow::Result<Option<String>>),
PromptBuildFailed(anyhow::Error),
TimedOut,
Aborted,
}
#[derive(Debug, Clone)]
pub(crate) struct GuardianReviewSessionMetadata {
pub(crate) guardian_thread_id: String,
pub(crate) guardian_session_kind: GuardianReviewSessionKind,
pub(crate) guardian_model: String,
pub(crate) guardian_reasoning_effort: Option<String>,
pub(crate) had_prior_review_context: bool,
pub(crate) reviewed_action_truncated: bool,
pub(crate) token_usage: Option<TokenUsage>,
}
pub(crate) struct GuardianReviewSessionParams {
pub(crate) parent_session: Arc<Session>,
pub(crate) parent_turn: Arc<TurnContext>,
@@ -101,6 +115,21 @@ struct GuardianReviewState {
last_committed_fork_snapshot: Option<GuardianReviewForkSnapshot>,
}
fn had_prior_review_context(prompt_mode: &GuardianPromptMode) -> bool {
matches!(prompt_mode, GuardianPromptMode::Delta { .. })
}
fn token_usage_delta(start: &TokenUsage, end: &TokenUsage) -> TokenUsage {
TokenUsage {
input_tokens: (end.input_tokens - start.input_tokens).max(0),
cached_input_tokens: (end.cached_input_tokens - start.cached_input_tokens).max(0),
output_tokens: (end.output_tokens - start.output_tokens).max(0),
reasoning_output_tokens: (end.reasoning_output_tokens - start.reasoning_output_tokens)
.max(0),
total_tokens: (end.total_tokens - start.total_tokens).max(0),
}
}
struct EphemeralReviewCleanup {
state: Arc<Mutex<GuardianReviewSessionState>>,
review_session: Option<Arc<GuardianReviewSession>>,
@@ -267,10 +296,14 @@ impl GuardianReviewSessionManager {
pub(crate) async fn run_review(
&self,
params: GuardianReviewSessionParams,
) -> GuardianReviewSessionOutcome {
) -> (
GuardianReviewSessionOutcome,
Option<GuardianReviewSessionMetadata>,
) {
let deadline = tokio::time::Instant::now() + GUARDIAN_REVIEW_TIMEOUT;
let next_reuse_key = GuardianReviewSessionReuseKey::from_spawn_config(&params.spawn_config);
let mut stale_trunk_to_shutdown = None;
let mut spawned_trunk = false;
let trunk_candidate = match run_before_review_deadline(
deadline,
params.external_cancel.as_ref(),
@@ -304,16 +337,17 @@ impl GuardianReviewSessionManager {
{
Ok(Ok(review_session)) => Arc::new(review_session),
Ok(Err(err)) => {
return GuardianReviewSessionOutcome::Completed(Err(err));
return (GuardianReviewSessionOutcome::PromptBuildFailed(err), None);
}
Err(outcome) => return outcome,
Err(outcome) => return (outcome, None),
};
state.trunk = Some(Arc::clone(&review_session));
spawned_trunk = true;
}
state.trunk.as_ref().cloned()
}
Err(outcome) => return outcome,
Err(outcome) => return (outcome, None),
};
if let Some(review_session) = stale_trunk_to_shutdown {
@@ -321,9 +355,12 @@ impl GuardianReviewSessionManager {
}
let Some(trunk) = trunk_candidate else {
return GuardianReviewSessionOutcome::Completed(Err(anyhow!(
"guardian review session was not available after spawn"
)));
return (
GuardianReviewSessionOutcome::Completed(Err(anyhow!(
"guardian review session was not available after spawn"
))),
None,
);
};
if trunk.reuse_key != next_reuse_key {
@@ -349,20 +386,30 @@ impl GuardianReviewSessionManager {
}
};
let (outcome, keep_review_session) =
Box::pin(run_review_on_session(trunk.as_ref(), &params, deadline)).await;
let guardian_session_kind = if spawned_trunk {
GuardianReviewSessionKind::TrunkNew
} else {
GuardianReviewSessionKind::TrunkReused
};
let (outcome, keep_review_session, metadata) = Box::pin(run_review_on_session(
trunk.as_ref(),
&params,
guardian_session_kind,
deadline,
))
.await;
if keep_review_session && matches!(outcome, GuardianReviewSessionOutcome::Completed(_)) {
trunk.refresh_last_committed_fork_snapshot().await;
}
drop(trunk_guard);
if keep_review_session {
outcome
(outcome, Some(metadata))
} else {
if let Some(review_session) = self.remove_trunk_if_current(&trunk).await {
review_session.shutdown_in_background();
}
outcome
(outcome, Some(metadata))
}
}
@@ -459,7 +506,10 @@ impl GuardianReviewSessionManager {
reuse_key: GuardianReviewSessionReuseKey,
deadline: tokio::time::Instant,
fork_snapshot: Option<GuardianReviewForkSnapshot>,
) -> GuardianReviewSessionOutcome {
) -> (
GuardianReviewSessionOutcome,
Option<GuardianReviewSessionMetadata>,
) {
let spawn_cancel_token = CancellationToken::new();
let mut fork_config = params.spawn_config.clone();
fork_config.ephemeral = true;
@@ -478,17 +528,18 @@ impl GuardianReviewSessionManager {
.await
{
Ok(Ok(review_session)) => Arc::new(review_session),
Ok(Err(err)) => return GuardianReviewSessionOutcome::Completed(Err(err)),
Err(outcome) => return outcome,
Ok(Err(err)) => return (GuardianReviewSessionOutcome::PromptBuildFailed(err), None),
Err(outcome) => return (outcome, None),
};
self.register_active_ephemeral(Arc::clone(&review_session))
.await;
let mut cleanup =
EphemeralReviewCleanup::new(Arc::clone(&self.state), Arc::clone(&review_session));
let (outcome, _) = Box::pin(run_review_on_session(
let (outcome, _, metadata) = Box::pin(run_review_on_session(
review_session.as_ref(),
&params,
GuardianReviewSessionKind::EphemeralForked,
deadline,
))
.await;
@@ -496,7 +547,7 @@ impl GuardianReviewSessionManager {
cleanup.disarm();
review_session.shutdown_in_background();
}
outcome
(outcome, Some(metadata))
}
}
@@ -543,8 +594,13 @@ async fn spawn_guardian_review_session(
async fn run_review_on_session(
review_session: &GuardianReviewSession,
params: &GuardianReviewSessionParams,
guardian_session_kind: GuardianReviewSessionKind,
deadline: tokio::time::Instant,
) -> (GuardianReviewSessionOutcome, bool) {
) -> (
GuardianReviewSessionOutcome,
bool,
GuardianReviewSessionMetadata,
) {
let (send_followup_reminder, prompt_mode) = {
let state = review_session.state.lock().await;
@@ -559,6 +615,15 @@ async fn run_review_on_session(
(send_followup_reminder, prompt_mode)
};
let mut guardian_metadata = GuardianReviewSessionMetadata {
guardian_thread_id: review_session.codex.session.conversation_id.to_string(),
guardian_session_kind,
guardian_model: params.model.clone(),
guardian_reasoning_effort: params.reasoning_effort.map(|effort| effort.to_string()),
had_prior_review_context: had_prior_review_context(&prompt_mode),
reviewed_action_truncated: false,
token_usage: None,
};
if send_followup_reminder {
append_guardian_followup_reminder(review_session).await;
}
@@ -583,6 +648,9 @@ async fn run_review_on_session(
prompt_mode,
)
.await?;
let reviewed_action_truncated = prompt_items.reviewed_action_truncated;
let token_usage_at_review_start =
review_session.codex.session.total_token_usage().await;
review_session
.codex
@@ -602,29 +670,48 @@ async fn run_review_on_session(
})
.await?;
Ok::<GuardianTranscriptCursor, anyhow::Error>(prompt_items.transcript_cursor)
Ok::<(GuardianTranscriptCursor, bool, Option<TokenUsage>), anyhow::Error>((
prompt_items.transcript_cursor,
reviewed_action_truncated,
token_usage_at_review_start,
))
}),
)
.await;
let submit_result = match submit_result {
Ok(submit_result) => submit_result,
Err(outcome) => return (outcome, false),
};
let transcript_cursor = match submit_result {
Ok(transcript_cursor) => transcript_cursor,
Err(err) => {
return (GuardianReviewSessionOutcome::Completed(Err(err)), false);
}
Err(outcome) => return (outcome, false, guardian_metadata),
};
let (transcript_cursor, reviewed_action_truncated, token_usage_at_review_start) =
match submit_result {
Ok(submit_result) => submit_result,
Err(err) => {
return (
GuardianReviewSessionOutcome::PromptBuildFailed(err),
false,
guardian_metadata,
);
}
};
guardian_metadata.reviewed_action_truncated = reviewed_action_truncated;
let outcome =
wait_for_guardian_review(review_session, deadline, params.external_cancel.as_ref()).await;
if matches!(outcome.0, GuardianReviewSessionOutcome::Completed(_)) {
if outcome.2
&& let Some(token_usage_at_review_start) = token_usage_at_review_start
&& let Some(total_token_usage) = review_session.codex.session.total_token_usage().await
{
guardian_metadata.token_usage = Some(token_usage_delta(
&token_usage_at_review_start,
&total_token_usage,
));
}
let mut state = review_session.state.lock().await;
state.prior_review_count = state.prior_review_count.saturating_add(1);
state.last_reviewed_transcript_cursor = Some(transcript_cursor);
}
outcome
(outcome.0, outcome.1, guardian_metadata)
}
async fn append_guardian_followup_reminder(review_session: &GuardianReviewSession) {
@@ -653,7 +740,7 @@ async fn wait_for_guardian_review(
review_session: &GuardianReviewSession,
deadline: tokio::time::Instant,
external_cancel: Option<&CancellationToken>,
) -> (GuardianReviewSessionOutcome, bool) {
) -> (GuardianReviewSessionOutcome, bool, bool) {
let timeout = tokio::time::sleep_until(deadline);
tokio::pin!(timeout);
let mut last_error_message: Option<String> = None;
@@ -662,7 +749,7 @@ async fn wait_for_guardian_review(
tokio::select! {
_ = &mut timeout => {
let keep_review_session = interrupt_and_drain_turn(&review_session.codex).await.is_ok();
return (GuardianReviewSessionOutcome::TimedOut, keep_review_session);
return (GuardianReviewSessionOutcome::TimedOut, keep_review_session, false);
}
_ = async {
if let Some(cancel_token) = external_cancel {
@@ -672,7 +759,7 @@ async fn wait_for_guardian_review(
}
} => {
let keep_review_session = interrupt_and_drain_turn(&review_session.codex).await.is_ok();
return (GuardianReviewSessionOutcome::Aborted, keep_review_session);
return (GuardianReviewSessionOutcome::Aborted, keep_review_session, false);
}
event = review_session.codex.next_event() => {
match event {
@@ -684,18 +771,20 @@ async fn wait_for_guardian_review(
return (
GuardianReviewSessionOutcome::Completed(Err(anyhow!(error_message))),
true,
true,
);
}
return (
GuardianReviewSessionOutcome::Completed(Ok(turn_complete.last_agent_message)),
true,
true,
);
}
EventMsg::Error(error) => {
last_error_message = Some(error.message);
}
EventMsg::TurnAborted(_) => {
return (GuardianReviewSessionOutcome::Aborted, true);
return (GuardianReviewSessionOutcome::Aborted, true, false);
}
_ => {}
},
@@ -703,6 +792,7 @@ async fn wait_for_guardian_review(
return (
GuardianReviewSessionOutcome::Completed(Err(err.into())),
false,
false,
);
}
}
@@ -954,4 +1044,44 @@ mod tests {
assert_eq!(outcome.unwrap(), 42);
assert!(!cancel_token.is_cancelled());
}
#[test]
fn had_prior_review_context_tracks_prompt_mode() {
assert!(!had_prior_review_context(&GuardianPromptMode::Full));
assert!(had_prior_review_context(&GuardianPromptMode::Delta {
cursor: GuardianTranscriptCursor {
parent_history_version: 7,
transcript_entry_count: 42,
}
}));
}
#[test]
fn token_usage_delta_never_reports_negative_usage() {
let start = TokenUsage {
input_tokens: 10,
cached_input_tokens: 8,
output_tokens: 6,
reasoning_output_tokens: 4,
total_tokens: 28,
};
let end = TokenUsage {
input_tokens: 15,
cached_input_tokens: 7,
output_tokens: 10,
reasoning_output_tokens: 2,
total_tokens: 34,
};
assert_eq!(
token_usage_delta(&start, &end),
TokenUsage {
input_tokens: 5,
cached_input_tokens: 0,
output_tokens: 4,
reasoning_output_tokens: 0,
total_tokens: 6,
}
);
}
}

View File

@@ -15,6 +15,7 @@ use crate::config_loader::NetworkDomainPermissionsToml;
use crate::config_loader::RequirementSource;
use crate::config_loader::Sourced;
use crate::test_support;
use codex_analytics::GuardianApprovalRequestSource;
use codex_config::config_toml::ConfigToml;
use codex_network_proxy::NetworkProxyConfig;
use codex_protocol::ThreadId;
@@ -567,11 +568,12 @@ fn collect_guardian_transcript_entries_includes_recent_tool_calls_and_output() {
fn guardian_truncate_text_keeps_prefix_suffix_and_xml_marker() {
let content = "prefix ".repeat(200) + &" suffix".repeat(200);
let truncated = guardian_truncate_text(&content, /*token_cap*/ 20);
let (truncated, was_truncated) = guardian_truncate_text(&content, /*token_cap*/ 20);
assert!(truncated.starts_with("prefix"));
assert!(truncated.contains("<truncated omitted_approx_tokens=\""));
assert!(truncated.ends_with("suffix"));
assert!(was_truncated);
}
#[test]
@@ -584,11 +586,29 @@ fn format_guardian_action_pretty_truncates_large_string_fields() -> serde_json::
patch: patch.clone(),
};
let rendered = format_guardian_action_pretty(&action)?;
let rendered = format_guardian_action_pretty_with_truncation(&action)?;
assert!(rendered.contains("\"tool\": \"apply_patch\""));
assert!(rendered.contains("<truncated omitted_approx_tokens="));
assert!(rendered.len() < patch.len());
assert!(rendered.text.contains("\"tool\": \"apply_patch\""));
assert!(rendered.text.contains("<truncated omitted_approx_tokens="));
assert!(rendered.text.len() < patch.len());
assert!(rendered.truncated);
Ok(())
}
#[test]
fn format_guardian_action_pretty_reports_no_truncation_for_small_payload() -> serde_json::Result<()>
{
let action = GuardianApprovalRequest::ApplyPatch {
id: "patch-1".to_string(),
cwd: test_path_buf("/tmp").abs(),
files: Vec::new(),
patch: "line\n".to_string(),
};
let rendered = format_guardian_action_pretty_with_truncation(&action)?;
assert!(rendered.text.contains("\"tool\": \"apply_patch\""));
assert!(!rendered.truncated);
Ok(())
}
@@ -701,6 +721,7 @@ async fn cancelled_guardian_review_emits_terminal_abort_without_warning() {
.to_string(),
},
/*retry_reason*/ None,
GuardianApprovalRequestSource::MainTurn,
cancel_token,
)
.await;
@@ -916,10 +937,28 @@ async fn guardian_review_request_layout_matches_model_visible_request_snapshot()
/*external_cancel*/ None,
)
.await;
let GuardianReviewOutcome::Completed(Ok(assessment)) = outcome else {
let GuardianReviewSessionResult {
outcome: GuardianReviewOutcome::Completed(assessment),
metadata,
} = outcome
else {
panic!("expected guardian assessment");
};
let metadata = metadata.expect("guardian session metadata");
assert_eq!(assessment.outcome, GuardianAssessmentOutcome::Allow);
assert_ne!(
metadata.guardian_thread_id,
session.conversation_id.to_string()
);
ThreadId::from_string(&metadata.guardian_thread_id)
.expect("guardian thread id should be a valid UUID");
assert!(matches!(
metadata.guardian_session_kind,
codex_analytics::GuardianReviewSessionKind::TrunkNew
));
assert_eq!(metadata.guardian_model, "gpt-5.4");
assert_eq!(metadata.guardian_reasoning_effort.as_deref(), Some("low"));
assert!(!metadata.had_prior_review_context);
let request = request_log.single_request();
let mut settings = Settings::clone_current();
@@ -1125,18 +1164,62 @@ async fn guardian_reuses_prompt_cache_key_and_appends_prior_reviews() -> anyhow:
)
.await;
let GuardianReviewOutcome::Completed(Ok(first_assessment)) = first_outcome else {
let GuardianReviewSessionResult {
outcome: GuardianReviewOutcome::Completed(first_assessment),
metadata: first_metadata,
} = first_outcome
else {
panic!("expected first guardian assessment");
};
let GuardianReviewOutcome::Completed(Ok(second_assessment)) = second_outcome else {
let first_metadata = first_metadata.expect("first guardian session metadata");
let GuardianReviewSessionResult {
outcome: GuardianReviewOutcome::Completed(second_assessment),
metadata: second_metadata,
} = second_outcome
else {
panic!("expected second guardian assessment");
};
let GuardianReviewOutcome::Completed(Ok(third_assessment)) = third_outcome else {
let second_metadata = second_metadata.expect("second guardian session metadata");
let GuardianReviewSessionResult {
outcome: GuardianReviewOutcome::Completed(third_assessment),
metadata: third_metadata,
} = third_outcome
else {
panic!("expected third guardian assessment");
};
let third_metadata = third_metadata.expect("third guardian session metadata");
assert_eq!(first_assessment.outcome, GuardianAssessmentOutcome::Allow);
assert_eq!(second_assessment.outcome, GuardianAssessmentOutcome::Allow);
assert_eq!(third_assessment.outcome, GuardianAssessmentOutcome::Allow);
assert!(matches!(
first_metadata.guardian_session_kind,
codex_analytics::GuardianReviewSessionKind::TrunkNew
));
assert!(matches!(
second_metadata.guardian_session_kind,
codex_analytics::GuardianReviewSessionKind::TrunkReused
));
assert!(matches!(
third_metadata.guardian_session_kind,
codex_analytics::GuardianReviewSessionKind::TrunkReused
));
ThreadId::from_string(&first_metadata.guardian_thread_id)
.expect("first guardian thread id should be a valid UUID");
ThreadId::from_string(&second_metadata.guardian_thread_id)
.expect("second guardian thread id should be a valid UUID");
ThreadId::from_string(&third_metadata.guardian_thread_id)
.expect("third guardian thread id should be a valid UUID");
assert!(!first_metadata.had_prior_review_context);
assert!(second_metadata.had_prior_review_context);
assert!(third_metadata.had_prior_review_context);
assert_eq!(
first_metadata.guardian_thread_id,
second_metadata.guardian_thread_id
);
assert_eq!(
second_metadata.guardian_thread_id,
third_metadata.guardian_thread_id
);
let requests = request_log.requests();
assert_eq!(requests.len(), 3);

View File

@@ -100,6 +100,14 @@ pub enum GuardianUserAuthorization {
High,
}
/// Final allow/deny outcome returned by the guardian reviewer.
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
#[serde(rename_all = "lowercase")]
pub enum GuardianAssessmentOutcome {
Allow,
Deny,
}
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
pub enum GuardianAssessmentStatus {

View File

@@ -67,6 +67,7 @@ pub use crate::approvals::ExecPolicyAmendment;
pub use crate::approvals::GuardianAssessmentAction;
pub use crate::approvals::GuardianAssessmentDecisionSource;
pub use crate::approvals::GuardianAssessmentEvent;
pub use crate::approvals::GuardianAssessmentOutcome;
pub use crate::approvals::GuardianAssessmentStatus;
pub use crate::approvals::GuardianCommandSource;
pub use crate::approvals::GuardianRiskLevel;