mirror of
https://github.com/openai/codex.git
synced 2026-04-23 22:24:57 +00:00
[codex-analytics] guardian review truncation
This commit is contained in:
@@ -9,7 +9,7 @@ use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::GUARDIAN_MAX_ACTION_STRING_TOKENS;
|
||||
use super::prompt::guardian_truncate_text;
|
||||
use super::prompt::guardian_truncate_text_with_metadata;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub(crate) enum GuardianApprovalRequest {
|
||||
@@ -167,32 +167,49 @@ fn guardian_command_source_tool_name(source: GuardianCommandSource) -> &'static
|
||||
}
|
||||
}
|
||||
|
||||
fn truncate_guardian_action_value(value: Value) -> Value {
|
||||
fn truncate_guardian_action_value(value: Value) -> (Value, bool) {
|
||||
match value {
|
||||
Value::String(text) => Value::String(guardian_truncate_text(
|
||||
&text,
|
||||
GUARDIAN_MAX_ACTION_STRING_TOKENS,
|
||||
)),
|
||||
Value::Array(values) => Value::Array(
|
||||
values
|
||||
Value::String(text) => {
|
||||
let (text, truncated) =
|
||||
guardian_truncate_text_with_metadata(&text, GUARDIAN_MAX_ACTION_STRING_TOKENS);
|
||||
(Value::String(text), truncated)
|
||||
}
|
||||
Value::Array(values) => {
|
||||
let mut truncated = false;
|
||||
let values = values
|
||||
.into_iter()
|
||||
.map(truncate_guardian_action_value)
|
||||
.collect::<Vec<_>>(),
|
||||
),
|
||||
.map(|value| {
|
||||
let (value, value_truncated) = truncate_guardian_action_value(value);
|
||||
truncated |= value_truncated;
|
||||
value
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
(Value::Array(values), truncated)
|
||||
}
|
||||
Value::Object(values) => {
|
||||
let mut entries = values.into_iter().collect::<Vec<_>>();
|
||||
entries.sort_by(|(left, _), (right, _)| left.cmp(right));
|
||||
Value::Object(
|
||||
entries
|
||||
.into_iter()
|
||||
.map(|(key, value)| (key, truncate_guardian_action_value(value)))
|
||||
.collect(),
|
||||
)
|
||||
let mut truncated = false;
|
||||
let values = entries
|
||||
.into_iter()
|
||||
.map(|(key, value)| {
|
||||
let (value, value_truncated) = truncate_guardian_action_value(value);
|
||||
truncated |= value_truncated;
|
||||
(key, value)
|
||||
})
|
||||
.collect();
|
||||
(Value::Object(values), truncated)
|
||||
}
|
||||
other => other,
|
||||
other => (other, false),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub(crate) struct FormattedGuardianAction {
|
||||
pub(crate) text: String,
|
||||
pub(crate) truncated: bool,
|
||||
}
|
||||
|
||||
pub(crate) fn guardian_approval_request_to_json(
|
||||
action: &GuardianApprovalRequest,
|
||||
) -> serde_json::Result<Value> {
|
||||
@@ -382,10 +399,13 @@ pub(crate) fn guardian_request_turn_id<'a>(
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn format_guardian_action_pretty(
|
||||
pub(crate) fn format_guardian_action_pretty_with_truncation(
|
||||
action: &GuardianApprovalRequest,
|
||||
) -> serde_json::Result<String> {
|
||||
let mut value = guardian_approval_request_to_json(action)?;
|
||||
value = truncate_guardian_action_value(value);
|
||||
serde_json::to_string_pretty(&value)
|
||||
) -> serde_json::Result<FormattedGuardianAction> {
|
||||
let value = guardian_approval_request_to_json(action)?;
|
||||
let (value, truncated) = truncate_guardian_action_value(value);
|
||||
Ok(FormattedGuardianAction {
|
||||
text: serde_json::to_string_pretty(&value)?,
|
||||
truncated,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -64,7 +64,7 @@ pub(crate) struct GuardianRejection {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
use approval_request::format_guardian_action_pretty;
|
||||
use approval_request::format_guardian_action_pretty_with_truncation;
|
||||
#[cfg(test)]
|
||||
use approval_request::guardian_assessment_action;
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -19,7 +19,7 @@ use super::GUARDIAN_RECENT_ENTRY_LIMIT;
|
||||
use super::GuardianApprovalRequest;
|
||||
use super::GuardianAssessment;
|
||||
use super::TRUNCATION_TAG;
|
||||
use super::approval_request::format_guardian_action_pretty;
|
||||
use super::approval_request::format_guardian_action_pretty_with_truncation;
|
||||
|
||||
/// Transcript entry retained for guardian review after filtering.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
@@ -56,6 +56,7 @@ impl GuardianTranscriptEntryKind {
|
||||
pub(crate) struct GuardianPromptItems {
|
||||
pub(crate) items: Vec<UserInput>,
|
||||
pub(crate) transcript_cursor: GuardianTranscriptCursor,
|
||||
pub(crate) reviewed_action_truncated: bool,
|
||||
}
|
||||
|
||||
/// Points to the end of the transcript that the guardian has already reviewed.
|
||||
@@ -91,7 +92,7 @@ pub(crate) async fn build_guardian_prompt_items(
|
||||
parent_history_version: history.history_version(),
|
||||
transcript_entry_count: transcript_entries.len(),
|
||||
};
|
||||
let planned_action_json = format_guardian_action_pretty(&request)?;
|
||||
let planned_action = format_guardian_action_pretty_with_truncation(&request)?;
|
||||
|
||||
let prompt_shape = match mode {
|
||||
GuardianPromptMode::Full => GuardianPromptShape::Full,
|
||||
@@ -176,11 +177,12 @@ pub(crate) async fn build_guardian_prompt_items(
|
||||
.to_string(),
|
||||
);
|
||||
push_text("Planned action JSON:\n".to_string());
|
||||
push_text(format!("{planned_action_json}\n"));
|
||||
push_text(format!("{}\n", planned_action.text));
|
||||
push_text(">>> APPROVAL REQUEST END\n".to_string());
|
||||
Ok(GuardianPromptItems {
|
||||
items,
|
||||
transcript_cursor,
|
||||
reviewed_action_truncated: planned_action.truncated,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -420,20 +422,23 @@ pub(crate) fn collect_guardian_transcript_entries(
|
||||
entries
|
||||
}
|
||||
|
||||
pub(crate) fn guardian_truncate_text(content: &str, token_cap: usize) -> String {
|
||||
pub(crate) fn guardian_truncate_text_with_metadata(
|
||||
content: &str,
|
||||
token_cap: usize,
|
||||
) -> (String, bool) {
|
||||
if content.is_empty() {
|
||||
return String::new();
|
||||
return (String::new(), false);
|
||||
}
|
||||
|
||||
let max_bytes = approx_bytes_for_tokens(token_cap);
|
||||
if content.len() <= max_bytes {
|
||||
return content.to_string();
|
||||
return (content.to_string(), false);
|
||||
}
|
||||
|
||||
let omitted_tokens = approx_tokens_from_byte_count(content.len().saturating_sub(max_bytes));
|
||||
let marker = format!("<{TRUNCATION_TAG} omitted_approx_tokens=\"{omitted_tokens}\" />");
|
||||
if max_bytes <= marker.len() {
|
||||
return marker;
|
||||
return (marker, true);
|
||||
}
|
||||
|
||||
let available_bytes = max_bytes.saturating_sub(marker.len());
|
||||
@@ -441,7 +446,11 @@ pub(crate) fn guardian_truncate_text(content: &str, token_cap: usize) -> String
|
||||
let suffix_budget = available_bytes.saturating_sub(prefix_budget);
|
||||
let (prefix, suffix) = split_guardian_truncation_bounds(content, prefix_budget, suffix_budget);
|
||||
|
||||
format!("{prefix}{marker}{suffix}")
|
||||
(format!("{prefix}{marker}{suffix}"), true)
|
||||
}
|
||||
|
||||
pub(crate) fn guardian_truncate_text(content: &str, token_cap: usize) -> String {
|
||||
guardian_truncate_text_with_metadata(content, token_cap).0
|
||||
}
|
||||
|
||||
fn split_guardian_truncation_bounds(
|
||||
|
||||
@@ -234,7 +234,7 @@ fn guardian_review_metadata_fields(
|
||||
guardian_model: Some(metadata.guardian_model),
|
||||
guardian_reasoning_effort: metadata.guardian_reasoning_effort,
|
||||
had_prior_review_context: Some(metadata.had_prior_review_context),
|
||||
reviewed_action_truncated: false,
|
||||
reviewed_action_truncated: metadata.reviewed_action_truncated,
|
||||
token_usage: metadata.token_usage,
|
||||
time_to_first_token_ms: None,
|
||||
},
|
||||
|
||||
@@ -72,6 +72,7 @@ pub(crate) struct GuardianReviewSessionMetadata {
|
||||
pub(crate) guardian_model: String,
|
||||
pub(crate) guardian_reasoning_effort: Option<String>,
|
||||
pub(crate) had_prior_review_context: bool,
|
||||
pub(crate) reviewed_action_truncated: bool,
|
||||
pub(crate) token_usage: Option<TokenUsage>,
|
||||
}
|
||||
|
||||
@@ -620,6 +621,7 @@ async fn run_review_on_session(
|
||||
guardian_model: params.model.clone(),
|
||||
guardian_reasoning_effort: params.reasoning_effort.map(|effort| effort.to_string()),
|
||||
had_prior_review_context: had_prior_review_context(&prompt_mode),
|
||||
reviewed_action_truncated: false,
|
||||
token_usage: None,
|
||||
};
|
||||
if send_followup_reminder {
|
||||
@@ -646,6 +648,7 @@ async fn run_review_on_session(
|
||||
prompt_mode,
|
||||
)
|
||||
.await?;
|
||||
let reviewed_action_truncated = prompt_items.reviewed_action_truncated;
|
||||
let token_usage_at_review_start =
|
||||
review_session.codex.session.total_token_usage().await;
|
||||
|
||||
@@ -667,8 +670,9 @@ async fn run_review_on_session(
|
||||
})
|
||||
.await?;
|
||||
|
||||
Ok::<(GuardianTranscriptCursor, Option<TokenUsage>), anyhow::Error>((
|
||||
Ok::<(GuardianTranscriptCursor, bool, Option<TokenUsage>), anyhow::Error>((
|
||||
prompt_items.transcript_cursor,
|
||||
reviewed_action_truncated,
|
||||
token_usage_at_review_start,
|
||||
))
|
||||
}),
|
||||
@@ -678,16 +682,18 @@ async fn run_review_on_session(
|
||||
Ok(submit_result) => submit_result,
|
||||
Err(outcome) => return (outcome, false, guardian_metadata),
|
||||
};
|
||||
let (transcript_cursor, token_usage_at_review_start) = match submit_result {
|
||||
Ok(submit_result) => submit_result,
|
||||
Err(err) => {
|
||||
return (
|
||||
GuardianReviewSessionOutcome::PromptBuildFailed(err),
|
||||
false,
|
||||
guardian_metadata,
|
||||
);
|
||||
}
|
||||
};
|
||||
let (transcript_cursor, reviewed_action_truncated, token_usage_at_review_start) =
|
||||
match submit_result {
|
||||
Ok(submit_result) => submit_result,
|
||||
Err(err) => {
|
||||
return (
|
||||
GuardianReviewSessionOutcome::PromptBuildFailed(err),
|
||||
false,
|
||||
guardian_metadata,
|
||||
);
|
||||
}
|
||||
};
|
||||
guardian_metadata.reviewed_action_truncated = reviewed_action_truncated;
|
||||
|
||||
let outcome =
|
||||
wait_for_guardian_review(review_session, deadline, params.external_cancel.as_ref()).await;
|
||||
|
||||
@@ -584,11 +584,29 @@ fn format_guardian_action_pretty_truncates_large_string_fields() -> serde_json::
|
||||
patch: patch.clone(),
|
||||
};
|
||||
|
||||
let rendered = format_guardian_action_pretty(&action)?;
|
||||
let rendered = format_guardian_action_pretty_with_truncation(&action)?;
|
||||
|
||||
assert!(rendered.contains("\"tool\": \"apply_patch\""));
|
||||
assert!(rendered.contains("<truncated omitted_approx_tokens="));
|
||||
assert!(rendered.len() < patch.len());
|
||||
assert!(rendered.text.contains("\"tool\": \"apply_patch\""));
|
||||
assert!(rendered.text.contains("<truncated omitted_approx_tokens="));
|
||||
assert!(rendered.text.len() < patch.len());
|
||||
assert!(rendered.truncated);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_guardian_action_pretty_reports_no_truncation_for_small_payload() -> serde_json::Result<()>
|
||||
{
|
||||
let action = GuardianApprovalRequest::ApplyPatch {
|
||||
id: "patch-1".to_string(),
|
||||
cwd: test_path_buf("/tmp").abs(),
|
||||
files: Vec::new(),
|
||||
patch: "line\n".to_string(),
|
||||
};
|
||||
|
||||
let rendered = format_guardian_action_pretty_with_truncation(&action)?;
|
||||
|
||||
assert!(rendered.text.contains("\"tool\": \"apply_patch\""));
|
||||
assert!(!rendered.truncated);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -916,10 +934,24 @@ async fn guardian_review_request_layout_matches_model_visible_request_snapshot()
|
||||
/*external_cancel*/ None,
|
||||
)
|
||||
.await;
|
||||
let GuardianReviewOutcome::Completed(Ok(assessment), _) = outcome else {
|
||||
let GuardianReviewOutcome::Completed(Ok(assessment), metadata) = outcome else {
|
||||
panic!("expected guardian assessment");
|
||||
};
|
||||
let metadata = metadata.expect("guardian session metadata");
|
||||
assert_eq!(assessment.outcome, GuardianAssessmentOutcome::Allow);
|
||||
assert_ne!(
|
||||
metadata.guardian_thread_id,
|
||||
session.conversation_id.to_string()
|
||||
);
|
||||
ThreadId::from_string(&metadata.guardian_thread_id)
|
||||
.expect("guardian thread id should be a valid UUID");
|
||||
assert!(matches!(
|
||||
metadata.guardian_session_kind,
|
||||
codex_analytics::GuardianReviewSessionKind::TrunkNew
|
||||
));
|
||||
assert_eq!(metadata.guardian_model, "gpt-5.4");
|
||||
assert_eq!(metadata.guardian_reasoning_effort.as_deref(), Some("low"));
|
||||
assert!(!metadata.had_prior_review_context);
|
||||
|
||||
let request = request_log.single_request();
|
||||
let mut settings = Settings::clone_current();
|
||||
@@ -1125,18 +1157,53 @@ async fn guardian_reuses_prompt_cache_key_and_appends_prior_reviews() -> anyhow:
|
||||
)
|
||||
.await;
|
||||
|
||||
let GuardianReviewOutcome::Completed(Ok(first_assessment), _) = first_outcome else {
|
||||
let GuardianReviewOutcome::Completed(Ok(first_assessment), first_metadata) = first_outcome
|
||||
else {
|
||||
panic!("expected first guardian assessment");
|
||||
};
|
||||
let GuardianReviewOutcome::Completed(Ok(second_assessment), _) = second_outcome else {
|
||||
let first_metadata = first_metadata.expect("first guardian session metadata");
|
||||
let GuardianReviewOutcome::Completed(Ok(second_assessment), second_metadata) = second_outcome
|
||||
else {
|
||||
panic!("expected second guardian assessment");
|
||||
};
|
||||
let GuardianReviewOutcome::Completed(Ok(third_assessment), _) = third_outcome else {
|
||||
let second_metadata = second_metadata.expect("second guardian session metadata");
|
||||
let GuardianReviewOutcome::Completed(Ok(third_assessment), third_metadata) = third_outcome
|
||||
else {
|
||||
panic!("expected third guardian assessment");
|
||||
};
|
||||
let third_metadata = third_metadata.expect("third guardian session metadata");
|
||||
assert_eq!(first_assessment.outcome, GuardianAssessmentOutcome::Allow);
|
||||
assert_eq!(second_assessment.outcome, GuardianAssessmentOutcome::Allow);
|
||||
assert_eq!(third_assessment.outcome, GuardianAssessmentOutcome::Allow);
|
||||
assert!(matches!(
|
||||
first_metadata.guardian_session_kind,
|
||||
codex_analytics::GuardianReviewSessionKind::TrunkNew
|
||||
));
|
||||
assert!(matches!(
|
||||
second_metadata.guardian_session_kind,
|
||||
codex_analytics::GuardianReviewSessionKind::TrunkReused
|
||||
));
|
||||
assert!(matches!(
|
||||
third_metadata.guardian_session_kind,
|
||||
codex_analytics::GuardianReviewSessionKind::TrunkReused
|
||||
));
|
||||
ThreadId::from_string(&first_metadata.guardian_thread_id)
|
||||
.expect("first guardian thread id should be a valid UUID");
|
||||
ThreadId::from_string(&second_metadata.guardian_thread_id)
|
||||
.expect("second guardian thread id should be a valid UUID");
|
||||
ThreadId::from_string(&third_metadata.guardian_thread_id)
|
||||
.expect("third guardian thread id should be a valid UUID");
|
||||
assert!(!first_metadata.had_prior_review_context);
|
||||
assert!(second_metadata.had_prior_review_context);
|
||||
assert!(third_metadata.had_prior_review_context);
|
||||
assert_eq!(
|
||||
first_metadata.guardian_thread_id,
|
||||
second_metadata.guardian_thread_id
|
||||
);
|
||||
assert_eq!(
|
||||
second_metadata.guardian_thread_id,
|
||||
third_metadata.guardian_thread_id
|
||||
);
|
||||
|
||||
let requests = request_log.requests();
|
||||
assert_eq!(requests.len(), 3);
|
||||
|
||||
Reference in New Issue
Block a user