fix(guardian, app-server): introduce guardian review ids (#17298)

## Description This PR introduces `review_id` as the stable identifier for guardian reviews and exposes it in app-server `item/autoApprovalReview/started` and `item/autoApprovalReview/completed` events. Internally, guardian rejection state is now keyed by `review_id` instead of the reviewed tool item ID. `target_item_id` is still included when a review maps to a concrete thread item, but it is no longer overloaded as the review lifecycle identifier. ## Motivation We'd like to give users the ability to preempt a guardian review while it's running (approve or decline). However, we can't implement the API that allows the user to override a running guardian review because we didn't have a unique `review_id` per guardian review. Using `target_item_id` is not correct since: - with execve reviews, there can be multiple execve calls (and therefore guardian reviews) per shell command - with network policy reviews, there is no target item ID The PR that actually implements user overrides will use `review_id` as the stable identifier.
2026-04-30 09:26:44 +00:00 · 2026-04-10 16:21:02 -07:00
parent 7999b0f60f
commit a3be74143a
36 changed files with 577 additions and 172 deletions
--- a/codex-rs/app-server-protocol/src/protocol/v2.rs
+++ b/codex-rs/app-server-protocol/src/protocol/v2.rs
@@ -9,6 +9,7 @@ use codex_protocol::account::PlanType;
 use codex_protocol::approvals::ElicitationRequest as CoreElicitationRequest;
 use codex_protocol::approvals::ExecPolicyAmendment as CoreExecPolicyAmendment;
 use codex_protocol::approvals::GuardianAssessmentAction as CoreGuardianAssessmentAction;
+use codex_protocol::approvals::GuardianAssessmentDecisionSource as CoreGuardianAssessmentDecisionSource;
 use codex_protocol::approvals::GuardianCommandSource as CoreGuardianCommandSource;
 use codex_protocol::approvals::NetworkApprovalContext as CoreNetworkApprovalContext;
 use codex_protocol::approvals::NetworkApprovalProtocol as CoreNetworkApprovalProtocol;
@@ -4565,6 +4566,22 @@ pub enum GuardianApprovalReviewStatus {
    Aborted,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export_to = "v2/")]
+/// [UNSTABLE] Source that produced a terminal guardian approval review decision.
+pub enum AutoReviewDecisionSource {
+    Agent,
+}
+
+impl From<CoreGuardianAssessmentDecisionSource> for AutoReviewDecisionSource {
+    fn from(value: CoreGuardianAssessmentDecisionSource) -> Self {
+        match value {
+            CoreGuardianAssessmentDecisionSource::Agent => Self::Agent,
+        }
+    }
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, JsonSchema, TS)]
 #[serde(rename_all = "lowercase")]
 #[ts(export_to = "v2/")]
@@ -5321,14 +5338,23 @@ pub struct ItemStartedNotification {
 #[ts(export_to = "v2/")]
 /// [UNSTABLE] Temporary notification payload for guardian automatic approval
 /// review. This shape is expected to change soon.
-///
-/// TODO(ccunningham): Attach guardian review state to the reviewed tool item's
-/// lifecycle instead of sending separate standalone review notifications so the
-/// app-server API can persist and replay review state via `thread/read`.
 pub struct ItemGuardianApprovalReviewStartedNotification {
    pub thread_id: String,
    pub turn_id: String,
-    pub target_item_id: String,
+    /// Stable identifier for this review.
+    pub review_id: String,
+    /// Identifier for the reviewed item or tool call when one exists.
+    ///
+    /// In most cases, one review maps to one target item. The exceptions are
+    /// - execve reviews, where a single command may contain multiple execve
+    ///   calls to review (only possible when using the shell_zsh_fork feature)
+    /// - network policy reviews, where there is no target item
+    ///
+    /// A network call is triggered by a CommandExecution item, so having a
+    /// target_item_id set to the CommandExecution item would be misleading
+    /// because the review is about the network call, not the command execution.
+    /// Therefore, target_item_id is set to None for network policy reviews.
+    pub target_item_id: Option<String>,
    pub review: GuardianApprovalReview,
    pub action: GuardianApprovalReviewAction,
 }
@@ -5338,14 +5364,24 @@ pub struct ItemGuardianApprovalReviewStartedNotification {
 #[ts(export_to = "v2/")]
 /// [UNSTABLE] Temporary notification payload for guardian automatic approval
 /// review. This shape is expected to change soon.
-///
-/// TODO(ccunningham): Attach guardian review state to the reviewed tool item's
-/// lifecycle instead of sending separate standalone review notifications so the
-/// app-server API can persist and replay review state via `thread/read`.
 pub struct ItemGuardianApprovalReviewCompletedNotification {
    pub thread_id: String,
    pub turn_id: String,
-    pub target_item_id: String,
+    /// Stable identifier for this review.
+    pub review_id: String,
+    /// Identifier for the reviewed item or tool call when one exists.
+    ///
+    /// In most cases, one review maps to one target item. The exceptions are
+    /// - execve reviews, where a single command may contain multiple execve
+    ///   calls to review (only possible when using the shell_zsh_fork feature)
+    /// - network policy reviews, where there is no target item
+    ///
+    /// A network call is triggered by a CommandExecution item, so having a
+    /// target_item_id set to the CommandExecution item would be misleading
+    /// because the review is about the network call, not the command execution.
+    /// Therefore, target_item_id is set to None for network policy reviews.
+    pub target_item_id: Option<String>,
+    pub decision_source: AutoReviewDecisionSource,
    pub review: GuardianApprovalReview,
    pub action: GuardianApprovalReviewAction,
 }