From bb6134c028fd2539b3c0a76c6c3bc6635784ac3e Mon Sep 17 00:00:00 2001 From: alexsong-oai Date: Mon, 11 May 2026 15:41:38 -0700 Subject: [PATCH] Stop uploading accepted line fingerprints (#22180) ## Summary - keep accepted-line diff parsing and fingerprint hashing logic locally - stop uploading path/line hash fingerprints in the accepted-line analytics event payload - keep aggregate accepted added/deleted line counts in the event ## Testing - just fmt - cargo test -p codex-analytics - just fix -p codex-analytics --- codex-rs/analytics/src/accepted_lines.rs | 106 ++++++------------ .../analytics/src/analytics_client_tests.rs | 45 ++------ codex-rs/analytics/src/client_tests.rs | 6 +- 3 files changed, 45 insertions(+), 112 deletions(-) diff --git a/codex-rs/analytics/src/accepted_lines.rs b/codex-rs/analytics/src/accepted_lines.rs index 07a0e33694..fd94bb2827 100644 --- a/codex-rs/analytics/src/accepted_lines.rs +++ b/codex-rs/analytics/src/accepted_lines.rs @@ -7,9 +7,6 @@ use codex_git_utils::get_git_remote_urls_assume_git_repo; use sha1::Digest; use std::path::Path; -const ACCEPTED_LINE_FINGERPRINT_EVENT_TARGET_BYTES: usize = 2 * 1024 * 1024; -const ACCEPTED_LINE_FINGERPRINT_EVENT_FIXED_BYTES: usize = 1024; - #[derive(Clone, Debug, PartialEq, Eq)] pub struct AcceptedLineFingerprintSummary { pub accepted_added_lines: u64, @@ -97,39 +94,38 @@ pub fn fingerprint_hash(domain: &str, value: &str) -> String { pub(crate) fn accepted_line_fingerprint_event_requests( input: AcceptedLineFingerprintEventInput, ) -> Vec { - let chunks = accepted_line_fingerprint_chunks(input.line_fingerprints); - chunks - .into_iter() - .enumerate() - .map(|(index, line_fingerprints)| { - let is_first_chunk = index == 0; - TrackEventRequest::AcceptedLineFingerprints(Box::new( - CodexAcceptedLineFingerprintsEventRequest { - event_type: "codex_accepted_line_fingerprints", - event_params: CodexAcceptedLineFingerprintsEventParams { - event_type: input.event_type, - turn_id: input.turn_id.clone(), - thread_id: input.thread_id.clone(), - product_surface: input.product_surface.clone(), - model_slug: input.model_slug.clone(), - completed_at: input.completed_at, - repo_hash: input.repo_hash.clone(), - accepted_added_lines: if is_first_chunk { - input.accepted_added_lines - } else { - 0 - }, - accepted_deleted_lines: if is_first_chunk { - input.accepted_deleted_lines - } else { - 0 - }, - line_fingerprints, - }, - }, - )) - }) - .collect() + let AcceptedLineFingerprintEventInput { + event_type, + turn_id, + thread_id, + product_surface, + model_slug, + completed_at, + repo_hash, + accepted_added_lines, + accepted_deleted_lines, + line_fingerprints: _line_fingerprints, + } = input; + + vec![TrackEventRequest::AcceptedLineFingerprints(Box::new( + CodexAcceptedLineFingerprintsEventRequest { + event_type: "codex_accepted_line_fingerprints", + event_params: CodexAcceptedLineFingerprintsEventParams { + event_type, + turn_id, + thread_id, + product_surface, + model_slug, + completed_at, + repo_hash, + accepted_added_lines, + accepted_deleted_lines, + // Keep computing local fingerprints for parsing tests and future attribution, + // but do not upload path/line hashes in the analytics event payload. + line_fingerprints: Vec::new(), + }, + }, + ))] } pub async fn accepted_line_repo_hash_for_cwd(cwd: &Path) -> Option { @@ -172,44 +168,6 @@ fn normalize_effective_line(line: &str) -> Option { Some(normalized) } -fn accepted_line_fingerprint_chunks( - line_fingerprints: Vec, -) -> Vec> { - if line_fingerprints.is_empty() { - return vec![Vec::new()]; - } - - let mut chunks = Vec::new(); - let mut current = Vec::new(); - let mut current_bytes = ACCEPTED_LINE_FINGERPRINT_EVENT_FIXED_BYTES; - - for fingerprint in line_fingerprints { - let item_bytes = accepted_line_fingerprint_json_bytes(&fingerprint); - let separator_bytes = usize::from(!current.is_empty()); - if !current.is_empty() - && current_bytes + separator_bytes + item_bytes - > ACCEPTED_LINE_FINGERPRINT_EVENT_TARGET_BYTES - { - chunks.push(current); - current = Vec::new(); - current_bytes = ACCEPTED_LINE_FINGERPRINT_EVENT_FIXED_BYTES; - } - current_bytes += usize::from(!current.is_empty()) + item_bytes; - current.push(fingerprint); - } - - if !current.is_empty() { - chunks.push(current); - } - chunks -} - -fn accepted_line_fingerprint_json_bytes(fingerprint: &AcceptedLineFingerprint) -> usize { - // {"path_hash":"...","line_hash":"..."} plus one byte of array comma - // accounted for by the caller when needed. - 32 + fingerprint.path_hash.len() + fingerprint.line_hash.len() -} - #[cfg(test)] mod tests { use super::*; diff --git a/codex-rs/analytics/src/analytics_client_tests.rs b/codex-rs/analytics/src/analytics_client_tests.rs index 1e0bc49da3..5b4a72229b 100644 --- a/codex-rs/analytics/src/analytics_client_tests.rs +++ b/codex-rs/analytics/src/analytics_client_tests.rs @@ -37,7 +37,6 @@ use crate::events::codex_hook_run_metadata; use crate::events::codex_plugin_metadata; use crate::events::codex_plugin_used_metadata; use crate::events::subagent_thread_started_event_request; -use crate::facts::AcceptedLineFingerprint; use crate::facts::AnalyticsFact; use crate::facts::AnalyticsJsonRpcError; use crate::facts::AppInvocation; @@ -1028,10 +1027,7 @@ fn accepted_line_fingerprints_event_serializes_expected_shape() { repo_hash: Some("repo-hash-1".to_string()), accepted_added_lines: 42, accepted_deleted_lines: 40, - line_fingerprints: vec![AcceptedLineFingerprint { - path_hash: "path-hash-1".to_string(), - line_hash: "line-hash-1".to_string(), - }], + line_fingerprints: Vec::new(), }, }, )); @@ -1052,19 +1048,14 @@ fn accepted_line_fingerprints_event_serializes_expected_shape() { "repo_hash": "repo-hash-1", "accepted_added_lines": 42, "accepted_deleted_lines": 40, - "line_fingerprints": [ - { - "path_hash": "path-hash-1", - "line_hash": "line-hash-1" - } - ] + "line_fingerprints": [] } }) ); } #[tokio::test] -async fn reducer_chunks_large_accepted_line_fingerprint_events_without_repeating_counts() { +async fn reducer_emits_large_accepted_line_aggregates_without_fingerprints() { let mut reducer = AnalyticsReducer::default(); let mut events = Vec::new(); @@ -1124,22 +1115,14 @@ index 1111111..2222222 _ => None, }) .collect::>(); - assert!(accepted_line_events.len() > 1); - let mut total_fingerprints = 0; - for (index, event) in accepted_line_events.iter().enumerate() { - assert_eq!(event.event_params.turn_id, "turn-2"); - assert_eq!(event.event_params.thread_id, "thread-2"); - total_fingerprints += event.event_params.line_fingerprints.len(); - if index == 0 { - assert_eq!(event.event_params.accepted_added_lines, 20_000); - assert_eq!(event.event_params.accepted_deleted_lines, 0); - } else { - assert_eq!(event.event_params.accepted_added_lines, 0); - assert_eq!(event.event_params.accepted_deleted_lines, 0); - } - assert!(serde_json::to_vec(event).expect("serialize chunk").len() < 2_100_000); - } - assert_eq!(total_fingerprints, 20_000); + assert_eq!(accepted_line_events.len(), 1); + let event = accepted_line_events[0]; + assert_eq!(event.event_params.turn_id, "turn-2"); + assert_eq!(event.event_params.thread_id, "thread-2"); + assert_eq!(event.event_params.accepted_added_lines, 20_000); + assert_eq!(event.event_params.accepted_deleted_lines, 0); + assert!(event.event_params.line_fingerprints.is_empty()); + assert!(serde_json::to_vec(event).expect("serialize event").len() < 2_100_000); } #[tokio::test] @@ -1206,11 +1189,7 @@ index 1111111..2222222 assert_eq!(accepted_line_events.len(), 1); let event = accepted_line_events[0]; assert_eq!(event.event_params.accepted_added_lines, 1); - assert_eq!(event.event_params.line_fingerprints.len(), 1); - assert_eq!( - event.event_params.line_fingerprints[0].line_hash, - crate::fingerprint_hash("line", "let latest_value = 2;") - ); + assert!(event.event_params.line_fingerprints.is_empty()); } #[test] diff --git a/codex-rs/analytics/src/client_tests.rs b/codex-rs/analytics/src/client_tests.rs index 2ddc21e477..71c46d808a 100644 --- a/codex-rs/analytics/src/client_tests.rs +++ b/codex-rs/analytics/src/client_tests.rs @@ -6,7 +6,6 @@ use crate::events::CodexAcceptedLineFingerprintsEventRequest; use crate::events::SkillInvocationEventParams; use crate::events::SkillInvocationEventRequest; use crate::events::TrackEventRequest; -use crate::facts::AcceptedLineFingerprint; use crate::facts::AnalyticsFact; use crate::facts::InvocationType; use codex_app_server_protocol::ApprovalsReviewer as AppServerApprovalsReviewer; @@ -53,10 +52,7 @@ fn sample_accepted_line_fingerprint_event(thread_id: &str) -> TrackEventRequest repo_hash: None, accepted_added_lines: 1, accepted_deleted_lines: 0, - line_fingerprints: vec![AcceptedLineFingerprint { - path_hash: "path-hash".to_string(), - line_hash: "line-hash".to_string(), - }], + line_fingerprints: Vec::new(), }, }, ))