Stop uploading accepted line fingerprints (#22180)

## Summary
- keep accepted-line diff parsing and fingerprint hashing logic locally
- stop uploading path/line hash fingerprints in the accepted-line
analytics event payload
- keep aggregate accepted added/deleted line counts in the event

## Testing
- just fmt
- cargo test -p codex-analytics
- just fix -p codex-analytics
This commit is contained in:
alexsong-oai
2026-05-11 15:41:38 -07:00
committed by GitHub
parent 4859d80ffe
commit bb6134c028
3 changed files with 45 additions and 112 deletions

View File

@@ -7,9 +7,6 @@ use codex_git_utils::get_git_remote_urls_assume_git_repo;
use sha1::Digest;
use std::path::Path;
const ACCEPTED_LINE_FINGERPRINT_EVENT_TARGET_BYTES: usize = 2 * 1024 * 1024;
const ACCEPTED_LINE_FINGERPRINT_EVENT_FIXED_BYTES: usize = 1024;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct AcceptedLineFingerprintSummary {
pub accepted_added_lines: u64,
@@ -97,39 +94,38 @@ pub fn fingerprint_hash(domain: &str, value: &str) -> String {
pub(crate) fn accepted_line_fingerprint_event_requests(
input: AcceptedLineFingerprintEventInput,
) -> Vec<TrackEventRequest> {
let chunks = accepted_line_fingerprint_chunks(input.line_fingerprints);
chunks
.into_iter()
.enumerate()
.map(|(index, line_fingerprints)| {
let is_first_chunk = index == 0;
TrackEventRequest::AcceptedLineFingerprints(Box::new(
CodexAcceptedLineFingerprintsEventRequest {
event_type: "codex_accepted_line_fingerprints",
event_params: CodexAcceptedLineFingerprintsEventParams {
event_type: input.event_type,
turn_id: input.turn_id.clone(),
thread_id: input.thread_id.clone(),
product_surface: input.product_surface.clone(),
model_slug: input.model_slug.clone(),
completed_at: input.completed_at,
repo_hash: input.repo_hash.clone(),
accepted_added_lines: if is_first_chunk {
input.accepted_added_lines
} else {
0
},
accepted_deleted_lines: if is_first_chunk {
input.accepted_deleted_lines
} else {
0
},
line_fingerprints,
},
},
))
})
.collect()
let AcceptedLineFingerprintEventInput {
event_type,
turn_id,
thread_id,
product_surface,
model_slug,
completed_at,
repo_hash,
accepted_added_lines,
accepted_deleted_lines,
line_fingerprints: _line_fingerprints,
} = input;
vec![TrackEventRequest::AcceptedLineFingerprints(Box::new(
CodexAcceptedLineFingerprintsEventRequest {
event_type: "codex_accepted_line_fingerprints",
event_params: CodexAcceptedLineFingerprintsEventParams {
event_type,
turn_id,
thread_id,
product_surface,
model_slug,
completed_at,
repo_hash,
accepted_added_lines,
accepted_deleted_lines,
// Keep computing local fingerprints for parsing tests and future attribution,
// but do not upload path/line hashes in the analytics event payload.
line_fingerprints: Vec::new(),
},
},
))]
}
pub async fn accepted_line_repo_hash_for_cwd(cwd: &Path) -> Option<String> {
@@ -172,44 +168,6 @@ fn normalize_effective_line(line: &str) -> Option<String> {
Some(normalized)
}
fn accepted_line_fingerprint_chunks(
line_fingerprints: Vec<AcceptedLineFingerprint>,
) -> Vec<Vec<AcceptedLineFingerprint>> {
if line_fingerprints.is_empty() {
return vec![Vec::new()];
}
let mut chunks = Vec::new();
let mut current = Vec::new();
let mut current_bytes = ACCEPTED_LINE_FINGERPRINT_EVENT_FIXED_BYTES;
for fingerprint in line_fingerprints {
let item_bytes = accepted_line_fingerprint_json_bytes(&fingerprint);
let separator_bytes = usize::from(!current.is_empty());
if !current.is_empty()
&& current_bytes + separator_bytes + item_bytes
> ACCEPTED_LINE_FINGERPRINT_EVENT_TARGET_BYTES
{
chunks.push(current);
current = Vec::new();
current_bytes = ACCEPTED_LINE_FINGERPRINT_EVENT_FIXED_BYTES;
}
current_bytes += usize::from(!current.is_empty()) + item_bytes;
current.push(fingerprint);
}
if !current.is_empty() {
chunks.push(current);
}
chunks
}
fn accepted_line_fingerprint_json_bytes(fingerprint: &AcceptedLineFingerprint) -> usize {
// {"path_hash":"...","line_hash":"..."} plus one byte of array comma
// accounted for by the caller when needed.
32 + fingerprint.path_hash.len() + fingerprint.line_hash.len()
}
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -37,7 +37,6 @@ use crate::events::codex_hook_run_metadata;
use crate::events::codex_plugin_metadata;
use crate::events::codex_plugin_used_metadata;
use crate::events::subagent_thread_started_event_request;
use crate::facts::AcceptedLineFingerprint;
use crate::facts::AnalyticsFact;
use crate::facts::AnalyticsJsonRpcError;
use crate::facts::AppInvocation;
@@ -1028,10 +1027,7 @@ fn accepted_line_fingerprints_event_serializes_expected_shape() {
repo_hash: Some("repo-hash-1".to_string()),
accepted_added_lines: 42,
accepted_deleted_lines: 40,
line_fingerprints: vec![AcceptedLineFingerprint {
path_hash: "path-hash-1".to_string(),
line_hash: "line-hash-1".to_string(),
}],
line_fingerprints: Vec::new(),
},
},
));
@@ -1052,19 +1048,14 @@ fn accepted_line_fingerprints_event_serializes_expected_shape() {
"repo_hash": "repo-hash-1",
"accepted_added_lines": 42,
"accepted_deleted_lines": 40,
"line_fingerprints": [
{
"path_hash": "path-hash-1",
"line_hash": "line-hash-1"
}
]
"line_fingerprints": []
}
})
);
}
#[tokio::test]
async fn reducer_chunks_large_accepted_line_fingerprint_events_without_repeating_counts() {
async fn reducer_emits_large_accepted_line_aggregates_without_fingerprints() {
let mut reducer = AnalyticsReducer::default();
let mut events = Vec::new();
@@ -1124,22 +1115,14 @@ index 1111111..2222222
_ => None,
})
.collect::<Vec<_>>();
assert!(accepted_line_events.len() > 1);
let mut total_fingerprints = 0;
for (index, event) in accepted_line_events.iter().enumerate() {
assert_eq!(event.event_params.turn_id, "turn-2");
assert_eq!(event.event_params.thread_id, "thread-2");
total_fingerprints += event.event_params.line_fingerprints.len();
if index == 0 {
assert_eq!(event.event_params.accepted_added_lines, 20_000);
assert_eq!(event.event_params.accepted_deleted_lines, 0);
} else {
assert_eq!(event.event_params.accepted_added_lines, 0);
assert_eq!(event.event_params.accepted_deleted_lines, 0);
}
assert!(serde_json::to_vec(event).expect("serialize chunk").len() < 2_100_000);
}
assert_eq!(total_fingerprints, 20_000);
assert_eq!(accepted_line_events.len(), 1);
let event = accepted_line_events[0];
assert_eq!(event.event_params.turn_id, "turn-2");
assert_eq!(event.event_params.thread_id, "thread-2");
assert_eq!(event.event_params.accepted_added_lines, 20_000);
assert_eq!(event.event_params.accepted_deleted_lines, 0);
assert!(event.event_params.line_fingerprints.is_empty());
assert!(serde_json::to_vec(event).expect("serialize event").len() < 2_100_000);
}
#[tokio::test]
@@ -1206,11 +1189,7 @@ index 1111111..2222222
assert_eq!(accepted_line_events.len(), 1);
let event = accepted_line_events[0];
assert_eq!(event.event_params.accepted_added_lines, 1);
assert_eq!(event.event_params.line_fingerprints.len(), 1);
assert_eq!(
event.event_params.line_fingerprints[0].line_hash,
crate::fingerprint_hash("line", "let latest_value = 2;")
);
assert!(event.event_params.line_fingerprints.is_empty());
}
#[test]

View File

@@ -6,7 +6,6 @@ use crate::events::CodexAcceptedLineFingerprintsEventRequest;
use crate::events::SkillInvocationEventParams;
use crate::events::SkillInvocationEventRequest;
use crate::events::TrackEventRequest;
use crate::facts::AcceptedLineFingerprint;
use crate::facts::AnalyticsFact;
use crate::facts::InvocationType;
use codex_app_server_protocol::ApprovalsReviewer as AppServerApprovalsReviewer;
@@ -53,10 +52,7 @@ fn sample_accepted_line_fingerprint_event(thread_id: &str) -> TrackEventRequest
repo_hash: None,
accepted_added_lines: 1,
accepted_deleted_lines: 0,
line_fingerprints: vec![AcceptedLineFingerprint {
path_hash: "path-hash".to_string(),
line_hash: "line-hash".to_string(),
}],
line_fingerprints: Vec::new(),
},
},
))