mirror of
https://github.com/openai/codex.git
synced 2026-05-18 02:02:30 +00:00
Stop uploading accepted line fingerprints (#22180)
## Summary - keep accepted-line diff parsing and fingerprint hashing logic locally - stop uploading path/line hash fingerprints in the accepted-line analytics event payload - keep aggregate accepted added/deleted line counts in the event ## Testing - just fmt - cargo test -p codex-analytics - just fix -p codex-analytics
This commit is contained in:
@@ -7,9 +7,6 @@ use codex_git_utils::get_git_remote_urls_assume_git_repo;
|
||||
use sha1::Digest;
|
||||
use std::path::Path;
|
||||
|
||||
const ACCEPTED_LINE_FINGERPRINT_EVENT_TARGET_BYTES: usize = 2 * 1024 * 1024;
|
||||
const ACCEPTED_LINE_FINGERPRINT_EVENT_FIXED_BYTES: usize = 1024;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct AcceptedLineFingerprintSummary {
|
||||
pub accepted_added_lines: u64,
|
||||
@@ -97,39 +94,38 @@ pub fn fingerprint_hash(domain: &str, value: &str) -> String {
|
||||
pub(crate) fn accepted_line_fingerprint_event_requests(
|
||||
input: AcceptedLineFingerprintEventInput,
|
||||
) -> Vec<TrackEventRequest> {
|
||||
let chunks = accepted_line_fingerprint_chunks(input.line_fingerprints);
|
||||
chunks
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(index, line_fingerprints)| {
|
||||
let is_first_chunk = index == 0;
|
||||
TrackEventRequest::AcceptedLineFingerprints(Box::new(
|
||||
CodexAcceptedLineFingerprintsEventRequest {
|
||||
event_type: "codex_accepted_line_fingerprints",
|
||||
event_params: CodexAcceptedLineFingerprintsEventParams {
|
||||
event_type: input.event_type,
|
||||
turn_id: input.turn_id.clone(),
|
||||
thread_id: input.thread_id.clone(),
|
||||
product_surface: input.product_surface.clone(),
|
||||
model_slug: input.model_slug.clone(),
|
||||
completed_at: input.completed_at,
|
||||
repo_hash: input.repo_hash.clone(),
|
||||
accepted_added_lines: if is_first_chunk {
|
||||
input.accepted_added_lines
|
||||
} else {
|
||||
0
|
||||
},
|
||||
accepted_deleted_lines: if is_first_chunk {
|
||||
input.accepted_deleted_lines
|
||||
} else {
|
||||
0
|
||||
},
|
||||
line_fingerprints,
|
||||
},
|
||||
},
|
||||
))
|
||||
})
|
||||
.collect()
|
||||
let AcceptedLineFingerprintEventInput {
|
||||
event_type,
|
||||
turn_id,
|
||||
thread_id,
|
||||
product_surface,
|
||||
model_slug,
|
||||
completed_at,
|
||||
repo_hash,
|
||||
accepted_added_lines,
|
||||
accepted_deleted_lines,
|
||||
line_fingerprints: _line_fingerprints,
|
||||
} = input;
|
||||
|
||||
vec![TrackEventRequest::AcceptedLineFingerprints(Box::new(
|
||||
CodexAcceptedLineFingerprintsEventRequest {
|
||||
event_type: "codex_accepted_line_fingerprints",
|
||||
event_params: CodexAcceptedLineFingerprintsEventParams {
|
||||
event_type,
|
||||
turn_id,
|
||||
thread_id,
|
||||
product_surface,
|
||||
model_slug,
|
||||
completed_at,
|
||||
repo_hash,
|
||||
accepted_added_lines,
|
||||
accepted_deleted_lines,
|
||||
// Keep computing local fingerprints for parsing tests and future attribution,
|
||||
// but do not upload path/line hashes in the analytics event payload.
|
||||
line_fingerprints: Vec::new(),
|
||||
},
|
||||
},
|
||||
))]
|
||||
}
|
||||
|
||||
pub async fn accepted_line_repo_hash_for_cwd(cwd: &Path) -> Option<String> {
|
||||
@@ -172,44 +168,6 @@ fn normalize_effective_line(line: &str) -> Option<String> {
|
||||
Some(normalized)
|
||||
}
|
||||
|
||||
fn accepted_line_fingerprint_chunks(
|
||||
line_fingerprints: Vec<AcceptedLineFingerprint>,
|
||||
) -> Vec<Vec<AcceptedLineFingerprint>> {
|
||||
if line_fingerprints.is_empty() {
|
||||
return vec![Vec::new()];
|
||||
}
|
||||
|
||||
let mut chunks = Vec::new();
|
||||
let mut current = Vec::new();
|
||||
let mut current_bytes = ACCEPTED_LINE_FINGERPRINT_EVENT_FIXED_BYTES;
|
||||
|
||||
for fingerprint in line_fingerprints {
|
||||
let item_bytes = accepted_line_fingerprint_json_bytes(&fingerprint);
|
||||
let separator_bytes = usize::from(!current.is_empty());
|
||||
if !current.is_empty()
|
||||
&& current_bytes + separator_bytes + item_bytes
|
||||
> ACCEPTED_LINE_FINGERPRINT_EVENT_TARGET_BYTES
|
||||
{
|
||||
chunks.push(current);
|
||||
current = Vec::new();
|
||||
current_bytes = ACCEPTED_LINE_FINGERPRINT_EVENT_FIXED_BYTES;
|
||||
}
|
||||
current_bytes += usize::from(!current.is_empty()) + item_bytes;
|
||||
current.push(fingerprint);
|
||||
}
|
||||
|
||||
if !current.is_empty() {
|
||||
chunks.push(current);
|
||||
}
|
||||
chunks
|
||||
}
|
||||
|
||||
fn accepted_line_fingerprint_json_bytes(fingerprint: &AcceptedLineFingerprint) -> usize {
|
||||
// {"path_hash":"...","line_hash":"..."} plus one byte of array comma
|
||||
// accounted for by the caller when needed.
|
||||
32 + fingerprint.path_hash.len() + fingerprint.line_hash.len()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -37,7 +37,6 @@ use crate::events::codex_hook_run_metadata;
|
||||
use crate::events::codex_plugin_metadata;
|
||||
use crate::events::codex_plugin_used_metadata;
|
||||
use crate::events::subagent_thread_started_event_request;
|
||||
use crate::facts::AcceptedLineFingerprint;
|
||||
use crate::facts::AnalyticsFact;
|
||||
use crate::facts::AnalyticsJsonRpcError;
|
||||
use crate::facts::AppInvocation;
|
||||
@@ -1028,10 +1027,7 @@ fn accepted_line_fingerprints_event_serializes_expected_shape() {
|
||||
repo_hash: Some("repo-hash-1".to_string()),
|
||||
accepted_added_lines: 42,
|
||||
accepted_deleted_lines: 40,
|
||||
line_fingerprints: vec![AcceptedLineFingerprint {
|
||||
path_hash: "path-hash-1".to_string(),
|
||||
line_hash: "line-hash-1".to_string(),
|
||||
}],
|
||||
line_fingerprints: Vec::new(),
|
||||
},
|
||||
},
|
||||
));
|
||||
@@ -1052,19 +1048,14 @@ fn accepted_line_fingerprints_event_serializes_expected_shape() {
|
||||
"repo_hash": "repo-hash-1",
|
||||
"accepted_added_lines": 42,
|
||||
"accepted_deleted_lines": 40,
|
||||
"line_fingerprints": [
|
||||
{
|
||||
"path_hash": "path-hash-1",
|
||||
"line_hash": "line-hash-1"
|
||||
}
|
||||
]
|
||||
"line_fingerprints": []
|
||||
}
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reducer_chunks_large_accepted_line_fingerprint_events_without_repeating_counts() {
|
||||
async fn reducer_emits_large_accepted_line_aggregates_without_fingerprints() {
|
||||
let mut reducer = AnalyticsReducer::default();
|
||||
let mut events = Vec::new();
|
||||
|
||||
@@ -1124,22 +1115,14 @@ index 1111111..2222222
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
assert!(accepted_line_events.len() > 1);
|
||||
let mut total_fingerprints = 0;
|
||||
for (index, event) in accepted_line_events.iter().enumerate() {
|
||||
assert_eq!(event.event_params.turn_id, "turn-2");
|
||||
assert_eq!(event.event_params.thread_id, "thread-2");
|
||||
total_fingerprints += event.event_params.line_fingerprints.len();
|
||||
if index == 0 {
|
||||
assert_eq!(event.event_params.accepted_added_lines, 20_000);
|
||||
assert_eq!(event.event_params.accepted_deleted_lines, 0);
|
||||
} else {
|
||||
assert_eq!(event.event_params.accepted_added_lines, 0);
|
||||
assert_eq!(event.event_params.accepted_deleted_lines, 0);
|
||||
}
|
||||
assert!(serde_json::to_vec(event).expect("serialize chunk").len() < 2_100_000);
|
||||
}
|
||||
assert_eq!(total_fingerprints, 20_000);
|
||||
assert_eq!(accepted_line_events.len(), 1);
|
||||
let event = accepted_line_events[0];
|
||||
assert_eq!(event.event_params.turn_id, "turn-2");
|
||||
assert_eq!(event.event_params.thread_id, "thread-2");
|
||||
assert_eq!(event.event_params.accepted_added_lines, 20_000);
|
||||
assert_eq!(event.event_params.accepted_deleted_lines, 0);
|
||||
assert!(event.event_params.line_fingerprints.is_empty());
|
||||
assert!(serde_json::to_vec(event).expect("serialize event").len() < 2_100_000);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -1206,11 +1189,7 @@ index 1111111..2222222
|
||||
assert_eq!(accepted_line_events.len(), 1);
|
||||
let event = accepted_line_events[0];
|
||||
assert_eq!(event.event_params.accepted_added_lines, 1);
|
||||
assert_eq!(event.event_params.line_fingerprints.len(), 1);
|
||||
assert_eq!(
|
||||
event.event_params.line_fingerprints[0].line_hash,
|
||||
crate::fingerprint_hash("line", "let latest_value = 2;")
|
||||
);
|
||||
assert!(event.event_params.line_fingerprints.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -6,7 +6,6 @@ use crate::events::CodexAcceptedLineFingerprintsEventRequest;
|
||||
use crate::events::SkillInvocationEventParams;
|
||||
use crate::events::SkillInvocationEventRequest;
|
||||
use crate::events::TrackEventRequest;
|
||||
use crate::facts::AcceptedLineFingerprint;
|
||||
use crate::facts::AnalyticsFact;
|
||||
use crate::facts::InvocationType;
|
||||
use codex_app_server_protocol::ApprovalsReviewer as AppServerApprovalsReviewer;
|
||||
@@ -53,10 +52,7 @@ fn sample_accepted_line_fingerprint_event(thread_id: &str) -> TrackEventRequest
|
||||
repo_hash: None,
|
||||
accepted_added_lines: 1,
|
||||
accepted_deleted_lines: 0,
|
||||
line_fingerprints: vec![AcceptedLineFingerprint {
|
||||
path_hash: "path-hash".to_string(),
|
||||
line_hash: "line-hash".to_string(),
|
||||
}],
|
||||
line_fingerprints: Vec::new(),
|
||||
},
|
||||
},
|
||||
))
|
||||
|
||||
Reference in New Issue
Block a user