Files
codex/codex-rs/analytics/src/lib.rs
alexsong-oai bbb6bf0a37 Emit accepted line fingerprint analytics (#21601)
## Why

Codex assisted-code attribution needs a client-side accepted-code source
that does not upload raw code. This adds a hash-only analytics event
derived from the turn diff so downstream attribution can compare
accepted Codex lines against commit or PR diffs.

## What Changed

- Parse accepted/effective added lines from the final turn diff and emit
`codex_accepted_line_fingerprints` analytics.
- Hash repo, path, and normalized line content before upload; raw code
and raw diffs are not included in the event.
- Chunk large fingerprint payloads and send accepted-line fingerprint
events in isolated requests while preserving normal batching for other
analytics events.
- Canonicalize Git remote URLs before repo hashing so SSH/HTTPS GitHub
remotes join to the same repo hash.
- Add parser coverage for unified diff hunk lines that look like `+++`
or `---` file headers.

## Verification

- `cargo test -p codex-analytics`
- `cargo test -p codex-git-utils canonicalize_git_remote_url`
- `just fix -p codex-analytics`
- `just bazel-lock-check`
- `git diff --check`
2026-05-08 12:16:24 -07:00

82 lines
2.4 KiB
Rust

mod accepted_lines;
mod client;
mod events;
mod facts;
mod reducer;
use std::time::SystemTime;
use std::time::UNIX_EPOCH;
pub use accepted_lines::accepted_line_fingerprints_from_unified_diff;
pub use accepted_lines::fingerprint_hash;
pub use client::AnalyticsEventsClient;
pub use events::AppServerRpcTransport;
pub use events::GuardianApprovalRequestSource;
pub use events::GuardianReviewAnalyticsResult;
pub use events::GuardianReviewDecision;
pub use events::GuardianReviewEventParams;
pub use events::GuardianReviewFailureReason;
pub use events::GuardianReviewSessionKind;
pub use events::GuardianReviewTerminalStatus;
pub use events::GuardianReviewTrackContext;
pub use events::GuardianReviewedAction;
pub use facts::AcceptedLineFingerprint;
pub use facts::AnalyticsJsonRpcError;
pub use facts::AppInvocation;
pub use facts::CodexCompactionEvent;
pub use facts::CodexTurnSteerEvent;
pub use facts::CompactionImplementation;
pub use facts::CompactionPhase;
pub use facts::CompactionReason;
pub use facts::CompactionStatus;
pub use facts::CompactionStrategy;
pub use facts::CompactionTrigger;
pub use facts::HookRunFact;
pub use facts::InputError;
pub use facts::InvocationType;
pub use facts::SkillInvocation;
pub use facts::SubAgentThreadStartedInput;
pub use facts::ThreadInitializationMode;
pub use facts::TrackEventsContext;
pub use facts::TurnResolvedConfigFact;
pub use facts::TurnStatus;
pub use facts::TurnSteerRejectionReason;
pub use facts::TurnSteerRequestError;
pub use facts::TurnSteerResult;
pub use facts::TurnTokenUsageFact;
pub use facts::build_track_events_context;
#[cfg(test)]
mod analytics_client_tests;
pub fn now_unix_seconds() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs()
}
pub fn now_unix_millis() -> u64 {
u64::try_from(
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_millis(),
)
.unwrap_or(u64::MAX)
}
pub(crate) fn serialize_enum_as_string<T: serde::Serialize>(value: &T) -> Option<String> {
serde_json::to_value(value)
.ok()
.and_then(|value| value.as_str().map(str::to_string))
}
pub(crate) fn usize_to_u64(value: usize) -> u64 {
u64::try_from(value).unwrap_or(u64::MAX)
}
pub(crate) fn option_i64_to_u64(value: Option<i64>) -> Option<u64> {
value.and_then(|value| u64::try_from(value).ok())
}