Add OTEL metrics for hook runs (#18026)

# Why
We already emit analytics for completed hook runs, but we don't have
matching OTEL metrics to track hook volume and latency.

# What
- add `codex.hooks.run` and `codex.hooks.run.duration_ms`
- tag both metrics with `hook_name`, `source`, and `status`
- emit the metrics from the completed hook path

Verified locally against a dummy OTLP collector

---------

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Abhinav
2026-04-16 14:30:38 -07:00
committed by GitHub
parent 55c3de75cb
commit d9c71d41a9
2 changed files with 87 additions and 0 deletions

View File

@@ -1,5 +1,6 @@
use std::future::Future;
use std::sync::Arc;
use std::time::Duration;
use codex_analytics::HookRunFact;
use codex_analytics::build_track_events_context;
@@ -10,6 +11,8 @@ use codex_hooks::PreToolUseRequest;
use codex_hooks::SessionStartOutcome;
use codex_hooks::UserPromptSubmitOutcome;
use codex_hooks::UserPromptSubmitRequest;
use codex_otel::HOOK_RUN_DURATION_METRIC;
use codex_otel::HOOK_RUN_METRIC;
use codex_protocol::items::TurnItem;
use codex_protocol::models::DeveloperInstructions;
use codex_protocol::models::ResponseInputItem;
@@ -17,7 +20,10 @@ use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::HookCompletedEvent;
use codex_protocol::protocol::HookEventName;
use codex_protocol::protocol::HookRunStatus;
use codex_protocol::protocol::HookRunSummary;
use codex_protocol::protocol::HookSource;
use codex_protocol::protocol::HookStartedEvent;
use codex_protocol::user_input::UserInput;
use serde_json::Value;
@@ -324,12 +330,29 @@ pub(crate) async fn emit_hook_completed_events(
completed_events: Vec<HookCompletedEvent>,
) {
for completed in completed_events {
emit_hook_completed_metrics(turn_context, &completed);
track_hook_completed_analytics(sess, turn_context, &completed);
sess.send_event(turn_context, EventMsg::HookCompleted(completed))
.await;
}
}
fn emit_hook_completed_metrics(turn_context: &TurnContext, completed: &HookCompletedEvent) {
let tags = hook_run_metric_tags(&completed.run);
turn_context
.session_telemetry
.counter(HOOK_RUN_METRIC, /*inc*/ 1, &tags);
if let Some(duration_ms) = completed.run.duration_ms
&& let Ok(duration_ms) = u64::try_from(duration_ms)
{
turn_context.session_telemetry.record_duration(
HOOK_RUN_DURATION_METRIC,
Duration::from_millis(duration_ms),
&tags,
);
}
}
fn track_hook_completed_analytics(
sess: &Arc<Session>,
turn_context: &Arc<TurnContext>,
@@ -364,6 +387,39 @@ fn hook_run_analytics_payload(
)
}
fn hook_run_metric_tags(run: &HookRunSummary) -> [(&'static str, &'static str); 3] {
let hook_name = match run.event_name {
HookEventName::PreToolUse => "PreToolUse",
HookEventName::PostToolUse => "PostToolUse",
HookEventName::SessionStart => "SessionStart",
HookEventName::UserPromptSubmit => "UserPromptSubmit",
HookEventName::Stop => "Stop",
};
let hook_source = match run.source {
HookSource::System => "system",
HookSource::User => "user",
HookSource::Project => "project",
HookSource::Mdm => "mdm",
HookSource::SessionFlags => "session_flags",
HookSource::LegacyManagedConfigFile => "legacy_managed_config_file",
HookSource::LegacyManagedConfigMdm => "legacy_managed_config_mdm",
HookSource::Unknown => "unknown",
};
let status = match run.status {
HookRunStatus::Running => "running",
HookRunStatus::Completed => "completed",
HookRunStatus::Failed => "failed",
HookRunStatus::Blocked => "blocked",
HookRunStatus::Stopped => "stopped",
};
[
("hook_name", hook_name),
("source", hook_source),
("status", status),
]
}
fn hook_permission_mode(turn_context: &TurnContext) -> String {
match turn_context.approval_policy.value() {
AskForApproval::Never => "bypassPermissions",
@@ -388,6 +444,7 @@ mod tests {
use super::additional_context_messages;
use super::hook_run_analytics_payload;
use super::hook_run_metric_tags;
use crate::codex::make_session_and_context;
use codex_protocol::protocol::HookCompletedEvent;
use codex_protocol::protocol::HookRunSummary;
@@ -463,6 +520,34 @@ mod tests {
assert_eq!(hook.status, HookRunStatus::Failed);
}
#[test]
fn hook_run_metric_tags_match_analytics_shape() {
let run = sample_hook_run(HookRunStatus::Blocked, HookSource::Project);
assert_eq!(
hook_run_metric_tags(&run),
[
("hook_name", "Stop"),
("source", "project"),
("status", "blocked"),
]
);
}
#[test]
fn hook_run_metric_tags_include_expanded_hook_sources() {
let run = sample_hook_run(HookRunStatus::Completed, HookSource::LegacyManagedConfigMdm);
assert_eq!(
hook_run_metric_tags(&run),
[
("hook_name", "Stop"),
("source", "legacy_managed_config_mdm"),
("status", "completed"),
]
);
}
fn sample_hook_run(status: HookRunStatus, source: HookSource) -> HookRunSummary {
HookRunSummary {
id: "stop:0:/tmp/hooks.json".to_string(),

View File

@@ -29,6 +29,8 @@ pub const TURN_TOKEN_USAGE_METRIC: &str = "codex.turn.token_usage";
pub const PROFILE_USAGE_METRIC: &str = "codex.profile.usage";
pub const CURATED_PLUGINS_STARTUP_SYNC_METRIC: &str = "codex.plugins.startup_sync";
pub const CURATED_PLUGINS_STARTUP_SYNC_FINAL_METRIC: &str = "codex.plugins.startup_sync.final";
pub const HOOK_RUN_METRIC: &str = "codex.hooks.run";
pub const HOOK_RUN_DURATION_METRIC: &str = "codex.hooks.run.duration_ms";
/// Total runtime of a startup prewarm attempt until it completes, tagged by final status.
pub const STARTUP_PREWARM_DURATION_METRIC: &str = "codex.startup_prewarm.duration_ms";
/// Age of the startup prewarm attempt when the first real turn resolves it, tagged by outcome.