[codex-analytics] rework thread_source for thread analytics (#20949)

## Summary
- make `thread_source` an explicit optional thread-level field on
`thread/start`, `thread/fork`, and returned thread payloads
- persist `thread_source` in rollout/session metadata so resumed live
threads retain the original value
- replace the old best-effort `session_source` -> `thread_source`
mapping with an explicit caller-supplied analytics classification

## Why
Before this change, analytics `thread_source` was populated by a
best-effort mapping from `session_source`. `session_source` describes
the runtime/client surface, not the actual thread-level origin, so that
projection was not accurate enough to distinguish cases such as `user`,
`subagent`, `memory_consolidation`, and future thread origins reliably.

Making `thread_source` explicit keeps one thread-level analytics field
while letting callers provide the real classification directly instead
of recovering it indirectly from `session_source`.

## Impact
For new analytics events, `thread_source` now reflects the explicit
thread-level classification supplied by the caller rather than an
inferred value derived from `session_source`. Existing protocol fields
remain optional; callers that omit `threadSource` now produce `null`
instead of a best-effort inferred value.

## Validation
- `just write-app-server-schema`
- `cargo test -p codex-analytics -p codex-core -p
codex-app-server-protocol --no-run`
- `cargo test -p codex-app-server-protocol
generated_ts_optional_nullable_fields_only_in_params`
- `cargo test -p codex-analytics
thread_initialized_event_serializes_expected_shape`
- `cargo test -p codex-core
resume_stopped_thread_from_rollout_preserves_thread_source`
This commit is contained in:
rhan-oai
2026-05-05 19:12:31 -07:00
committed by GitHub
parent 94db03d5af
commit b3d4f1a9f0
98 changed files with 896 additions and 90 deletions

View File

@@ -48,6 +48,7 @@ fn apply_session_meta_from_item(metadata: &mut ThreadMetadata, meta_line: &Sessi
}
metadata.id = meta_line.meta.id;
metadata.source = enum_to_string(&meta_line.meta.source);
metadata.thread_source = meta_line.meta.thread_source;
metadata.agent_nickname = meta_line.meta.agent_nickname.clone();
metadata.agent_role = meta_line.meta.agent_role.clone();
metadata.agent_path = meta_line.meta.agent_path.clone();
@@ -249,6 +250,7 @@ mod tests {
originator: "codex_cli_rs".to_string(),
cli_version: "0.0.0".to_string(),
source: SessionSource::Cli,
thread_source: None,
agent_path: None,
agent_nickname: None,
agent_role: None,
@@ -382,6 +384,7 @@ mod tests {
originator: "codex_cli_rs".to_string(),
cli_version: "0.0.0".to_string(),
source: SessionSource::Cli,
thread_source: None,
agent_path: None,
agent_nickname: None,
agent_role: None,
@@ -408,6 +411,7 @@ mod tests {
created_at,
updated_at: created_at,
source: "cli".to_string(),
thread_source: None,
agent_path: None,
agent_nickname: None,
agent_role: None,

View File

@@ -6,6 +6,7 @@ use codex_protocol::openai_models::ReasoningEffort;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::ThreadSource;
use sqlx::Row;
use sqlx::sqlite::SqliteRow;
use std::path::PathBuf;
@@ -68,6 +69,8 @@ pub struct ThreadMetadata {
pub updated_at: DateTime<Utc>,
/// The session source (stringified enum).
pub source: String,
/// Optional analytics source classification for this thread.
pub thread_source: Option<ThreadSource>,
/// Optional random unique nickname assigned to an AgentControl-spawned sub-agent.
pub agent_nickname: Option<String>,
/// Optional role (agent_role) assigned to an AgentControl-spawned sub-agent.
@@ -117,6 +120,8 @@ pub struct ThreadMetadataBuilder {
pub updated_at: Option<DateTime<Utc>>,
/// The session source.
pub source: SessionSource,
/// Optional analytics source classification for this thread.
pub thread_source: Option<ThreadSource>,
/// Optional random unique nickname assigned to the session.
pub agent_nickname: Option<String>,
/// Optional role (agent_role) assigned to the session.
@@ -157,6 +162,7 @@ impl ThreadMetadataBuilder {
created_at,
updated_at: None,
source,
thread_source: None,
agent_nickname: None,
agent_role: None,
agent_path: None,
@@ -188,6 +194,7 @@ impl ThreadMetadataBuilder {
created_at,
updated_at,
source,
thread_source: self.thread_source,
agent_nickname: self.agent_nickname.clone(),
agent_role: self.agent_role.clone(),
agent_path: self
@@ -313,6 +320,7 @@ pub(crate) struct ThreadRow {
created_at: i64,
updated_at: i64,
source: String,
thread_source: Option<String>,
agent_nickname: Option<String>,
agent_role: Option<String>,
agent_path: Option<String>,
@@ -340,6 +348,7 @@ impl ThreadRow {
created_at: row.try_get("created_at")?,
updated_at: row.try_get("updated_at")?,
source: row.try_get("source")?,
thread_source: row.try_get("thread_source")?,
agent_nickname: row.try_get("agent_nickname")?,
agent_role: row.try_get("agent_role")?,
agent_path: row.try_get("agent_path")?,
@@ -371,6 +380,7 @@ impl TryFrom<ThreadRow> for ThreadMetadata {
created_at,
updated_at,
source,
thread_source,
agent_nickname,
agent_role,
agent_path,
@@ -389,12 +399,17 @@ impl TryFrom<ThreadRow> for ThreadMetadata {
git_branch,
git_origin_url,
} = row;
let thread_source = thread_source
.map(|thread_source| thread_source.parse())
.transpose()
.map_err(anyhow::Error::msg)?;
Ok(Self {
id: ThreadId::try_from(id)?,
rollout_path: PathBuf::from(rollout_path),
created_at: epoch_millis_to_datetime(created_at)?,
updated_at: epoch_millis_to_datetime(updated_at)?,
source,
thread_source,
agent_nickname,
agent_role,
agent_path,
@@ -480,6 +495,7 @@ mod tests {
created_at: 1_700_000_000,
updated_at: 1_700_000_100,
source: "cli".to_string(),
thread_source: None,
agent_nickname: None,
agent_role: None,
agent_path: None,
@@ -508,6 +524,7 @@ mod tests {
created_at: DateTime::<Utc>::from_timestamp(1_700_000_000, 0).expect("timestamp"),
updated_at: DateTime::<Utc>::from_timestamp(1_700_000_100, 0).expect("timestamp"),
source: "cli".to_string(),
thread_source: None,
agent_nickname: None,
agent_role: None,
agent_path: None,

View File

@@ -137,6 +137,7 @@ SELECT
threads.created_at_ms AS created_at,
threads.updated_at_ms AS updated_at,
threads.source,
threads.thread_source,
threads.agent_path,
threads.agent_nickname,
threads.agent_role,

View File

@@ -48,6 +48,7 @@ pub(super) fn test_thread_metadata(
created_at: now,
updated_at: now,
source: "cli".to_string(),
thread_source: None,
agent_nickname: None,
agent_role: None,
agent_path: None,

View File

@@ -13,6 +13,7 @@ SELECT
threads.created_at_ms AS created_at,
threads.updated_at_ms AS updated_at,
threads.source,
threads.thread_source,
threads.agent_nickname,
threads.agent_role,
threads.agent_path,
@@ -486,6 +487,7 @@ INSERT INTO threads (
created_at_ms,
updated_at_ms,
source,
thread_source,
agent_nickname,
agent_role,
agent_path,
@@ -505,7 +507,7 @@ INSERT INTO threads (
git_branch,
git_origin_url,
memory_mode
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(id) DO NOTHING
"#,
)
@@ -516,6 +518,11 @@ ON CONFLICT(id) DO NOTHING
.bind(datetime_to_epoch_millis(metadata.created_at))
.bind(datetime_to_epoch_millis(updated_at))
.bind(metadata.source.as_str())
.bind(
metadata
.thread_source
.map(codex_protocol::protocol::ThreadSource::as_str),
)
.bind(metadata.agent_nickname.as_deref())
.bind(metadata.agent_role.as_deref())
.bind(metadata.agent_path.as_deref())
@@ -683,6 +690,7 @@ INSERT INTO threads (
created_at_ms,
updated_at_ms,
source,
thread_source,
agent_nickname,
agent_role,
agent_path,
@@ -702,7 +710,7 @@ INSERT INTO threads (
git_branch,
git_origin_url,
memory_mode
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(id) DO UPDATE SET
rollout_path = excluded.rollout_path,
created_at = excluded.created_at,
@@ -710,6 +718,7 @@ ON CONFLICT(id) DO UPDATE SET
created_at_ms = excluded.created_at_ms,
updated_at_ms = excluded.updated_at_ms,
source = excluded.source,
thread_source = excluded.thread_source,
agent_nickname = excluded.agent_nickname,
agent_role = excluded.agent_role,
agent_path = excluded.agent_path,
@@ -737,6 +746,11 @@ ON CONFLICT(id) DO UPDATE SET
.bind(datetime_to_epoch_millis(metadata.created_at))
.bind(datetime_to_epoch_millis(updated_at))
.bind(metadata.source.as_str())
.bind(
metadata
.thread_source
.map(codex_protocol::protocol::ThreadSource::as_str),
)
.bind(metadata.agent_nickname.as_deref())
.bind(metadata.agent_role.as_deref())
.bind(metadata.agent_path.as_deref())
@@ -958,6 +972,7 @@ SELECT
threads.created_at_ms AS created_at,
threads.updated_at_ms AS updated_at,
threads.source,
threads.thread_source,
threads.agent_nickname,
threads.agent_role,
threads.agent_path,
@@ -1361,6 +1376,7 @@ mod tests {
originator: String::new(),
cli_version: String::new(),
source: SessionSource::Cli,
thread_source: None,
agent_path: None,
agent_nickname: None,
agent_role: None,
@@ -1419,6 +1435,7 @@ mod tests {
originator: String::new(),
cli_version: String::new(),
source: SessionSource::Cli,
thread_source: None,
agent_path: None,
agent_nickname: None,
agent_role: None,