[codex-analytics] rework thread_source for thread analytics (#20949)

## Summary
- make `thread_source` an explicit optional thread-level field on
`thread/start`, `thread/fork`, and returned thread payloads
- persist `thread_source` in rollout/session metadata so resumed live
threads retain the original value
- replace the old best-effort `session_source` -> `thread_source`
mapping with an explicit caller-supplied analytics classification

## Why
Before this change, analytics `thread_source` was populated by a
best-effort mapping from `session_source`. `session_source` describes
the runtime/client surface, not the actual thread-level origin, so that
projection was not accurate enough to distinguish cases such as `user`,
`subagent`, `memory_consolidation`, and future thread origins reliably.

Making `thread_source` explicit keeps one thread-level analytics field
while letting callers provide the real classification directly instead
of recovering it indirectly from `session_source`.

## Impact
For new analytics events, `thread_source` now reflects the explicit
thread-level classification supplied by the caller rather than an
inferred value derived from `session_source`. Existing protocol fields
remain optional; callers that omit `threadSource` now produce `null`
instead of a best-effort inferred value.

## Validation
- `just write-app-server-schema`
- `cargo test -p codex-analytics -p codex-core -p
codex-app-server-protocol --no-run`
- `cargo test -p codex-app-server-protocol
generated_ts_optional_nullable_fields_only_in_params`
- `cargo test -p codex-analytics
thread_initialized_event_serializes_expected_shape`
- `cargo test -p codex-core
resume_stopped_thread_from_rollout_preserves_thread_source`
This commit is contained in:
rhan-oai
2026-05-05 19:12:31 -07:00
committed by Channing Conger
parent 55c135600c
commit 5ef71a8e53
98 changed files with 896 additions and 90 deletions

View File

@@ -2503,6 +2503,18 @@ impl InitialHistory {
}),
}
}
pub fn get_resumed_thread_source(&self) -> Option<ThreadSource> {
match self {
InitialHistory::New | InitialHistory::Cleared | InitialHistory::Forked(_) => None,
InitialHistory::Resumed(resumed) => {
resumed.history.iter().find_map(|item| match item {
RolloutItem::SessionMeta(meta_line) => meta_line.meta.thread_source,
_ => None,
})
}
}
}
}
fn session_cwd_from_items(items: &[RolloutItem]) -> Option<PathBuf> {
@@ -2528,6 +2540,44 @@ pub enum SessionSource {
Unknown,
}
#[derive(Serialize, Deserialize, Clone, Copy, Debug, PartialEq, Eq, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
#[ts(rename_all = "snake_case")]
pub enum ThreadSource {
User,
Subagent,
MemoryConsolidation,
}
impl ThreadSource {
pub fn as_str(self) -> &'static str {
match self {
ThreadSource::User => "user",
ThreadSource::Subagent => "subagent",
ThreadSource::MemoryConsolidation => "memory_consolidation",
}
}
}
impl fmt::Display for ThreadSource {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
impl FromStr for ThreadSource {
type Err = String;
fn from_str(value: &str) -> Result<Self, Self::Err> {
match value {
"user" => Ok(ThreadSource::User),
"subagent" => Ok(ThreadSource::Subagent),
"memory_consolidation" => Ok(ThreadSource::MemoryConsolidation),
other => Err(format!("unknown thread source: {other}")),
}
}
}
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
#[ts(rename_all = "snake_case")]
@@ -2588,16 +2638,6 @@ impl SessionSource {
})
}
/// Low cardinality thread source label for analytics.
pub fn thread_source_name(&self) -> Option<&'static str> {
match self {
SessionSource::Cli | SessionSource::VSCode | SessionSource::Exec => Some("user"),
SessionSource::Internal(_) => Some("internal"),
SessionSource::SubAgent(_) => Some("subagent"),
SessionSource::Mcp | SessionSource::Custom(_) | SessionSource::Unknown => None,
}
}
pub fn is_internal(&self) -> bool {
matches!(self, SessionSource::Internal(_))
}
@@ -2698,6 +2738,9 @@ pub struct SessionMeta {
pub cli_version: String,
#[serde(default)]
pub source: SessionSource,
/// Optional analytics source classification for this thread.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub thread_source: Option<ThreadSource>,
/// Optional random unique nickname assigned to an AgentControl-spawned sub-agent.
#[serde(skip_serializing_if = "Option::is_none")]
pub agent_nickname: Option<String>,
@@ -2728,6 +2771,7 @@ impl Default for SessionMeta {
originator: String::new(),
cli_version: String::new(),
source: SessionSource::default(),
thread_source: None,
agent_nickname: None,
agent_role: None,
agent_path: None,
@@ -3415,6 +3459,9 @@ pub struct SessionConfiguredEvent {
pub session_id: ThreadId,
#[serde(skip_serializing_if = "Option::is_none")]
pub forked_from_id: Option<ThreadId>,
/// Optional analytics source classification for this thread.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub thread_source: Option<ThreadSource>,
/// Optional user-facing thread name (may be unset).
#[serde(default, skip_serializing_if = "Option::is_none")]
@@ -3486,6 +3533,8 @@ impl<'de> Deserialize<'de> for SessionConfiguredEvent {
session_id: ThreadId,
forked_from_id: Option<ThreadId>,
#[serde(default)]
thread_source: Option<ThreadSource>,
#[serde(default)]
thread_name: Option<String>,
model: String,
model_provider_id: String,
@@ -3524,6 +3573,7 @@ impl<'de> Deserialize<'de> for SessionConfiguredEvent {
Ok(Self {
session_id: wire.session_id,
forked_from_id: wire.forked_from_id,
thread_source: wire.thread_source,
thread_name: wire.thread_name,
model: wire.model,
model_provider_id: wire.model_provider_id,
@@ -4011,28 +4061,6 @@ mod tests {
);
}
#[test]
fn session_source_thread_source_name_classifies_user_and_subagent_sources() {
for (source, expected) in [
(SessionSource::Cli, Some("user")),
(SessionSource::VSCode, Some("user")),
(SessionSource::Exec, Some("user")),
(
SessionSource::Internal(InternalSessionSource::MemoryConsolidation),
Some("internal"),
),
(
SessionSource::SubAgent(SubAgentSource::Review),
Some("subagent"),
),
(SessionSource::Mcp, None),
(SessionSource::Custom("atlas".to_string()), None),
(SessionSource::Unknown, None),
] {
assert_eq!(source.thread_source_name(), expected);
}
}
#[test]
fn session_source_restriction_product_defaults_non_subagent_sources_to_codex() {
assert_eq!(
@@ -5274,6 +5302,7 @@ mod tests {
msg: EventMsg::SessionConfigured(SessionConfiguredEvent {
session_id: conversation_id,
forked_from_id: None,
thread_source: None,
thread_name: None,
model: "codex-mini-latest".to_string(),
model_provider_id: "openai".to_string(),