Compare commits

...

8 Commits

Author SHA1 Message Date
Albin Cassirer
8630f1f242 Restrict inference call lookups to the thread. 2026-04-16 10:10:50 -07:00
Albin Cassirer
70fb10676a Fix edge issues when sub agent threads are complete failures.
The edges were assuming that there are assistant messages to attach to but if everything fails straight away then we now fall back to attaching the edges to the sub agent thread instead.
2026-04-16 10:06:41 -07:00
Albin Cassirer
d364b9651e codex: fix CI failure on PR #17982 2026-04-15 16:49:30 -07:00
Albin Cassirer
55d7e431f3 Add debug trace reduction command 2026-04-15 16:09:08 -07:00
Albin Cassirer
c657343888 Trace rollout sessions and multi-agent v2 edges 2026-04-15 16:09:08 -07:00
Albin Cassirer
2abf9dcd4e Trace tool and code-mode runtime boundaries 2026-04-15 16:09:07 -07:00
Albin Cassirer
856ef510c2 Add core rollout trace recorder 2026-04-15 16:07:59 -07:00
Albin Cassirer
eff93282cf Add rollout trace crate 2026-04-15 16:07:59 -07:00
59 changed files with 11650 additions and 58 deletions

14
codex-rs/Cargo.lock generated
View File

@@ -1690,6 +1690,7 @@ dependencies = [
"codex-protocol",
"codex-responses-api-proxy",
"codex-rmcp-client",
"codex-rollout-trace",
"codex-sandboxing",
"codex-state",
"codex-stdio-to-uds",
@@ -1932,6 +1933,7 @@ dependencies = [
"codex-response-debug-context",
"codex-rmcp-client",
"codex-rollout",
"codex-rollout-trace",
"codex-sandboxing",
"codex-secrets",
"codex-shell-command",
@@ -2705,6 +2707,18 @@ dependencies = [
"uuid",
]
[[package]]
name = "codex-rollout-trace"
version = "0.0.0"
dependencies = [
"anyhow",
"codex-protocol",
"pretty_assertions",
"serde",
"serde_json",
"tempfile",
]
[[package]]
name = "codex-sandboxing"
version = "0.0.0"

View File

@@ -51,6 +51,7 @@ members = [
"protocol",
"realtime-webrtc",
"rollout",
"rollout-trace",
"rmcp-client",
"responses-api-proxy",
"response-debug-context",
@@ -159,6 +160,7 @@ codex-responses-api-proxy = { path = "responses-api-proxy" }
codex-response-debug-context = { path = "response-debug-context" }
codex-rmcp-client = { path = "rmcp-client" }
codex-rollout = { path = "rollout" }
codex-rollout-trace = { path = "rollout-trace" }
codex-sandboxing = { path = "sandboxing" }
codex-secrets = { path = "secrets" }
codex-shell-command = { path = "shell-command" }

View File

@@ -40,6 +40,7 @@ codex-mcp-server = { workspace = true }
codex-protocol = { workspace = true }
codex-responses-api-proxy = { workspace = true }
codex-rmcp-client = { workspace = true }
codex-rollout-trace = { workspace = true }
codex-sandboxing = { workspace = true }
codex-state = { workspace = true }
codex-stdio-to-uds = { workspace = true }

View File

@@ -22,6 +22,8 @@ use codex_exec::Command as ExecCommand;
use codex_exec::ReviewArgs;
use codex_execpolicy::ExecPolicyCheckCommand;
use codex_responses_api_proxy::Args as ResponsesApiProxyArgs;
use codex_rollout_trace::REDUCED_STATE_FILE_NAME;
use codex_rollout_trace::replay_bundle;
use codex_state::StateRuntime;
use codex_state::state_db_path;
use codex_tui::AppExitInfo;
@@ -190,6 +192,9 @@ enum DebugSubcommand {
/// Render the model-visible prompt input list as JSON.
PromptInput(DebugPromptInputCommand),
/// Replay a rollout trace bundle and write reduced state JSON.
TraceReduce(DebugTraceReduceCommand),
/// Internal: reset local memory state for a fresh start.
#[clap(hide = true)]
ClearMemories,
@@ -224,6 +229,17 @@ struct DebugPromptInputCommand {
images: Vec<PathBuf>,
}
#[derive(Debug, Parser)]
struct DebugTraceReduceCommand {
/// Trace bundle directory containing manifest.json and trace.jsonl.
#[arg(value_name = "TRACE_BUNDLE")]
trace_bundle: PathBuf,
/// Output path for reduced RolloutTrace JSON. Defaults to TRACE_BUNDLE/state.json.
#[arg(long = "output", short = 'o', value_name = "FILE")]
output: Option<PathBuf>,
}
#[derive(Debug, Parser)]
struct ResumeCommand {
/// Conversation/session id (UUID) or thread name. UUIDs take precedence if it parses.
@@ -991,6 +1007,14 @@ async fn cli_main(arg0_paths: Arg0DispatchPaths) -> anyhow::Result<()> {
)
.await?;
}
DebugSubcommand::TraceReduce(cmd) => {
reject_remote_mode_for_subcommand(
root_remote.as_deref(),
root_remote_auth_token_env.as_deref(),
"debug trace-reduce",
)?;
run_debug_trace_reduce_command(cmd).await?;
}
DebugSubcommand::ClearMemories => {
reject_remote_mode_for_subcommand(
root_remote.as_deref(),
@@ -1192,6 +1216,19 @@ fn maybe_print_under_development_feature_warning(
);
}
async fn run_debug_trace_reduce_command(cmd: DebugTraceReduceCommand) -> anyhow::Result<()> {
let output = cmd
.output
.unwrap_or_else(|| cmd.trace_bundle.join(REDUCED_STATE_FILE_NAME));
let trace = replay_bundle(&cmd.trace_bundle)?;
let reduced_json = serde_json::to_vec_pretty(&trace)?;
tokio::fs::write(&output, reduced_json).await?;
println!("{}", output.display());
Ok(())
}
async fn run_debug_prompt_input_command(
cmd: DebugPromptInputCommand,
root_config_overrides: CliConfigOverrides,

View File

@@ -23,7 +23,9 @@ pub use runtime::DEFAULT_WAIT_YIELD_TIME_MS;
pub use runtime::ExecuteRequest;
pub use runtime::RuntimeResponse;
pub use runtime::WaitRequest;
pub use runtime::WaitResponse;
pub use service::CodeModeService;
pub use service::CodeModeToolInvocation;
pub use service::CodeModeTurnHost;
pub use service::CodeModeTurnWorker;

View File

@@ -10,6 +10,7 @@ use std::sync::mpsc as std_mpsc;
use std::thread;
use codex_protocol::ToolName;
use serde::Serialize;
use serde_json::Value as JsonValue;
use tokio::sync::mpsc;
@@ -25,6 +26,12 @@ const EXIT_SENTINEL: &str = "__codex_code_mode_exit__";
#[derive(Clone, Debug)]
pub struct ExecuteRequest {
/// Runtime cell id to use for this execution.
///
/// Hosts that need to trace work before JavaScript starts can allocate an id
/// first and pass it here. `None` keeps the service-owned allocation path
/// for callers that only need the id once a runtime response is returned.
pub cell_id: Option<String>,
pub tool_call_id: String,
pub enabled_tools: Vec<ToolDefinition>,
pub source: String,
@@ -41,6 +48,33 @@ pub struct WaitRequest {
}
#[derive(Debug, PartialEq)]
pub enum WaitResponse {
/// The requested cell was live when the wait command was accepted.
///
/// Non-yielding responses from this variant are terminal lifecycle points
/// for the matching code cell.
Cell(RuntimeResponse),
/// The requested cell was not live, so the response is only the result of
/// the `wait` tool call. It must not be treated as a code-cell lifecycle
/// event because there is no cell to complete.
MissingCell(RuntimeResponse),
}
impl WaitResponse {
pub fn into_runtime_response(self) -> RuntimeResponse {
match self {
WaitResponse::Cell(response) | WaitResponse::MissingCell(response) => response,
}
}
pub fn runtime_response(&self) -> &RuntimeResponse {
match self {
WaitResponse::Cell(response) | WaitResponse::MissingCell(response) => response,
}
}
}
#[derive(Debug, PartialEq, Serialize)]
pub enum RuntimeResponse {
Yielded {
cell_id: String,
@@ -331,6 +365,7 @@ mod tests {
fn execute_request(source: &str) -> ExecuteRequest {
ExecuteRequest {
cell_id: None,
tool_call_id: "call_1".to_string(),
enabled_tools: Vec::new(),
source: source.to_string(),

View File

@@ -21,14 +21,26 @@ use crate::runtime::RuntimeEvent;
use crate::runtime::RuntimeResponse;
use crate::runtime::TurnMessage;
use crate::runtime::WaitRequest;
use crate::runtime::WaitResponse;
use crate::runtime::spawn_runtime;
/// Nested tool request emitted by one code-mode cell.
///
/// Code mode owns the per-cell runtime id. Hosts should preserve it for
/// provenance/debugging, but should still assign their own runtime tool call id
/// if their tool-call graph requires globally unique ids.
pub struct CodeModeToolInvocation {
pub cell_id: String,
pub runtime_tool_call_id: String,
pub tool_name: ToolName,
pub input: Option<JsonValue>,
}
#[async_trait]
pub trait CodeModeTurnHost: Send + Sync {
async fn invoke_tool(
&self,
tool_name: ToolName,
input: Option<JsonValue>,
invocation: CodeModeToolInvocation,
cancellation_token: CancellationToken,
) -> Result<JsonValue, String>;
@@ -76,24 +88,44 @@ impl CodeModeService {
*self.inner.stored_values.lock().await = values;
}
pub async fn execute(&self, request: ExecuteRequest) -> Result<RuntimeResponse, String> {
let cell_id = self
.inner
/// Reserves the runtime cell id for a future `execute` request.
///
/// The runtime can issue nested tool calls before the first `execute`
/// response is returned. Hosts that need a parent trace object for those
/// nested calls should allocate the cell id up front and pass it back on the
/// `ExecuteRequest`.
pub fn allocate_cell_id(&self) -> String {
self.inner
.next_cell_id
.fetch_add(1, Ordering::Relaxed)
.to_string();
.to_string()
}
pub async fn execute(&self, request: ExecuteRequest) -> Result<RuntimeResponse, String> {
let cell_id = request
.cell_id
.clone()
.unwrap_or_else(|| self.allocate_cell_id());
let mut sessions = self.inner.sessions.lock().await;
if sessions.contains_key(&cell_id) {
return Err(format!("exec cell {cell_id} already exists"));
}
// Keep the session registry locked through insertion so a caller-owned
// cell id cannot race with another execute and replace a live runtime.
let (event_tx, event_rx) = mpsc::unbounded_channel();
let (runtime_tx, runtime_terminate_handle) = spawn_runtime(request.clone(), event_tx)?;
let (control_tx, control_rx) = mpsc::unbounded_channel();
let (response_tx, response_rx) = oneshot::channel();
self.inner.sessions.lock().await.insert(
sessions.insert(
cell_id.clone(),
SessionHandle {
control_tx: control_tx.clone(),
runtime_tx: runtime_tx.clone(),
},
);
drop(sessions);
tokio::spawn(run_session_control(
Arc::clone(&self.inner),
@@ -113,7 +145,7 @@ impl CodeModeService {
.map_err(|_| "exec runtime ended unexpectedly".to_string())
}
pub async fn wait(&self, request: WaitRequest) -> Result<RuntimeResponse, String> {
pub async fn wait(&self, request: WaitRequest) -> Result<WaitResponse, String> {
let cell_id = request.cell_id.clone();
let handle = self
.inner
@@ -123,7 +155,7 @@ impl CodeModeService {
.get(&request.cell_id)
.cloned();
let Some(handle) = handle else {
return Ok(missing_cell_response(cell_id));
return Ok(WaitResponse::MissingCell(missing_cell_response(cell_id)));
};
let (response_tx, response_rx) = oneshot::channel();
let control_message = if request.terminate {
@@ -135,11 +167,13 @@ impl CodeModeService {
}
};
if handle.control_tx.send(control_message).is_err() {
return Ok(missing_cell_response(cell_id));
return Ok(WaitResponse::MissingCell(missing_cell_response(cell_id)));
}
match response_rx.await {
Ok(response) => Ok(response),
Err(_) => Ok(missing_cell_response(request.cell_id)),
Ok(response) => Ok(WaitResponse::Cell(response)),
Err(_) => Ok(WaitResponse::MissingCell(missing_cell_response(
request.cell_id,
))),
}
}
@@ -181,9 +215,14 @@ impl CodeModeService {
let host = Arc::clone(&host);
let inner = Arc::clone(&inner);
tokio::spawn(async move {
let response = host
.invoke_tool(name, input, CancellationToken::new())
.await;
let invocation = CodeModeToolInvocation {
cell_id: cell_id.clone(),
runtime_tool_call_id: id.clone(),
tool_name: name,
input,
};
let response =
host.invoke_tool(invocation, CancellationToken::new()).await;
let runtime_tx = inner
.sessions
.lock()
@@ -482,6 +521,8 @@ mod tests {
use super::RuntimeResponse;
use super::SessionControlCommand;
use super::SessionControlContext;
use super::WaitRequest;
use super::WaitResponse;
use super::run_session_control;
use crate::FunctionCallOutputContentItem;
use crate::runtime::ExecuteRequest;
@@ -490,6 +531,7 @@ mod tests {
fn execute_request(source: &str) -> ExecuteRequest {
ExecuteRequest {
cell_id: None,
tool_call_id: "call_1".to_string(),
enabled_tools: Vec::new(),
source: source.to_string(),
@@ -832,6 +874,30 @@ image({
);
}
#[tokio::test]
async fn wait_reports_missing_cell_separately_from_runtime_results() {
let service = CodeModeService::new();
let response = service
.wait(WaitRequest {
cell_id: "missing".to_string(),
yield_time_ms: 1,
terminate: false,
})
.await
.unwrap();
assert_eq!(
response,
WaitResponse::MissingCell(RuntimeResponse::Result {
cell_id: "missing".to_string(),
content_items: Vec::new(),
stored_values: HashMap::new(),
error_text: Some("exec cell missing not found".to_string()),
})
);
}
#[tokio::test]
async fn terminate_waits_for_runtime_shutdown_before_responding() {
let inner = test_inner();

View File

@@ -53,6 +53,7 @@ codex-plugin = { workspace = true }
codex-protocol = { workspace = true }
codex-response-debug-context = { workspace = true }
codex-rollout = { workspace = true }
codex-rollout-trace = { workspace = true }
codex-rmcp-client = { workspace = true }
codex-sandboxing = { workspace = true }
codex-state = { workspace = true }

View File

@@ -77,6 +77,9 @@ use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SubAgentSource;
use codex_protocol::protocol::W3cTraceContext;
use codex_rollout_trace::CompactionTraceContext;
use codex_rollout_trace::InferenceTraceAttempt;
use codex_rollout_trace::InferenceTraceContext;
use codex_tools::create_tools_json_for_responses_api;
use eventsource_stream::Event;
use eventsource_stream::EventStreamError;
@@ -408,6 +411,7 @@ impl ModelClient {
effort: Option<ReasoningEffortConfig>,
summary: ReasoningSummaryConfig,
session_telemetry: &SessionTelemetry,
compaction_trace: &CompactionTraceContext,
) -> Result<Vec<ResponseItem>> {
if prompt.input.is_empty() {
return Ok(Vec::new());
@@ -462,10 +466,18 @@ impl ModelClient {
extra_headers.extend(build_conversation_headers(Some(
self.state.conversation_id.to_string(),
)));
client
.compact_input(&payload, extra_headers)
.await
.map_err(map_api_error)
let trace_attempt = compaction_trace.start_attempt(&payload);
match client.compact_input(&payload, extra_headers).await {
Ok(output) => {
trace_attempt.record_completed(&output);
Ok(output)
}
Err(err) => {
let err = map_api_error(err);
trace_attempt.record_failed(&err);
Err(err)
}
}
}
pub(crate) async fn create_realtime_call_with_headers(
@@ -1140,6 +1152,7 @@ impl ModelClientSession {
summary: ReasoningSummaryConfig,
service_tier: Option<ServiceTier>,
turn_metadata_header: Option<&str>,
inference_trace: &InferenceTraceContext,
) -> Result<ResponseStream> {
if let Some(path) = &*CODEX_RS_SSE_FIXTURE {
warn!(path, "Streaming from fixture");
@@ -1148,7 +1161,11 @@ impl ModelClientSession {
self.client.state.provider.stream_idle_timeout(),
)
.map_err(map_api_error)?;
let (stream, _last_request_rx) = map_response_stream(stream, session_telemetry.clone());
let (stream, _last_request_rx) = map_response_stream(
stream,
session_telemetry.clone(),
InferenceTraceAttempt::disabled(),
);
return Ok(stream);
}
@@ -1182,6 +1199,8 @@ impl ModelClientSession {
summary,
service_tier,
)?;
let inference_trace_attempt = inference_trace.start_attempt();
inference_trace_attempt.record_started(&request);
let client = ApiResponsesClient::new(
transport,
client_setup.api_provider,
@@ -1192,12 +1211,17 @@ impl ModelClientSession {
match stream_result {
Ok(stream) => {
let (stream, _) = map_response_stream(stream, session_telemetry.clone());
let (stream, _) = map_response_stream(
stream,
session_telemetry.clone(),
inference_trace_attempt,
);
return Ok(stream);
}
Err(ApiError::Transport(
unauthorized_transport @ TransportError::Http { status, .. },
)) if status == StatusCode::UNAUTHORIZED => {
inference_trace_attempt.record_failed(&unauthorized_transport);
pending_retry = PendingUnauthorizedRetry::from_recovery(
handle_unauthorized(
unauthorized_transport,
@@ -1208,7 +1232,11 @@ impl ModelClientSession {
);
continue;
}
Err(err) => return Err(map_api_error(err)),
Err(err) => {
let err = map_api_error(err);
inference_trace_attempt.record_failed(&err);
return Err(err);
}
}
}
}
@@ -1239,6 +1267,7 @@ impl ModelClientSession {
turn_metadata_header: Option<&str>,
warmup: bool,
request_trace: Option<W3cTraceContext>,
inference_trace: &InferenceTraceContext,
) -> Result<WebsocketStreamOutcome> {
let auth_manager = self.client.state.auth_manager.clone();
@@ -1313,17 +1342,35 @@ impl ModelClientSession {
let ws_request = self.prepare_websocket_request(ws_payload, &request);
self.websocket_session.last_request = Some(request);
let inference_trace_attempt = if warmup {
// Prewarm sends `generate=false`; it is connection setup, not a
// model inference attempt that should appear in rollout traces.
InferenceTraceAttempt::disabled()
} else {
inference_trace.start_attempt()
};
inference_trace_attempt.record_started(&ws_request);
let stream_result = self.websocket_session.connection.as_ref().ok_or_else(|| {
map_api_error(ApiError::Stream(
"websocket connection is unavailable".to_string(),
))
})?;
let stream_result = stream_result
let stream_result = match stream_result
.stream_request(ws_request, self.websocket_session.connection_reused())
.await
.map_err(map_api_error)?;
let (stream, last_request_rx) =
map_response_stream(stream_result, session_telemetry.clone());
{
Ok(stream_result) => stream_result,
Err(err) => {
let err = map_api_error(err);
inference_trace_attempt.record_failed(&err);
return Err(err);
}
};
let (stream, last_request_rx) = map_response_stream(
stream_result,
session_telemetry.clone(),
inference_trace_attempt,
);
self.websocket_session.last_response_rx = Some(last_request_rx);
return Ok(WebsocketStreamOutcome::Stream(stream));
}
@@ -1382,6 +1429,7 @@ impl ModelClientSession {
return Ok(());
}
let disabled_trace = InferenceTraceContext::disabled();
match self
.stream_responses_websocket(
prompt,
@@ -1393,6 +1441,7 @@ impl ModelClientSession {
turn_metadata_header,
/*warmup*/ true,
current_span_w3c_trace_context(),
&disabled_trace,
)
.await
{
@@ -1416,12 +1465,11 @@ impl ModelClientSession {
}
#[allow(clippy::too_many_arguments)]
/// Streams a single model request within the current turn.
/// Streams a single model request without rollout tracing.
///
/// The caller is responsible for passing per-turn settings explicitly (model selection,
/// reasoning settings, telemetry context, and turn metadata). This method will prefer the
/// Responses WebSocket transport when the provider supports it and it remains healthy, and will
/// fall back to the HTTP Responses API transport otherwise.
/// This is the public client API. It routes through the same transport code
/// as traced Codex turns, but supplies a disabled trace context so tracing
/// does not leak into callers that only need model streaming.
pub async fn stream(
&mut self,
prompt: &Prompt,
@@ -1431,6 +1479,37 @@ impl ModelClientSession {
summary: ReasoningSummaryConfig,
service_tier: Option<ServiceTier>,
turn_metadata_header: Option<&str>,
) -> Result<ResponseStream> {
let disabled_trace = InferenceTraceContext::disabled();
self.stream_with_trace(
prompt,
model_info,
session_telemetry,
effort,
summary,
service_tier,
turn_metadata_header,
&disabled_trace,
)
.await
}
#[allow(clippy::too_many_arguments)]
/// Streams a model request with an explicit rollout trace context.
///
/// The context may be enabled or disabled. Transport code records against it
/// unconditionally so HTTP, WebSocket, retry, and fallback paths do not need
/// separate trace/no-trace branches.
pub(crate) async fn stream_with_trace(
&mut self,
prompt: &Prompt,
model_info: &ModelInfo,
session_telemetry: &SessionTelemetry,
effort: Option<ReasoningEffortConfig>,
summary: ReasoningSummaryConfig,
service_tier: Option<ServiceTier>,
turn_metadata_header: Option<&str>,
inference_trace: &InferenceTraceContext,
) -> Result<ResponseStream> {
let wire_api = self.client.state.provider.wire_api;
match wire_api {
@@ -1448,6 +1527,7 @@ impl ModelClientSession {
turn_metadata_header,
/*warmup*/ false,
request_trace,
inference_trace,
)
.await?
{
@@ -1466,6 +1546,7 @@ impl ModelClientSession {
summary,
service_tier,
turn_metadata_header,
inference_trace,
)
.await
}
@@ -1561,6 +1642,7 @@ fn parent_thread_id_header_value(session_source: &SessionSource) -> Option<Strin
fn map_response_stream<S>(
api_stream: S,
session_telemetry: SessionTelemetry,
inference_trace_attempt: InferenceTraceAttempt,
) -> (ResponseStream, oneshot::Receiver<LastResponse>)
where
S: futures::Stream<Item = std::result::Result<ResponseEvent, ApiError>>
@@ -1601,6 +1683,11 @@ where
usage.total_tokens,
);
}
inference_trace_attempt.record_completed(
&response_id,
&token_usage,
&items_added,
);
if let Some(sender) = tx_last_response.take() {
let _ = sender.send(LastResponse {
response_id: response_id.clone(),
@@ -1625,6 +1712,7 @@ where
}
Err(err) => {
let mapped = map_api_error(err);
inference_trace_attempt.record_failed(&mapped);
if !logged_error {
session_telemetry.see_event_completed_failed(&mapped);
logged_error = true;

View File

@@ -299,6 +299,8 @@ use crate::rollout::RolloutRecorderParams;
use crate::rollout::map_session_init_error;
use crate::rollout::metadata;
use crate::rollout::policy::EventPersistenceMode;
use crate::rollout_trace::RolloutTraceRecorder;
use crate::rollout_trace::ThreadStartedTraceMetadata;
use crate::session_startup_prewarm::SessionStartupPrewarmHandle;
use crate::shell;
use crate::shell_snapshot::ShellSnapshot;
@@ -440,6 +442,7 @@ pub(crate) struct CodexSpawnArgs {
pub(crate) metrics_service_name: Option<String>,
pub(crate) inherited_shell_snapshot: Option<Arc<ShellSnapshot>>,
pub(crate) inherited_exec_policy: Option<Arc<ExecPolicyManager>>,
pub(crate) inherited_rollout_trace: Option<RolloutTraceRecorder>,
pub(crate) user_shell_override: Option<shell::Shell>,
pub(crate) parent_trace: Option<W3cTraceContext>,
pub(crate) analytics_events_client: Option<AnalyticsEventsClient>,
@@ -494,6 +497,7 @@ impl Codex {
inherited_shell_snapshot,
user_shell_override,
inherited_exec_policy,
inherited_rollout_trace,
parent_trace: _,
analytics_events_client,
} = args;
@@ -690,6 +694,7 @@ impl Codex {
agent_control,
environment,
analytics_events_client,
inherited_rollout_trace,
)
.await
.map_err(|e| {
@@ -1697,6 +1702,7 @@ impl Session {
agent_control: AgentControl,
environment: Option<Arc<Environment>>,
analytics_events_client: Option<AnalyticsEventsClient>,
inherited_rollout_trace: Option<RolloutTraceRecorder>,
) -> anyhow::Result<Arc<Self>> {
debug!(
"Configuring session: model={}; provider={:?}",
@@ -1833,6 +1839,40 @@ impl Session {
let rollout_path = rollout_recorder
.as_ref()
.map(|rec| rec.rollout_path().to_path_buf());
let trace_agent_path = session_configuration
.session_source
.get_agent_path()
.unwrap_or_else(codex_protocol::AgentPath::root);
let trace_task_name =
(!trace_agent_path.is_root()).then(|| trace_agent_path.name().to_string());
let trace_metadata = ThreadStartedTraceMetadata {
thread_id: conversation_id.to_string(),
agent_path: trace_agent_path.to_string(),
task_name: trace_task_name,
nickname: session_configuration.session_source.get_nickname(),
agent_role: session_configuration.session_source.get_agent_role(),
session_source: session_configuration.session_source.clone(),
cwd: session_configuration.cwd.to_path_buf(),
rollout_path: rollout_path.clone(),
model: session_configuration.collaboration_mode.model().to_string(),
provider_name: config.model_provider_id.clone(),
approval_policy: session_configuration.approval_policy.value().to_string(),
sandbox_policy: format!("{:?}", session_configuration.sandbox_policy.get()),
};
let rollout_trace = if let Some(rollout_trace) = inherited_rollout_trace {
rollout_trace.record_thread_started(trace_metadata);
Some(rollout_trace)
} else if matches!(
session_configuration.session_source,
SessionSource::SubAgent(SubAgentSource::ThreadSpawn { .. })
) {
// Spawned child threads are part of their root rollout tree. If
// the parent had no trace recorder, do not create an orphan child
// bundle that looks like an independent rollout.
None
} else {
RolloutTraceRecorder::maybe_create(conversation_id, trace_metadata)
};
let mut post_session_configured_events = Vec::<Event>::new();
@@ -2107,6 +2147,7 @@ impl Session {
analytics_events_client,
hooks,
rollout: Mutex::new(rollout_recorder),
rollout_trace,
user_shell: Arc::new(default_shell),
agent_identity_manager: Arc::new(AgentIdentityManager::new(
config.as_ref(),
@@ -2873,6 +2914,18 @@ impl Session {
/// Persist the event to rollout and send it to clients.
pub(crate) async fn send_event(&self, turn_context: &TurnContext, msg: EventMsg) {
let legacy_source = msg.clone();
if let Some(rollout_trace) = &self.services.rollout_trace {
rollout_trace.record_codex_turn_event(
self.conversation_id.to_string(),
&turn_context.sub_id,
&legacy_source,
);
rollout_trace.record_tool_call_event(
self.conversation_id.to_string(),
turn_context.sub_id.clone(),
&legacy_source,
);
}
let event = Event {
id: turn_context.sub_id.clone(),
msg,
@@ -2925,13 +2978,19 @@ impl Session {
return;
}
self.forward_child_completion_to_parent(*parent_thread_id, child_agent_path, status)
.await;
self.forward_child_completion_to_parent(
turn_context,
*parent_thread_id,
child_agent_path,
status,
)
.await;
}
/// Sends the standard completion envelope from a spawned MultiAgentV2 child to its parent.
async fn forward_child_completion_to_parent(
&self,
turn_context: &TurnContext,
parent_thread_id: ThreadId,
child_agent_path: &codex_protocol::AgentPath,
status: AgentStatus,
@@ -2949,9 +3008,19 @@ impl Session {
child_agent_path.clone(),
parent_agent_path,
Vec::new(),
message,
message.clone(),
/*trigger_turn*/ false,
);
if let Some(rollout_trace) = &self.services.rollout_trace {
rollout_trace.record_agent_result_interaction(
self.conversation_id.to_string(),
turn_context.sub_id.clone(),
parent_thread_id.to_string(),
child_agent_path.as_str(),
&message,
&status,
);
}
if let Err(err) = self
.services
.agent_control
@@ -2990,6 +3059,9 @@ impl Session {
// Persist the event into rollout (recorder filters as needed)
let rollout_items = vec![RolloutItem::EventMsg(event.msg.clone())];
self.persist_rollout_items(&rollout_items).await;
if let Some(rollout_trace) = &self.services.rollout_trace {
rollout_trace.record_protocol_event(&event.msg);
}
self.deliver_event_raw(event).await;
}
@@ -5887,6 +5959,12 @@ mod handlers {
msg: EventMsg::ShutdownComplete,
};
sess.send_event_raw(event).await;
if let Some(rollout_trace) = &sess.services.rollout_trace {
rollout_trace.record_thread_ended(
sess.conversation_id.to_string(),
codex_rollout_trace::RolloutStatus::Completed,
);
}
true
}
@@ -7843,8 +7921,19 @@ async fn try_run_sampling_request(
auth_mode = sess.services.auth_manager.auth_mode(),
features = sess.features.enabled_features(),
);
let inference_trace = sess.services.rollout_trace.as_ref().map_or_else(
codex_rollout_trace::InferenceTraceContext::disabled,
|trace| {
trace.inference_trace_context(
sess.conversation_id.to_string(),
turn_context.sub_id.clone(),
turn_context.model_info.slug.clone(),
turn_context.provider.name.clone(),
)
},
);
let mut stream = client_session
.stream(
.stream_with_trace(
prompt,
&turn_context.model_info,
&turn_context.session_telemetry,
@@ -7852,6 +7941,7 @@ async fn try_run_sampling_request(
turn_context.reasoning_summary,
turn_context.config.service_tier,
turn_metadata_header,
&inference_trace,
)
.instrument(trace_span!("stream_request"))
.or_cancel(&cancellation_token)

View File

@@ -95,6 +95,7 @@ pub(crate) async fn run_codex_thread_interactive(
inherited_shell_snapshot: None,
user_shell_override: None,
inherited_exec_policy: Some(Arc::clone(&parent_session.services.exec_policy)),
inherited_rollout_trace: None,
parent_trace: None,
analytics_events_client: Some(parent_session.services.analytics_events_client.clone()),
}))

View File

@@ -2776,6 +2776,7 @@ async fn session_new_fails_when_zsh_fork_enabled_without_zsh_path() {
.expect("create environment"),
)),
/*analytics_events_client*/ None,
/*inherited_rollout_trace*/ None,
)
.await;
@@ -2898,6 +2899,7 @@ pub(crate) async fn make_session_and_context() -> (Session, TurnContext) {
..HooksConfig::default()
}),
rollout: Mutex::new(None),
rollout_trace: None,
user_shell: Arc::new(default_user_shell()),
agent_identity_manager: Arc::new(crate::agent_identity::AgentIdentityManager::new(
config.as_ref(),
@@ -3758,6 +3760,7 @@ pub(crate) async fn make_session_and_context_with_dynamic_tools_and_rx(
..HooksConfig::default()
}),
rollout: Mutex::new(None),
rollout_trace: None,
user_shell: Arc::new(default_user_shell()),
agent_identity_manager: Arc::new(crate::agent_identity::AgentIdentityManager::new(
config.as_ref(),
@@ -5720,6 +5723,7 @@ async fn rejects_escalated_permissions_when_policy_not_on_request() {
tracker: Arc::clone(&turn_diff_tracker),
call_id,
tool_name: codex_tools::ToolName::plain(tool_name),
source: crate::tools::context::ToolCallSource::Direct,
payload: ToolPayload::Function {
arguments: serde_json::json!({
"command": params.command.clone(),
@@ -5798,6 +5802,7 @@ async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request()
tracker: Arc::clone(&tracker),
call_id: "exec-call".to_string(),
tool_name: codex_tools::ToolName::plain("exec_command"),
source: crate::tools::context::ToolCallSource::Direct,
payload: ToolPayload::Function {
arguments: serde_json::json!({
"cmd": "echo hi",

View File

@@ -145,6 +145,7 @@ async fn guardian_allows_shell_additional_permissions_requests_past_policy_valid
tracker: Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())),
call_id: "test-call".to_string(),
tool_name: codex_tools::ToolName::plain("shell"),
source: crate::tools::context::ToolCallSource::Direct,
payload: ToolPayload::Function {
arguments: serde_json::json!({
"command": params.command.clone(),
@@ -211,6 +212,7 @@ async fn guardian_allows_unified_exec_additional_permissions_requests_past_polic
tracker: Arc::clone(&tracker),
call_id: "exec-call".to_string(),
tool_name: codex_tools::ToolName::plain("exec_command"),
source: crate::tools::context::ToolCallSource::Direct,
payload: ToolPayload::Function {
arguments: serde_json::json!({
"cmd": "echo hi",
@@ -324,6 +326,7 @@ async fn shell_handler_allows_sticky_turn_permissions_without_inline_request_per
tracker: Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())),
call_id: "sticky-turn-grant".to_string(),
tool_name: codex_tools::ToolName::plain("shell"),
source: crate::tools::context::ToolCallSource::Direct,
payload: ToolPayload::Function {
arguments: serde_json::json!({
"command": [
@@ -449,6 +452,7 @@ async fn guardian_subagent_does_not_inherit_parent_exec_policy_rules() {
metrics_service_name: None,
inherited_shell_snapshot: None,
inherited_exec_policy: Some(Arc::new(parent_exec_policy)),
inherited_rollout_trace: None,
user_shell_override: None,
parent_trace: None,
analytics_events_client: None,

View File

@@ -13,6 +13,7 @@ use crate::context_manager::ContextManager;
use crate::context_manager::TotalTokenUsageBreakdown;
use crate::context_manager::estimate_response_item_model_visible_bytes;
use crate::context_manager::is_codex_generated_item;
use crate::rollout_trace::CompactionCheckpointTracePayload;
use codex_analytics::CompactionImplementation;
use codex_analytics::CompactionPhase;
use codex_analytics::CompactionReason;
@@ -26,6 +27,7 @@ use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::CompactedItem;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::TurnStartedEvent;
use codex_rollout_trace::CompactionTraceContext;
use futures::TryFutureExt;
use tokio_util::sync::CancellationToken;
use tracing::error;
@@ -114,7 +116,11 @@ async fn run_remote_compact_task_inner_impl(
turn_context: &Arc<TurnContext>,
initial_context_injection: InitialContextInjection,
) -> CodexResult<()> {
let compaction_item = TurnItem::ContextCompaction(ContextCompactionItem::new());
let context_compaction_item = ContextCompactionItem::new();
// Use the UI compaction item ID as the trace compaction ID so protocol lifecycle events,
// endpoint attempts, and the installed history checkpoint all have one join key.
let compaction_id = context_compaction_item.id.clone();
let compaction_item = TurnItem::ContextCompaction(context_compaction_item);
sess.emit_turn_item_started(turn_context, &compaction_item)
.await;
let mut history = sess.clone_history().await;
@@ -131,6 +137,10 @@ async fn run_remote_compact_task_inner_impl(
"trimmed history items before remote compaction"
);
}
// This is the history selected for remote compaction, after any trimming required to fit the
// compact endpoint. The checkpoint below records it separately from the next sampling request,
// whose prompt will repeat current developer/context prefix items.
let trace_input_history = history.raw_items().to_vec();
// Required to keep `/undo` available after compaction
let ghost_snapshots: Vec<ResponseItem> = history
.raw_items()
@@ -157,6 +167,21 @@ async fn run_remote_compact_task_inner_impl(
personality: turn_context.personality,
output_schema: None,
};
// Remote compaction is the only compaction shape rollout tracing supports. The trace context
// records the exact `/responses/compact` request and response; normal sampling requests remain
// traced through the inference path.
let compaction_trace = sess.services.rollout_trace.as_ref().map_or_else(
CompactionTraceContext::disabled,
|trace| {
trace.compaction_trace_context(
sess.conversation_id.to_string(),
turn_context.sub_id.clone(),
compaction_id.clone(),
turn_context.model_info.slug.clone(),
turn_context.provider.name.clone(),
)
},
);
let mut new_history = sess
.services
@@ -167,6 +192,7 @@ async fn run_remote_compact_task_inner_impl(
turn_context.reasoning_effort,
turn_context.reasoning_summary,
&turn_context.session_telemetry,
&compaction_trace,
)
.or_else(|err| async {
let total_usage_breakdown = sess.get_total_token_usage_breakdown().await;
@@ -200,6 +226,20 @@ async fn run_remote_compact_task_inner_impl(
message: String::new(),
replacement_history: Some(new_history.clone()),
};
if let Some(trace) = sess.services.rollout_trace.as_ref() {
// Install is the semantic boundary where the compact endpoint's output becomes live
// thread history. Keep it distinct from the later inference request so the reducer can
// still represent repeated developer/context prefix items exactly as the model saw them.
trace.record_compaction_installed(
sess.conversation_id.to_string(),
turn_context.sub_id.clone(),
compaction_id,
&CompactionCheckpointTracePayload {
input_history: &trace_input_history,
replacement_history: &new_history,
},
);
}
sess.replace_compacted_history(new_history, reference_context_item, compacted_item)
.await;
sess.recompute_token_usage(turn_context).await;

View File

@@ -137,6 +137,7 @@ pub use project_doc::LOCAL_PROJECT_DOC_FILENAME;
pub use project_doc::discover_project_doc_paths;
pub use project_doc::read_project_docs;
mod rollout;
mod rollout_trace;
pub(crate) mod safety;
mod session_rollout_init_error;
pub mod shell;

View File

@@ -0,0 +1,886 @@
//! Opt-in producer for the rollout trace bundle.
//!
//! This module is the deliberately thin bridge from `codex-core` into
//! `codex-rollout-trace`. Core emits raw observations; the trace crate's
//! offline reducer owns the semantic graph.
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use crate::agent::AgentStatus;
use crate::tools::context::ToolCallSource;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
use codex_protocol::ThreadId;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::ExecCommandSource;
use codex_protocol::protocol::ExecCommandStatus;
use codex_protocol::protocol::PatchApplyStatus;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::TurnAbortReason;
use codex_rollout_trace::AgentThreadId;
use codex_rollout_trace::CodeCellRuntimeStatus;
use codex_rollout_trace::CodeModeRuntimeToolId;
use codex_rollout_trace::CompactionTraceContext;
use codex_rollout_trace::ExecutionStatus;
use codex_rollout_trace::InferenceTraceContext;
use codex_rollout_trace::ModelVisibleCallId;
use codex_rollout_trace::RawPayloadKind;
use codex_rollout_trace::RawPayloadRef;
use codex_rollout_trace::RawToolCallRequester;
use codex_rollout_trace::RawTraceEventContext;
use codex_rollout_trace::RawTraceEventPayload;
use codex_rollout_trace::RolloutStatus;
use codex_rollout_trace::ToolCallKind;
use codex_rollout_trace::ToolCallSummary;
use codex_rollout_trace::TraceWriter;
use serde::Serialize;
use tracing::debug;
use tracing::warn;
use uuid::Uuid;
/// Environment variable that enables local trace-bundle recording.
///
/// The value is a root directory. Each independent root session gets one child
/// bundle directory. Spawned child threads share their root session's bundle so
/// one reduced `state.json` describes the whole multi-agent rollout tree.
pub(crate) const CODEX_ROLLOUT_TRACE_ROOT_ENV: &str = "CODEX_ROLLOUT_TRACE_ROOT";
/// Lightweight handle stored in `SessionServices`.
///
/// Cloning the handle is cheap; all sequencing and file ownership remains
/// inside `TraceWriter`.
#[derive(Clone, Debug)]
pub(crate) struct RolloutTraceRecorder {
writer: Arc<TraceWriter>,
root_thread_id: AgentThreadId,
}
/// Metadata captured once at thread/session start.
///
/// This payload is intentionally operational rather than reduced: it is a raw
/// payload that later reducers can mine as the reduced thread model evolves.
#[derive(Serialize)]
pub(crate) struct ThreadStartedTraceMetadata {
pub(crate) thread_id: String,
pub(crate) agent_path: String,
pub(crate) task_name: Option<String>,
pub(crate) nickname: Option<String>,
pub(crate) agent_role: Option<String>,
pub(crate) session_source: SessionSource,
pub(crate) cwd: PathBuf,
pub(crate) rollout_path: Option<PathBuf>,
pub(crate) model: String,
pub(crate) provider_name: String,
pub(crate) approval_policy: String,
pub(crate) sandbox_policy: String,
}
/// History replacement checkpoint persisted when compaction installs new live history.
///
/// The checkpoint keeps compaction separate from ordinary sampling snapshots:
/// `input_history` is the live thread history selected for compaction, while
/// `replacement_history` is what future prompts may carry after the checkpoint.
#[derive(Serialize)]
pub(crate) struct CompactionCheckpointTracePayload<'a> {
pub(crate) input_history: &'a [ResponseItem],
pub(crate) replacement_history: &'a [ResponseItem],
}
/// Raw invocation payload for the canonical Codex tool boundary.
///
/// Protocol events may add runtime detail later, but this envelope preserves
/// the caller-facing request for both direct model calls and code-mode nested
/// calls.
#[derive(Serialize)]
struct DispatchedToolTraceRequest<'a> {
tool_name: &'a str,
tool_namespace: Option<&'a str>,
payload: serde_json::Value,
}
/// Raw response payload for dispatch-level tool trace events.
#[derive(Serialize)]
#[serde(rename_all = "snake_case", tag = "type")]
enum DispatchedToolTraceResponse<'a> {
DirectResponse {
response_item: &'a ResponseInputItem,
},
CodeModeResponse {
value: serde_json::Value,
},
Error {
error: &'a str,
},
}
/// Raw code-mode response captured at the runtime boundary.
///
/// The reducer keeps the graph small and uses this payload as evidence for
/// future viewers that need exact content items or stored-value details.
#[derive(Serialize)]
struct CodeCellResponseTracePayload<'a> {
response: &'a codex_code_mode::RuntimeResponse,
}
/// Trace-only payload for the notification a finished child sends back to its parent.
#[derive(Serialize)]
struct AgentResultTracePayload<'a> {
child_agent_path: &'a str,
message: &'a str,
status: &'a AgentStatus,
}
impl RolloutTraceRecorder {
/// Creates and starts a trace bundle if `CODEX_ROLLOUT_TRACE_ROOT` is set.
///
/// Trace startup is best-effort. A tracing failure must not make the Codex
/// session unusable, because traces are diagnostic and can be enabled while
/// debugging unrelated production failures.
pub(crate) fn maybe_create(
thread_id: ThreadId,
metadata: ThreadStartedTraceMetadata,
) -> Option<Self> {
let root = std::env::var_os(CODEX_ROLLOUT_TRACE_ROOT_ENV)?;
let root = PathBuf::from(root);
match Self::create_in_root(root.as_path(), thread_id, metadata) {
Ok(recorder) => Some(recorder),
Err(err) => {
warn!("failed to initialize rollout trace recorder: {err:#}");
None
}
}
}
fn create_in_root(
root: &Path,
thread_id: ThreadId,
metadata: ThreadStartedTraceMetadata,
) -> anyhow::Result<Self> {
let trace_id = Uuid::new_v4().to_string();
let thread_id = thread_id.to_string();
let bundle_dir = root.join(format!("trace-{trace_id}-{thread_id}"));
let writer = TraceWriter::create(
&bundle_dir,
trace_id.clone(),
thread_id.clone(),
thread_id.clone(),
)?;
let recorder = Self {
writer: Arc::new(writer),
root_thread_id: thread_id.clone(),
};
recorder.append_best_effort(RawTraceEventPayload::RolloutStarted {
trace_id,
root_thread_id: thread_id,
});
recorder.record_thread_started(metadata);
debug!("recording rollout trace at {}", bundle_dir.display());
Ok(recorder)
}
#[cfg(test)]
pub(crate) fn create_in_root_for_test(
root: &Path,
thread_id: ThreadId,
metadata: ThreadStartedTraceMetadata,
) -> anyhow::Result<Self> {
Self::create_in_root(root, thread_id, metadata)
}
/// Wraps selected UI/protocol events in the trace bundle.
///
/// We intentionally skip high-volume stream deltas here. Inference/tool
/// hooks emit typed raw events; protocol wrappers are debug breadcrumbs, not
/// the canonical transcript.
pub(crate) fn record_protocol_event(&self, event: &EventMsg) {
let Some(event_type) = wrapped_protocol_event_type(event) else {
return;
};
let event_payload =
match self.write_json_payload_best_effort(RawPayloadKind::ProtocolEvent, event) {
Some(event_payload) => event_payload,
None => return,
};
self.append_best_effort(RawTraceEventPayload::ProtocolEventObserved {
event_type: event_type.to_string(),
event_payload,
});
}
/// Emits the lifecycle event and metadata for one thread in this rollout tree.
///
/// Root sessions call this immediately after `RolloutStarted`; spawned
/// child sessions call it on the inherited recorder. Keeping children in
/// the root bundle preserves one raw payload namespace and one reduced
/// `RolloutTrace` for the whole multi-agent task.
pub(crate) fn record_thread_started(&self, metadata: ThreadStartedTraceMetadata) {
let metadata_payload =
self.write_json_payload_best_effort(RawPayloadKind::SessionMetadata, &metadata);
self.append_best_effort(RawTraceEventPayload::ThreadStarted {
thread_id: metadata.thread_id,
agent_path: metadata.agent_path,
metadata_payload,
});
}
/// Emits typed turn lifecycle events from the UI/protocol lifecycle.
pub(crate) fn record_codex_turn_event(
&self,
thread_id: AgentThreadId,
default_turn_id: &str,
event: &EventMsg,
) {
match event {
EventMsg::TurnStarted(event) => {
self.append_with_context_best_effort(
thread_id.clone(),
event.turn_id.clone(),
RawTraceEventPayload::CodexTurnStarted {
codex_turn_id: event.turn_id.clone(),
thread_id,
},
);
}
EventMsg::TurnComplete(event) => {
self.append_with_context_best_effort(
thread_id,
event.turn_id.clone(),
RawTraceEventPayload::CodexTurnEnded {
codex_turn_id: event.turn_id.clone(),
status: ExecutionStatus::Completed,
},
);
}
EventMsg::TurnAborted(event) => {
let turn_id = event
.turn_id
.clone()
.unwrap_or_else(|| default_turn_id.to_string());
self.append_with_context_best_effort(
thread_id,
turn_id.clone(),
RawTraceEventPayload::CodexTurnEnded {
codex_turn_id: turn_id,
status: execution_status_for_abort_reason(&event.reason),
},
);
}
_ => {}
}
}
/// Emits typed runtime tool events from existing protocol lifecycle events.
///
/// The protocol event stays separate from the caller-facing invocation and
/// result payloads. Reducers attach it to `ToolCall.raw_runtime_payload_ids`
/// and can also use it to build richer objects such as terminal operations.
pub(crate) fn record_tool_call_event(
&self,
thread_id: AgentThreadId,
codex_turn_id: String,
event: &EventMsg,
) {
let Some(payload) = self.tool_call_trace_payload(event) else {
return;
};
self.append_with_context_best_effort(thread_id, codex_turn_id, payload);
}
/// Emits the parent runtime object for one model-authored code-mode cell.
///
/// This must run before JavaScript starts because the runtime can request
/// nested tools before the initial custom-tool response is available.
pub(crate) fn record_code_cell_started(
&self,
thread_id: AgentThreadId,
codex_turn_id: String,
runtime_cell_id: &str,
model_visible_call_id: &str,
source_js: &str,
) {
self.append_with_context_best_effort(
thread_id,
codex_turn_id,
RawTraceEventPayload::CodeCellStarted {
runtime_cell_id: runtime_cell_id.to_string(),
model_visible_call_id: model_visible_call_id.to_string(),
source_js: source_js.to_string(),
},
);
}
/// Emits the first response returned by the public code-mode `exec` tool.
///
/// A yielded response returns control to the model while the cell keeps
/// running. A terminal response is followed by `CodeCellEnded` so the
/// reducer can distinguish "first model-visible output" from runtime end.
pub(crate) fn record_code_cell_initial_response(
&self,
thread_id: AgentThreadId,
codex_turn_id: String,
response: &codex_code_mode::RuntimeResponse,
) {
let response_payload = self.code_cell_response_payload(response);
self.append_with_context_best_effort(
thread_id,
codex_turn_id,
RawTraceEventPayload::CodeCellInitialResponse {
runtime_cell_id: code_cell_runtime_id(response).to_string(),
status: code_cell_status_for_runtime_response(response),
response_payload,
},
);
}
/// Emits the terminal lifecycle point for a code-mode cell.
pub(crate) fn record_code_cell_ended(
&self,
thread_id: AgentThreadId,
codex_turn_id: String,
response: &codex_code_mode::RuntimeResponse,
) {
let response_payload = self.code_cell_response_payload(response);
self.append_with_context_best_effort(
thread_id,
codex_turn_id,
RawTraceEventPayload::CodeCellEnded {
runtime_cell_id: code_cell_runtime_id(response).to_string(),
status: code_cell_status_for_runtime_response(response),
response_payload,
},
);
}
/// Emits a generic lifecycle start for direct/code-mode tools without a
/// richer protocol-backed lifecycle.
///
/// The registry calls this after it has resolved a concrete handler. At that
/// point we know the tool call is valid, but we are still before
/// approval/pre-use hooks, so blocked tools are represented as failed tool
/// executions instead of disappearing from the trace.
pub(crate) fn record_dispatched_tool_call_started(&self, invocation: &ToolInvocation) {
let request = DispatchedToolTraceRequest {
tool_name: invocation.tool_name.name.as_str(),
tool_namespace: invocation.tool_name.namespace.as_deref(),
payload: dispatched_tool_payload(&invocation.payload),
};
let request_payload =
self.write_json_payload_best_effort(RawPayloadKind::ToolInvocation, &request);
let (model_visible_call_id, code_mode_runtime_tool_id, requester) =
dispatched_tool_requester_fields(invocation);
self.append_with_context_best_effort(
invocation.session.conversation_id.to_string(),
invocation.turn.sub_id.clone(),
RawTraceEventPayload::ToolCallStarted {
tool_call_id: invocation.call_id.clone(),
model_visible_call_id,
code_mode_runtime_tool_id,
requester,
kind: dispatched_tool_kind(invocation),
summary: ToolCallSummary::Generic {
label: dispatched_tool_label(invocation),
input_preview: Some(truncate_preview(&invocation.payload.log_payload())),
output_preview: None,
},
invocation_payload: request_payload,
},
);
}
/// Emits the caller-facing result for a dispatch-level tool lifecycle.
pub(crate) fn record_dispatched_tool_call_ended(
&self,
invocation: &ToolInvocation,
status: ExecutionStatus,
result: &dyn ToolOutput,
response_call_id: &str,
tool_payload: &ToolPayload,
) {
let direct_response_item;
let response = match invocation.source {
ToolCallSource::Direct => {
direct_response_item = result.to_response_item(response_call_id, tool_payload);
DispatchedToolTraceResponse::DirectResponse {
response_item: &direct_response_item,
}
}
ToolCallSource::CodeMode { .. } => DispatchedToolTraceResponse::CodeModeResponse {
value: result.code_mode_result(tool_payload),
},
ToolCallSource::JsRepl => return,
};
self.append_dispatched_tool_call_ended(invocation, status, &response);
}
/// Emits a failed end event for a dispatch-level tool lifecycle.
pub(crate) fn record_dispatched_tool_call_failed(
&self,
invocation: &ToolInvocation,
error: &str,
) {
let response = DispatchedToolTraceResponse::Error { error };
self.append_dispatched_tool_call_ended(invocation, ExecutionStatus::Failed, &response);
}
fn append_dispatched_tool_call_ended(
&self,
invocation: &ToolInvocation,
status: ExecutionStatus,
response: &DispatchedToolTraceResponse<'_>,
) {
let response_payload =
self.write_json_payload_best_effort(RawPayloadKind::ToolResult, response);
self.append_with_context_best_effort(
invocation.session.conversation_id.to_string(),
invocation.turn.sub_id.clone(),
RawTraceEventPayload::ToolCallEnded {
tool_call_id: invocation.call_id.clone(),
status,
result_payload: response_payload,
},
);
}
/// Builds reusable inference trace context for one Codex turn.
///
/// The returned context is intentionally not "an inference call" yet.
/// Transport code owns retry/fallback attempts and calls `start_attempt`
/// only after it has built the concrete request payload for that attempt.
pub(crate) fn inference_trace_context(
&self,
thread_id: AgentThreadId,
codex_turn_id: String,
model: String,
provider_name: String,
) -> InferenceTraceContext {
InferenceTraceContext::enabled(
Arc::clone(&self.writer),
thread_id,
codex_turn_id,
model,
provider_name,
)
}
/// Builds remote-compaction trace context for one checkpoint.
///
/// Rollout tracing currently has a first-class checkpoint model only for remote compaction.
/// The compact endpoint is a model-facing request whose output replaces live history, so it
/// needs both request/response attempt events and a later checkpoint event when processed
/// replacement history is installed.
pub(crate) fn compaction_trace_context(
&self,
thread_id: AgentThreadId,
codex_turn_id: String,
compaction_id: String,
model: String,
provider_name: String,
) -> CompactionTraceContext {
CompactionTraceContext::enabled(
Arc::clone(&self.writer),
thread_id,
codex_turn_id,
compaction_id,
model,
provider_name,
)
}
/// Emits the checkpoint where remote-compacted history replaces live thread history.
///
/// This checkpoint is deliberately separate from the compact endpoint response: Codex filters
/// and reinjects context before replacement history becomes live. The reducer uses this event
/// to connect the pre-compaction history to the processed replacement items without treating
/// repeated developer/context prefix items as part of the replacement itself.
pub(crate) fn record_compaction_installed(
&self,
thread_id: AgentThreadId,
codex_turn_id: String,
compaction_id: String,
checkpoint: &CompactionCheckpointTracePayload<'_>,
) {
let Some(checkpoint_payload) =
self.write_json_payload_best_effort(RawPayloadKind::CompactionCheckpoint, checkpoint)
else {
return;
};
self.append_with_context_best_effort(
thread_id,
codex_turn_id,
RawTraceEventPayload::CompactionInstalled {
compaction_id,
checkpoint_payload,
},
);
}
/// Emits the v2 child-to-parent completion message as an explicit graph edge.
///
/// This notification is not a tool call in the child: it is runtime delivery
/// from the completed child turn into the parent's mailbox. Without a
/// trace-owned edge the viewer would have to infer the relationship from a
/// later parent prompt snapshot, which loses the runtime timing and source.
pub(crate) fn record_agent_result_interaction(
&self,
child_thread_id: AgentThreadId,
child_codex_turn_id: String,
parent_thread_id: AgentThreadId,
child_agent_path: &str,
message: &str,
status: &AgentStatus,
) {
let carried_payload = self.write_json_payload_best_effort(
RawPayloadKind::AgentResult,
&AgentResultTracePayload {
child_agent_path,
message,
status,
},
);
self.append_with_context_best_effort(
child_thread_id.clone(),
child_codex_turn_id.clone(),
RawTraceEventPayload::AgentResultObserved {
edge_id: format!(
"edge:agent_result:{child_thread_id}:{child_codex_turn_id}:{parent_thread_id}"
),
child_thread_id,
child_codex_turn_id,
parent_thread_id,
message: message.to_string(),
carried_payload,
},
);
}
/// Emits terminal trace events for graceful session shutdown.
///
/// Child agent sessions share their root recorder, so ending a child thread
/// must not close the whole rollout. Only the root thread's shutdown emits
/// `RolloutEnded`.
pub(crate) fn record_thread_ended(&self, thread_id: AgentThreadId, status: RolloutStatus) {
self.append_best_effort(RawTraceEventPayload::ThreadEnded {
thread_id: thread_id.clone(),
status: status.clone(),
});
if thread_id == self.root_thread_id {
self.append_best_effort(RawTraceEventPayload::RolloutEnded { status });
}
}
fn write_json_payload_best_effort(
&self,
kind: RawPayloadKind,
payload: &impl Serialize,
) -> Option<codex_rollout_trace::RawPayloadRef> {
match self.writer.write_json_payload(kind, payload) {
Ok(payload_ref) => Some(payload_ref),
Err(err) => {
warn!("failed to write rollout trace payload: {err:#}");
None
}
}
}
fn append_best_effort(&self, payload: RawTraceEventPayload) {
if let Err(err) = self.writer.append(payload) {
warn!("failed to append rollout trace event: {err:#}");
}
}
fn append_with_context_best_effort(
&self,
thread_id: AgentThreadId,
codex_turn_id: String,
payload: RawTraceEventPayload,
) {
let context = RawTraceEventContext {
thread_id: Some(thread_id),
codex_turn_id: Some(codex_turn_id),
};
if let Err(err) = self.writer.append_with_context(context, payload) {
warn!("failed to append rollout trace event: {err:#}");
}
}
fn tool_call_trace_payload(&self, event: &EventMsg) -> Option<RawTraceEventPayload> {
match event {
EventMsg::ExecCommandBegin(event) if event.source != ExecCommandSource::UserShell => {
self.tool_runtime_started_payload(&event.call_id, event)
}
EventMsg::ExecCommandEnd(event) if event.source != ExecCommandSource::UserShell => self
.tool_runtime_ended_payload(
&event.call_id,
execution_status_for_exec_status(&event.status),
event,
),
EventMsg::PatchApplyBegin(event) => {
self.tool_runtime_started_payload(&event.call_id, event)
}
EventMsg::PatchApplyEnd(event) => self.tool_runtime_ended_payload(
&event.call_id,
execution_status_for_patch_status(&event.status),
event,
),
EventMsg::McpToolCallBegin(event) => {
self.tool_runtime_started_payload(&event.call_id, event)
}
EventMsg::McpToolCallEnd(event) => self.tool_runtime_ended_payload(
&event.call_id,
if event.result.is_ok() {
ExecutionStatus::Completed
} else {
ExecutionStatus::Failed
},
event,
),
EventMsg::CollabAgentSpawnBegin(event) => {
self.tool_runtime_started_payload(&event.call_id, event)
}
EventMsg::CollabAgentSpawnEnd(event) => self.tool_runtime_ended_payload(
&event.call_id,
if event.new_thread_id.is_some() {
ExecutionStatus::Completed
} else {
ExecutionStatus::Failed
},
event,
),
EventMsg::CollabAgentInteractionBegin(event) => {
self.tool_runtime_started_payload(&event.call_id, event)
}
EventMsg::CollabAgentInteractionEnd(event) => {
self.tool_runtime_ended_payload(&event.call_id, ExecutionStatus::Completed, event)
}
EventMsg::CollabWaitingBegin(event) => {
self.tool_runtime_started_payload(&event.call_id, event)
}
EventMsg::CollabWaitingEnd(event) => {
self.tool_runtime_ended_payload(&event.call_id, ExecutionStatus::Completed, event)
}
EventMsg::CollabCloseBegin(event) => {
self.tool_runtime_started_payload(&event.call_id, event)
}
EventMsg::CollabCloseEnd(event) => {
self.tool_runtime_ended_payload(&event.call_id, ExecutionStatus::Completed, event)
}
_ => None,
}
}
fn tool_runtime_started_payload(
&self,
tool_call_id: &str,
event: &impl Serialize,
) -> Option<RawTraceEventPayload> {
let runtime_payload =
self.write_json_payload_best_effort(RawPayloadKind::ToolRuntimeEvent, event)?;
Some(RawTraceEventPayload::ToolCallRuntimeStarted {
tool_call_id: tool_call_id.to_string(),
runtime_payload,
})
}
fn tool_runtime_ended_payload(
&self,
tool_call_id: &str,
status: ExecutionStatus,
event: &impl Serialize,
) -> Option<RawTraceEventPayload> {
let runtime_payload =
self.write_json_payload_best_effort(RawPayloadKind::ToolRuntimeEvent, event)?;
Some(RawTraceEventPayload::ToolCallRuntimeEnded {
tool_call_id: tool_call_id.to_string(),
status,
runtime_payload,
})
}
fn code_cell_response_payload(
&self,
response: &codex_code_mode::RuntimeResponse,
) -> Option<RawPayloadRef> {
self.write_json_payload_best_effort(
RawPayloadKind::ToolResult,
&CodeCellResponseTracePayload { response },
)
}
}
fn execution_status_for_abort_reason(reason: &TurnAbortReason) -> ExecutionStatus {
match reason {
TurnAbortReason::Interrupted | TurnAbortReason::Replaced | TurnAbortReason::ReviewEnded => {
ExecutionStatus::Cancelled
}
}
}
fn execution_status_for_exec_status(status: &ExecCommandStatus) -> ExecutionStatus {
match status {
ExecCommandStatus::Completed => ExecutionStatus::Completed,
ExecCommandStatus::Failed => ExecutionStatus::Failed,
ExecCommandStatus::Declined => ExecutionStatus::Cancelled,
}
}
fn execution_status_for_patch_status(status: &PatchApplyStatus) -> ExecutionStatus {
match status {
PatchApplyStatus::Completed => ExecutionStatus::Completed,
PatchApplyStatus::Failed => ExecutionStatus::Failed,
PatchApplyStatus::Declined => ExecutionStatus::Cancelled,
}
}
fn code_cell_runtime_id(response: &codex_code_mode::RuntimeResponse) -> &str {
match response {
codex_code_mode::RuntimeResponse::Yielded { cell_id, .. }
| codex_code_mode::RuntimeResponse::Terminated { cell_id, .. }
| codex_code_mode::RuntimeResponse::Result { cell_id, .. } => cell_id,
}
}
fn code_cell_status_for_runtime_response(
response: &codex_code_mode::RuntimeResponse,
) -> CodeCellRuntimeStatus {
match response {
codex_code_mode::RuntimeResponse::Yielded { .. } => CodeCellRuntimeStatus::Yielded,
codex_code_mode::RuntimeResponse::Terminated { .. } => CodeCellRuntimeStatus::Terminated,
codex_code_mode::RuntimeResponse::Result { error_text, .. } => {
if error_text.is_some() {
CodeCellRuntimeStatus::Failed
} else {
CodeCellRuntimeStatus::Completed
}
}
}
}
fn dispatched_tool_requester_fields(
invocation: &ToolInvocation,
) -> (
Option<ModelVisibleCallId>,
Option<CodeModeRuntimeToolId>,
RawToolCallRequester,
) {
match &invocation.source {
ToolCallSource::Direct => (
Some(invocation.call_id.clone()),
None,
RawToolCallRequester::Model,
),
ToolCallSource::CodeMode {
cell_id,
runtime_tool_call_id,
} => (
None,
Some(runtime_tool_call_id.clone()),
RawToolCallRequester::CodeCell {
runtime_cell_id: cell_id.clone(),
},
),
ToolCallSource::JsRepl => (None, None, RawToolCallRequester::Model),
}
}
fn dispatched_tool_kind(invocation: &ToolInvocation) -> ToolCallKind {
if let ToolPayload::Mcp { server, tool, .. } = &invocation.payload {
return ToolCallKind::Mcp {
server: server.clone(),
tool: tool.clone(),
};
}
match invocation.tool_name.name.as_str() {
"exec_command" | "local_shell" | "shell" | "shell_command" => ToolCallKind::ExecCommand,
"write_stdin" => ToolCallKind::WriteStdin,
"apply_patch" => ToolCallKind::ApplyPatch,
"web_search" | "web_search_preview" => ToolCallKind::Web,
"image_generation" | "image_query" => ToolCallKind::ImageGeneration,
"spawn_agent" => ToolCallKind::SpawnAgent,
"send_message" => ToolCallKind::SendMessage,
"followup_task" => ToolCallKind::AssignAgentTask,
"wait_agent" => ToolCallKind::WaitAgent,
"close_agent" => ToolCallKind::CloseAgent,
other => ToolCallKind::Other {
name: other.to_string(),
},
}
}
fn dispatched_tool_label(invocation: &ToolInvocation) -> String {
if let ToolPayload::Mcp { server, tool, .. } = &invocation.payload {
return format!("mcp:{server}:{tool}");
}
invocation.tool_name.to_string()
}
fn dispatched_tool_payload(payload: &ToolPayload) -> serde_json::Value {
match payload {
ToolPayload::Function { arguments } => serde_json::json!({
"type": "function",
"arguments": arguments,
}),
ToolPayload::ToolSearch { arguments } => serde_json::json!({
"type": "tool_search",
"arguments": arguments,
}),
ToolPayload::Custom { input } => serde_json::json!({
"type": "custom",
"input": input,
}),
ToolPayload::LocalShell { params } => serde_json::json!({
"type": "local_shell",
"command": params.command,
"workdir": params.workdir,
"timeout_ms": params.timeout_ms,
}),
ToolPayload::Mcp {
server,
tool,
raw_arguments,
} => serde_json::json!({
"type": "mcp",
"server": server,
"tool": tool,
"raw_arguments": raw_arguments,
}),
}
}
fn truncate_preview(value: &str) -> String {
const MAX_PREVIEW_CHARS: usize = 160;
let mut preview = value.chars().take(MAX_PREVIEW_CHARS).collect::<String>();
if value.chars().count() > MAX_PREVIEW_CHARS {
preview.push_str("...");
}
preview
}
fn wrapped_protocol_event_type(event: &EventMsg) -> Option<&'static str> {
match event {
EventMsg::SessionConfigured(_) => Some("session_configured"),
EventMsg::TurnStarted(_) => Some("turn_started"),
EventMsg::TurnComplete(_) => Some("turn_complete"),
EventMsg::TurnAborted(_) => Some("turn_aborted"),
EventMsg::ThreadNameUpdated(_) => Some("thread_name_updated"),
EventMsg::ThreadRolledBack(_) => Some("thread_rolled_back"),
EventMsg::Error(_) => Some("error"),
EventMsg::Warning(_) => Some("warning"),
EventMsg::ShutdownComplete => Some("shutdown_complete"),
_ => None,
}
}
#[cfg(test)]
#[path = "rollout_trace_tests.rs"]
mod tests;

View File

@@ -0,0 +1,163 @@
use std::fs;
use std::path::Path;
use std::path::PathBuf;
use codex_protocol::AgentPath;
use codex_protocol::ThreadId;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SubAgentSource;
use codex_rollout_trace::ExecutionStatus;
use codex_rollout_trace::RawTraceEventPayload;
use codex_rollout_trace::RolloutStatus;
use tempfile::TempDir;
use super::*;
#[test]
fn create_in_root_writes_replayable_lifecycle_events() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let thread_id = ThreadId::new();
let recorder = RolloutTraceRecorder::create_in_root(
temp.path(),
thread_id,
ThreadStartedTraceMetadata {
thread_id: thread_id.to_string(),
agent_path: "/root".to_string(),
task_name: None,
nickname: None,
agent_role: None,
session_source: SessionSource::Exec,
cwd: PathBuf::from("/workspace"),
rollout_path: Some(PathBuf::from("/tmp/rollout.jsonl")),
model: "gpt-test".to_string(),
provider_name: "test-provider".to_string(),
approval_policy: "never".to_string(),
sandbox_policy: format!("{:?}", SandboxPolicy::DangerFullAccess),
},
)
.expect("trace recorder");
recorder.record_thread_ended(thread_id.to_string(), RolloutStatus::Completed);
let bundle_dir = single_bundle_dir(temp.path())?;
let replayed = codex_rollout_trace::replay_bundle(&bundle_dir)?;
assert_eq!(replayed.status, RolloutStatus::Completed);
assert_eq!(replayed.root_thread_id, thread_id.to_string());
assert_eq!(replayed.threads[&thread_id.to_string()].agent_path, "/root");
assert_eq!(replayed.raw_payloads.len(), 1);
Ok(())
}
#[test]
fn spawned_thread_start_appends_to_root_bundle() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let root_thread_id = ThreadId::new();
let child_thread_id = ThreadId::new();
let recorder = RolloutTraceRecorder::create_in_root(
temp.path(),
root_thread_id,
minimal_metadata(root_thread_id),
)
.expect("trace recorder");
recorder.record_thread_started(ThreadStartedTraceMetadata {
thread_id: child_thread_id.to_string(),
agent_path: "/root/repo_file_counter".to_string(),
task_name: Some("repo_file_counter".to_string()),
nickname: Some("Kepler".to_string()),
agent_role: Some("worker".to_string()),
session_source: SessionSource::SubAgent(SubAgentSource::ThreadSpawn {
parent_thread_id: root_thread_id,
depth: 1,
agent_path: Some(
AgentPath::try_from("/root/repo_file_counter").map_err(anyhow::Error::msg)?,
),
agent_nickname: Some("Kepler".to_string()),
agent_role: Some("worker".to_string()),
}),
cwd: PathBuf::from("/workspace"),
rollout_path: Some(PathBuf::from("/tmp/child-rollout.jsonl")),
model: "gpt-test".to_string(),
provider_name: "test-provider".to_string(),
approval_policy: "never".to_string(),
sandbox_policy: format!("{:?}", SandboxPolicy::DangerFullAccess),
});
recorder.record_thread_ended(child_thread_id.to_string(), RolloutStatus::Completed);
let bundle_dir = single_bundle_dir(temp.path())?;
let replayed = codex_rollout_trace::replay_bundle(&bundle_dir)?;
assert_eq!(fs::read_dir(temp.path())?.count(), 1);
assert_eq!(replayed.threads.len(), 2);
assert_eq!(
replayed.threads[&child_thread_id.to_string()].agent_path,
"/root/repo_file_counter"
);
assert_eq!(replayed.status, RolloutStatus::Running);
assert_eq!(
replayed.threads[&child_thread_id.to_string()]
.execution
.status,
ExecutionStatus::Completed
);
assert_eq!(replayed.raw_payloads.len(), 2);
Ok(())
}
#[test]
fn protocol_wrapper_records_selected_events_as_raw_payloads() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let thread_id = ThreadId::new();
let recorder =
RolloutTraceRecorder::create_in_root(temp.path(), thread_id, minimal_metadata(thread_id))
.expect("trace recorder");
recorder.record_protocol_event(&EventMsg::ShutdownComplete);
let event_log = fs::read_to_string(single_bundle_dir(temp.path())?.join("trace.jsonl"))?;
let protocol_event_seen = event_log.lines().any(|line| {
let event: codex_rollout_trace::RawTraceEvent =
serde_json::from_str(line).expect("raw trace event");
matches!(
event.payload,
RawTraceEventPayload::ProtocolEventObserved {
event_type,
..
} if event_type == "shutdown_complete"
)
});
assert!(protocol_event_seen);
Ok(())
}
fn minimal_metadata(thread_id: ThreadId) -> ThreadStartedTraceMetadata {
ThreadStartedTraceMetadata {
thread_id: thread_id.to_string(),
agent_path: "/root".to_string(),
task_name: None,
nickname: None,
agent_role: None,
session_source: SessionSource::Exec,
cwd: PathBuf::from("/workspace"),
rollout_path: None,
model: "gpt-test".to_string(),
provider_name: "test-provider".to_string(),
approval_policy: "never".to_string(),
sandbox_policy: "danger-full-access".to_string(),
}
}
fn single_bundle_dir(root: &Path) -> anyhow::Result<PathBuf> {
let mut entries = fs::read_dir(root)?
.map(|entry| entry.map(|entry| entry.path()))
.collect::<Result<Vec<_>, _>>()?;
entries.sort();
assert_eq!(entries.len(), 1);
Ok(entries.remove(0))
}

View File

@@ -11,6 +11,7 @@ use crate::exec_policy::ExecPolicyManager;
use crate::guardian::GuardianRejection;
use crate::mcp::McpManager;
use crate::plugins::PluginsManager;
use crate::rollout_trace::RolloutTraceRecorder;
use crate::skills_watcher::SkillsWatcher;
use crate::tools::code_mode::CodeModeService;
use crate::tools::network_approval::NetworkApprovalService;
@@ -42,6 +43,7 @@ pub(crate) struct SessionServices {
pub(crate) analytics_events_client: AnalyticsEventsClient,
pub(crate) hooks: Hooks,
pub(crate) rollout: Mutex<Option<RolloutRecorder>>,
pub(crate) rollout_trace: Option<RolloutTraceRecorder>,
pub(crate) user_shell: Arc<crate::shell::Shell>,
pub(crate) agent_identity_manager: Arc<AgentIdentityManager>,
pub(crate) shell_snapshot_tx: watch::Sender<Option<Arc<crate::shell_snapshot::ShellSnapshot>>>,

View File

@@ -41,6 +41,7 @@ use codex_protocol::protocol::Op;
use codex_protocol::protocol::RolloutItem;
use codex_protocol::protocol::SessionConfiguredEvent;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SubAgentSource;
use codex_protocol::protocol::TurnAbortReason;
use codex_protocol::protocol::TurnAbortedEvent;
use codex_protocol::protocol::W3cTraceContext;
@@ -925,6 +926,9 @@ impl ThreadManagerState {
}
Some(_) | None => crate::file_watcher::WatchRegistration::default(),
};
let inherited_rollout_trace = self
.inherited_rollout_trace_for_source(&session_source)
.await;
let CodexSpawnOk {
codex, thread_id, ..
} = Codex::spawn(CodexSpawnArgs {
@@ -944,6 +948,7 @@ impl ThreadManagerState {
metrics_service_name,
inherited_shell_snapshot,
inherited_exec_policy,
inherited_rollout_trace,
user_shell_override,
parent_trace,
analytics_events_client: self.analytics_events_client.clone(),
@@ -988,6 +993,24 @@ impl ThreadManagerState {
pub(crate) fn notify_thread_created(&self, thread_id: ThreadId) {
let _ = self.thread_created_tx.send(thread_id);
}
async fn inherited_rollout_trace_for_source(
&self,
session_source: &SessionSource,
) -> Option<crate::rollout_trace::RolloutTraceRecorder> {
// Only v2 thread-spawn children inherit a recorder. Independent
// top-level threads still create their own rollout bundles.
let SessionSource::SubAgent(SubAgentSource::ThreadSpawn {
parent_thread_id, ..
}) = session_source
else {
return None;
};
self.get_thread(*parent_thread_id)
.await
.ok()
.and_then(|thread| thread.codex.session.services.rollout_trace.clone())
}
}
/// Return a fork snapshot cut strictly before the nth user message (0-based).

View File

@@ -30,12 +30,25 @@ impl CodeModeExecuteHandler {
.code_mode_service
.stored_values()
.await;
// Allocate before starting V8 so the trace can create the parent
// CodeCell before model-authored JavaScript issues nested tool calls.
let runtime_cell_id = exec.session.services.code_mode_service.allocate_cell_id();
if let Some(trace) = &exec.session.services.rollout_trace {
trace.record_code_cell_started(
exec.session.conversation_id.to_string(),
exec.turn.sub_id.clone(),
&runtime_cell_id,
&call_id,
&args.code,
);
}
let started_at = std::time::Instant::now();
let response = exec
.session
.services
.code_mode_service
.execute(codex_code_mode::ExecuteRequest {
cell_id: Some(runtime_cell_id.clone()),
tool_call_id: call_id,
enabled_tools,
source: args.code,
@@ -45,6 +58,23 @@ impl CodeModeExecuteHandler {
})
.await
.map_err(FunctionCallError::RespondToModel)?;
if let Some(trace) = &exec.session.services.rollout_trace {
// The initial response is the model-visible custom-tool return.
// Yielded cells keep running, so terminal lifecycle is only emitted
// here when the first response also ended the runtime.
trace.record_code_cell_initial_response(
exec.session.conversation_id.to_string(),
exec.turn.sub_id.clone(),
&response,
);
if !matches!(response, codex_code_mode::RuntimeResponse::Yielded { .. }) {
trace.record_code_cell_ended(
exec.session.conversation_id.to_string(),
exec.turn.sub_id.clone(),
&response,
);
}
}
handle_runtime_response(&exec, response, args.max_output_tokens, started_at)
.await
.map_err(FunctionCallError::RespondToModel)
@@ -62,6 +92,15 @@ impl ToolHandler for CodeModeExecuteHandler {
matches!(payload, ToolPayload::Custom { .. })
}
fn uses_first_class_trace_object(&self, invocation: &ToolInvocation) -> bool {
// `exec` is represented by the first-class CodeCell lifecycle. The
// dispatch-level ToolCall event would duplicate the same runtime
// boundary as a less precise object.
matches!(invocation.payload, ToolPayload::Custom { .. })
&& invocation.tool_name.namespace.is_none()
&& invocation.tool_name.name == PUBLIC_TOOL_NAME
}
async fn handle(&self, invocation: ToolInvocation) -> Result<Self::Output, FunctionCallError> {
let ToolInvocation {
session,

View File

@@ -7,6 +7,7 @@ use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use codex_code_mode::CodeModeToolInvocation;
use codex_code_mode::CodeModeTurnHost;
use codex_code_mode::RuntimeResponse;
use codex_protocol::models::FunctionCallOutputContentItem;
@@ -73,6 +74,10 @@ impl CodeModeService {
self.inner.replace_stored_values(values).await;
}
pub(crate) fn allocate_cell_id(&self) -> String {
self.inner.allocate_cell_id()
}
pub(crate) async fn execute(
&self,
request: codex_code_mode::ExecuteRequest,
@@ -83,7 +88,7 @@ impl CodeModeService {
pub(crate) async fn wait(
&self,
request: codex_code_mode::WaitRequest,
) -> Result<RuntimeResponse, String> {
) -> Result<codex_code_mode::WaitResponse, String> {
self.inner.wait(request).await
}
@@ -118,15 +123,13 @@ struct CoreTurnHost {
impl CodeModeTurnHost for CoreTurnHost {
async fn invoke_tool(
&self,
tool_name: ToolName,
input: Option<JsonValue>,
invocation: CodeModeToolInvocation,
cancellation_token: CancellationToken,
) -> Result<JsonValue, String> {
call_nested_tool(
self.exec.clone(),
self.tool_runtime.clone(),
tool_name,
input,
invocation,
cancellation_token,
)
.await
@@ -298,10 +301,15 @@ async fn build_nested_router(exec: &ExecContext) -> ToolRouter {
async fn call_nested_tool(
exec: ExecContext,
tool_runtime: ToolCallRuntime,
tool_name: ToolName,
input: Option<JsonValue>,
invocation: CodeModeToolInvocation,
cancellation_token: CancellationToken,
) -> Result<JsonValue, FunctionCallError> {
let CodeModeToolInvocation {
cell_id,
runtime_tool_call_id,
tool_name,
input,
} = invocation;
if tool_name.namespace.is_none() && tool_name.name == PUBLIC_TOOL_NAME {
return Err(FunctionCallError::RespondToModel(format!(
"{PUBLIC_TOOL_NAME} cannot invoke itself"
@@ -335,7 +343,14 @@ async fn call_nested_tool(
payload,
};
let result = tool_runtime
.handle_tool_call_with_source(call, ToolCallSource::CodeMode, cancellation_token)
.handle_tool_call_with_source(
call,
ToolCallSource::CodeMode {
cell_id,
runtime_tool_call_id,
},
cancellation_token,
)
.await?;
Ok(result.code_mode_result())
}

View File

@@ -61,7 +61,7 @@ impl ToolHandler for CodeModeWaitHandler {
let args: ExecWaitArgs = parse_arguments(&arguments)?;
let exec = ExecContext { session, turn };
let started_at = std::time::Instant::now();
let response = exec
let wait_response = exec
.session
.services
.code_mode_service
@@ -72,9 +72,28 @@ impl ToolHandler for CodeModeWaitHandler {
})
.await
.map_err(FunctionCallError::RespondToModel)?;
handle_runtime_response(&exec, response, args.max_tokens, started_at)
.await
.map_err(FunctionCallError::RespondToModel)
let response = wait_response.runtime_response();
if matches!(&wait_response, codex_code_mode::WaitResponse::Cell(_))
&& !matches!(response, codex_code_mode::RuntimeResponse::Yielded { .. })
&& let Some(trace) = &exec.session.services.rollout_trace
{
// Only a live-cell wait can close a CodeCell. A missing
// cell is still an ordinary `wait` tool result, but there
// is no runtime object for the reducer to complete.
trace.record_code_cell_ended(
exec.session.conversation_id.to_string(),
exec.turn.sub_id.clone(),
response,
);
}
handle_runtime_response(
&exec,
wait_response.into_runtime_response(),
args.max_tokens,
started_at,
)
.await
.map_err(FunctionCallError::RespondToModel)
}
_ => Err(FunctionCallError::RespondToModel(format!(
"{WAIT_TOOL_NAME} expects JSON arguments"

View File

@@ -28,11 +28,18 @@ use tokio::sync::Mutex;
pub type SharedTurnDiffTracker = Arc<Mutex<TurnDiffTracker>>;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ToolCallSource {
Direct,
JsRepl,
CodeMode,
CodeMode {
/// Runtime cell that issued the nested tool request.
cell_id: String,
/// Code-mode's per-cell tool invocation id. This is useful for
/// debugging the JS/runtime bridge, but it is not the Codex tool call id
/// because the runtime id only needs to be unique within one cell.
runtime_tool_call_id: String,
},
}
#[derive(Clone)]
@@ -42,6 +49,7 @@ pub struct ToolInvocation {
pub tracker: SharedTurnDiffTracker,
pub call_id: String,
pub tool_name: ToolName,
pub source: ToolCallSource,
pub payload: ToolPayload,
}

View File

@@ -70,6 +70,7 @@ fn invocation(
tracker: Arc::new(Mutex::new(TurnDiffTracker::default())),
call_id: "call-1".to_string(),
tool_name: codex_tools::ToolName::plain(tool_name),
source: crate::tools::context::ToolCallSource::Direct,
payload,
}
}

View File

@@ -30,6 +30,7 @@ async fn multi_agent_v2_request_user_input_rejects_subagent_threads() {
tracker: Arc::new(Mutex::new(TurnDiffTracker::default())),
call_id: "call-1".to_string(),
tool_name: codex_tools::ToolName::plain(REQUEST_USER_INPUT_TOOL_NAME),
source: crate::tools::context::ToolCallSource::Direct,
payload: ToolPayload::Function {
arguments: json!({
"questions": [{

View File

@@ -228,6 +228,7 @@ async fn shell_pre_tool_use_payload_uses_joined_command() {
tracker: Arc::new(Mutex::new(TurnDiffTracker::new())),
call_id: "call-41".to_string(),
tool_name: codex_tools::ToolName::plain("shell"),
source: crate::tools::context::ToolCallSource::Direct,
payload,
}),
Some(crate::tools::registry::PreToolUsePayload {
@@ -253,6 +254,7 @@ async fn shell_command_pre_tool_use_payload_uses_raw_command() {
tracker: Arc::new(Mutex::new(TurnDiffTracker::new())),
call_id: "call-42".to_string(),
tool_name: codex_tools::ToolName::plain("shell_command"),
source: crate::tools::context::ToolCallSource::Direct,
payload,
}),
Some(crate::tools::registry::PreToolUsePayload {

View File

@@ -213,6 +213,7 @@ async fn exec_command_pre_tool_use_payload_uses_raw_command() {
tracker: Arc::new(Mutex::new(TurnDiffTracker::new())),
call_id: "call-43".to_string(),
tool_name: codex_tools::ToolName::plain("exec_command"),
source: crate::tools::context::ToolCallSource::Direct,
payload,
}),
Some(crate::tools::registry::PreToolUsePayload {
@@ -236,6 +237,7 @@ async fn exec_command_pre_tool_use_payload_skips_write_stdin() {
tracker: Arc::new(Mutex::new(TurnDiffTracker::new())),
call_id: "call-44".to_string(),
tool_name: codex_tools::ToolName::plain("write_stdin"),
source: crate::tools::context::ToolCallSource::Direct,
payload,
}),
None

View File

@@ -8,8 +8,10 @@ use crate::hook_runtime::record_additional_contexts;
use crate::hook_runtime::run_post_tool_use_hooks;
use crate::hook_runtime::run_pre_tool_use_hooks;
use crate::memories::usage::emit_metric_for_tool_read;
use crate::rollout_trace::RolloutTraceRecorder;
use crate::sandbox_tags::sandbox_tag;
use crate::tools::context::FunctionToolOutput;
use crate::tools::context::ToolCallSource;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
@@ -22,6 +24,7 @@ use codex_hooks::HookToolInputLocalShell;
use codex_hooks::HookToolKind;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::protocol::SandboxPolicy;
use codex_rollout_trace::ExecutionStatus;
use codex_tools::ConfiguredToolSpec;
use codex_tools::ToolName;
use codex_tools::ToolSpec;
@@ -74,6 +77,16 @@ pub trait ToolHandler: Send + Sync {
None
}
/// Returns `true` when this handler is represented by a trace object other
/// than `ToolCall`.
///
/// Protocol events are runtime observations on the `ToolCall`; they do not
/// suppress the canonical tool boundary. The public code-mode `exec` tool is
/// the exception because `CodeCell` owns that model-visible boundary.
fn uses_first_class_trace_object(&self, _invocation: &ToolInvocation) -> bool {
false
}
/// Perform the actual [ToolInvocation] and returns a [ToolOutput] containing
/// the final output to return to the model.
fn handle(
@@ -132,6 +145,8 @@ trait AnyToolHandler: Send + Sync {
result: &dyn ToolOutput,
) -> Option<PostToolUsePayload>;
fn uses_first_class_trace_object(&self, invocation: &ToolInvocation) -> bool;
fn handle_any<'a>(
&'a self,
invocation: ToolInvocation,
@@ -163,6 +178,10 @@ where
ToolHandler::post_tool_use_payload(self, call_id, payload, result)
}
fn uses_first_class_trace_object(&self, invocation: &ToolInvocation) -> bool {
ToolHandler::uses_first_class_trace_object(self, invocation)
}
fn handle_any<'a>(
&'a self,
invocation: ToolInvocation,
@@ -184,6 +203,60 @@ pub struct ToolRegistry {
handlers: HashMap<ToolName, Arc<dyn AnyToolHandler>>,
}
/// No-op capable trace handle for one resolved tool dispatch.
///
/// The registry has several early-return paths after a handler is selected:
/// pre-use hooks, handler execution, after-use hooks, and result status all
/// affect the trace lifecycle. Keeping the trace eligibility and event writes
/// behind this helper makes those paths say what happened instead of repeating
/// the Direct/CodeMode/JsRepl/first-class-object policy at each branch.
struct DispatchTrace {
recorder: Option<RolloutTraceRecorder>,
}
impl DispatchTrace {
fn new(invocation: &ToolInvocation, handler: &dyn AnyToolHandler) -> Self {
let should_trace = matches!(
invocation.source,
ToolCallSource::Direct | ToolCallSource::CodeMode { .. }
) && !handler.uses_first_class_trace_object(invocation);
let recorder = should_trace
.then(|| invocation.session.services.rollout_trace.clone())
.flatten();
Self { recorder }
}
fn record_started(&self, invocation: &ToolInvocation) {
if let Some(recorder) = &self.recorder {
recorder.record_dispatched_tool_call_started(invocation);
}
}
fn record_completed(&self, invocation: &ToolInvocation, result: &AnyToolResult) {
if let Some(recorder) = &self.recorder {
let status = if result.result.success_for_logging() {
ExecutionStatus::Completed
} else {
ExecutionStatus::Failed
};
recorder.record_dispatched_tool_call_ended(
invocation,
status,
result.result.as_ref(),
&result.call_id,
&result.payload,
);
}
}
fn record_failed(&self, invocation: &ToolInvocation, error: &FunctionCallError) {
if let Some(recorder) = &self.recorder {
recorder.record_dispatched_tool_call_failed(invocation, &error.to_string());
}
}
}
impl ToolRegistry {
fn new(handlers: HashMap<ToolName, Arc<dyn AnyToolHandler>>) -> Self {
Self { handlers }
@@ -288,6 +361,9 @@ impl ToolRegistry {
return Err(FunctionCallError::Fatal(message));
}
let dispatch_trace = DispatchTrace::new(&invocation, handler.as_ref());
dispatch_trace.record_started(&invocation);
if let Some(pre_tool_use_payload) = handler.pre_tool_use_payload(&invocation)
&& let Some(reason) = run_pre_tool_use_hooks(
&invocation.session,
@@ -297,10 +373,12 @@ impl ToolRegistry {
)
.await
{
return Err(FunctionCallError::RespondToModel(format!(
let err = FunctionCallError::RespondToModel(format!(
"Command blocked by PreToolUse hook: {reason}. Command: {}",
pre_tool_use_payload.command
)));
));
dispatch_trace.record_failed(&invocation, &err);
return Err(err);
}
let is_mutating = handler.is_mutating(&invocation).await;
@@ -383,6 +461,7 @@ impl ToolRegistry {
.await;
if let Some(err) = hook_abort_error {
dispatch_trace.record_failed(&invocation, &err);
return Err(err);
}
@@ -422,9 +501,13 @@ impl ToolRegistry {
let result = guard.take().ok_or_else(|| {
FunctionCallError::Fatal("tool produced no output".to_string())
})?;
dispatch_trace.record_completed(&invocation, &result);
Ok(result)
}
Err(err) => Err(err),
Err(err) => {
dispatch_trace.record_failed(&invocation, &err);
Err(err)
}
}
}
}

View File

@@ -1,7 +1,25 @@
use super::*;
use crate::codex::make_session_and_context;
use crate::rollout_trace::RolloutTraceRecorder;
use crate::rollout_trace::ThreadStartedTraceMetadata;
use crate::tools::code_mode::CodeModeWaitHandler;
use crate::tools::code_mode::WAIT_TOOL_NAME;
use crate::turn_diff_tracker::TurnDiffTracker;
use codex_protocol::config_types::ModeKind;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::TurnStartedEvent;
use codex_rollout_trace::ToolCallRequester;
use pretty_assertions::assert_eq;
use std::fs;
use std::path::Path;
use std::path::PathBuf;
use tempfile::TempDir;
struct TestHandler;
#[derive(Default)]
struct TestHandler {
first_class_trace_object: bool,
}
impl ToolHandler for TestHandler {
type Output = crate::tools::context::FunctionToolOutput;
@@ -10,15 +28,22 @@ impl ToolHandler for TestHandler {
ToolKind::Function
}
fn uses_first_class_trace_object(&self, _invocation: &ToolInvocation) -> bool {
self.first_class_trace_object
}
async fn handle(&self, _invocation: ToolInvocation) -> Result<Self::Output, FunctionCallError> {
unreachable!("test handler should not be invoked")
Ok(crate::tools::context::FunctionToolOutput::from_text(
"ok".to_string(),
Some(true),
))
}
}
#[test]
fn handler_looks_up_namespaced_aliases_explicitly() {
let plain_handler = Arc::new(TestHandler) as Arc<dyn AnyToolHandler>;
let namespaced_handler = Arc::new(TestHandler) as Arc<dyn AnyToolHandler>;
let plain_handler = Arc::new(TestHandler::default()) as Arc<dyn AnyToolHandler>;
let namespaced_handler = Arc::new(TestHandler::default()) as Arc<dyn AnyToolHandler>;
let namespace = "mcp__codex_apps__gmail";
let tool_name = "gmail_get_recent_emails";
let plain_name = codex_tools::ToolName::plain(tool_name);
@@ -49,3 +74,248 @@ fn handler_looks_up_namespaced_aliases_explicitly() {
.is_some_and(|handler| Arc::ptr_eq(handler, &namespaced_handler))
);
}
#[tokio::test]
async fn dispatch_lifecycle_trace_records_direct_and_code_mode_requesters() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let (mut session, turn) = make_session_and_context().await;
attach_test_trace(&mut session, &turn, temp.path())?;
session
.services
.rollout_trace
.as_ref()
.expect("trace recorder")
.record_code_cell_started(
session.conversation_id.to_string(),
turn.sub_id.clone(),
"cell-1",
"call-code",
"await tools.test_tool({})",
);
let registry = ToolRegistry::new(HashMap::from([(
codex_tools::ToolName::plain("test_tool"),
Arc::new(TestHandler::default()) as Arc<dyn AnyToolHandler>,
)]));
let session = Arc::new(session);
let turn = Arc::new(turn);
registry
.dispatch_any(test_invocation(
Arc::clone(&session),
Arc::clone(&turn),
"direct-call",
"test_tool",
ToolCallSource::Direct,
"{}",
))
.await?;
registry
.dispatch_any(test_invocation(
session,
turn,
"code-mode-call",
"test_tool",
ToolCallSource::CodeMode {
cell_id: "cell-1".to_string(),
runtime_tool_call_id: "tool-1".to_string(),
},
"{}",
))
.await?;
let replayed = codex_rollout_trace::replay_bundle(single_bundle_dir(temp.path())?)?;
assert_eq!(
replayed.tool_calls["direct-call"].model_visible_call_id,
Some("direct-call".to_string()),
);
assert_eq!(
replayed.tool_calls["direct-call"].requester,
ToolCallRequester::Model,
);
assert!(
replayed.tool_calls["direct-call"]
.raw_invocation_payload_id
.is_some(),
"dispatch tracing should keep the tool invocation payload",
);
assert!(
replayed.tool_calls["direct-call"]
.raw_result_payload_id
.is_some(),
"direct calls should keep the model-facing result payload",
);
assert_eq!(
replayed.tool_calls["code-mode-call"].model_visible_call_id,
None,
);
assert_eq!(
replayed.tool_calls["code-mode-call"].code_mode_runtime_tool_id,
Some("tool-1".to_string()),
);
assert_eq!(
replayed.tool_calls["code-mode-call"].requester,
ToolCallRequester::CodeCell {
code_cell_id: "code_cell:call-code".to_string(),
},
);
assert!(
replayed.tool_calls["code-mode-call"]
.raw_result_payload_id
.is_some(),
"code-mode calls should keep the result returned to JavaScript",
);
Ok(())
}
#[tokio::test]
async fn dispatch_lifecycle_trace_skips_noncanonical_boundaries() -> anyhow::Result<()> {
assert_dispatch_trace_skips(
Arc::new(TestHandler::default()) as Arc<dyn AnyToolHandler>,
ToolCallSource::JsRepl,
)
.await?;
assert_dispatch_trace_skips(
Arc::new(TestHandler {
first_class_trace_object: true,
}) as Arc<dyn AnyToolHandler>,
ToolCallSource::Direct,
)
.await
}
async fn assert_dispatch_trace_skips(
handler: Arc<dyn AnyToolHandler>,
source: ToolCallSource,
) -> anyhow::Result<()> {
let temp = TempDir::new()?;
let (mut session, turn) = make_session_and_context().await;
attach_test_trace(&mut session, &turn, temp.path())?;
let registry = ToolRegistry::new(HashMap::from([(
codex_tools::ToolName::plain("test_tool"),
handler,
)]));
let session = Arc::new(session);
let turn = Arc::new(turn);
registry
.dispatch_any(test_invocation(
session,
turn,
"skipped-call",
"test_tool",
source,
"{}",
))
.await?;
let replayed = codex_rollout_trace::replay_bundle(single_bundle_dir(temp.path())?)?;
assert_eq!(replayed.tool_calls, Default::default());
Ok(())
}
#[tokio::test]
async fn missing_code_mode_wait_traces_only_the_wait_tool_call() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let (mut session, turn) = make_session_and_context().await;
attach_test_trace(&mut session, &turn, temp.path())?;
let registry = ToolRegistry::new(HashMap::from([(
codex_tools::ToolName::plain(WAIT_TOOL_NAME),
Arc::new(CodeModeWaitHandler) as Arc<dyn AnyToolHandler>,
)]));
let session = Arc::new(session);
let turn = Arc::new(turn);
registry
.dispatch_any(test_invocation(
session,
turn,
"wait-call",
WAIT_TOOL_NAME,
ToolCallSource::Direct,
r#"{"cell_id":"noop","terminate":true}"#,
))
.await?;
let replayed = codex_rollout_trace::replay_bundle(single_bundle_dir(temp.path())?)?;
assert_eq!(replayed.code_cells.len(), 0);
assert!(
replayed.tool_calls["wait-call"]
.raw_result_payload_id
.is_some()
);
Ok(())
}
fn test_invocation(
session: Arc<crate::codex::Session>,
turn: Arc<crate::codex::TurnContext>,
call_id: &str,
tool_name: &str,
source: ToolCallSource,
arguments: &str,
) -> ToolInvocation {
ToolInvocation {
session,
turn,
tracker: Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())),
call_id: call_id.to_string(),
tool_name: codex_tools::ToolName::plain(tool_name),
source,
payload: ToolPayload::Function {
arguments: arguments.to_string(),
},
}
}
fn attach_test_trace(
session: &mut crate::codex::Session,
turn: &crate::codex::TurnContext,
root: &Path,
) -> anyhow::Result<()> {
let thread_id = session.conversation_id;
let recorder = RolloutTraceRecorder::create_in_root_for_test(
root,
thread_id,
ThreadStartedTraceMetadata {
thread_id: thread_id.to_string(),
agent_path: "/root".to_string(),
task_name: None,
nickname: None,
agent_role: None,
session_source: SessionSource::Exec,
cwd: PathBuf::from("/workspace"),
rollout_path: None,
model: "gpt-test".to_string(),
provider_name: "test-provider".to_string(),
approval_policy: "never".to_string(),
sandbox_policy: "danger-full-access".to_string(),
},
)?;
recorder.record_codex_turn_event(
thread_id.to_string(),
&turn.sub_id,
&EventMsg::TurnStarted(TurnStartedEvent {
turn_id: turn.sub_id.clone(),
started_at: None,
model_context_window: None,
collaboration_mode_kind: ModeKind::default(),
}),
);
session.services.rollout_trace = Some(recorder);
Ok(())
}
fn single_bundle_dir(root: &Path) -> anyhow::Result<PathBuf> {
let mut entries = fs::read_dir(root)?
.map(|entry| entry.map(|entry| entry.path()))
.collect::<Result<Vec<_>, _>>()?;
entries.sort();
assert_eq!(entries.len(), 1);
Ok(entries.remove(0))
}

View File

@@ -271,7 +271,7 @@ impl ToolRouter {
let direct_js_repl_call = tool_name.namespace.is_none()
&& matches!(tool_name.name.as_str(), "js_repl" | "js_repl_reset");
if source == ToolCallSource::Direct
if matches!(source, ToolCallSource::Direct)
&& turn.tools_config.js_repl_tools_only
&& !direct_js_repl_call
{
@@ -287,6 +287,7 @@ impl ToolRouter {
tracker,
call_id,
tool_name,
source,
payload,
};

View File

@@ -0,0 +1,6 @@
load("//:defs.bzl", "codex_rust_crate")
codex_rust_crate(
name = "rollout-trace",
crate_name = "codex_rollout_trace",
)

View File

@@ -0,0 +1,23 @@
[package]
edition.workspace = true
license.workspace = true
name = "codex-rollout-trace"
version.workspace = true
[lib]
doctest = false
name = "codex_rollout_trace"
path = "src/lib.rs"
[lints]
workspace = true
[dependencies]
anyhow = { workspace = true }
codex-protocol = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
[dev-dependencies]
pretty_assertions = { workspace = true }
tempfile = { workspace = true }

View File

@@ -0,0 +1,200 @@
# Rollout Trace
Rollout tracing is an opt-in diagnostic path for understanding what happened
during a Codex session. It records raw runtime evidence into a local bundle, then
replays that bundle into a semantic graph that a debugger or UI can inspect.
The key design choice is: **observe first, interpret later**.
Hot-path Codex code does not try to build the final graph while the session is
running. It writes ordered raw events and payload references. The offline reducer
then decides which events became model-visible conversation, which events were
runtime work, and how information moved between threads, tools, code cells, and
terminal sessions.
## What This Gives Us
Rollout traces make failures debuggable when the normal transcript is not enough.
They preserve enough evidence to answer questions like:
- Which model request produced this tool call?
- Did this output come from the model-visible transcript, a code-mode runtime
value, a terminal operation, or an agent notification?
- Which code-mode `exec` cell issued a nested tool call?
- Which terminal operation created or reused a running process?
- Which multi-agent v2 tool call spawned, messaged, received from, or closed a
child thread?
The reduced `state.json` is intentionally not just a transcript. It is a graph of
model-visible conversation plus the runtime objects that explain how Codex got
there.
## System Shape
```mermaid
flowchart TD
subgraph Runtime["codex-core runtime"]
Protocol["protocol lifecycle\nthread start/end, turn start/end"]
Inference["inference + compaction\nrequests, responses, checkpoints"]
Tools["tool dispatch\ndirect model tools + code-mode nested tools"]
CodeMode["code-mode runtime\nexec cells, yields, waits, termination"]
Terminal["terminal runtime\nexec_command / write_stdin operations"]
Agents["multi_agent_v2\nspawn, task delivery, result, close"]
end
Recorder["RolloutTraceRecorder\nthin best-effort producer"]
Writer["TraceWriter\nassigns seq and writes payloads before events"]
subgraph Bundle["trace bundle"]
Manifest["manifest.json\ntrace_id, rollout_id, root_thread_id"]
Events["trace.jsonl\nordered raw event spine"]
Payloads["payloads/*.json\nlarge raw evidence"]
end
Reducer["replay_bundle\ndeterministic offline reducer"]
subgraph State["state.json"]
Threads["threads + turns"]
Conversation["conversation_items\nwhat the model saw"]
RuntimeObjects["inference_calls, tool_calls,\ncode_cells, terminals, compactions"]
Edges["interaction_edges\nspawn, task, result, close"]
RawRefs["raw_payload refs"]
end
Protocol --> Recorder
Inference --> Recorder
Tools --> Recorder
CodeMode --> Recorder
Terminal --> Recorder
Agents --> Recorder
Recorder --> Writer
Writer --> Manifest
Writer --> Payloads
Writer --> Events
Manifest --> Reducer
Events --> Reducer
Payloads --> Reducer
Reducer --> Threads
Reducer --> Conversation
Reducer --> RuntimeObjects
Reducer --> Edges
Reducer --> RawRefs
```
The recorder is deliberately small. It is enabled by `CODEX_ROLLOUT_TRACE_ROOT`
and must never make a Codex session fail just because tracing failed. Core emits
raw observations; this crate owns the bundle schema, writer API, and reducer.
## Bundle Layout
A trace bundle contains:
- `manifest.json`: trace identity and bundle metadata.
- `trace.jsonl`: append-only raw events ordered by writer-assigned `seq`.
- `payloads/*.json`: raw requests, responses, tool inputs/results, runtime
events, terminal output, compaction data, and protocol snapshots.
- `state.json`: optional reducer output written by `codex debug trace-reduce`.
`trace_id` identifies this diagnostic artifact. `rollout_id` identifies the
Codex rollout/session being observed. Keeping those separate lets us reason about
the stored trace without confusing it with the product-level session identity.
To reduce a bundle:
```bash
codex debug trace-reduce <trace-bundle>
```
By default this writes `<trace-bundle>/state.json`.
## Raw Evidence vs Reduced Graph
```mermaid
flowchart LR
Model["model-visible payloads\nrequests and response output items"]
Runtime["runtime observations\ntool dispatch, terminal output, code-mode JSON"]
RawPayloads["payloads/*.json\nexact evidence"]
Reducer["reducer"]
Conversation["ConversationItem\nwhat the model saw"]
ToolCall["ToolCall\nruntime tool boundary"]
CodeCell["CodeCell\nmodel-authored exec cell"]
TerminalOperation["TerminalOperation\ncommand/write/poll"]
InteractionEdge["InteractionEdge\ninformation flow"]
Model --> RawPayloads
Runtime --> RawPayloads
RawPayloads --> Reducer
Reducer --> Conversation
Reducer --> ToolCall
Reducer --> CodeCell
Reducer --> TerminalOperation
Reducer --> InteractionEdge
CodeCell --> ToolCall
ToolCall --> TerminalOperation
ToolCall --> InteractionEdge
Conversation --> InteractionEdge
```
This distinction is the reason the model has both raw payload references and
semantic objects. A code-mode nested tool call, for example, has JSON input and
output at the JavaScript runtime boundary, but the model-visible transcript only
contains the surrounding `exec` custom tool call and its eventual output.
The reducer keeps those facts separate:
- `ConversationItem` records what appeared in model-facing requests/responses.
- `ToolCall`, `CodeCell`, `TerminalOperation`, `InferenceCall`, and
`Compaction` record runtime/debug boundaries.
- `InteractionEdge` records information flow between objects, such as a
`spawn_agent` tool call delivering a task into a child thread.
- `RawPayloadRef` points back to exact evidence when a viewer needs more detail
than the reduced graph stores inline.
## Multi-Agent v2
Multi-agent v2 child threads share the root trace writer. That means one root
bundle reduces into one graph containing the parent thread, child threads, and
the edges between them.
```mermaid
flowchart LR
RootTool["root ToolCall\nspawn_agent / followup_task / send_message"]
ChildInput["child ConversationItem\ninjected task/message"]
ChildThread["child AgentThread"]
ChildResult["child assistant ConversationItem\nresult message"]
RootNotice["root ConversationItem\nsubagent notification"]
CloseTool["root ToolCall\nclose_agent"]
TargetThread["target AgentThread"]
RootTool -- "spawn/task edge" --> ChildInput
ChildInput --> ChildThread
ChildThread --> ChildResult
ChildResult -- "agent_result edge" --> RootNotice
CloseTool -- "close_agent edge" --> TargetThread
```
Top-level independent threads still get independent bundles. Spawned child
threads are different: they are part of the same rollout tree, so they belong in
the same raw event log, payload directory, and reduced `state.json`.
## Reducer Invariants
The reducer is strict where the raw evidence should be self-consistent:
- raw events are replayed in `seq` order;
- payload files must exist before events refer to them;
- reduced object IDs are stable within one replay;
- runtime events may be queued until the model-visible source or delivery target
has been observed;
- model-visible conversation is derived from model-facing payloads, not from
runtime convenience output;
- runtime payloads are evidence, not proof that the model saw the same bytes.
Those invariants let the reduced graph stay small while preserving a path back
to the original evidence whenever a debugger needs to explain why an object or
edge exists.

View File

@@ -0,0 +1,49 @@
//! Trace bundle manifest and local layout constants.
use serde::Deserialize;
use serde::Serialize;
use crate::model::AgentThreadId;
pub(crate) const MANIFEST_FILE_NAME: &str = "manifest.json";
pub(crate) const RAW_EVENT_LOG_FILE_NAME: &str = "trace.jsonl";
pub(crate) const PAYLOADS_DIR_NAME: &str = "payloads";
/// Conventional file name for a reducer-written `RolloutTrace` cache.
pub const REDUCED_STATE_FILE_NAME: &str = "state.json";
pub(crate) const TRACE_MANIFEST_SCHEMA_VERSION: u32 = 1;
pub(crate) const REDUCED_TRACE_SCHEMA_VERSION: u32 = 1;
/// Manifest stored at the root of a trace bundle.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub(crate) struct TraceBundleManifest {
pub(crate) schema_version: u32,
pub(crate) trace_id: String,
pub(crate) rollout_id: String,
/// Root thread for the recorded rollout. Replay should fail rather than
/// inventing a placeholder, because every reduced object is scoped back to
/// this thread tree.
pub(crate) root_thread_id: AgentThreadId,
pub(crate) started_at_unix_ms: i64,
pub(crate) raw_event_log: String,
pub(crate) payloads_dir: String,
}
impl TraceBundleManifest {
/// Builds a manifest that uses the standard local bundle layout.
pub(crate) fn new(
trace_id: String,
rollout_id: String,
root_thread_id: AgentThreadId,
started_at_unix_ms: i64,
) -> Self {
Self {
schema_version: TRACE_MANIFEST_SCHEMA_VERSION,
trace_id,
rollout_id,
root_thread_id,
started_at_unix_ms,
raw_event_log: RAW_EVENT_LOG_FILE_NAME.to_string(),
payloads_dir: PAYLOADS_DIR_NAME.to_string(),
}
}
}

View File

@@ -0,0 +1,225 @@
//! Hot-path helpers for recording upstream remote compaction attempts.
//!
//! Remote compaction is a model-facing request with a different semantic role
//! from normal sampling. Keeping the no-op capable trace handle in this crate
//! lets `codex-core` record exact endpoint payloads without owning trace schema
//! details.
use std::fmt::Display;
use std::sync::Arc;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::Ordering;
use codex_protocol::models::ResponseItem;
use serde::Serialize;
use serde_json::Value as JsonValue;
use crate::inference::trace_response_item_json;
use crate::model::AgentThreadId;
use crate::model::CodexTurnId;
use crate::model::CompactionId;
use crate::model::CompactionRequestId;
use crate::payload::RawPayloadKind;
use crate::raw_event::RawTraceEventContext;
use crate::raw_event::RawTraceEventPayload;
use crate::writer::TraceWriter;
static NEXT_COMPACTION_REQUEST: AtomicU64 = AtomicU64::new(1);
/// Turn-local remote compaction tracing context.
///
/// A compaction can retry its upstream request before installing one checkpoint. The context
/// owns the stable checkpoint ID; each request attempt gets a separate request ID.
#[derive(Clone, Debug)]
pub struct CompactionTraceContext {
state: CompactionTraceContextState,
}
#[derive(Clone, Debug)]
enum CompactionTraceContextState {
Disabled,
Enabled(EnabledCompactionTraceContext),
}
#[derive(Clone, Debug)]
struct EnabledCompactionTraceContext {
writer: Arc<TraceWriter>,
thread_id: AgentThreadId,
codex_turn_id: CodexTurnId,
compaction_id: CompactionId,
model: String,
provider_name: String,
}
/// One upstream request attempt made while computing a compaction checkpoint.
#[derive(Clone, Debug)]
pub struct CompactionTraceAttempt {
state: CompactionTraceAttemptState,
}
#[derive(Clone, Debug)]
enum CompactionTraceAttemptState {
Disabled,
Enabled(EnabledCompactionTraceAttempt),
}
#[derive(Clone, Debug)]
struct EnabledCompactionTraceAttempt {
context: EnabledCompactionTraceContext,
compaction_request_id: CompactionRequestId,
}
#[derive(Serialize)]
struct TracedCompactionCompleted {
output_items: Vec<JsonValue>,
}
impl CompactionTraceContext {
/// Builds a context that accepts trace calls and records nothing.
pub fn disabled() -> Self {
Self {
state: CompactionTraceContextState::Disabled,
}
}
/// Builds an enabled context for upstream attempts that compute one checkpoint.
pub fn enabled(
writer: Arc<TraceWriter>,
thread_id: AgentThreadId,
codex_turn_id: CodexTurnId,
compaction_id: CompactionId,
model: String,
provider_name: String,
) -> Self {
Self {
state: CompactionTraceContextState::Enabled(EnabledCompactionTraceContext {
writer,
thread_id,
codex_turn_id,
compaction_id,
model,
provider_name,
}),
}
}
/// Starts a new upstream attempt and records the exact compact endpoint request.
pub fn start_attempt(&self, request: &impl Serialize) -> CompactionTraceAttempt {
let CompactionTraceContextState::Enabled(context) = &self.state else {
return CompactionTraceAttempt::disabled();
};
let attempt = CompactionTraceAttempt {
state: CompactionTraceAttemptState::Enabled(EnabledCompactionTraceAttempt {
context: context.clone(),
compaction_request_id: next_compaction_request_id(),
}),
};
attempt.record_started(request);
attempt
}
}
impl CompactionTraceAttempt {
/// Builds an attempt that records nothing.
fn disabled() -> Self {
Self {
state: CompactionTraceAttemptState::Disabled,
}
}
fn record_started(&self, request: &impl Serialize) {
let CompactionTraceAttemptState::Enabled(attempt) = &self.state else {
return;
};
let Some(request_payload) = write_json_payload_best_effort(
&attempt.context.writer,
RawPayloadKind::CompactionRequest,
request,
) else {
return;
};
append_with_context_best_effort(
&attempt.context,
RawTraceEventPayload::CompactionRequestStarted {
compaction_id: attempt.context.compaction_id.clone(),
compaction_request_id: attempt.compaction_request_id.clone(),
thread_id: attempt.context.thread_id.clone(),
codex_turn_id: attempt.context.codex_turn_id.clone(),
model: attempt.context.model.clone(),
provider_name: attempt.context.provider_name.clone(),
request_payload,
},
);
}
/// Records the non-streaming compact endpoint response payload.
///
/// Compaction responses use the same response-item preservation rules as
/// inference streams: traces are evidence, while normal ResponseItem
/// serialization is shaped for future request construction.
pub fn record_completed(&self, output_items: &[ResponseItem]) {
let response_payload = TracedCompactionCompleted {
output_items: output_items.iter().map(trace_response_item_json).collect(),
};
let CompactionTraceAttemptState::Enabled(attempt) = &self.state else {
return;
};
let Some(response_payload) = write_json_payload_best_effort(
&attempt.context.writer,
RawPayloadKind::CompactionResponse,
&response_payload,
) else {
return;
};
append_with_context_best_effort(
&attempt.context,
RawTraceEventPayload::CompactionRequestCompleted {
compaction_id: attempt.context.compaction_id.clone(),
compaction_request_id: attempt.compaction_request_id.clone(),
response_payload,
},
);
}
/// Records pre-response failures from the compact endpoint.
pub fn record_failed(&self, error: impl Display) {
let CompactionTraceAttemptState::Enabled(attempt) = &self.state else {
return;
};
append_with_context_best_effort(
&attempt.context,
RawTraceEventPayload::CompactionRequestFailed {
compaction_id: attempt.context.compaction_id.clone(),
compaction_request_id: attempt.compaction_request_id.clone(),
error: error.to_string(),
},
);
}
}
fn next_compaction_request_id() -> CompactionRequestId {
let ordinal = NEXT_COMPACTION_REQUEST.fetch_add(1, Ordering::Relaxed);
format!("compaction_request:{ordinal}")
}
fn write_json_payload_best_effort(
writer: &TraceWriter,
kind: RawPayloadKind,
payload: &impl Serialize,
) -> Option<crate::RawPayloadRef> {
writer.write_json_payload(kind, payload).ok()
}
fn append_with_context_best_effort(
context: &EnabledCompactionTraceContext,
payload: RawTraceEventPayload,
) {
let event_context = RawTraceEventContext {
thread_id: Some(context.thread_id.clone()),
codex_turn_id: Some(context.codex_turn_id.clone()),
};
let _ = context.writer.append_with_context(event_context, payload);
}

View File

@@ -0,0 +1,369 @@
//! Hot-path helpers for recording upstream inference attempts.
//!
//! The model client should not need to know whether rollout tracing is enabled.
//! A disabled context records nothing, which keeps one-shot HTTP calls,
//! WebSocket reuse, and retry/fallback attempts on the same code path.
use std::fmt::Display;
use std::sync::Arc;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::Ordering;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::TokenUsage;
use serde::Serialize;
use serde_json::Value as JsonValue;
use crate::model::AgentThreadId;
use crate::model::CodexTurnId;
use crate::model::InferenceCallId;
use crate::payload::RawPayloadKind;
use crate::raw_event::RawTraceEventContext;
use crate::raw_event::RawTraceEventPayload;
use crate::writer::TraceWriter;
static NEXT_INFERENCE_ATTEMPT: AtomicU64 = AtomicU64::new(1);
/// Turn-local inference tracing context.
///
/// This is intentionally a no-op capable handle instead of an `Option` at each
/// transport callsite. Whether tracing is enabled is a session concern; retry,
/// fallback, and stream mapping code should always be able to say what happened
/// without first branching on trace availability.
#[derive(Clone, Debug)]
pub struct InferenceTraceContext {
state: InferenceTraceContextState,
}
#[derive(Clone, Debug)]
enum InferenceTraceContextState {
Disabled,
Enabled(EnabledInferenceTraceContext),
}
#[derive(Clone, Debug)]
struct EnabledInferenceTraceContext {
writer: Arc<TraceWriter>,
thread_id: AgentThreadId,
codex_turn_id: CodexTurnId,
model: String,
provider_name: String,
}
/// One concrete upstream request attempt.
///
/// A Codex turn can create multiple attempts when auth recovery retries the
/// HTTP request or WebSocket setup falls back to HTTP. Completion is often
/// observed after the client returns the response stream, so attempts are
/// cloneable and self-contained.
#[derive(Clone, Debug)]
pub struct InferenceTraceAttempt {
state: InferenceTraceAttemptState,
}
#[derive(Clone, Debug)]
enum InferenceTraceAttemptState {
Disabled,
Enabled(EnabledInferenceTraceAttempt),
}
#[derive(Clone, Debug)]
struct EnabledInferenceTraceAttempt {
context: EnabledInferenceTraceContext,
inference_call_id: InferenceCallId,
}
/// Non-delta response payload saved when a traced inference stream completes.
///
/// We intentionally record completed output items instead of every stream delta
/// here. The raw stream can be added later as a separate payload class; this
/// response summary gives the reducer stable response identity, usage, and
/// model-visible output without duplicating high-volume text deltas.
#[derive(Serialize)]
struct TracedResponseStreamCompleted<'a> {
response_id: &'a str,
token_usage: &'a Option<TokenUsage>,
output_items: Vec<JsonValue>,
}
impl InferenceTraceContext {
/// Builds a context that accepts trace calls and records nothing.
pub fn disabled() -> Self {
Self {
state: InferenceTraceContextState::Disabled,
}
}
/// Builds an enabled context for all upstream attempts made by one Codex turn.
pub fn enabled(
writer: Arc<TraceWriter>,
thread_id: AgentThreadId,
codex_turn_id: CodexTurnId,
model: String,
provider_name: String,
) -> Self {
Self {
state: InferenceTraceContextState::Enabled(EnabledInferenceTraceContext {
writer,
thread_id,
codex_turn_id,
model,
provider_name,
}),
}
}
/// Starts a new attempt after the concrete provider request has been built.
pub fn start_attempt(&self) -> InferenceTraceAttempt {
let InferenceTraceContextState::Enabled(context) = &self.state else {
return InferenceTraceAttempt::disabled();
};
InferenceTraceAttempt {
state: InferenceTraceAttemptState::Enabled(EnabledInferenceTraceAttempt {
context: context.clone(),
inference_call_id: next_inference_call_id(),
}),
}
}
}
impl InferenceTraceAttempt {
/// Builds an attempt that records nothing.
pub fn disabled() -> Self {
Self {
state: InferenceTraceAttemptState::Disabled,
}
}
/// Records the exact request object about to be sent to the model provider.
pub fn record_started(&self, request: &impl Serialize) {
let InferenceTraceAttemptState::Enabled(attempt) = &self.state else {
return;
};
let Some(request_payload) = write_json_payload_best_effort(
&attempt.context.writer,
RawPayloadKind::InferenceRequest,
request,
) else {
return;
};
append_with_context_best_effort(
&attempt.context,
RawTraceEventPayload::InferenceStarted {
inference_call_id: attempt.inference_call_id.clone(),
thread_id: attempt.context.thread_id.clone(),
codex_turn_id: attempt.context.codex_turn_id.clone(),
model: attempt.context.model.clone(),
provider_name: attempt.context.provider_name.clone(),
request_payload,
},
);
}
/// Records a bounded, non-streaming summary of the completed response stream.
///
/// The caller passes protocol-native response items so this crate owns the
/// trace-specific serialization rules. That keeps codex-core focused on
/// transport behavior while preserving trace evidence that normal request
/// serialization intentionally omits.
pub fn record_completed(
&self,
response_id: &str,
token_usage: &Option<TokenUsage>,
output_items: &[ResponseItem],
) {
let response_payload = TracedResponseStreamCompleted {
response_id,
token_usage,
output_items: output_items.iter().map(trace_response_item_json).collect(),
};
let InferenceTraceAttemptState::Enabled(attempt) = &self.state else {
return;
};
let Some(response_payload) = write_json_payload_best_effort(
&attempt.context.writer,
RawPayloadKind::InferenceResponse,
&response_payload,
) else {
return;
};
append_with_context_best_effort(
&attempt.context,
RawTraceEventPayload::InferenceCompleted {
inference_call_id: attempt.inference_call_id.clone(),
response_id: Some(response_id.to_string()),
response_payload,
},
);
}
/// Records pre-response and mid-stream failures.
pub fn record_failed(&self, error: impl Display) {
let InferenceTraceAttemptState::Enabled(attempt) = &self.state else {
return;
};
append_with_context_best_effort(
&attempt.context,
RawTraceEventPayload::InferenceFailed {
inference_call_id: attempt.inference_call_id.clone(),
error: error.to_string(),
partial_response_payload: None,
},
);
}
}
/// Serializes a response item for trace evidence rather than future request construction.
///
/// The protocol serializer intentionally omits some readable reasoning content
/// when shaping items for later model requests. Rollout traces need the item as
/// Codex received it, so this helper restores that content in the raw payload.
pub(crate) fn trace_response_item_json(item: &ResponseItem) -> JsonValue {
let mut value = serde_json::to_value(item).unwrap_or_else(|err| {
serde_json::json!({
"serialization_error": err.to_string(),
})
});
if let ResponseItem::Reasoning {
content: Some(content),
..
} = item
&& let JsonValue::Object(object) = &mut value
{
object.insert(
"content".to_string(),
serde_json::to_value(content).unwrap_or_else(|err| {
serde_json::json!({
"serialization_error": err.to_string(),
})
}),
);
}
value
}
fn next_inference_call_id() -> InferenceCallId {
let ordinal = NEXT_INFERENCE_ATTEMPT.fetch_add(1, Ordering::Relaxed);
format!("inference:{ordinal}")
}
fn write_json_payload_best_effort(
writer: &TraceWriter,
kind: RawPayloadKind,
payload: &impl Serialize,
) -> Option<crate::RawPayloadRef> {
writer.write_json_payload(kind, payload).ok()
}
fn append_with_context_best_effort(
context: &EnabledInferenceTraceContext,
payload: RawTraceEventPayload,
) {
let event_context = RawTraceEventContext {
thread_id: Some(context.thread_id.clone()),
codex_turn_id: Some(context.codex_turn_id.clone()),
};
let _ = context.writer.append_with_context(event_context, payload);
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use codex_protocol::models::ReasoningItemContent;
use codex_protocol::models::ReasoningItemReasoningSummary;
use pretty_assertions::assert_eq;
use serde_json::json;
use tempfile::TempDir;
use super::*;
use crate::model::ExecutionStatus;
use crate::replay_bundle;
#[test]
fn enabled_context_records_replayable_inference_attempt() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = Arc::new(TraceWriter::create(
temp.path(),
"trace-1".to_string(),
"rollout-1".to_string(),
"thread-root".to_string(),
)?);
writer.append(RawTraceEventPayload::ThreadStarted {
thread_id: "thread-root".to_string(),
agent_path: "/root".to_string(),
metadata_payload: None,
})?;
writer.append(RawTraceEventPayload::CodexTurnStarted {
codex_turn_id: "turn-1".to_string(),
thread_id: "thread-root".to_string(),
})?;
let context = InferenceTraceContext::enabled(
writer,
"thread-root".to_string(),
"turn-1".to_string(),
"gpt-test".to_string(),
"test-provider".to_string(),
);
let attempt = context.start_attempt();
attempt.record_started(&json!({
"model": "gpt-test",
"input": [{
"type": "message",
"role": "user",
"content": [{"type": "input_text", "text": "hello"}]
}],
}));
attempt.record_completed("resp-1", &None, &[]);
let rollout = replay_bundle(temp.path())?;
let inference = rollout
.inference_calls
.values()
.next()
.expect("recorded inference call");
assert_eq!(rollout.inference_calls.len(), 1);
assert_eq!(inference.thread_id, "thread-root");
assert_eq!(inference.codex_turn_id, "turn-1");
assert_eq!(inference.execution.status, ExecutionStatus::Completed);
assert_eq!(inference.upstream_request_id, Some("resp-1".to_string()));
assert_eq!(rollout.raw_payloads.len(), 2);
Ok(())
}
#[test]
fn traced_response_item_preserves_reasoning_content_omitted_by_normal_serializer() {
let item = ResponseItem::Reasoning {
id: "rs-1".to_string(),
summary: vec![ReasoningItemReasoningSummary::SummaryText {
text: "summary".to_string(),
}],
content: Some(vec![ReasoningItemContent::Text {
text: "raw reasoning".to_string(),
}]),
encrypted_content: Some("encoded".to_string()),
};
let normal = serde_json::to_value(&item).expect("response item serializes");
let traced = trace_response_item_json(&item);
assert_eq!(normal.get("content"), None);
assert_eq!(
traced,
json!({
"type": "reasoning",
"summary": [{"type": "summary_text", "text": "summary"}],
"content": [{"type": "text", "text": "raw reasoning"}],
"encrypted_content": "encoded",
}),
);
}
}

View File

@@ -0,0 +1,49 @@
//! Trace bundle format, writer, and reducer for Codex rollouts.
//!
//! This crate owns the trace schema. Hot-path Codex code should depend on the
//! small writer API here; semantic replay and viewer projections stay outside
//! `codex-core`.
//!
//! See `README.md` for the system diagram and reducer model.
mod bundle;
mod compaction;
mod inference;
mod model;
mod payload;
mod raw_event;
mod reducer;
mod writer;
/// Conventional reduced-state cache name written next to a raw trace bundle.
pub use bundle::REDUCED_STATE_FILE_NAME;
/// No-op-capable handle for recording remote-compaction requests.
pub use compaction::CompactionTraceAttempt;
/// Shared recorder context for a compaction checkpoint.
pub use compaction::CompactionTraceContext;
/// No-op-capable handle for recording one upstream inference attempt.
pub use inference::InferenceTraceAttempt;
/// Shared recorder context for inference attempts within one Codex turn.
pub use inference::InferenceTraceContext;
/// Public reduced trace model returned by replay.
pub use model::*;
/// Stable identifier for one raw payload inside a rollout bundle.
pub use payload::RawPayloadId;
/// Coarse role labels for raw payload files.
pub use payload::RawPayloadKind;
/// Reference to a raw request/response/log payload stored in the bundle.
pub use payload::RawPayloadRef;
/// Monotonic sequence number assigned by the raw trace writer.
pub use raw_event::RawEventSeq;
/// Runtime requester observed before semantic reduction.
pub use raw_event::RawToolCallRequester;
/// One append-only raw trace event from `trace.jsonl`.
pub use raw_event::RawTraceEvent;
/// Event-envelope context supplied by hot-path trace producers.
pub use raw_event::RawTraceEventContext;
/// Typed payload for one raw trace event.
pub use raw_event::RawTraceEventPayload;
/// Replay a raw trace bundle and write/read its reduced `RolloutTrace`.
pub use reducer::replay_bundle;
/// Append-only writer used by hot-path Codex instrumentation.
pub use writer::TraceWriter;

View File

@@ -0,0 +1,176 @@
use serde::Deserialize;
use serde::Serialize;
use crate::payload::RawPayloadId;
use super::AgentThreadId;
use super::CodeCellId;
use super::CodexTurnId;
use super::CompactionId;
use super::ConversationItemId;
use super::EdgeId;
use super::InferenceCallId;
use super::ModelVisibleCallId;
use super::ToolCallId;
use super::session::ExecutionWindow;
/// One logical transcript item or transcript boundary.
///
/// The reducer builds conversation items primarily from inference request and
/// response payloads. Runtime objects can be listed in `produced_by`, but they
/// must not rewrite what the item body says the model saw. Structural items,
/// such as compaction markers, live in the same ordered list so conversation
/// views can show where the live history changed.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ConversationItem {
pub item_id: ConversationItemId,
pub thread_id: AgentThreadId,
/// Runtime activation that first introduced this item locally, when known.
pub codex_turn_id: Option<CodexTurnId>,
pub first_seen_at_unix_ms: i64,
pub role: ConversationRole,
/// Codex channel for assistant/tool content, when the item is channel-specific.
pub channel: Option<ConversationChannel>,
pub kind: ConversationItemKind,
pub body: ConversationBody,
/// Protocol/model `call_id` for function/custom tool call and output items.
pub call_id: Option<ModelVisibleCallId>,
/// Runtime or control-plane objects that caused this conversation item to exist.
pub produced_by: Vec<ProducerRef>,
}
/// Model-visible role assigned to a conversation item.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ConversationRole {
System,
Developer,
User,
Assistant,
Tool,
}
/// Codex channel for model-visible content.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ConversationChannel {
Analysis,
Commentary,
Final,
/// Remote compaction summaries are reintroduced as assistant summary-channel content.
Summary,
}
/// Responses item category after normalization into the reduced transcript.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ConversationItemKind {
Message,
Reasoning,
FunctionCall,
FunctionCallOutput,
CustomToolCall,
CustomToolCallOutput,
/// Structural marker inserted where live history was replaced by compaction.
CompactionMarker,
}
/// Ordered content parts for a reduced conversation item.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ConversationBody {
/// Renderable model-visible parts. Raw payload refs are used when the bytes
/// are too large or too structured for the normal conversation path.
pub parts: Vec<ConversationPart>,
}
/// One model-visible part inside a conversation item.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum ConversationPart {
Text {
text: String,
},
/// A model-provided summary of content whose full form may also be present.
///
/// Reasoning summaries are not interchangeable with raw reasoning text:
/// both can be present in one payload, and replay/debug tooling needs to
/// preserve which representation the model actually returned.
Summary {
text: String,
},
/// Opaque model-visible content that is intentionally not decoded here.
///
/// Reasoning can be carried as `encrypted_content` with no readable text.
/// Keeping that blob inline makes it part of item identity, unlike a raw
/// payload reference whose ID changes every time the same item is replayed
/// in a later inference request.
Encoded {
label: String,
value: String,
},
/// Small JSON-ish body represented by a summary plus a raw ref.
Json {
summary: String,
raw_payload_id: RawPayloadId,
},
Code {
language: String,
source: String,
},
/// Large or uncommon payload that should be lazy-loaded from details UI.
PayloadRef {
label: String,
raw_payload_id: RawPayloadId,
},
}
/// Explanation for where a conversation item came from.
///
/// This is deliberately plural at the call site: a function output can be both
/// model-visible conversation and the product of a runtime tool call.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum ProducerRef {
UserInput,
Inference { inference_call_id: InferenceCallId },
Tool { tool_call_id: ToolCallId },
CodeCell { code_cell_id: CodeCellId },
InteractionEdge { edge_id: EdgeId },
Compaction { compaction_id: CompactionId },
Harness,
}
/// One outbound inference request and its response metadata.
///
/// Full upstream request/response bodies live behind raw payload refs. The
/// request/response item ID lists are the reduced, model-visible snapshot.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct InferenceCall {
pub inference_call_id: InferenceCallId,
pub thread_id: AgentThreadId,
pub codex_turn_id: CodexTurnId,
pub execution: ExecutionWindow,
pub model: String,
pub provider_name: String,
/// Upstream request ID returned by HTTP/proxy/engine infrastructure.
pub upstream_request_id: Option<String>,
/// Complete ordered input snapshot sent with this request.
pub request_item_ids: Vec<ConversationItemId>,
/// Ordered output items produced by this response.
pub response_item_ids: Vec<ConversationItemId>,
/// Runtime tool calls whose model-visible call item came from this response.
pub tool_call_ids_started_by_response: Vec<ToolCallId>,
pub usage: Option<TokenUsage>,
pub raw_request_payload_id: RawPayloadId,
/// Full upstream response payload. `None` while running or after pre-stream failures.
pub raw_response_payload_id: Option<RawPayloadId>,
}
/// Token usage summary for one inference call.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct TokenUsage {
pub input_tokens: u64,
pub cached_input_tokens: u64,
pub output_tokens: u64,
pub reasoning_output_tokens: u64,
}

View File

@@ -0,0 +1,121 @@
//! Reduced rollout trace model.
//!
//! These types describe the deterministic replay output. They intentionally
//! separate model-visible conversation from runtime/debug objects.
use std::collections::BTreeMap;
use serde::Deserialize;
use serde::Serialize;
use crate::payload::RawPayloadId;
use crate::payload::RawPayloadRef;
mod conversation;
mod runtime;
mod session;
pub use conversation::*;
pub use runtime::*;
pub use session::*;
/// Codex conversation/session UUID.
pub type AgentThreadId = String;
/// Stable multi-agent routing path such as `/root` or `/root/search_docs`.
pub type AgentPath = String;
/// Runtime submission/activation UUID. This is not a chat turn.
pub type CodexTurnId = String;
/// Reduced transcript item ID assigned by the trace reducer.
pub type ConversationItemId = String;
/// Local ID for one outbound upstream inference request.
pub type InferenceCallId = String;
/// Reducer-owned ID for one runtime tool-call object.
pub type ToolCallId = String;
/// Responses `call_id` / custom-tool call ID visible in inference payloads.
pub type ModelVisibleCallId = String;
/// Tool invocation ID assigned inside the code-mode JavaScript runtime.
pub type CodeModeRuntimeToolId = String;
/// Reducer-owned ID for one model-authored `exec` JavaScript cell.
pub type CodeCellId = String;
/// Process/session ID returned by Codex's terminal runtime.
pub type TerminalId = String;
/// Reducer-owned ID for one command/write/poll operation against a terminal.
pub type TerminalOperationId = String;
/// Reducer-owned ID for one installed conversation-history checkpoint.
pub type CompactionId = String;
/// Reducer-owned ID for one upstream request that computes a compaction.
pub type CompactionRequestId = String;
/// Reducer-owned ID for one information-flow edge.
pub type EdgeId = String;
/// Reducer-owned ID for request/log correlation metadata.
pub type CorrelationId = String;
/// Canonical reduced graph for one Codex rollout.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct RolloutTrace {
pub schema_version: u32,
/// Unique identity for this trace capture.
///
/// `rollout_id` names the Codex rollout/session being observed. `trace_id`
/// names the diagnostic artifact produced for that rollout, which keeps
/// storage/replay identity separate from the product-level session identity.
pub trace_id: String,
/// CLI-visible rollout/run identity. Higher-level experiment/sample IDs wrap this object.
pub rollout_id: String,
pub started_at_unix_ms: i64,
/// Wall-clock timestamp for terminal rollout status. `None` means running or partial trace.
pub ended_at_unix_ms: Option<i64>,
pub status: RolloutStatus,
pub root_thread_id: AgentThreadId,
pub threads: BTreeMap<AgentThreadId, AgentThread>,
pub codex_turns: BTreeMap<CodexTurnId, CodexTurn>,
pub conversation_items: BTreeMap<ConversationItemId, ConversationItem>,
pub inference_calls: BTreeMap<InferenceCallId, InferenceCall>,
/// Model-authored `exec` JavaScript cells keyed by reducer-owned cell ID.
pub code_cells: BTreeMap<CodeCellId, CodeCell>,
pub tool_calls: BTreeMap<ToolCallId, ToolCall>,
/// Terminal runtime sessions keyed by process/session ID returned by the runtime.
pub terminal_sessions: BTreeMap<TerminalId, TerminalSession>,
/// Commands/writes/polls against terminals keyed by reducer-owned operation ID.
pub terminal_operations: BTreeMap<TerminalOperationId, TerminalOperation>,
/// Installed compaction checkpoints keyed by checkpoint ID.
pub compactions: BTreeMap<CompactionId, Compaction>,
/// Upstream remote compaction calls keyed by local request ID.
pub compaction_requests: BTreeMap<CompactionRequestId, CompactionRequest>,
/// Information-flow edges between threads, cells, tools, and runtime resources.
pub interaction_edges: BTreeMap<EdgeId, InteractionEdge>,
/// Raw JSON payloads keyed by raw-payload ID. Most point at files outside this object.
pub raw_payloads: BTreeMap<RawPayloadId, RawPayloadRef>,
}
impl RolloutTrace {
/// Builds an empty reduced trace that a reducer can populate.
pub(crate) fn new(
schema_version: u32,
trace_id: String,
rollout_id: String,
root_thread_id: AgentThreadId,
started_at_unix_ms: i64,
) -> Self {
Self {
schema_version,
trace_id,
rollout_id,
started_at_unix_ms,
ended_at_unix_ms: None,
status: RolloutStatus::Running,
root_thread_id,
threads: BTreeMap::new(),
codex_turns: BTreeMap::new(),
conversation_items: BTreeMap::new(),
inference_calls: BTreeMap::new(),
code_cells: BTreeMap::new(),
tool_calls: BTreeMap::new(),
terminal_sessions: BTreeMap::new(),
terminal_operations: BTreeMap::new(),
compactions: BTreeMap::new(),
compaction_requests: BTreeMap::new(),
interaction_edges: BTreeMap::new(),
raw_payloads: BTreeMap::new(),
}
}
}

View File

@@ -0,0 +1,331 @@
use serde::Deserialize;
use serde::Serialize;
use crate::payload::RawPayloadId;
use crate::raw_event::RawEventSeq;
use super::AgentPath;
use super::AgentThreadId;
use super::CodeCellId;
use super::CodeModeRuntimeToolId;
use super::CodexTurnId;
use super::CompactionId;
use super::CompactionRequestId;
use super::ConversationItemId;
use super::EdgeId;
use super::ModelVisibleCallId;
use super::TerminalId;
use super::TerminalOperationId;
use super::ToolCallId;
use super::session::ExecutionWindow;
/// Runtime/debug object for one model-authored `exec` cell.
///
/// The JavaScript source and custom-tool outputs are still conversation items;
/// this object tracks the code-mode runtime boundary and nested runtime work.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct CodeCell {
/// Reducer-owned graph id derived from the model-visible `exec` call id.
/// Runtime cell ids are stored separately because they are only handles for
/// later waits and nested code-mode tools.
pub code_cell_id: CodeCellId,
pub model_visible_call_id: ModelVisibleCallId,
pub thread_id: AgentThreadId,
pub codex_turn_id: CodexTurnId,
/// Conversation item containing the model-authored JavaScript.
pub source_item_id: ConversationItemId,
pub output_item_ids: Vec<ConversationItemId>,
/// Raw code-mode runtime/session id, useful when matching runtime payloads.
pub runtime_cell_id: Option<String>,
/// Full JS-cell runtime window; yielded cells can outlive the initial custom call.
pub execution: ExecutionWindow,
pub runtime_status: CodeCellRuntimeStatus,
pub initial_response_at_unix_ms: Option<i64>,
pub initial_response_seq: Option<RawEventSeq>,
pub yielded_at_unix_ms: Option<i64>,
pub yielded_seq: Option<RawEventSeq>,
pub source_js: String,
pub nested_tool_call_ids: Vec<ToolCallId>,
pub wait_tool_call_ids: Vec<ToolCallId>,
}
/// Code-mode runtime lifecycle.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum CodeCellRuntimeStatus {
/// The `exec` request has been accepted but the runtime has not yet started user code.
Starting,
/// Runtime is executing JavaScript and has not yet yielded or terminated.
Running,
/// Initial `exec` returned while JavaScript kept running in the background.
Yielded,
/// Runtime reached a normal terminal result.
Completed,
/// Runtime reached an error terminal result.
Failed,
/// Runtime was explicitly terminated.
Terminated,
}
/// Installed conversation-history replacement boundary.
///
/// Duration-bearing upstream requests live in `CompactionRequest`. This object
/// is the checkpoint where replacement history became the live thread history.
/// The boundary marker and the model-visible summary are separate conversation
/// items: the marker says where history was replaced, while the summary is part
/// of `replacement_item_ids` when the compact endpoint returned one.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Compaction {
pub compaction_id: CompactionId,
pub thread_id: AgentThreadId,
pub codex_turn_id: CodexTurnId,
pub installed_at_unix_ms: i64,
/// Structural conversation item marking where pre-compaction history ended.
pub marker_item_id: ConversationItemId,
/// Upstream compaction request attempts that contributed to this checkpoint.
pub request_ids: Vec<CompactionRequestId>,
/// Logical conversation items present immediately before replacement.
pub input_item_ids: Vec<ConversationItemId>,
/// Replacement conversation items installed by the checkpoint.
pub replacement_item_ids: Vec<ConversationItemId>,
}
/// One upstream remote request made while computing a compaction checkpoint.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct CompactionRequest {
pub compaction_request_id: CompactionRequestId,
pub compaction_id: CompactionId,
pub thread_id: AgentThreadId,
pub codex_turn_id: CodexTurnId,
pub execution: ExecutionWindow,
pub model: String,
pub provider_name: String,
pub raw_request_payload_id: RawPayloadId,
/// Full compaction response payload. `None` while running or after pre-response failures.
pub raw_response_payload_id: Option<RawPayloadId>,
}
/// Runtime operation requested by the model, a JS code cell, or Codex itself.
///
/// A `ToolCall` is not a chat transcript row. Model-visible call/output items
/// link to it through `model_visible_*_item_ids`; runtime-only tools can have
/// empty model-visible lists.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct ToolCall {
pub tool_call_id: ToolCallId,
/// Model-visible protocol call ID, if the model directly requested this tool.
pub model_visible_call_id: Option<ModelVisibleCallId>,
/// Code-mode runtime's internal tool invocation ID, if this call came from JS.
pub code_mode_runtime_tool_id: Option<CodeModeRuntimeToolId>,
pub thread_id: AgentThreadId,
/// Runtime activation that started the tool. Background work may outlive this turn.
pub started_by_codex_turn_id: Option<CodexTurnId>,
pub execution: ExecutionWindow,
pub requester: ToolCallRequester,
pub kind: ToolCallKind,
pub model_visible_call_item_ids: Vec<ConversationItemId>,
pub model_visible_output_item_ids: Vec<ConversationItemId>,
/// Terminal operation started by this tool, when the tool touched a terminal.
pub terminal_operation_id: Option<TerminalOperationId>,
pub summary: ToolCallSummary,
/// Original invocation at the Codex tool boundary.
///
/// Direct model tools store the model's function/custom call payload here.
/// Code-mode nested tools store the JSON call made by model-authored JS.
/// Runtime protocol events are deliberately kept separate below because
/// they describe how Codex executed the request, not what the caller sent.
pub raw_invocation_payload_id: Option<RawPayloadId>,
/// Result returned to the immediate requester.
///
/// For direct tools this is the tool output item returned to the model; for
/// code-mode nested tools this is the value returned to JavaScript.
pub raw_result_payload_id: Option<RawPayloadId>,
/// Runtime/protocol payloads observed while executing the tool.
///
/// Examples include exec begin/end, patch begin/end, and MCP begin/end
/// events. Reducers can use these to build richer runtime objects such as
/// terminal operations without overwriting the canonical invocation/result.
pub raw_runtime_payload_ids: Vec<RawPayloadId>,
}
/// Requester of a runtime tool.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum ToolCallRequester {
Model,
/// Model-authored JavaScript requested the tool through code-mode.
CodeCell {
code_cell_id: CodeCellId,
},
}
/// Runtime tool category.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum ToolCallKind {
ExecCommand,
WriteStdin,
ApplyPatch,
Mcp {
server: String,
tool: String,
},
Web,
ImageGeneration,
SpawnAgent,
AssignAgentTask,
SendMessage,
/// Multi-agent wait operation. Code-mode wait is modeled separately.
WaitAgent,
CloseAgent,
Other {
name: String,
},
}
/// Bounded card/list summary for a tool call.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum ToolCallSummary {
/// Tool is summarized by its terminal operation.
Terminal { operation_id: TerminalOperationId },
Agent {
target_agent_path: AgentPath,
/// Task name/path segment when the operation creates or targets a task.
task_name: Option<String>,
message_preview: String,
},
WaitAgent {
/// Wait target, when narrower than "any child".
target_agent_path: Option<AgentPath>,
timeout_ms: Option<u64>,
},
Generic {
label: String,
input_preview: Option<String>,
output_preview: Option<String>,
},
}
/// Reusable terminal process/session returned by the runtime.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TerminalSession {
pub terminal_id: TerminalId,
pub thread_id: AgentThreadId,
pub created_by_operation_id: TerminalOperationId,
pub operation_ids: Vec<TerminalOperationId>,
/// Terminal lifetime. This can outlive the operation that created it.
pub execution: ExecutionWindow,
}
/// One command/write/poll operation against a terminal session.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TerminalOperation {
pub operation_id: TerminalOperationId,
/// Runtime terminal/process ID. `None` is legal only while the operation that creates it is starting.
pub terminal_id: Option<TerminalId>,
pub tool_call_id: ToolCallId,
pub kind: TerminalOperationKind,
/// Operation execution window. This is not necessarily the terminal session lifetime.
pub execution: ExecutionWindow,
pub request: TerminalRequest,
/// Runtime-observed terminal result. Model-visible output links through observations.
pub result: Option<TerminalResult>,
pub model_observations: Vec<TerminalModelObservation>,
pub raw_payload_ids: Vec<RawPayloadId>,
}
/// Terminal operation category.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum TerminalOperationKind {
ExecCommand,
WriteStdin,
}
/// Terminal request summary.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum TerminalRequest {
ExecCommand {
command: Vec<String>,
display_command: String,
cwd: String,
yield_time_ms: Option<u64>,
max_output_tokens: Option<usize>,
},
/// Request to interact with an existing terminal.
WriteStdin {
/// Bytes/text sent to stdin. Empty string means poll/read without writing bytes.
stdin: String,
yield_time_ms: Option<u64>,
max_output_tokens: Option<usize>,
},
}
/// Terminal result observed by the runtime.
///
/// This is debugger/runtime output. It is not proof that the model saw the same
/// bytes; link model-visible call/output items through `TerminalModelObservation`.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct TerminalResult {
/// Process exit code. `None` if the process is still running or no exit status was produced.
pub exit_code: Option<i32>,
pub stdout: String,
pub stderr: String,
/// Tool runtime's formatted caller-facing output, when present.
pub formatted_output: Option<String>,
/// Token count before truncation, when the tool runtime reported it.
pub original_token_count: Option<usize>,
/// Streaming chunk ID, when this result was assembled from chunked terminal output.
pub chunk_id: Option<String>,
}
/// Conversation items that observed a terminal operation.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct TerminalModelObservation {
pub call_item_ids: Vec<ConversationItemId>,
pub output_item_ids: Vec<ConversationItemId>,
pub source: TerminalObservationSource,
}
/// Source of model-visible terminal observation.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum TerminalObservationSource {
DirectToolCall,
CodeCellOutput,
}
/// Directed information-flow relationship between trace objects.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct InteractionEdge {
pub edge_id: EdgeId,
pub kind: InteractionEdgeKind,
pub source: TraceAnchor,
pub target: TraceAnchor,
pub started_at_unix_ms: i64,
pub ended_at_unix_ms: Option<i64>,
pub carried_item_ids: Vec<ConversationItemId>,
pub carried_raw_payload_ids: Vec<RawPayloadId>,
}
/// Information-flow edge category.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum InteractionEdgeKind {
SpawnAgent,
AssignAgentTask,
SendMessage,
AgentResult,
CloseAgent,
}
/// Typed pointer to one stable reduced-trace object.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum TraceAnchor {
ConversationItem { item_id: ConversationItemId },
ToolCall { tool_call_id: ToolCallId },
Thread { thread_id: AgentThreadId },
}

View File

@@ -0,0 +1,110 @@
use serde::Deserialize;
use serde::Serialize;
use crate::raw_event::RawEventSeq;
use super::AgentPath;
use super::AgentThreadId;
use super::CodexTurnId;
use super::ConversationItemId;
use super::EdgeId;
/// Coarse terminal status for the rollout.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum RolloutStatus {
/// Writer has not seen a terminal rollout event.
Running,
/// Rollout ended normally.
Completed,
/// Rollout ended because an operation failed.
Failed,
/// Rollout was cancelled or otherwise stopped before normal completion.
Aborted,
}
/// One Codex thread/session participating in the rollout.
///
/// Threads are agents in the multi-agent sense, but the root interactive
/// session is represented by the same object. Runtime objects live in top-level
/// maps and point back to their owning thread; only transcript order is stored
/// here because compaction/reconciliation makes it semantic.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct AgentThread {
pub thread_id: AgentThreadId,
/// Stable routing identity. Viewer/search should prefer this over nickname.
pub agent_path: AgentPath,
/// Presentation hint. It can collide and must not be used as identity.
pub nickname: Option<String>,
pub origin: AgentOrigin,
/// Session lifecycle for this thread.
///
/// Child threads can end independently from the root rollout, for example
/// after a parent calls `close_agent`. Keeping this on the thread prevents
/// those shutdowns from being mistaken for whole-rollout completion.
pub execution: ExecutionWindow,
/// Configured model presentation hint. Individual inference calls carry the actual upstream model.
pub default_model: Option<String>,
/// Logical conversation items first observed for this thread, in transcript order.
pub conversation_item_ids: Vec<ConversationItemId>,
}
/// Provenance for a traced Codex thread.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum AgentOrigin {
Root,
Spawned {
parent_thread_id: AgentThreadId,
/// Interaction edge that carried the spawn task.
spawn_edge_id: EdgeId,
/// Stable path segment/task name selected by the parent/tool call.
task_name: String,
/// Selected agent role/type, for example `worker` or `explorer`.
agent_role: String,
},
}
/// Runtime interval for a typed trace object.
///
/// Wall-clock timestamps are for display and latency. Sequence numbers are the
/// causal ordering primitive and should be used to pair observations or break
/// same-millisecond ties.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ExecutionWindow {
pub started_at_unix_ms: i64,
pub started_seq: RawEventSeq,
pub ended_at_unix_ms: Option<i64>,
pub ended_seq: Option<RawEventSeq>,
pub status: ExecutionStatus,
}
/// Coarse lifecycle status for a runtime object.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ExecutionStatus {
/// Object is still live or the trace ended before its terminal event.
Running,
/// Object completed successfully.
Completed,
/// Object reached an error state.
Failed,
/// Object was cancelled by user/policy/runtime before completion.
Cancelled,
/// Object was aborted when its owner/runtime stopped.
Aborted,
}
/// One activation of the Codex runtime for one thread.
///
/// A Codex turn groups protocol/runtime work for one thread activation.
/// It is not a user/assistant message pair; conversation belongs in
/// `ConversationItem`.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct CodexTurn {
pub codex_turn_id: CodexTurnId,
pub thread_id: AgentThreadId,
pub execution: ExecutionWindow,
/// Conversation items that directly triggered this activation, when known.
pub input_item_ids: Vec<ConversationItemId>,
}

View File

@@ -0,0 +1,49 @@
//! References to heavyweight trace payloads stored outside the reduced graph.
use serde::Deserialize;
use serde::Serialize;
/// Stable identifier for one raw payload inside a rollout bundle.
pub type RawPayloadId = String;
/// Reference to a raw request/response/log payload.
///
/// `RolloutTrace` stores these references so normal timeline and conversation
/// rendering does not require the browser or reducer output to inline every
/// upstream request, tool response, or terminal log.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct RawPayloadRef {
pub raw_payload_id: RawPayloadId,
/// Payload role. This lets details UI choose syntax highlighting and labels
/// without opening the payload file first.
pub kind: RawPayloadKind,
/// Path relative to the trace bundle root.
///
/// The writer always materializes payloads as bundle-local files. Keeping
/// this as a plain path avoids exposing storage modes we do not produce.
pub path: String,
}
/// Coarse role of a raw payload.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "type", content = "value")]
pub enum RawPayloadKind {
InferenceRequest,
/// Full upstream inference response or non-delta response stream summary.
InferenceResponse,
CompactionRequest,
/// Trace-only checkpoint captured when processed replacement history is installed.
CompactionCheckpoint,
CompactionResponse,
ToolInvocation,
ToolResult,
/// Raw runtime/protocol observation for an executing tool.
ToolRuntimeEvent,
/// Raw terminal runtime event or stream shard.
TerminalRuntimeEvent,
ProtocolEvent,
/// One-shot metadata captured when a Codex session/thread starts.
SessionMetadata,
/// Runtime notification payload carried when a child agent reports back to its parent.
AgentResult,
}

View File

@@ -0,0 +1,285 @@
//! Append-only raw trace events.
use crate::model::AgentThreadId;
use crate::model::CodeCellRuntimeStatus;
use crate::model::CodexTurnId;
use crate::model::CompactionId;
use crate::model::CompactionRequestId;
use crate::model::EdgeId;
use crate::model::ExecutionStatus;
use crate::model::InferenceCallId;
use crate::model::ModelVisibleCallId;
use crate::model::RolloutStatus;
use crate::model::ToolCallId;
use crate::model::ToolCallKind;
use crate::model::ToolCallSummary;
use crate::payload::RawPayloadRef;
use serde::Deserialize;
use serde::Serialize;
use serde_json::Value;
/// Monotonic sequence number assigned by the raw trace writer.
pub type RawEventSeq = u64;
/// Current raw event envelope schema version.
pub(crate) const RAW_TRACE_EVENT_SCHEMA_VERSION: u32 = 1;
/// One append-only raw trace event.
///
/// Every event uses the same envelope so partial replay and corruption checks
/// can run before the reducer understands the event-specific payload.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct RawTraceEvent {
pub schema_version: u32,
/// Contiguous writer-assigned order inside one rollout event log.
pub seq: RawEventSeq,
/// Unix wall-clock timestamp in milliseconds. Use for display/latency.
pub wall_time_unix_ms: i64,
pub rollout_id: String,
pub thread_id: Option<AgentThreadId>,
pub codex_turn_id: Option<CodexTurnId>,
pub payload: RawTraceEventPayload,
}
/// Writer-supplied context that appears in the raw event envelope.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct RawTraceEventContext {
pub thread_id: Option<AgentThreadId>,
pub codex_turn_id: Option<CodexTurnId>,
}
/// Runtime requester as observed at the raw tool boundary.
///
/// This intentionally uses runtime-local identifiers. The reducer is the only
/// place that maps these handles to graph identities such as `CodeCellId`.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum RawToolCallRequester {
Model,
CodeCell {
/// Runtime-local code-mode cell handle.
runtime_cell_id: String,
},
}
/// Typed payload for a raw trace event.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum RawTraceEventPayload {
RolloutStarted {
trace_id: String,
root_thread_id: AgentThreadId,
},
RolloutEnded {
status: RolloutStatus,
},
ThreadStarted {
thread_id: AgentThreadId,
/// Stable agent path.
agent_path: String,
metadata_payload: Option<RawPayloadRef>,
},
ThreadEnded {
thread_id: AgentThreadId,
status: RolloutStatus,
},
CodexTurnStarted {
codex_turn_id: CodexTurnId,
thread_id: AgentThreadId,
},
CodexTurnEnded {
codex_turn_id: CodexTurnId,
status: ExecutionStatus,
},
InferenceStarted {
inference_call_id: InferenceCallId,
thread_id: AgentThreadId,
codex_turn_id: CodexTurnId,
model: String,
provider_name: String,
request_payload: RawPayloadRef,
},
InferenceCompleted {
inference_call_id: InferenceCallId,
response_id: Option<String>,
response_payload: RawPayloadRef,
},
InferenceFailed {
inference_call_id: InferenceCallId,
error: String,
/// Partial response payload, when stream events arrived before failure.
partial_response_payload: Option<RawPayloadRef>,
},
ToolCallStarted {
tool_call_id: ToolCallId,
/// Protocol/model call ID when this runtime call came from model output.
model_visible_call_id: Option<String>,
/// Code-mode runtime bridge ID when model-authored code issued this call.
code_mode_runtime_tool_id: Option<String>,
/// Runtime requester that caused this tool lifecycle.
requester: RawToolCallRequester,
kind: ToolCallKind,
summary: ToolCallSummary,
invocation_payload: Option<RawPayloadRef>,
},
ToolCallRuntimeStarted {
tool_call_id: ToolCallId,
/// Runtime/protocol observation for how Codex began executing the tool.
runtime_payload: RawPayloadRef,
},
ToolCallRuntimeEnded {
tool_call_id: ToolCallId,
status: ExecutionStatus,
/// Runtime/protocol observation for how Codex finished executing the tool.
runtime_payload: RawPayloadRef,
},
ToolCallEnded {
tool_call_id: ToolCallId,
status: ExecutionStatus,
result_payload: Option<RawPayloadRef>,
},
CodeCellStarted {
/// Runtime-local handle allocated by code mode for waits and nested tools.
runtime_cell_id: String,
/// Custom tool call id on the model-visible `exec` item.
model_visible_call_id: ModelVisibleCallId,
/// JavaScript source after the public `exec` wrapper has been parsed.
source_js: String,
},
CodeCellInitialResponse {
/// Runtime-local handle, matching `CodeCellStarted`.
runtime_cell_id: String,
status: CodeCellRuntimeStatus,
response_payload: Option<RawPayloadRef>,
},
CodeCellEnded {
/// Runtime-local handle, matching `CodeCellStarted`.
runtime_cell_id: String,
status: CodeCellRuntimeStatus,
response_payload: Option<RawPayloadRef>,
},
CompactionRequestStarted {
compaction_id: CompactionId,
compaction_request_id: CompactionRequestId,
thread_id: AgentThreadId,
codex_turn_id: CodexTurnId,
model: String,
provider_name: String,
request_payload: RawPayloadRef,
},
CompactionRequestCompleted {
compaction_id: CompactionId,
compaction_request_id: CompactionRequestId,
response_payload: RawPayloadRef,
},
CompactionRequestFailed {
compaction_id: CompactionId,
compaction_request_id: CompactionRequestId,
error: String,
},
/// Checkpoint installation event for remote-compacted replacement history.
CompactionInstalled {
compaction_id: CompactionId,
/// Trace-only checkpoint payload. Do not route this through public UI protocol.
checkpoint_payload: RawPayloadRef,
},
/// Multi-agent v2 child-to-parent completion delivery.
AgentResultObserved {
edge_id: EdgeId,
child_thread_id: AgentThreadId,
child_codex_turn_id: CodexTurnId,
parent_thread_id: AgentThreadId,
message: String,
/// Raw notification payload. This is evidence for the runtime delivery,
/// not the parent-side model-visible item.
carried_payload: Option<RawPayloadRef>,
},
/// Existing UI/protocol event wrapped into trace format.
ProtocolEventObserved {
event_type: String,
event_payload: RawPayloadRef,
},
/// Structured payload for early instrumentation before a dedicated variant exists.
Other {
kind: String,
summary: String,
payloads: Vec<RawPayloadRef>,
/// Small structured metadata. Large data belongs in `payloads`.
metadata: Value,
},
}
impl RawTraceEventPayload {
/// Raw payload refs that must exist before this raw event is appended.
pub(crate) fn raw_payload_refs(&self) -> Vec<&RawPayloadRef> {
match self {
RawTraceEventPayload::RolloutStarted { .. }
| RawTraceEventPayload::RolloutEnded { .. }
| RawTraceEventPayload::ThreadEnded { .. }
| RawTraceEventPayload::CodexTurnStarted { .. }
| RawTraceEventPayload::CodexTurnEnded { .. }
| RawTraceEventPayload::CompactionRequestFailed { .. }
| RawTraceEventPayload::CodeCellStarted { .. }
| RawTraceEventPayload::AgentResultObserved {
carried_payload: None,
..
} => Vec::new(),
RawTraceEventPayload::ThreadStarted {
metadata_payload, ..
} => metadata_payload.iter().collect(),
RawTraceEventPayload::InferenceStarted {
request_payload, ..
}
| RawTraceEventPayload::InferenceCompleted {
response_payload: request_payload,
..
}
| RawTraceEventPayload::CompactionRequestStarted {
request_payload, ..
}
| RawTraceEventPayload::CompactionRequestCompleted {
response_payload: request_payload,
..
}
| RawTraceEventPayload::CompactionInstalled {
checkpoint_payload: request_payload,
..
}
| RawTraceEventPayload::ProtocolEventObserved {
event_payload: request_payload,
..
} => vec![request_payload],
RawTraceEventPayload::InferenceFailed {
partial_response_payload,
..
}
| RawTraceEventPayload::ToolCallStarted {
invocation_payload: partial_response_payload,
..
}
| RawTraceEventPayload::ToolCallEnded {
result_payload: partial_response_payload,
..
}
| RawTraceEventPayload::CodeCellInitialResponse {
response_payload: partial_response_payload,
..
}
| RawTraceEventPayload::CodeCellEnded {
response_payload: partial_response_payload,
..
} => partial_response_payload.iter().collect(),
RawTraceEventPayload::AgentResultObserved {
carried_payload: Some(carried_payload),
..
} => vec![carried_payload],
RawTraceEventPayload::ToolCallRuntimeStarted {
runtime_payload, ..
}
| RawTraceEventPayload::ToolCallRuntimeEnded {
runtime_payload, ..
} => vec![runtime_payload],
RawTraceEventPayload::Other { payloads, .. } => payloads.iter().collect(),
}
}
}

View File

@@ -0,0 +1,738 @@
//! Code-mode reduction.
//!
//! A code cell is the runtime parent for model-authored `exec`
//! JavaScript. Nested tools, waits, and terminal operations hang off this
//! object so viewers can inspect runtime work without flattening it into the
//! model-visible conversation.
//!
//! The reducer has to reconcile two clocks:
//! - model-visible items come from inference request/response payloads;
//! - runtime work starts as soon as Codex dispatches the tool.
//!
//! In real traces `CodeCellStarted` can arrive before the inference completion
//! payload that contains the `custom_tool_call` item. We therefore queue starts
//! until their source conversation item exists, then attach runtime edges.
use anyhow::Context;
use anyhow::Result;
use anyhow::bail;
use serde_json::Value;
use super::TraceReducer;
use crate::model::CodeCell;
use crate::model::CodeCellId;
use crate::model::CodeCellRuntimeStatus;
use crate::model::ConversationItemKind;
use crate::model::ExecutionStatus;
use crate::model::ExecutionWindow;
use crate::model::ProducerRef;
use crate::model::ToolCallId;
use crate::model::ToolCallRequester;
use crate::payload::RawPayloadRef;
use crate::raw_event::RawEventSeq;
use crate::raw_event::RawToolCallRequester;
/// Runtime start payload for one model-authored code-mode exec call.
///
/// The reduced id is already derived from the model-visible call id before this
/// reaches the code-cell reducer, so the reducer can reconcile runtime lifecycle
/// events against a stable graph identity.
pub(super) struct StartedCodeCell {
pub(super) code_cell_id: CodeCellId,
pub(super) runtime_cell_id: String,
pub(super) model_visible_call_id: crate::model::ModelVisibleCallId,
pub(super) source_js: String,
}
/// Queued code-cell start waiting for its model-visible source item.
///
/// Code execution can begin before inference stream completion records the
/// custom-tool call item that authored it. This wrapper keeps the original
/// event timing intact until that source item exists.
pub(super) struct PendingCodeCellStart {
pub(super) seq: RawEventSeq,
pub(super) wall_time_unix_ms: i64,
pub(super) thread_id: String,
pub(super) codex_turn_id: Option<String>,
pub(super) started: StartedCodeCell,
}
/// Lifecycle event observed before a queued code cell has materialized.
///
/// These events are replayed after the start is resolved so failed or very fast
/// cells do not lose runtime status while preserving source-item ownership.
pub(super) struct PendingCodeCellLifecycleEvent {
pub(super) seq: RawEventSeq,
pub(super) wall_time_unix_ms: i64,
pub(super) kind: PendingCodeCellLifecycleEventKind,
}
/// Runtime lifecycle transitions that can arrive while a code-cell start is queued.
pub(super) enum PendingCodeCellLifecycleEventKind {
InitialResponse {
runtime_cell_id: String,
status: CodeCellRuntimeStatus,
},
Ended {
status: CodeCellRuntimeStatus,
},
}
impl TraceReducer {
/// Starts a code cell once its model-visible source item exists.
///
/// Runtime events are allowed to arrive before stream completion has
/// reduced the model output that requested `exec`. Queueing preserves the
/// event order while still requiring every final `CodeCell` to point at the
/// exact conversation item that authored its JavaScript.
pub(super) fn start_or_queue_code_cell(&mut self, pending: PendingCodeCellStart) -> Result<()> {
let code_cell_id = pending.started.code_cell_id.clone();
if self
.source_item_id_for_pending_code_cell(&pending)?
.is_none()
{
if self.rollout.code_cells.contains_key(&code_cell_id)
|| self.pending_code_cell_starts.contains_key(&code_cell_id)
{
bail!("duplicate code cell start for {code_cell_id}");
}
self.pending_code_cell_starts.insert(code_cell_id, pending);
return Ok(());
}
self.start_code_cell(pending)
}
/// Materializes any queued code-cell starts unlocked by newly reduced conversation items.
///
/// This is called after inference and compaction conversation reduction,
/// because those are the only paths that create model-visible items today.
pub(super) fn flush_pending_code_cell_starts(&mut self) -> Result<()> {
let mut ready_ids = Vec::new();
for (code_cell_id, pending) in &self.pending_code_cell_starts {
if self
.source_item_id_for_pending_code_cell(pending)?
.is_some()
{
ready_ids.push(code_cell_id.clone());
}
}
for code_cell_id in ready_ids {
let Some(pending) = self.pending_code_cell_starts.remove(&code_cell_id) else {
continue;
};
self.start_code_cell(pending)?;
}
Ok(())
}
/// Inserts the reduced `CodeCell` once source ownership can be proven.
fn start_code_cell(&mut self, pending: PendingCodeCellStart) -> Result<()> {
let PendingCodeCellStart {
seq,
wall_time_unix_ms,
thread_id,
codex_turn_id,
started,
} = pending;
if self.rollout.code_cells.contains_key(&started.code_cell_id) {
bail!("duplicate code cell start for {}", started.code_cell_id);
}
let Some(codex_turn_id) = codex_turn_id else {
bail!(
"code cell start {} did not include a Codex turn id",
started.code_cell_id
);
};
self.validate_code_cell_turn(&thread_id, &codex_turn_id)?;
let source_item_id = self.source_item_id_for_code_cell_start(
&thread_id,
&started.code_cell_id,
&started.model_visible_call_id,
)?;
let output_item_ids = self.model_visible_code_cell_item_ids(
&thread_id,
&started.model_visible_call_id,
ConversationItemKind::CustomToolCallOutput,
);
// Runtime events may also have arrived while the start was queued.
// Seed these reverse links from already-reduced tool calls so replay is
// order-insensitive within the known trace causality.
let requester = ToolCallRequester::CodeCell {
code_cell_id: started.code_cell_id.clone(),
};
let nested_tool_call_ids = self
.rollout
.tool_calls
.values()
.filter(|tool_call| tool_call.requester == requester)
.map(|tool_call| tool_call.tool_call_id.clone())
.collect();
self.rollout.code_cells.insert(
started.code_cell_id.clone(),
CodeCell {
code_cell_id: started.code_cell_id.clone(),
model_visible_call_id: started.model_visible_call_id,
thread_id: thread_id.clone(),
codex_turn_id,
source_item_id,
output_item_ids: output_item_ids.clone(),
runtime_cell_id: Some(started.runtime_cell_id),
execution: ExecutionWindow {
started_at_unix_ms: wall_time_unix_ms,
started_seq: seq,
ended_at_unix_ms: None,
ended_seq: None,
status: ExecutionStatus::Running,
},
runtime_status: CodeCellRuntimeStatus::Starting,
initial_response_at_unix_ms: None,
initial_response_seq: None,
yielded_at_unix_ms: None,
yielded_seq: None,
source_js: started.source_js,
nested_tool_call_ids,
wait_tool_call_ids: Vec::new(),
},
);
self.thread_mut(&thread_id)?;
for item_id in output_item_ids {
self.add_code_cell_output_item(&started.code_cell_id, &item_id)?;
}
self.flush_pending_code_cell_lifecycle_events(&started.code_cell_id)?;
Ok(())
}
/// Returns the source item if the model-visible `exec` call has been reduced.
fn source_item_id_for_pending_code_cell(
&self,
pending: &PendingCodeCellStart,
) -> Result<Option<String>> {
Ok(self
.model_visible_code_cell_item_ids(
&pending.thread_id,
&pending.started.model_visible_call_id,
ConversationItemKind::CustomToolCall,
)
.into_iter()
.next())
}
/// Records the runtime's first response for a code cell, or waits for its source item.
///
/// Code-mode execution can start and fail before the inference response payload
/// that introduced the model-visible `exec` call has been reduced. In that
/// case the cell start is already pending; keep the lifecycle event beside it
/// instead of weakening the invariant that every reduced cell has a source
/// conversation item.
pub(super) fn record_or_queue_code_cell_initial_response(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
code_cell_id: CodeCellId,
runtime_cell_id: String,
status: CodeCellRuntimeStatus,
) -> Result<()> {
if !self.rollout.code_cells.contains_key(&code_cell_id) {
if self.pending_code_cell_starts.contains_key(&code_cell_id) {
self.queue_code_cell_lifecycle_event(
code_cell_id,
PendingCodeCellLifecycleEvent {
seq,
wall_time_unix_ms,
kind: PendingCodeCellLifecycleEventKind::InitialResponse {
runtime_cell_id,
status,
},
},
);
return Ok(());
}
bail!("code cell initial response referenced unknown cell {code_cell_id}");
}
self.record_code_cell_initial_response(
seq,
wall_time_unix_ms,
code_cell_id,
runtime_cell_id,
status,
)
}
fn record_code_cell_initial_response(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
code_cell_id: CodeCellId,
runtime_cell_id: String,
status: CodeCellRuntimeStatus,
) -> Result<()> {
let Some(cell) = self.rollout.code_cells.get_mut(&code_cell_id) else {
bail!("code cell initial response referenced unknown cell {code_cell_id}");
};
cell.runtime_cell_id = Some(runtime_cell_id);
if cell.initial_response_at_unix_ms.is_none() {
cell.initial_response_at_unix_ms = Some(wall_time_unix_ms);
cell.initial_response_seq = Some(seq);
}
if status == CodeCellRuntimeStatus::Yielded {
cell.yielded_at_unix_ms = Some(wall_time_unix_ms);
cell.yielded_seq = Some(seq);
}
cell.runtime_status = status;
Ok(())
}
/// Ends a code cell, or waits until its queued start can materialize.
///
/// This mirrors `record_or_queue_code_cell_initial_response`: the reducer is
/// strict about unknown cells, but a cell whose start is pending on the
/// model-visible source item is known and just needs its lifecycle replayed
/// after the source item appears.
pub(super) fn end_or_queue_code_cell(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
code_cell_id: CodeCellId,
status: CodeCellRuntimeStatus,
) -> Result<()> {
if !self.rollout.code_cells.contains_key(&code_cell_id) {
if self.pending_code_cell_starts.contains_key(&code_cell_id) {
self.queue_code_cell_lifecycle_event(
code_cell_id,
PendingCodeCellLifecycleEvent {
seq,
wall_time_unix_ms,
kind: PendingCodeCellLifecycleEventKind::Ended { status },
},
);
return Ok(());
}
bail!("code cell end referenced unknown cell {code_cell_id}");
}
self.end_code_cell(seq, wall_time_unix_ms, code_cell_id, status)
}
fn end_code_cell(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
code_cell_id: CodeCellId,
status: CodeCellRuntimeStatus,
) -> Result<()> {
let Some(cell) = self.rollout.code_cells.get_mut(&code_cell_id) else {
bail!("code cell end referenced unknown cell {code_cell_id}");
};
if cell.initial_response_at_unix_ms.is_none() {
cell.initial_response_at_unix_ms = Some(wall_time_unix_ms);
cell.initial_response_seq = Some(seq);
}
cell.execution.ended_at_unix_ms = Some(wall_time_unix_ms);
cell.execution.ended_seq = Some(seq);
cell.execution.status = execution_status_for_code_cell(&status);
cell.runtime_status = status;
Ok(())
}
/// Closes unfinished code cells when their owning turn is interrupted.
///
/// A yielded code cell can outlive a completed turn and be resumed by a
/// later `wait`, so normal turn completion must not imply cell completion.
/// Cancellation/failure is different: the model-visible JS frame has been
/// abandoned even if nested terminal work reports late runtime events. In
/// that case leaving the cell `running` makes a completed trace look live.
pub(super) fn terminate_running_code_cells_for_turn_end(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
codex_turn_id: &str,
turn_status: &ExecutionStatus,
) -> Result<()> {
let runtime_status = match turn_status {
ExecutionStatus::Running | ExecutionStatus::Completed => return Ok(()),
ExecutionStatus::Failed => CodeCellRuntimeStatus::Failed,
ExecutionStatus::Cancelled | ExecutionStatus::Aborted => {
CodeCellRuntimeStatus::Terminated
}
};
let code_cell_ids: Vec<_> = self
.rollout
.code_cells
.values()
.filter(|cell| {
cell.codex_turn_id == codex_turn_id
&& cell.execution.status == ExecutionStatus::Running
})
.map(|cell| cell.code_cell_id.clone())
.collect();
for code_cell_id in code_cell_ids {
self.end_code_cell(seq, wall_time_unix_ms, code_cell_id, runtime_status.clone())?;
}
Ok(())
}
fn queue_code_cell_lifecycle_event(
&mut self,
code_cell_id: CodeCellId,
event: PendingCodeCellLifecycleEvent,
) {
let events = self
.pending_code_cell_lifecycle_events
.entry(code_cell_id)
.or_default();
events.push(event);
events.sort_by_key(|event| event.seq);
}
fn flush_pending_code_cell_lifecycle_events(&mut self, code_cell_id: &str) -> Result<()> {
let Some(events) = self.pending_code_cell_lifecycle_events.remove(code_cell_id) else {
return Ok(());
};
for event in events {
match event.kind {
PendingCodeCellLifecycleEventKind::InitialResponse {
runtime_cell_id,
status,
} => self.record_code_cell_initial_response(
event.seq,
event.wall_time_unix_ms,
code_cell_id.to_string(),
runtime_cell_id,
status,
)?,
PendingCodeCellLifecycleEventKind::Ended { status } => self.end_code_cell(
event.seq,
event.wall_time_unix_ms,
code_cell_id.to_string(),
status,
)?,
}
}
Ok(())
}
/// Links a nested tool call back to its parent code cell.
///
/// If the parent cell is still queued, the link is recovered later from already
/// reduced tool calls when the cell materializes.
pub(super) fn link_tool_call_to_code_cell(
&mut self,
tool_call_id: &ToolCallId,
requester: &ToolCallRequester,
) -> Result<()> {
let ToolCallRequester::CodeCell { code_cell_id } = requester else {
return Ok(());
};
let Some(cell) = self.rollout.code_cells.get_mut(code_cell_id) else {
// The cell start may still be queued behind the inference payload
// that contains its model-visible source item. `start_code_cell`
// backfills these already-reduced nested calls once the source
// ownership can be proven.
return Ok(());
};
push_unique(&mut cell.nested_tool_call_ids, tool_call_id);
Ok(())
}
/// Records that a model-visible wait call is waiting on a runtime code cell.
///
/// Wait calls are not nested JavaScript tools, so the relationship is inferred
/// from the runtime cell id inside the function arguments.
pub(super) fn link_wait_tool_call_from_request_payload(
&mut self,
thread_id: &str,
tool_call_id: &ToolCallId,
request_payload: Option<&RawPayloadRef>,
) -> Result<()> {
let Some(request_payload) = request_payload else {
return Ok(());
};
let payload = self.read_payload_json(request_payload)?;
if payload.get("tool_name").and_then(Value::as_str) != Some("wait") {
return Ok(());
}
// `wait` is a normal model-visible function call, not a nested JS tool
// request. The only stable edge back to the code cell is the runtime
// `cell_id` inside the function arguments.
let Some(arguments) = payload
.get("payload")
.and_then(|payload| payload.get("arguments"))
.and_then(Value::as_str)
else {
bail!(
"wait tool request payload {} did not contain function arguments",
request_payload.raw_payload_id
);
};
let arguments: Value = serde_json::from_str(arguments).with_context(|| {
format!(
"wait tool request payload {} had invalid JSON arguments",
request_payload.raw_payload_id
)
})?;
let Some(runtime_cell_id) = arguments.get("cell_id").and_then(Value::as_str) else {
bail!(
"wait tool request payload {} did not contain cell_id",
request_payload.raw_payload_id
);
};
let Some(code_cell_id) =
self.code_cell_id_for_runtime_cell_id_if_known(thread_id, runtime_cell_id)
else {
return Ok(());
};
let Some(cell) = self.rollout.code_cells.get_mut(&code_cell_id) else {
return Ok(());
};
push_unique(&mut cell.wait_tool_call_ids, tool_call_id);
Ok(())
}
/// Attaches a later-observed model-visible output item to its code cell.
///
/// This is used when an inference request carries a custom-tool output after
/// the runtime cell already exists.
pub(super) fn attach_model_visible_code_cell_item(
&mut self,
item_id: &str,
call_id: Option<&str>,
kind: &ConversationItemKind,
) -> Result<()> {
let Some(call_id) = call_id else {
return Ok(());
};
if *kind != ConversationItemKind::CustomToolCallOutput {
return Ok(());
}
// The output item can be observed after the CodeCell was created, e.g.
// when a later inference request carries the custom-tool result back to
// the model. Add the reverse ProducerRef at that later observation
// point instead of copying runtime bytes into the conversation model.
let code_cell_id = self.reduced_code_cell_id_for_model_visible_call(call_id);
if !self.rollout.code_cells.contains_key(&code_cell_id) {
return Ok(());
}
self.add_code_cell_output_item(&code_cell_id, item_id)
}
/// Resolves the owning thread for a code-cell runtime event.
///
/// Runtime events should carry a thread id, but older/raw paths may only have
/// the turn id. The fallback keeps replay strict while avoiding duplicate logic
/// in every code-cell event arm.
pub(super) fn code_cell_event_thread_id(
&self,
thread_id: Option<String>,
codex_turn_id: Option<&str>,
runtime_cell_id: &str,
event_name: &str,
) -> Result<String> {
if let Some(thread_id) = thread_id {
return Ok(thread_id);
}
let Some(codex_turn_id) = codex_turn_id else {
bail!("{event_name} {runtime_cell_id} did not include a thread id");
};
self.rollout
.codex_turns
.get(codex_turn_id)
.map(|turn| turn.thread_id.clone())
.with_context(|| {
format!(
"{event_name} {runtime_cell_id} referenced unknown Codex turn {codex_turn_id}"
)
})
}
/// Derives the stable reduced code-cell id from the model-visible exec call id.
pub(super) fn reduced_code_cell_id_for_model_visible_call(
&self,
model_visible_call_id: &str,
) -> CodeCellId {
// The model-visible `exec` call is the durable source identity. The
// runtime `cell_id` is only a thread-local handle used for later waits
// and nested tool calls.
format!("code_cell:{model_visible_call_id}")
}
/// Records the thread-local runtime cell id to reduced code-cell id mapping.
///
/// Runtime ids can repeat across threads, so callers must provide the owning
/// thread id when creating or resolving this bridge.
pub(super) fn record_runtime_code_cell_id(
&mut self,
thread_id: &str,
runtime_cell_id: &str,
code_cell_id: &str,
) -> Result<()> {
let key = runtime_code_cell_key(thread_id, runtime_cell_id);
if let Some(existing) = self.code_cell_ids_by_runtime.get(&key) {
if existing == code_cell_id {
return Ok(());
}
bail!(
"runtime code cell {runtime_cell_id} in thread {thread_id} mapped to both \
{existing} and {code_cell_id}"
);
}
self.code_cell_ids_by_runtime
.insert(key, code_cell_id.to_string());
Ok(())
}
/// Resolves a runtime cell id to the reduced code-cell id for the given thread.
pub(super) fn code_cell_id_for_runtime_cell_id(
&self,
thread_id: &str,
runtime_cell_id: &str,
event_name: &str,
) -> Result<CodeCellId> {
self.code_cell_id_for_runtime_cell_id_if_known(thread_id, runtime_cell_id)
.with_context(|| {
format!(
"{event_name} referenced unknown runtime cell {runtime_cell_id} \
in thread {thread_id}"
)
})
}
fn code_cell_id_for_runtime_cell_id_if_known(
&self,
thread_id: &str,
runtime_cell_id: &str,
) -> Option<CodeCellId> {
self.code_cell_ids_by_runtime
.get(&runtime_code_cell_key(thread_id, runtime_cell_id))
.cloned()
}
/// Converts a raw tool requester into the reduced graph requester.
///
/// Code-mode tool requests arrive with a runtime cell id, so this method is
/// the boundary that turns that runtime handle into a stable code-cell anchor.
pub(super) fn reduce_tool_call_requester(
&self,
thread_id: &str,
requester: RawToolCallRequester,
) -> Result<ToolCallRequester> {
match requester {
RawToolCallRequester::Model => Ok(ToolCallRequester::Model),
RawToolCallRequester::CodeCell { runtime_cell_id } => Ok(ToolCallRequester::CodeCell {
code_cell_id: self.code_cell_id_for_runtime_cell_id(
thread_id,
&runtime_cell_id,
"code-mode nested tool",
)?,
}),
}
}
fn validate_code_cell_turn(&self, thread_id: &str, codex_turn_id: &str) -> Result<()> {
if !self.rollout.threads.contains_key(thread_id) {
bail!("code cell start referenced unknown thread {thread_id}");
}
let Some(turn) = self.rollout.codex_turns.get(codex_turn_id) else {
bail!("code cell start referenced unknown Codex turn {codex_turn_id}");
};
if turn.thread_id != thread_id {
bail!(
"code cell start used thread {thread_id}, but Codex turn {codex_turn_id} belongs \
to {}",
turn.thread_id
);
}
Ok(())
}
fn model_visible_code_cell_item_ids(
&self,
thread_id: &str,
call_id: &str,
kind: ConversationItemKind,
) -> Vec<String> {
self.rollout
.conversation_items
.values()
.filter(|item| {
item.thread_id == thread_id
&& item.call_id.as_deref() == Some(call_id)
&& item.kind == kind
})
.map(|item| item.item_id.clone())
.collect()
}
fn source_item_id_for_code_cell_start(
&self,
thread_id: &str,
code_cell_id: &str,
model_visible_call_id: &str,
) -> Result<String> {
self.model_visible_code_cell_item_ids(
thread_id,
model_visible_call_id,
ConversationItemKind::CustomToolCall,
)
.into_iter()
.next()
.with_context(|| {
format!(
"code cell {code_cell_id} referenced model-visible call {model_visible_call_id}, \
but no custom tool call item was observed"
)
})
}
fn add_code_cell_output_item(&mut self, code_cell_id: &str, item_id: &str) -> Result<()> {
let Some(cell) = self.rollout.code_cells.get_mut(code_cell_id) else {
bail!("code cell {code_cell_id} disappeared during output linking");
};
push_unique(&mut cell.output_item_ids, item_id);
let Some(item) = self.rollout.conversation_items.get_mut(item_id) else {
bail!("conversation item {item_id} disappeared during code-cell output linking");
};
let producer = ProducerRef::CodeCell {
code_cell_id: code_cell_id.to_string(),
};
if !item.produced_by.contains(&producer) {
item.produced_by.push(producer);
}
Ok(())
}
}
fn execution_status_for_code_cell(status: &CodeCellRuntimeStatus) -> ExecutionStatus {
match status {
CodeCellRuntimeStatus::Starting
| CodeCellRuntimeStatus::Running
| CodeCellRuntimeStatus::Yielded => ExecutionStatus::Running,
CodeCellRuntimeStatus::Completed => ExecutionStatus::Completed,
CodeCellRuntimeStatus::Failed => ExecutionStatus::Failed,
CodeCellRuntimeStatus::Terminated => ExecutionStatus::Cancelled,
}
}
fn push_unique(items: &mut Vec<String>, item_id: &str) {
if !items.iter().any(|existing| existing == item_id) {
items.push(item_id.to_string());
}
}
fn runtime_code_cell_key(thread_id: &str, runtime_cell_id: &str) -> (String, String) {
(thread_id.to_string(), runtime_cell_id.to_string())
}
#[cfg(test)]
#[path = "code_cell_tests.rs"]
mod tests;

View File

@@ -0,0 +1,423 @@
use pretty_assertions::assert_eq;
use serde_json::json;
use tempfile::TempDir;
use crate::model::CodeCellRuntimeStatus;
use crate::model::ConversationItemKind;
use crate::model::ExecutionStatus;
use crate::model::ProducerRef;
use crate::model::ToolCallKind;
use crate::model::ToolCallSummary;
use crate::payload::RawPayloadKind;
use crate::raw_event::RawToolCallRequester;
use crate::raw_event::RawTraceEventPayload;
use crate::reducer::test_support::create_started_writer;
use crate::reducer::test_support::message;
use crate::reducer::test_support::start_turn;
use crate::reducer::test_support::start_turn_for_thread;
use crate::reducer::test_support::trace_context;
use crate::reducer::test_support::trace_context_for_thread;
use crate::replay_bundle;
#[test]
fn code_cell_lifecycle_links_nested_tools_waits_and_outputs() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [message("user", "count files")]
}),
)?;
writer.append(RawTraceEventPayload::InferenceStarted {
inference_call_id: "inference-1".to_string(),
thread_id: "thread-root".to_string(),
codex_turn_id: "turn-1".to_string(),
model: "gpt-test".to_string(),
provider_name: "test-provider".to_string(),
request_payload: request,
})?;
let response = writer.write_json_payload(
RawPayloadKind::InferenceResponse,
&json!({
"response_id": "resp-1",
"output_items": [{
"type": "custom_tool_call",
"name": "exec",
"call_id": "call-code",
"input": "text('hi')"
}]
}),
)?;
// Runtime tool dispatch starts before the stream-completion hook has
// reduced the model response that requested `exec`.
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::CodeCellStarted {
runtime_cell_id: "1".to_string(),
model_visible_call_id: "call-code".to_string(),
source_js: "text('hi')".to_string(),
},
)?;
writer.append(RawTraceEventPayload::InferenceCompleted {
inference_call_id: "inference-1".to_string(),
response_id: Some("resp-1".to_string()),
response_payload: response,
})?;
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::CodeCellInitialResponse {
runtime_cell_id: "1".to_string(),
status: CodeCellRuntimeStatus::Yielded,
response_payload: None,
},
)?;
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::ToolCallStarted {
tool_call_id: "nested-tool-1".to_string(),
model_visible_call_id: None,
code_mode_runtime_tool_id: Some("tool-1".to_string()),
requester: RawToolCallRequester::CodeCell {
runtime_cell_id: "1".to_string(),
},
kind: ToolCallKind::ExecCommand,
summary: ToolCallSummary::Generic {
label: "exec_command".to_string(),
input_preview: Some("pwd".to_string()),
output_preview: None,
},
invocation_payload: None,
},
)?;
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::ToolCallEnded {
tool_call_id: "nested-tool-1".to_string(),
status: ExecutionStatus::Completed,
result_payload: None,
},
)?;
start_turn(&writer, "turn-2")?;
let followup = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"previous_response_id": "resp-1",
"input": [{
"type": "custom_tool_call_output",
"call_id": "call-code",
"output": "Script running with cell ID 1"
}]
}),
)?;
writer.append(RawTraceEventPayload::InferenceStarted {
inference_call_id: "inference-2".to_string(),
thread_id: "thread-root".to_string(),
codex_turn_id: "turn-2".to_string(),
model: "gpt-test".to_string(),
provider_name: "test-provider".to_string(),
request_payload: followup,
})?;
let wait_request = writer.write_json_payload(
RawPayloadKind::ToolInvocation,
&json!({
"tool_name": "wait",
"tool_namespace": null,
"payload": {
"type": "function",
"arguments": "{\"cell_id\":\"1\"}"
}
}),
)?;
writer.append_with_context(
trace_context("turn-2"),
RawTraceEventPayload::ToolCallStarted {
tool_call_id: "wait-tool-1".to_string(),
model_visible_call_id: Some("wait-call".to_string()),
code_mode_runtime_tool_id: None,
requester: RawToolCallRequester::Model,
kind: ToolCallKind::Other {
name: "wait".to_string(),
},
summary: ToolCallSummary::Generic {
label: "wait".to_string(),
input_preview: Some("{\"cell_id\":\"1\"}".to_string()),
output_preview: None,
},
invocation_payload: Some(wait_request),
},
)?;
writer.append_with_context(
trace_context("turn-2"),
RawTraceEventPayload::CodeCellEnded {
runtime_cell_id: "1".to_string(),
status: CodeCellRuntimeStatus::Completed,
response_payload: None,
},
)?;
let rollout = replay_bundle(temp.path())?;
let code_cell_id = test_reduced_code_cell_id("call-code");
let cell = &rollout.code_cells[&code_cell_id];
let output_item_id = rollout.inference_calls["inference-2"]
.request_item_ids
.last()
.expect("exec output item");
assert_eq!(cell.thread_id, "thread-root");
assert_eq!(cell.runtime_status, CodeCellRuntimeStatus::Completed);
assert_eq!(cell.execution.status, ExecutionStatus::Completed);
assert_eq!(cell.runtime_cell_id, Some("1".to_string()));
assert_eq!(cell.nested_tool_call_ids, vec!["nested-tool-1"]);
assert_eq!(cell.wait_tool_call_ids, vec!["wait-tool-1"]);
assert_eq!(cell.output_item_ids, vec![output_item_id.clone()]);
assert_eq!(
rollout.conversation_items[output_item_id].produced_by,
vec![ProducerRef::CodeCell {
code_cell_id: code_cell_id.clone(),
}]
);
assert_eq!(
rollout.conversation_items[&cell.source_item_id].kind,
ConversationItemKind::CustomToolCall,
);
Ok(())
}
#[test]
fn fast_code_cell_lifecycle_waits_for_source_item() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [message("user", "count files")]
}),
)?;
writer.append(RawTraceEventPayload::InferenceStarted {
inference_call_id: "inference-1".to_string(),
thread_id: "thread-root".to_string(),
codex_turn_id: "turn-1".to_string(),
model: "gpt-test".to_string(),
provider_name: "test-provider".to_string(),
request_payload: request,
})?;
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::CodeCellStarted {
runtime_cell_id: "1".to_string(),
model_visible_call_id: "call-code".to_string(),
source_js: "not valid js".to_string(),
},
)?;
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::CodeCellInitialResponse {
runtime_cell_id: "1".to_string(),
status: CodeCellRuntimeStatus::Failed,
response_payload: None,
},
)?;
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::CodeCellEnded {
runtime_cell_id: "1".to_string(),
status: CodeCellRuntimeStatus::Failed,
response_payload: None,
},
)?;
let response = writer.write_json_payload(
RawPayloadKind::InferenceResponse,
&json!({
"response_id": "resp-1",
"output_items": [{
"type": "custom_tool_call",
"name": "exec",
"call_id": "call-code",
"input": "not valid js"
}]
}),
)?;
writer.append(RawTraceEventPayload::InferenceCompleted {
inference_call_id: "inference-1".to_string(),
response_id: Some("resp-1".to_string()),
response_payload: response,
})?;
let rollout = replay_bundle(temp.path())?;
let code_cell_id = test_reduced_code_cell_id("call-code");
let cell = &rollout.code_cells[&code_cell_id];
assert_eq!(cell.thread_id, "thread-root");
assert_eq!(cell.runtime_status, CodeCellRuntimeStatus::Failed);
assert_eq!(cell.execution.status, ExecutionStatus::Failed);
assert_eq!(cell.runtime_cell_id, Some("1".to_string()));
assert_eq!(
rollout.conversation_items[&cell.source_item_id].kind,
ConversationItemKind::CustomToolCall,
);
Ok(())
}
#[test]
fn cancelled_turn_terminates_unfinished_code_cell() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [message("user", "count files")]
}),
)?;
writer.append(RawTraceEventPayload::InferenceStarted {
inference_call_id: "inference-1".to_string(),
thread_id: "thread-root".to_string(),
codex_turn_id: "turn-1".to_string(),
model: "gpt-test".to_string(),
provider_name: "test-provider".to_string(),
request_payload: request,
})?;
let response = writer.write_json_payload(
RawPayloadKind::InferenceResponse,
&json!({
"response_id": "resp-1",
"output_items": [{
"type": "custom_tool_call",
"name": "exec",
"call_id": "call-code",
"input": "await tools.exec_command({cmd: 'slow'});"
}]
}),
)?;
writer.append(RawTraceEventPayload::InferenceCompleted {
inference_call_id: "inference-1".to_string(),
response_id: Some("resp-1".to_string()),
response_payload: response,
})?;
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::CodeCellStarted {
runtime_cell_id: "1".to_string(),
model_visible_call_id: "call-code".to_string(),
source_js: "await tools.exec_command({cmd: 'slow'});".to_string(),
},
)?;
let turn_end = writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::CodexTurnEnded {
codex_turn_id: "turn-1".to_string(),
status: ExecutionStatus::Cancelled,
},
)?;
let rollout = replay_bundle(temp.path())?;
let code_cell_id = test_reduced_code_cell_id("call-code");
let cell = &rollout.code_cells[&code_cell_id];
assert_eq!(cell.runtime_status, CodeCellRuntimeStatus::Terminated);
assert_eq!(cell.execution.status, ExecutionStatus::Cancelled);
assert_eq!(cell.execution.ended_seq, Some(turn_end.seq));
Ok(())
}
#[test]
fn runtime_code_cell_ids_can_repeat_across_threads() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
writer.append(RawTraceEventPayload::ThreadStarted {
thread_id: "thread-child".to_string(),
agent_path: "/root/child".to_string(),
metadata_payload: None,
})?;
start_turn_for_thread(&writer, "thread-root", "turn-root")?;
start_turn_for_thread(&writer, "thread-child", "turn-child")?;
for (thread_id, turn_id, inference_call_id, call_id) in [
("thread-root", "turn-root", "inference-root", "call-root"),
(
"thread-child",
"turn-child",
"inference-child",
"call-child",
),
] {
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [message("user", "run code")]
}),
)?;
writer.append(RawTraceEventPayload::InferenceStarted {
inference_call_id: inference_call_id.to_string(),
thread_id: thread_id.to_string(),
codex_turn_id: turn_id.to_string(),
model: "gpt-test".to_string(),
provider_name: "test-provider".to_string(),
request_payload: request,
})?;
writer.append_with_context(
trace_context_for_thread(thread_id, turn_id),
RawTraceEventPayload::CodeCellStarted {
runtime_cell_id: "1".to_string(),
model_visible_call_id: call_id.to_string(),
source_js: "text('hi')".to_string(),
},
)?;
let response = writer.write_json_payload(
RawPayloadKind::InferenceResponse,
&json!({
"response_id": format!("resp-{thread_id}"),
"output_items": [{
"type": "custom_tool_call",
"name": "exec",
"call_id": call_id,
"input": "text('hi')"
}]
}),
)?;
writer.append(RawTraceEventPayload::InferenceCompleted {
inference_call_id: inference_call_id.to_string(),
response_id: Some(format!("resp-{thread_id}")),
response_payload: response,
})?;
writer.append_with_context(
trace_context_for_thread(thread_id, turn_id),
RawTraceEventPayload::CodeCellEnded {
runtime_cell_id: "1".to_string(),
status: CodeCellRuntimeStatus::Completed,
response_payload: None,
},
)?;
}
let rollout = replay_bundle(temp.path())?;
let root_cell_id = test_reduced_code_cell_id("call-root");
let child_cell_id = test_reduced_code_cell_id("call-child");
assert_eq!(rollout.code_cells[&root_cell_id].thread_id, "thread-root");
assert_eq!(rollout.code_cells[&child_cell_id].thread_id, "thread-child");
assert_eq!(
rollout.code_cells[&root_cell_id].runtime_cell_id,
Some("1".to_string())
);
assert_eq!(
rollout.code_cells[&child_cell_id].runtime_cell_id,
Some("1".to_string())
);
Ok(())
}
fn test_reduced_code_cell_id(model_visible_call_id: &str) -> String {
format!("code_cell:{model_visible_call_id}")
}

View File

@@ -0,0 +1,183 @@
//! Reducer support for the remote compaction lifecycle.
//!
//! This module owns request/checkpoint bookkeeping. Conversation item reconciliation stays in
//! `conversation` because it depends on the same normalization and reuse invariants as inference
//! requests.
use anyhow::Result;
use anyhow::bail;
use super::TraceReducer;
use crate::model::Compaction;
use crate::model::CompactionRequest;
use crate::model::CompactionRequestId;
use crate::model::ExecutionStatus;
use crate::model::ExecutionWindow;
use crate::payload::RawPayloadRef;
use crate::raw_event::RawEventSeq;
impl TraceReducer {
/// Starts one upstream request attempt for a compaction operation.
pub(super) fn start_compaction_request(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
started: StartedCompactionRequest,
) -> Result<()> {
if self
.rollout
.compaction_requests
.contains_key(&started.compaction_request_id)
{
bail!(
"duplicate compaction request start for {}",
started.compaction_request_id
);
}
self.thread_mut(&started.thread_id)?;
let Some(turn) = self.rollout.codex_turns.get(&started.codex_turn_id) else {
bail!(
"compaction request {} referenced unknown codex turn {}",
started.compaction_request_id,
started.codex_turn_id
);
};
if turn.thread_id != started.thread_id {
bail!(
"compaction request {} used thread {}, but codex turn {} belongs to {}",
started.compaction_request_id,
started.thread_id,
started.codex_turn_id,
turn.thread_id
);
}
self.rollout.compaction_requests.insert(
started.compaction_request_id.clone(),
CompactionRequest {
compaction_request_id: started.compaction_request_id,
compaction_id: started.compaction_id,
thread_id: started.thread_id,
codex_turn_id: started.codex_turn_id,
execution: ExecutionWindow {
started_at_unix_ms: wall_time_unix_ms,
started_seq: seq,
ended_at_unix_ms: None,
ended_seq: None,
status: ExecutionStatus::Running,
},
model: started.model,
provider_name: started.provider_name,
raw_request_payload_id: started.request_payload.raw_payload_id,
raw_response_payload_id: None,
},
);
Ok(())
}
/// Completes an upstream compaction request attempt without modifying conversation history.
///
/// The request/response payloads are evidence for the remote call. The live
/// conversation changes only when a separate install event provides the checkpoint.
pub(super) fn complete_compaction_request(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
compaction_id: String,
compaction_request_id: CompactionRequestId,
status: ExecutionStatus,
response_payload: Option<RawPayloadRef>,
) -> Result<()> {
let Some(request) = self
.rollout
.compaction_requests
.get_mut(&compaction_request_id)
else {
bail!(
"compaction request completion referenced unknown request {compaction_request_id}"
);
};
if request.compaction_id != compaction_id {
bail!(
"compaction request {compaction_request_id} completion used compaction {compaction_id}, but start used {}",
request.compaction_id
);
}
request.execution.ended_at_unix_ms = Some(wall_time_unix_ms);
request.execution.ended_seq = Some(seq);
request.execution.status = status;
request.raw_response_payload_id = response_payload.map(|payload| payload.raw_payload_id);
Ok(())
}
/// Installs a compaction checkpoint into the reduced conversation graph.
///
/// This is the semantic boundary where replacement history becomes the live
/// thread history; request attempts alone do not imply that change.
pub(super) fn reduce_compaction_installed_event(
&mut self,
wall_time_unix_ms: i64,
thread_id: String,
codex_turn_id: String,
compaction_id: String,
checkpoint_payload: RawPayloadRef,
) -> Result<()> {
if self.rollout.compactions.contains_key(&compaction_id) {
bail!("duplicate compaction install for {compaction_id}");
}
self.thread_mut(&thread_id)?;
let Some(turn) = self.rollout.codex_turns.get(&codex_turn_id) else {
bail!(
"compaction install {compaction_id} referenced unknown codex turn {codex_turn_id}"
);
};
if turn.thread_id != thread_id {
bail!(
"compaction install {compaction_id} used thread {thread_id}, but codex turn {codex_turn_id} belongs to {}",
turn.thread_id
);
}
let checkpoint = self.reduce_compaction_checkpoint(
wall_time_unix_ms,
&thread_id,
codex_turn_id.as_str(),
&compaction_id,
&checkpoint_payload,
)?;
let request_ids = self
.rollout
.compaction_requests
.values()
.filter(|request| request.compaction_id == compaction_id)
.map(|request| request.compaction_request_id.clone())
.collect();
self.pending_compaction_replacement_item_ids
.insert(thread_id.clone(), checkpoint.replacement_item_ids.clone());
self.rollout.compactions.insert(
compaction_id.clone(),
Compaction {
compaction_id,
thread_id,
codex_turn_id,
installed_at_unix_ms: wall_time_unix_ms,
marker_item_id: checkpoint.marker_item_id,
request_ids,
input_item_ids: checkpoint.input_item_ids,
replacement_item_ids: checkpoint.replacement_item_ids,
},
);
Ok(())
}
}
/// Raw compaction-request start fields after dispatch has stripped the event envelope.
pub(super) struct StartedCompactionRequest {
pub(super) compaction_id: String,
pub(super) compaction_request_id: String,
pub(super) thread_id: String,
pub(super) codex_turn_id: String,
pub(super) model: String,
pub(super) provider_name: String,
pub(super) request_payload: RawPayloadRef,
}

View File

@@ -0,0 +1,700 @@
//! Conversation reduction from model-facing payload snapshots.
//!
//! Inference request inputs and response outputs are both part of the logical
//! conversation because they are the payloads exchanged with the model. Runtime
//! observations, such as local tool output, stay outside the transcript until a
//! later model-facing payload carries their content.
use anyhow::Context;
use anyhow::Result;
use anyhow::bail;
use serde_json::Value;
use self::normalize::NormalizedConversationItem;
use super::TraceReducer;
use crate::model::CompactionId;
use crate::model::ConversationBody;
use crate::model::ConversationItem;
use crate::model::ConversationItemKind;
use crate::model::ConversationPart;
use crate::model::ConversationRole;
use crate::model::InferenceCallId;
use crate::model::ProducerRef;
use crate::payload::RawPayloadRef;
mod normalize;
impl TraceReducer {
/// Reduces an inference request input snapshot into model-visible conversation items.
///
/// Request snapshots are reconciled by position against the previous model-visible
/// snapshot for the thread so repeated history reuses ids while newly inserted
/// items remain distinct.
pub(super) fn reduce_inference_request(
&mut self,
wall_time_unix_ms: i64,
inference_call_id: &InferenceCallId,
thread_id: &str,
codex_turn_id: &str,
request_payload: &RawPayloadRef,
) -> Result<Vec<String>> {
let payload = self.read_payload_json(request_payload)?;
let Some(input) = payload.get("input") else {
bail!(
"inference request payload {} did not contain input",
request_payload.raw_payload_id
);
};
let Some(request_items) = input.as_array() else {
bail!(
"inference request payload {} had non-array input",
request_payload.raw_payload_id
);
};
let items = normalize::normalize_model_items(request_items, request_payload)?;
let previous_response_id = payload.get("previous_response_id").and_then(Value::as_str);
// After compaction, the next full request is compared against the installed replacement
// history, not the pre-compaction prompt. Any repeated developer/context prefix that Codex
// reinjects must therefore become a fresh post-compaction conversation item.
let post_compaction_snapshot = if previous_response_id.is_none() {
self.pending_compaction_replacement_item_ids
.get(thread_id)
.cloned()
} else {
None
};
let request_item_ids = if let Some(previous_response_id) = previous_response_id {
// Streaming follow-up requests can send only the new input plus a
// `previous_response_id`. The trace model still exposes the full
// model-visible input, so rebuild the omitted prefix from the
// previous request and response before reducing this delta.
let previous_items = self
.rollout
.inference_calls
.values()
.find(|inference| {
inference.thread_id == thread_id
&& inference.upstream_request_id.as_deref() == Some(previous_response_id)
})
.map(|inference| {
let mut ids = inference.request_item_ids.clone();
ids.extend(inference.response_item_ids.clone());
ids
});
let Some(mut item_ids) = previous_items else {
bail!(
"incremental inference request {inference_call_id} referenced unknown previous_response_id {previous_response_id}"
);
};
let delta_item_ids = self.reconcile_conversation_items(
items,
ReconcileItems {
thread_id,
codex_turn_id,
wall_time_unix_ms,
produced_by: Vec::new(),
start_index: item_ids.len(),
mode: ReconcileMode::AppendOnly,
snapshot_override: None,
},
)?;
item_ids.extend(delta_item_ids);
item_ids
} else {
self.reconcile_conversation_items(
items,
ReconcileItems {
thread_id,
codex_turn_id,
wall_time_unix_ms,
produced_by: Vec::new(),
start_index: 0,
mode: ReconcileMode::FullSnapshot,
snapshot_override: post_compaction_snapshot.as_deref(),
},
)?
};
self.append_thread_conversation_items(thread_id, &request_item_ids)?;
if post_compaction_snapshot.is_some() {
self.pending_compaction_replacement_item_ids
.remove(thread_id);
}
self.thread_conversation_snapshots
.insert(thread_id.to_string(), request_item_ids.clone());
Ok(request_item_ids)
}
/// Reduces an inference response payload into conversation items produced by the call.
pub(super) fn reduce_inference_response(
&mut self,
wall_time_unix_ms: i64,
inference_call_id: &InferenceCallId,
response_payload: &RawPayloadRef,
) -> Result<Vec<String>> {
let payload = self.read_payload_json(response_payload)?;
let Some(output_items) = payload.get("output_items").and_then(Value::as_array) else {
bail!(
"inference response payload {} did not contain output_items",
response_payload.raw_payload_id
);
};
let Some((thread_id, codex_turn_id)) = self
.rollout
.inference_calls
.get(inference_call_id)
.map(|inference| (inference.thread_id.clone(), inference.codex_turn_id.clone()))
else {
bail!("inference response referenced unknown call {inference_call_id}");
};
let items = normalize::normalize_model_items(output_items, response_payload)?;
// Response output is appended immediately: it was produced by the model,
// so it is conversation even before a later request carries it forward.
let append_at = self
.thread_conversation_snapshots
.get(&thread_id)
.map_or(0, Vec::len);
let response_item_ids = self.reconcile_conversation_items(
items,
ReconcileItems {
thread_id: &thread_id,
codex_turn_id: &codex_turn_id,
wall_time_unix_ms,
produced_by: vec![ProducerRef::Inference {
inference_call_id: inference_call_id.clone(),
}],
start_index: append_at,
mode: ReconcileMode::AppendOnly,
snapshot_override: None,
},
)?;
self.append_thread_conversation_items(&thread_id, &response_item_ids)?;
self.thread_conversation_snapshots
.entry(thread_id)
.or_default()
.extend(response_item_ids.clone());
if let Some(usage) = payload
.get("token_usage")
.and_then(normalize::token_usage_from_value)
&& let Some(inference) = self.rollout.inference_calls.get_mut(inference_call_id)
{
inference.usage = Some(usage);
}
Ok(response_item_ids)
}
fn reconcile_conversation_items(
&mut self,
items: Vec<NormalizedConversationItem>,
context: ReconcileItems<'_>,
) -> Result<Vec<String>> {
let previous_snapshot = context.snapshot_override.map_or_else(
|| {
self.thread_conversation_snapshots
.get(context.thread_id)
.cloned()
.unwrap_or_default()
},
<[_]>::to_vec,
);
let mut item_ids = Vec::with_capacity(items.len());
for (offset, item) in items.into_iter().enumerate() {
let index = context.start_index + offset;
let tool_link_item = item.clone();
self.ensure_call_id_consistency(context.thread_id, &item)?;
self.ensure_reasoning_consistency(context.thread_id, &item)?;
let item_id = if let Some(previous_item_id) = previous_snapshot.get(index) {
if self.item_matches(previous_item_id, &item) {
previous_item_id.clone()
} else if matches!(context.mode, ReconcileMode::FullSnapshot) {
self.find_matching_snapshot_item(&previous_snapshot, &item_ids, &item)
.unwrap_or_else(|| {
self.create_conversation_item(
context.thread_id,
Some(context.codex_turn_id.to_string()),
context.wall_time_unix_ms,
item,
context.produced_by.clone(),
)
})
} else {
let codex_turn_id = context.codex_turn_id;
let thread_id = context.thread_id;
bail!(
"model conversation mismatch while reducing turn {codex_turn_id} for \
thread {thread_id} at item index {index}: existing item \
{previous_item_id} does not match the current model payload item"
);
}
} else if matches!(context.mode, ReconcileMode::FullSnapshot) {
self.find_matching_snapshot_item(&previous_snapshot, &item_ids, &item)
.unwrap_or_else(|| {
self.create_conversation_item(
context.thread_id,
Some(context.codex_turn_id.to_string()),
context.wall_time_unix_ms,
item,
context.produced_by.clone(),
)
})
} else {
self.create_conversation_item(
context.thread_id,
Some(context.codex_turn_id.to_string()),
context.wall_time_unix_ms,
item,
context.produced_by.clone(),
)
};
self.update_conversation_item_from_sighting(
&item_id,
&tool_link_item,
&context.produced_by,
)?;
self.attach_model_visible_tool_item(
&item_id,
tool_link_item.call_id.as_deref(),
&tool_link_item.kind,
)?;
self.attach_model_visible_code_cell_item(
&item_id,
tool_link_item.call_id.as_deref(),
&tool_link_item.kind,
)?;
self.resolve_pending_agent_edges_for_item(&item_id)?;
item_ids.push(item_id);
}
self.flush_pending_code_cell_starts()?;
Ok(item_ids)
}
/// Reduces a compaction checkpoint payload into installed replacement history.
///
/// The returned ids let the compaction reducer record both the boundary marker
/// and the snapshot that future full requests should reconcile against.
pub(super) fn reduce_compaction_checkpoint(
&mut self,
wall_time_unix_ms: i64,
thread_id: &str,
codex_turn_id: &str,
compaction_id: &CompactionId,
checkpoint_payload: &RawPayloadRef,
) -> Result<ReducedCompactionCheckpoint> {
let payload = self.read_payload_json(checkpoint_payload)?;
let input_history = required_array(&payload, "input_history", checkpoint_payload)?;
let replacement_history =
required_array(&payload, "replacement_history", checkpoint_payload)?;
let input_items = normalize::normalize_model_items(input_history, checkpoint_payload)?;
let replacement_items =
normalize::normalize_model_items(replacement_history, checkpoint_payload)?;
let input_candidates = self
.thread_conversation_snapshots
.get(thread_id)
.cloned()
.unwrap_or_default();
let input_item_ids = self.reconcile_detached_conversation_items(
input_items,
DetachedReconcileItems {
thread_id,
codex_turn_id,
wall_time_unix_ms,
produced_by: Vec::new(),
candidates: input_candidates,
},
)?;
// A compaction checkpoint has two transcript effects. First, record the structural
// boundary where old live history ended. Then append the replacement items, including
// the provider-visible summary item if the compact endpoint returned one.
let marker_item_id = self.create_conversation_item(
thread_id,
Some(codex_turn_id.to_string()),
wall_time_unix_ms,
NormalizedConversationItem {
role: ConversationRole::Assistant,
channel: None,
kind: ConversationItemKind::CompactionMarker,
// The summary is a separate model/provider-visible item. Keep the marker body
// empty so transcript renderers cannot mistake the boundary for prompt content.
body: ConversationBody { parts: Vec::new() },
call_id: None,
},
vec![ProducerRef::Compaction {
compaction_id: compaction_id.clone(),
}],
);
let replacement_item_ids = self.reconcile_detached_conversation_items(
replacement_items,
DetachedReconcileItems {
thread_id,
codex_turn_id,
wall_time_unix_ms,
produced_by: vec![ProducerRef::Compaction {
compaction_id: compaction_id.clone(),
}],
// Replacement history is a rewrite boundary. Even if the compact endpoint emits
// text that matches old history, the installed item is a new post-compaction
// conversation item and should not reuse a pre-compaction ID.
candidates: Vec::new(),
},
)?;
self.append_thread_conversation_items(thread_id, &input_item_ids)?;
self.append_thread_conversation_items(thread_id, std::slice::from_ref(&marker_item_id))?;
self.append_thread_conversation_items(thread_id, &replacement_item_ids)?;
Ok(ReducedCompactionCheckpoint {
input_item_ids,
marker_item_id,
replacement_item_ids,
})
}
fn reconcile_detached_conversation_items(
&mut self,
items: Vec<NormalizedConversationItem>,
context: DetachedReconcileItems<'_>,
) -> Result<Vec<String>> {
let mut item_ids = Vec::with_capacity(items.len());
for item in items {
let tool_link_item = item.clone();
self.ensure_call_id_consistency(context.thread_id, &item)?;
self.ensure_reasoning_consistency(context.thread_id, &item)?;
let item_id = self
.find_matching_snapshot_item(&context.candidates, &item_ids, &item)
.unwrap_or_else(|| {
self.create_conversation_item(
context.thread_id,
Some(context.codex_turn_id.to_string()),
context.wall_time_unix_ms,
item,
context.produced_by.clone(),
)
});
self.update_conversation_item_from_sighting(
&item_id,
&tool_link_item,
&context.produced_by,
)?;
self.attach_model_visible_tool_item(
&item_id,
tool_link_item.call_id.as_deref(),
&tool_link_item.kind,
)?;
self.attach_model_visible_code_cell_item(
&item_id,
tool_link_item.call_id.as_deref(),
&tool_link_item.kind,
)?;
self.resolve_pending_agent_edges_for_item(&item_id)?;
item_ids.push(item_id);
}
self.flush_pending_code_cell_starts()?;
Ok(item_ids)
}
fn create_conversation_item(
&mut self,
thread_id: &str,
codex_turn_id: Option<String>,
first_seen_at_unix_ms: i64,
item: NormalizedConversationItem,
produced_by: Vec<ProducerRef>,
) -> String {
let item_id = self.next_conversation_item_id();
self.rollout.conversation_items.insert(
item_id.clone(),
ConversationItem {
item_id: item_id.clone(),
thread_id: thread_id.to_string(),
codex_turn_id,
first_seen_at_unix_ms,
role: item.role,
channel: item.channel,
kind: item.kind,
body: item.body,
call_id: item.call_id,
produced_by,
},
);
item_id
}
fn update_conversation_item_from_sighting(
&mut self,
item_id: &str,
normalized: &NormalizedConversationItem,
produced_by: &[ProducerRef],
) -> Result<()> {
let Some(item) = self.rollout.conversation_items.get_mut(item_id) else {
bail!("conversation item {item_id} was referenced before it was created");
};
if item.kind == ConversationItemKind::Reasoning {
merge_reasoning_body(&mut item.body, &normalized.body)?;
}
for producer in produced_by {
if !item.produced_by.contains(producer) {
item.produced_by.push(producer.clone());
}
}
Ok(())
}
fn append_thread_conversation_items(
&mut self,
thread_id: &str,
item_ids: &[String],
) -> Result<()> {
let thread = self.thread_mut(thread_id)?;
for item_id in item_ids {
if !thread.conversation_item_ids.contains(item_id) {
thread.conversation_item_ids.push(item_id.clone());
}
}
Ok(())
}
fn find_matching_snapshot_item(
&self,
previous_snapshot: &[String],
used_item_ids: &[String],
normalized: &NormalizedConversationItem,
) -> Option<String> {
previous_snapshot
.iter()
.find(|item_id| {
!used_item_ids.contains(item_id) && self.item_matches(item_id, normalized)
})
.cloned()
}
fn ensure_call_id_consistency(
&self,
thread_id: &str,
normalized: &NormalizedConversationItem,
) -> Result<()> {
let Some(call_id) = normalized.call_id.as_deref() else {
return Ok(());
};
for item in self.rollout.conversation_items.values() {
if item.thread_id == thread_id
&& item.call_id.as_deref() == Some(call_id)
&& item.kind == normalized.kind
&& !conversation_item_matches(item, normalized)
{
bail!("model-visible call id {call_id} was reused with different content");
}
}
Ok(())
}
fn ensure_reasoning_consistency(
&self,
thread_id: &str,
normalized: &NormalizedConversationItem,
) -> Result<()> {
if normalized.kind != ConversationItemKind::Reasoning {
return Ok(());
};
let Some((label, value)) = reasoning_encoded_part(&normalized.body) else {
return Ok(());
};
for item in self.rollout.conversation_items.values() {
if item.thread_id == thread_id
&& item.kind == ConversationItemKind::Reasoning
&& item.channel == normalized.channel
&& reasoning_encoded_part(&item.body) == Some((label, value))
&& !reasoning_body_matches(&item.body, &normalized.body)
{
bail!("reasoning encrypted_content was reused with different readable content");
}
}
Ok(())
}
fn item_matches(&self, item_id: &str, normalized: &NormalizedConversationItem) -> bool {
let Some(item) = self.rollout.conversation_items.get(item_id) else {
return false;
};
conversation_item_matches(item, normalized)
}
fn next_conversation_item_id(&mut self) -> String {
let ordinal = self.next_conversation_item_ordinal;
self.next_conversation_item_ordinal += 1;
format!("conversation_item:{ordinal}")
}
}
#[derive(Clone, Copy)]
enum ReconcileMode {
/// Full model requests are authoritative snapshots of the live context. The
/// prompt builder can reorder already-observed items or replace history
/// with synthetic summary messages, so item identity is "same content,
/// reused at most once in this snapshot" rather than "same position only".
FullSnapshot,
/// Incremental request deltas and response outputs append to a known prefix.
/// A mismatch at an occupied position means our reconstructed prefix is
/// wrong and should fail replay.
AppendOnly,
}
struct ReconcileItems<'a> {
thread_id: &'a str,
codex_turn_id: &'a str,
wall_time_unix_ms: i64,
produced_by: Vec<ProducerRef>,
start_index: usize,
mode: ReconcileMode,
snapshot_override: Option<&'a [String]>,
}
struct DetachedReconcileItems<'a> {
thread_id: &'a str,
codex_turn_id: &'a str,
wall_time_unix_ms: i64,
produced_by: Vec<ProducerRef>,
candidates: Vec<String>,
}
/// Conversation ids produced when a compaction checkpoint is installed.
///
/// The marker item records the boundary, while replacement items are the live
/// history that subsequent full requests should treat as their baseline.
pub(super) struct ReducedCompactionCheckpoint {
pub(super) input_item_ids: Vec<String>,
pub(super) marker_item_id: String,
pub(super) replacement_item_ids: Vec<String>,
}
fn required_array<'a>(
payload: &'a Value,
key: &str,
raw_payload: &RawPayloadRef,
) -> Result<&'a Vec<Value>> {
payload.get(key).and_then(Value::as_array).with_context(|| {
format!(
"compaction checkpoint payload {} did not contain array {key}",
raw_payload.raw_payload_id
)
})
}
fn conversation_item_matches(
item: &ConversationItem,
normalized: &NormalizedConversationItem,
) -> bool {
let body_matches = if item.kind == ConversationItemKind::Reasoning
&& normalized.kind == ConversationItemKind::Reasoning
{
reasoning_body_matches(&item.body, &normalized.body)
} else {
conversation_body_matches(&item.body, &normalized.body)
};
item.role == normalized.role
&& item.channel == normalized.channel
&& item.kind == normalized.kind
&& body_matches
&& item.call_id == normalized.call_id
}
fn conversation_body_matches(left: &ConversationBody, right: &ConversationBody) -> bool {
left.parts.len() == right.parts.len()
&& left
.parts
.iter()
.zip(&right.parts)
.all(|(left, right)| match (left, right) {
(
ConversationPart::Json {
summary: left_summary,
raw_payload_id: _,
},
ConversationPart::Json {
summary: right_summary,
raw_payload_id: _,
},
) => left_summary == right_summary,
_ => left == right,
})
}
fn reasoning_body_matches(left: &ConversationBody, right: &ConversationBody) -> bool {
if conversation_body_matches(left, right) {
return true;
}
// The Responses API may return readable reasoning on completion, but later
// request snapshots often replay only the encrypted blob. The blob is the
// stable model-visible identity; readable text/summary is extra evidence
// that must agree whenever both sides provide it.
let Some(left_encoded) = reasoning_encoded_part(left) else {
return false;
};
let Some(right_encoded) = reasoning_encoded_part(right) else {
return false;
};
left_encoded == right_encoded && readable_reasoning_parts_match(left, right)
}
fn merge_reasoning_body(
existing: &mut ConversationBody,
incoming: &ConversationBody,
) -> Result<()> {
if conversation_body_matches(existing, incoming) {
return Ok(());
}
if !reasoning_body_matches(existing, incoming) {
bail!("reasoning encrypted_content was reused with different readable content");
}
if readable_reasoning_parts(existing).is_empty()
&& !readable_reasoning_parts(incoming).is_empty()
{
existing.parts = incoming.parts.clone();
}
Ok(())
}
fn reasoning_encoded_part(body: &ConversationBody) -> Option<(&str, &str)> {
body.parts.iter().find_map(|part| {
if let ConversationPart::Encoded { label, value } = part {
Some((label.as_str(), value.as_str()))
} else {
None
}
})
}
fn readable_reasoning_parts_match(left: &ConversationBody, right: &ConversationBody) -> bool {
let left = readable_reasoning_parts(left);
let right = readable_reasoning_parts(right);
left.is_empty() || right.is_empty() || left == right
}
fn readable_reasoning_parts(body: &ConversationBody) -> Vec<&ConversationPart> {
body.parts
.iter()
.filter(|part| {
matches!(
part,
ConversationPart::Text { .. } | ConversationPart::Summary { .. }
)
})
.collect()
}
#[cfg(test)]
#[path = "conversation_tests.rs"]
mod tests;

View File

@@ -0,0 +1,446 @@
//! Normalization from Responses-shaped JSON items into conversation item data.
use anyhow::Result;
use anyhow::bail;
use serde_json::Value;
use crate::model::ConversationBody;
use crate::model::ConversationChannel;
use crate::model::ConversationItemKind;
use crate::model::ConversationPart;
use crate::model::ConversationRole;
use crate::model::TokenUsage;
use crate::payload::RawPayloadRef;
/// Conversation fields parsed from one Responses item before trace identity.
///
/// IDs and provenance are assigned after positional reconciliation. Keeping the
/// normalized data separate from `ConversationItem` makes reuse vs insertion a
/// single reducer decision instead of something the parser has to know about.
#[derive(Clone)]
pub(super) struct NormalizedConversationItem {
pub(super) role: ConversationRole,
pub(super) channel: Option<ConversationChannel>,
pub(super) kind: ConversationItemKind,
pub(super) body: ConversationBody,
pub(super) call_id: Option<String>,
}
pub(super) fn normalize_model_items(
items: &[Value],
raw_payload: &RawPayloadRef,
) -> Result<Vec<NormalizedConversationItem>> {
let mut normalized_items = Vec::new();
for item in items {
normalized_items.push(normalize_model_item(item, raw_payload)?);
}
Ok(normalized_items)
}
pub(super) fn token_usage_from_value(value: &Value) -> Option<TokenUsage> {
Some(TokenUsage {
input_tokens: u64_field(value, "input_tokens")?,
cached_input_tokens: u64_field(value, "cached_input_tokens")?,
output_tokens: u64_field(value, "output_tokens")?,
reasoning_output_tokens: u64_field(value, "reasoning_output_tokens")?,
})
}
fn normalize_model_item(
item: &Value,
raw_payload: &RawPayloadRef,
) -> Result<NormalizedConversationItem> {
let Some(item_type) = item.get("type").and_then(Value::as_str) else {
bail!(
"model item in payload {} did not contain a string type",
raw_payload.raw_payload_id
);
};
match item_type {
"message" => normalize_message_item(item, raw_payload),
"reasoning" => normalize_reasoning_item(item, raw_payload),
"function_call" => Ok(NormalizedConversationItem {
role: ConversationRole::Assistant,
channel: Some(ConversationChannel::Commentary),
kind: ConversationItemKind::FunctionCall,
body: raw_text_or_json_body(item.get("arguments"), raw_payload),
call_id: item
.get("call_id")
.and_then(Value::as_str)
.map(ToString::to_string),
}),
"function_call_output" => Ok(NormalizedConversationItem {
role: ConversationRole::Tool,
channel: Some(ConversationChannel::Commentary),
kind: ConversationItemKind::FunctionCallOutput,
body: tool_output_body(item.get("output"), raw_payload),
call_id: item
.get("call_id")
.and_then(Value::as_str)
.map(ToString::to_string),
}),
"custom_tool_call" => Ok(NormalizedConversationItem {
role: ConversationRole::Assistant,
channel: Some(ConversationChannel::Commentary),
kind: ConversationItemKind::CustomToolCall,
body: custom_tool_call_body(item, raw_payload),
call_id: item
.get("call_id")
.and_then(Value::as_str)
.map(ToString::to_string),
}),
"custom_tool_call_output" => Ok(NormalizedConversationItem {
role: ConversationRole::Tool,
channel: Some(ConversationChannel::Commentary),
kind: ConversationItemKind::CustomToolCallOutput,
body: tool_output_body(item.get("output"), raw_payload),
call_id: item
.get("call_id")
.and_then(Value::as_str)
.map(ToString::to_string),
}),
"tool_search_call" | "web_search_call" | "image_generation_call" | "local_shell_call" => {
Ok(NormalizedConversationItem {
role: ConversationRole::Assistant,
channel: Some(ConversationChannel::Commentary),
kind: ConversationItemKind::FunctionCall,
body: json_body(item, raw_payload),
call_id: item
.get("call_id")
.and_then(Value::as_str)
.map(ToString::to_string),
})
}
"tool_search_output" | "mcp_tool_call_output" => Ok(NormalizedConversationItem {
role: ConversationRole::Tool,
channel: Some(ConversationChannel::Commentary),
kind: ConversationItemKind::FunctionCallOutput,
body: json_body(item, raw_payload),
call_id: item
.get("call_id")
.and_then(Value::as_str)
.map(ToString::to_string),
}),
"compaction" | "compaction_summary" => Ok(NormalizedConversationItem {
role: ConversationRole::Assistant,
channel: Some(ConversationChannel::Summary),
kind: ConversationItemKind::Message,
body: compaction_body(item, raw_payload)?,
call_id: None,
}),
_ => bail!(
"unsupported model item type {item_type} in payload {}",
raw_payload.raw_payload_id
),
}
}
fn normalize_message_item(
item: &Value,
raw_payload: &RawPayloadRef,
) -> Result<NormalizedConversationItem> {
let Some(role) = item.get("role").and_then(Value::as_str) else {
bail!(
"message item in payload {} did not contain a string role",
raw_payload.raw_payload_id
);
};
let Some(role) = role_from_str(role) else {
bail!(
"unsupported message role {role} in payload {}",
raw_payload.raw_payload_id
);
};
Ok(NormalizedConversationItem {
role,
channel: item
.get("phase")
.and_then(Value::as_str)
.and_then(channel_from_phase),
kind: ConversationItemKind::Message,
body: ConversationBody {
parts: content_parts(item.get("content"), raw_payload),
},
call_id: None,
})
}
fn normalize_reasoning_item(
item: &Value,
raw_payload: &RawPayloadRef,
) -> Result<NormalizedConversationItem> {
let mut parts = Vec::new();
append_reasoning_parts(
item,
"content",
ReasoningPartKind::Content,
raw_payload,
&mut parts,
)?;
append_reasoning_parts(
item,
"summary",
ReasoningPartKind::Summary,
raw_payload,
&mut parts,
)?;
if let Some(encrypted_content) = item.get("encrypted_content") {
let encrypted_content = match encrypted_content {
Value::Null => None,
Value::String(encrypted_content) => Some(encrypted_content),
_ => {
bail!(
"reasoning item in payload {} had non-string encrypted_content",
raw_payload.raw_payload_id
);
}
};
if let Some(encrypted_content) = encrypted_content {
parts.push(ConversationPart::Encoded {
label: "encrypted_content".to_string(),
value: encrypted_content.to_string(),
});
}
}
if parts.is_empty() {
bail!(
"reasoning item in payload {} contained no content, summary, or encrypted_content",
raw_payload.raw_payload_id
);
}
Ok(NormalizedConversationItem {
role: ConversationRole::Assistant,
channel: Some(ConversationChannel::Analysis),
kind: ConversationItemKind::Reasoning,
body: ConversationBody { parts },
call_id: None,
})
}
#[derive(Clone, Copy)]
enum ReasoningPartKind {
Content,
Summary,
}
fn append_reasoning_parts(
item: &Value,
key: &str,
kind: ReasoningPartKind,
raw_payload: &RawPayloadRef,
parts: &mut Vec<ConversationPart>,
) -> Result<()> {
let Some(items) = item.get(key) else {
return Ok(());
};
if matches!((kind, items), (ReasoningPartKind::Content, Value::Null)) {
return Ok(());
}
let Some(items) = items.as_array() else {
bail!(
"reasoning item in payload {} had non-array {key}",
raw_payload.raw_payload_id
);
};
for content_item in items {
let Some(item_type) = content_item.get("type").and_then(Value::as_str) else {
bail!(
"reasoning item in payload {} had {key} entry without string type",
raw_payload.raw_payload_id
);
};
let expected_type = match kind {
ReasoningPartKind::Content => {
if !matches!(item_type, "reasoning_text" | "text") {
bail!(
"reasoning item in payload {} had unsupported content type {item_type}",
raw_payload.raw_payload_id
);
}
"content"
}
ReasoningPartKind::Summary => {
if item_type != "summary_text" {
bail!(
"reasoning item in payload {} had unsupported summary type {item_type}",
raw_payload.raw_payload_id
);
}
"summary"
}
};
let Some(text) = content_item.get("text").and_then(Value::as_str) else {
bail!(
"reasoning item in payload {} had {expected_type} entry without string text",
raw_payload.raw_payload_id
);
};
match kind {
ReasoningPartKind::Content => parts.push(ConversationPart::Text {
text: text.to_string(),
}),
ReasoningPartKind::Summary => parts.push(ConversationPart::Summary {
text: text.to_string(),
}),
}
}
Ok(())
}
fn role_from_str(role: &str) -> Option<ConversationRole> {
match role {
"system" => Some(ConversationRole::System),
"developer" => Some(ConversationRole::Developer),
"user" => Some(ConversationRole::User),
"assistant" => Some(ConversationRole::Assistant),
"tool" => Some(ConversationRole::Tool),
_ => None,
}
}
fn channel_from_phase(phase: &str) -> Option<ConversationChannel> {
match phase {
"commentary" => Some(ConversationChannel::Commentary),
"final_answer" => Some(ConversationChannel::Final),
"summary" => Some(ConversationChannel::Summary),
_ => None,
}
}
fn content_parts(content: Option<&Value>, raw_payload: &RawPayloadRef) -> Vec<ConversationPart> {
let Some(content) = content.and_then(Value::as_array) else {
return vec![payload_ref_part("content", raw_payload)];
};
let mut parts = Vec::new();
for part in content {
match part.get("type").and_then(Value::as_str) {
Some("input_text" | "output_text" | "text") => {
if let Some(text) = part.get("text").and_then(Value::as_str) {
parts.push(ConversationPart::Text {
text: text.to_string(),
});
}
}
Some("input_image") => parts.push(payload_ref_part("input_image", raw_payload)),
Some(other) => parts.push(payload_ref_part(other, raw_payload)),
None => parts.push(payload_ref_part("content", raw_payload)),
}
}
if parts.is_empty() {
parts.push(payload_ref_part("empty_content", raw_payload));
}
parts
}
fn custom_tool_call_body(item: &Value, raw_payload: &RawPayloadRef) -> ConversationBody {
let Some(input) = item.get("input").and_then(Value::as_str) else {
return json_body(item, raw_payload);
};
if item.get("name").and_then(Value::as_str) == Some("exec") {
ConversationBody {
parts: vec![ConversationPart::Code {
language: "javascript".to_string(),
source: input.to_string(),
}],
}
} else {
ConversationBody {
parts: vec![ConversationPart::Text {
text: input.to_string(),
}],
}
}
}
fn raw_text_or_json_body(value: Option<&Value>, raw_payload: &RawPayloadRef) -> ConversationBody {
match value {
Some(Value::String(text)) => {
if let Ok(json) = serde_json::from_str::<Value>(text) {
json_body(&json, raw_payload)
} else {
ConversationBody {
parts: vec![ConversationPart::Text { text: text.clone() }],
}
}
}
Some(value) => json_body(value, raw_payload),
None => ConversationBody {
parts: vec![payload_ref_part("payload", raw_payload)],
},
}
}
fn tool_output_body(output: Option<&Value>, raw_payload: &RawPayloadRef) -> ConversationBody {
match output {
Some(Value::String(text)) => ConversationBody {
parts: vec![ConversationPart::Text { text: text.clone() }],
},
Some(Value::Array(_)) => ConversationBody {
parts: content_parts(output, raw_payload),
},
Some(value) => json_body(value, raw_payload),
None => ConversationBody {
parts: vec![payload_ref_part("tool_output", raw_payload)],
},
}
}
fn compaction_body(item: &Value, raw_payload: &RawPayloadRef) -> Result<ConversationBody> {
let Some(encrypted_content) = item.get("encrypted_content").and_then(Value::as_str) else {
bail!(
"compaction item in payload {} did not contain string encrypted_content",
raw_payload.raw_payload_id
);
};
// `type: "compaction"` is the remote-compaction summary that later re-enters model requests.
// The structural "history was cut here" marker is inserted separately when the checkpoint is
// installed; payload refs are observation-local, so the encoded summary itself is identity.
Ok(ConversationBody {
parts: vec![ConversationPart::Encoded {
label: "encrypted_content".to_string(),
value: encrypted_content.to_string(),
}],
})
}
fn json_body(value: &Value, raw_payload: &RawPayloadRef) -> ConversationBody {
ConversationBody {
parts: vec![ConversationPart::Json {
summary: summarize_json(value),
raw_payload_id: raw_payload.raw_payload_id.clone(),
}],
}
}
fn payload_ref_part(label: &str, raw_payload: &RawPayloadRef) -> ConversationPart {
ConversationPart::PayloadRef {
label: label.to_string(),
raw_payload_id: raw_payload.raw_payload_id.clone(),
}
}
fn summarize_json(value: &Value) -> String {
const MAX_JSON_SUMMARY_LEN: usize = 240;
let mut summary =
serde_json::to_string(value).unwrap_or_else(|_| "<unserializable json>".to_string());
if summary.len() > MAX_JSON_SUMMARY_LEN {
summary.truncate(MAX_JSON_SUMMARY_LEN);
summary.push_str("...");
}
summary
}
fn u64_field(value: &Value, field: &str) -> Option<u64> {
value
.get(field)
.and_then(Value::as_i64)
.map(|value| value.max(0) as u64)
}

View File

@@ -0,0 +1,808 @@
use pretty_assertions::assert_eq;
use serde_json::json;
use tempfile::TempDir;
use crate::model::ConversationChannel;
use crate::model::ConversationItemKind;
use crate::model::ConversationPart;
use crate::model::ExecutionStatus;
use crate::model::ProducerRef;
use crate::model::ToolCallKind;
use crate::model::ToolCallSummary;
use crate::payload::RawPayloadKind;
use crate::raw_event::RawTraceEventPayload;
use crate::reducer::test_support::append_inference_completion;
use crate::reducer::test_support::append_inference_start;
use crate::reducer::test_support::create_started_writer;
use crate::reducer::test_support::expect_replay_error;
use crate::reducer::test_support::message;
use crate::reducer::test_support::start_turn;
use crate::reducer::test_support::trace_context;
use crate::replay_bundle;
#[test]
fn request_snapshots_reuse_history_without_deduping_new_identical_items() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let first_request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [message("user", "ok")]
}),
)?;
append_inference_start(&writer, "inference-1", "turn-1", first_request)?;
start_turn(&writer, "turn-2")?;
let second_request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [
message("user", "ok"),
message("assistant", "ack"),
message("user", "ok")
]
}),
)?;
append_inference_start(&writer, "inference-2", "turn-2", second_request)?;
let rollout = replay_bundle(temp.path())?;
let first = &rollout.inference_calls["inference-1"].request_item_ids;
let second = &rollout.inference_calls["inference-2"].request_item_ids;
assert_eq!(first.len(), 1);
assert_eq!(second.len(), 3);
assert_eq!(second[0], first[0]);
assert_ne!(second[2], first[0]);
assert_eq!(rollout.conversation_items.len(), 3);
assert_eq!(
rollout.threads["thread-root"].conversation_item_ids,
*second
);
Ok(())
}
#[test]
fn response_outputs_enter_thread_conversation_on_completion() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [message("user", "run tests")]
}),
)?;
append_inference_start(&writer, "inference-1", "turn-1", request)?;
let response = writer.write_json_payload(
RawPayloadKind::InferenceResponse,
&json!({
"response_id": "resp-1",
"output_items": [
{
"type": "message",
"role": "assistant",
"content": [{"type": "output_text", "text": "tests passed"}]
}
]
}),
)?;
append_inference_completion(&writer, "inference-1", "resp-1", response)?;
let rollout = replay_bundle(temp.path())?;
let inference = &rollout.inference_calls["inference-1"];
let mut expected_thread_items = inference.request_item_ids.clone();
expected_thread_items.extend(inference.response_item_ids.clone());
assert_eq!(inference.response_item_ids.len(), 1);
assert_eq!(
rollout.threads["thread-root"].conversation_item_ids,
expected_thread_items,
);
Ok(())
}
#[test]
fn later_full_request_reuses_prior_json_tool_call_by_position() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [message("user", "run tests")]
}),
)?;
append_inference_start(&writer, "inference-1", "turn-1", request)?;
let response = writer.write_json_payload(
RawPayloadKind::InferenceResponse,
&json!({
"response_id": "resp-1",
"output_items": [{
"type": "function_call",
"name": "shell",
"arguments": "{\"cmd\":\"cargo test\"}",
"call_id": "call-1"
}]
}),
)?;
append_inference_completion(&writer, "inference-1", "resp-1", response)?;
start_turn(&writer, "turn-2")?;
let next_request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [
message("user", "run tests"),
{
"type": "function_call",
"name": "shell",
"arguments": "{\"cmd\":\"cargo test\"}",
"call_id": "call-1"
}
]
}),
)?;
append_inference_start(&writer, "inference-2", "turn-2", next_request)?;
let rollout = replay_bundle(temp.path())?;
let first = &rollout.inference_calls["inference-1"];
let second = &rollout.inference_calls["inference-2"];
assert_eq!(
second.request_item_ids,
vec![
first.request_item_ids[0].clone(),
first.response_item_ids[0].clone(),
],
);
assert_eq!(rollout.conversation_items.len(), 2);
Ok(())
}
#[test]
fn incremental_request_carries_prior_request_and_response_items_forward() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [message("user", "run tests")]
}),
)?;
append_inference_start(&writer, "inference-1", "turn-1", request)?;
let response = writer.write_json_payload(
RawPayloadKind::InferenceResponse,
&json!({
"response_id": "resp-1",
"token_usage": {
"input_tokens": 10,
"cached_input_tokens": 1,
"output_tokens": 5,
"reasoning_output_tokens": 2,
"total_tokens": 15
},
"output_items": [
{
"type": "function_call",
"name": "shell",
"arguments": "{\"cmd\":\"cargo test\"}",
"call_id": "call-1"
}
]
}),
)?;
append_inference_completion(&writer, "inference-1", "resp-1", response)?;
start_turn(&writer, "turn-2")?;
let incremental_request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"type": "response.create",
"previous_response_id": "resp-1",
"input": [
{
"type": "function_call_output",
"call_id": "call-1",
"output": "tests passed"
}
]
}),
)?;
append_inference_start(&writer, "inference-2", "turn-2", incremental_request)?;
let rollout = replay_bundle(temp.path())?;
let first = &rollout.inference_calls["inference-1"];
let second = &rollout.inference_calls["inference-2"];
assert_eq!(first.response_item_ids.len(), 1);
assert_eq!(
second.request_item_ids,
vec![
first.request_item_ids[0].clone(),
first.response_item_ids[0].clone(),
rollout.threads["thread-root"].conversation_item_ids[2].clone(),
],
);
assert_eq!(
rollout.threads["thread-root"].conversation_item_ids,
second.request_item_ids,
);
assert_eq!(
first.usage.as_ref().map(|usage| usage.input_tokens),
Some(10),
);
Ok(())
}
#[test]
fn full_request_snapshot_can_reorder_existing_items_and_insert_summary() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [
message("developer", "follow the repo rules"),
message("user", "count files")
]
}),
)?;
append_inference_start(&writer, "inference-1", "turn-1", request)?;
start_turn(&writer, "turn-2")?;
let compacted_request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [
message("user", "count files"),
message("user", "summary from a compacted prior attempt"),
message("developer", "follow the repo rules")
]
}),
)?;
append_inference_start(&writer, "inference-2", "turn-2", compacted_request)?;
let rollout = replay_bundle(temp.path())?;
let first = &rollout.inference_calls["inference-1"].request_item_ids;
let second = &rollout.inference_calls["inference-2"].request_item_ids;
assert_eq!(second[0], first[1]);
assert_eq!(second[2], first[0]);
assert_ne!(second[1], first[0]);
assert_ne!(second[1], first[1]);
assert_eq!(rollout.conversation_items.len(), 3);
Ok(())
}
#[test]
fn reasoning_body_preserves_text_summary_and_encoded_content() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [message("user", "think visibly")]
}),
)?;
append_inference_start(&writer, "inference-1", "turn-1", request)?;
let response = writer.write_json_payload(
RawPayloadKind::InferenceResponse,
&json!({
"response_id": "resp-1",
"output_items": [{
"type": "reasoning",
"content": [{"type": "reasoning_text", "text": "raw reasoning"}],
"summary": [{"type": "summary_text", "text": "brief summary"}],
"encrypted_content": "encoded-reasoning"
}]
}),
)?;
append_inference_completion(&writer, "inference-1", "resp-1", response)?;
let rollout = replay_bundle(temp.path())?;
let reasoning_item_id = &rollout.inference_calls["inference-1"].response_item_ids[0];
assert_eq!(
rollout.conversation_items[reasoning_item_id].body.parts,
vec![
ConversationPart::Text {
text: "raw reasoning".to_string(),
},
ConversationPart::Summary {
text: "brief summary".to_string(),
},
ConversationPart::Encoded {
label: "encrypted_content".to_string(),
value: "encoded-reasoning".to_string(),
},
],
);
Ok(())
}
#[test]
fn encrypted_reasoning_reuses_response_item_in_later_request() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let user = message("user", "count files");
let function_call = json!({
"type": "function_call",
"name": "shell",
"arguments": "{\"cmd\":\"find . -maxdepth 1 -type f | wc -l\"}",
"call_id": "call-1"
});
let encrypted_reasoning = json!({
"type": "reasoning",
"summary": [],
"encrypted_content": "encoded-reasoning"
});
let readable_reasoning = json!({
"type": "reasoning",
"content": [{"type": "text", "text": "need count"}],
"summary": [],
"encrypted_content": "encoded-reasoning"
});
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [user]
}),
)?;
append_inference_start(&writer, "inference-1", "turn-1", request)?;
let response = writer.write_json_payload(
RawPayloadKind::InferenceResponse,
&json!({
"response_id": "resp-1",
"output_items": [
readable_reasoning,
function_call
]
}),
)?;
append_inference_completion(&writer, "inference-1", "resp-1", response)?;
start_turn(&writer, "turn-2")?;
let followup = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [
user,
encrypted_reasoning,
function_call,
{
"type": "function_call_output",
"call_id": "call-1",
"output": "31\n"
}
]
}),
)?;
append_inference_start(&writer, "inference-2", "turn-2", followup)?;
let rollout = replay_bundle(temp.path())?;
let first = &rollout.inference_calls["inference-1"];
let second = &rollout.inference_calls["inference-2"];
let output_item_id = rollout.threads["thread-root"].conversation_item_ids[3].clone();
assert_eq!(
second.request_item_ids,
vec![
first.request_item_ids[0].clone(),
first.response_item_ids[0].clone(),
first.response_item_ids[1].clone(),
output_item_id,
],
);
assert_eq!(
rollout.conversation_items[&first.response_item_ids[0]]
.body
.parts,
vec![
ConversationPart::Text {
text: "need count".to_string(),
},
ConversationPart::Encoded {
label: "encrypted_content".to_string(),
value: "encoded-reasoning".to_string(),
},
],
);
assert_eq!(rollout.conversation_items.len(), 4);
assert_eq!(
rollout.threads["thread-root"].conversation_item_ids,
second.request_item_ids,
);
Ok(())
}
#[test]
fn same_encrypted_reasoning_with_different_text_is_reducer_error() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let user = message("user", "count files");
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [user]
}),
)?;
append_inference_start(&writer, "inference-1", "turn-1", request)?;
let response = writer.write_json_payload(
RawPayloadKind::InferenceResponse,
&json!({
"response_id": "resp-1",
"output_items": [{
"type": "reasoning",
"content": [{"type": "text", "text": "first text"}],
"summary": [],
"encrypted_content": "encoded-reasoning"
}]
}),
)?;
append_inference_completion(&writer, "inference-1", "resp-1", response)?;
start_turn(&writer, "turn-2")?;
let conflicting_request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [
user,
{
"type": "reasoning",
"content": [{"type": "text", "text": "different text"}],
"summary": [],
"encrypted_content": "encoded-reasoning"
}
]
}),
)?;
append_inference_start(&writer, "inference-2", "turn-2", conflicting_request)?;
expect_replay_error(
&temp,
"reasoning encrypted_content was reused with different readable content",
)
}
#[test]
fn model_visible_call_id_reuse_with_different_content_is_reducer_error() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [{
"type": "function_call",
"name": "shell",
"arguments": "{\"cmd\":\"cargo test\"}",
"call_id": "call-1"
}]
}),
)?;
append_inference_start(&writer, "inference-1", "turn-1", request)?;
start_turn(&writer, "turn-2")?;
let conflicting_request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [{
"type": "function_call",
"name": "shell",
"arguments": "{\"cmd\":\"cargo check\"}",
"call_id": "call-1"
}]
}),
)?;
append_inference_start(&writer, "inference-2", "turn-2", conflicting_request)?;
expect_replay_error(
&temp,
"model-visible call id call-1 was reused with different content",
)
}
#[test]
fn unsupported_model_item_is_reducer_error() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [
{
"type": "new_unhandled_model_item",
"payload": "must not be silently skipped"
}
]
}),
)?;
append_inference_start(&writer, "inference-1", "turn-1", request)?;
expect_replay_error(
&temp,
"unsupported model item type new_unhandled_model_item",
)
}
#[test]
fn missing_request_input_is_reducer_error() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"model": "gpt-test"
}),
)?;
append_inference_start(&writer, "inference-1", "turn-1", request)?;
expect_replay_error(&temp, "did not contain input")
}
#[test]
fn unknown_previous_response_id_is_reducer_error() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"previous_response_id": "resp-missing",
"input": [message("user", "still here")]
}),
)?;
append_inference_start(&writer, "inference-1", "turn-1", request)?;
expect_replay_error(&temp, "unknown previous_response_id resp-missing")
}
#[test]
fn compaction_boundary_repeats_prefix_and_reuses_replacement_items() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let developer = message("developer", "follow repo rules");
let user = message("user", "count files");
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [developer, user]
}),
)?;
append_inference_start(&writer, "inference-1", "turn-1", request)?;
let summary = message("user", "summary from compacted history");
let compaction_summary = json!({
"type": "compaction",
"encrypted_content": "encrypted-summary",
});
let checkpoint = writer.write_json_payload(
RawPayloadKind::CompactionCheckpoint,
&json!({
"input_history": [developer, user],
"replacement_history": [user, summary, compaction_summary]
}),
)?;
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::CompactionInstalled {
compaction_id: "compaction-1".to_string(),
checkpoint_payload: checkpoint,
},
)?;
start_turn(&writer, "turn-2")?;
let post_compaction_request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [developer, user, summary, compaction_summary]
}),
)?;
append_inference_start(&writer, "inference-2", "turn-2", post_compaction_request)?;
let rollout = replay_bundle(temp.path())?;
let first = &rollout.inference_calls["inference-1"];
let second = &rollout.inference_calls["inference-2"];
let compaction = &rollout.compactions["compaction-1"];
assert_eq!(compaction.input_item_ids, first.request_item_ids);
assert_eq!(second.request_item_ids.len(), 4);
assert_eq!(
&second.request_item_ids[1..],
compaction.replacement_item_ids.as_slice()
);
let marker = &rollout.conversation_items[&compaction.marker_item_id];
assert_eq!(marker.kind, ConversationItemKind::CompactionMarker);
assert_eq!(marker.body.parts, Vec::<ConversationPart>::new());
assert_eq!(
marker.produced_by,
vec![ProducerRef::Compaction {
compaction_id: "compaction-1".to_string()
}],
);
assert_ne!(second.request_item_ids[0], first.request_item_ids[0]);
assert_ne!(
compaction.replacement_item_ids[0],
first.request_item_ids[1]
);
assert_eq!(
rollout.conversation_items[&compaction.replacement_item_ids[0]].produced_by,
vec![ProducerRef::Compaction {
compaction_id: "compaction-1".to_string()
}],
);
assert_eq!(
rollout.conversation_items[&compaction.replacement_item_ids[1]].produced_by,
vec![ProducerRef::Compaction {
compaction_id: "compaction-1".to_string()
}],
);
assert_eq!(
rollout.conversation_items[&compaction.replacement_item_ids[2]].channel,
Some(ConversationChannel::Summary),
);
assert_eq!(
rollout.conversation_items[&compaction.replacement_item_ids[2]].kind,
ConversationItemKind::Message,
);
assert_eq!(
rollout.conversation_items[&compaction.replacement_item_ids[2]]
.body
.parts,
vec![ConversationPart::Encoded {
label: "encrypted_content".to_string(),
value: "encrypted-summary".to_string(),
}],
);
Ok(())
}
#[test]
fn tool_call_links_model_call_and_followup_output_items() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [message("user", "run tests")]
}),
)?;
append_inference_start(&writer, "inference-1", "turn-1", request)?;
let response = writer.write_json_payload(
RawPayloadKind::InferenceResponse,
&json!({
"response_id": "resp-1",
"output_items": [{
"type": "function_call",
"name": "exec_command",
"arguments": "{\"cmd\":\"cargo test\"}",
"call_id": "call-1"
}]
}),
)?;
append_inference_completion(&writer, "inference-1", "resp-1", response)?;
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::ToolCallStarted {
tool_call_id: "tool-1".to_string(),
model_visible_call_id: Some("call-1".to_string()),
code_mode_runtime_tool_id: None,
requester: crate::raw_event::RawToolCallRequester::Model,
kind: ToolCallKind::ExecCommand,
summary: ToolCallSummary::Generic {
label: "exec_command".to_string(),
input_preview: Some("cargo test".to_string()),
output_preview: None,
},
invocation_payload: None,
},
)?;
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::ToolCallEnded {
tool_call_id: "tool-1".to_string(),
status: ExecutionStatus::Completed,
result_payload: None,
},
)?;
start_turn(&writer, "turn-2")?;
let followup = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"previous_response_id": "resp-1",
"input": [{
"type": "function_call_output",
"call_id": "call-1",
"output": "tests passed"
}]
}),
)?;
append_inference_start(&writer, "inference-2", "turn-2", followup)?;
let rollout = replay_bundle(temp.path())?;
let first_inference = &rollout.inference_calls["inference-1"];
let second_inference = &rollout.inference_calls["inference-2"];
let tool_call = &rollout.tool_calls["tool-1"];
let output_item_id = second_inference
.request_item_ids
.last()
.expect("follow-up output item");
assert_eq!(
first_inference.tool_call_ids_started_by_response,
vec!["tool-1".to_string()],
);
assert_eq!(
tool_call.model_visible_call_item_ids,
first_inference.response_item_ids,
);
assert_eq!(
tool_call.model_visible_output_item_ids,
vec![output_item_id.clone()],
);
assert_eq!(
rollout.conversation_items[output_item_id].produced_by,
vec![ProducerRef::Tool {
tool_call_id: "tool-1".to_string(),
}],
);
Ok(())
}
#[test]
fn inference_start_rejects_unknown_codex_turn() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [message("user", "hello")]
}),
)?;
append_inference_start(&writer, "inference-1", "turn-missing", request)?;
expect_replay_error(&temp, "referenced unknown codex turn turn-missing")
}

View File

@@ -0,0 +1,143 @@
//! Inference call lifecycle reduction.
//!
//! Conversation request/response normalization lives in the conversation module;
//! this module owns the runtime envelope around those model-facing payloads.
use anyhow::Result;
use anyhow::bail;
use super::TraceReducer;
use crate::model::ExecutionStatus;
use crate::model::ExecutionWindow;
use crate::model::InferenceCall;
use crate::model::InferenceCallId;
use crate::payload::RawPayloadRef;
use crate::raw_event::RawEventSeq;
/// Raw inference-start fields after dispatch has stripped the common event envelope.
///
/// Keeping this as one argument prevents callsites from passing a long list of
/// adjacent strings whose ordering is easy to mix up.
pub(super) struct StartedInferenceCall {
pub(super) inference_call_id: InferenceCallId,
pub(super) thread_id: String,
pub(super) codex_turn_id: String,
pub(super) model: String,
pub(super) provider_name: String,
pub(super) request_payload: RawPayloadRef,
}
impl TraceReducer {
/// Starts an inference call and reduces its request payload into conversation items.
///
/// Requests are model-visible transcript evidence, so the inference object is only
/// inserted after the request snapshot has been normalized and linked to the turn.
pub(super) fn start_inference_call(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
started: StartedInferenceCall,
) -> Result<()> {
if self
.rollout
.inference_calls
.contains_key(&started.inference_call_id)
{
bail!(
"duplicate inference start for {}",
started.inference_call_id
);
}
let inference_call_id = started.inference_call_id.clone();
let thread_id = started.thread_id.clone();
let codex_turn_id = started.codex_turn_id.clone();
let request_payload = started.request_payload.clone();
let Some(turn) = self.rollout.codex_turns.get(&codex_turn_id) else {
bail!(
"inference start {inference_call_id} referenced unknown codex turn {codex_turn_id}"
);
};
if turn.thread_id != thread_id {
bail!(
"inference start {inference_call_id} used thread {thread_id}, \
but codex turn {codex_turn_id} belongs to {}",
turn.thread_id
);
}
let request_item_ids = self.reduce_inference_request(
wall_time_unix_ms,
&inference_call_id,
&thread_id,
&codex_turn_id,
&request_payload,
)?;
self.thread_mut(&thread_id)?;
self.rollout.inference_calls.insert(
inference_call_id.clone(),
InferenceCall {
inference_call_id,
thread_id,
codex_turn_id,
execution: ExecutionWindow {
started_at_unix_ms: wall_time_unix_ms,
started_seq: seq,
ended_at_unix_ms: None,
ended_seq: None,
status: ExecutionStatus::Running,
},
model: started.model,
provider_name: started.provider_name,
upstream_request_id: None,
request_item_ids,
response_item_ids: Vec::new(),
tool_call_ids_started_by_response: Vec::new(),
usage: None,
raw_request_payload_id: started.request_payload.raw_payload_id,
raw_response_payload_id: None,
},
);
Ok(())
}
/// Completes an inference call and, when present, reduces response output items.
pub(super) fn complete_inference_call(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
inference_call_id: InferenceCallId,
status: ExecutionStatus,
response_id: Option<String>,
response_payload: Option<RawPayloadRef>,
) -> Result<()> {
if !self
.rollout
.inference_calls
.contains_key(&inference_call_id)
{
bail!("inference completion referenced unknown call {inference_call_id}");
}
let response_item_ids = response_payload
.as_ref()
.map(|payload| {
self.reduce_inference_response(wall_time_unix_ms, &inference_call_id, payload)
})
.transpose()?;
let Some(inference) = self.rollout.inference_calls.get_mut(&inference_call_id) else {
bail!("inference call {inference_call_id} disappeared during response reduction");
};
inference.execution.ended_at_unix_ms = Some(wall_time_unix_ms);
inference.execution.ended_seq = Some(seq);
inference.execution.status = status;
inference.upstream_request_id = response_id;
inference.raw_response_payload_id = response_payload.map(|payload| payload.raw_payload_id);
if let Some(response_item_ids) = response_item_ids {
inference.response_item_ids = response_item_ids;
}
Ok(())
}
}

View File

@@ -0,0 +1,504 @@
//! Deterministic replay from raw trace events to `RolloutTrace`.
use std::collections::BTreeMap;
use std::fs::File;
use std::io::BufRead;
use std::io::BufReader;
use std::path::Path;
use std::path::PathBuf;
use anyhow::Context;
use anyhow::Result;
use anyhow::bail;
use serde_json::Value;
use crate::bundle::MANIFEST_FILE_NAME;
use crate::bundle::RAW_EVENT_LOG_FILE_NAME;
use crate::bundle::REDUCED_TRACE_SCHEMA_VERSION;
use crate::bundle::TraceBundleManifest;
use crate::model::ExecutionStatus;
use crate::model::RolloutTrace;
use crate::payload::RawPayloadRef;
use crate::raw_event::RawTraceEvent;
use crate::raw_event::RawTraceEventPayload;
mod code_cell;
mod compaction;
mod conversation;
mod inference;
#[cfg(test)]
pub(crate) mod test_support;
mod thread;
mod tool;
use self::code_cell::PendingCodeCellLifecycleEvent;
use self::code_cell::PendingCodeCellStart;
use self::code_cell::StartedCodeCell;
use self::compaction::StartedCompactionRequest;
use self::inference::StartedInferenceCall;
use self::tool::ObservedAgentResultEdge;
use self::tool::PendingAgentInteractionEdge;
use self::tool::ToolCallStarted;
/// Replays a local trace bundle into a reduced rollout graph.
pub fn replay_bundle(bundle_dir: impl AsRef<Path>) -> Result<RolloutTrace> {
let bundle_dir = bundle_dir.as_ref();
let manifest: TraceBundleManifest =
serde_json::from_reader(File::open(bundle_dir.join(MANIFEST_FILE_NAME))?)
.with_context(|| format!("read {}", bundle_dir.join(MANIFEST_FILE_NAME).display()))?;
let mut reducer = TraceReducer {
rollout: RolloutTrace::new(
REDUCED_TRACE_SCHEMA_VERSION,
manifest.trace_id,
manifest.rollout_id,
manifest.root_thread_id,
manifest.started_at_unix_ms,
),
bundle_dir: bundle_dir.to_path_buf(),
next_conversation_item_ordinal: 1,
next_terminal_operation_ordinal: 1,
thread_conversation_snapshots: BTreeMap::new(),
pending_compaction_replacement_item_ids: BTreeMap::new(),
code_cell_ids_by_runtime: BTreeMap::new(),
pending_code_cell_starts: BTreeMap::new(),
pending_code_cell_lifecycle_events: BTreeMap::new(),
pending_agent_interaction_edges: Vec::new(),
};
let event_log_path = bundle_dir.join(RAW_EVENT_LOG_FILE_NAME);
let event_log = File::open(&event_log_path)
.with_context(|| format!("open trace event log {}", event_log_path.display()))?;
for (line_index, line) in BufReader::new(event_log).lines().enumerate() {
let line = line.with_context(|| format!("read trace event line {}", line_index + 1))?;
if line.trim().is_empty() {
continue;
}
let event: RawTraceEvent = serde_json::from_str(&line)
.with_context(|| format!("parse trace event line {}", line_index + 1))?;
reducer.apply_event(event)?;
}
// Spawn edges prefer the child task message as their target, but a child can
// fail before that message is ever reduced. Only after replaying the whole
// bundle do we know which spawn deliveries need the child-thread fallback.
reducer.resolve_pending_spawn_edge_fallbacks()?;
Ok(reducer.rollout)
}
struct TraceReducer {
rollout: RolloutTrace,
bundle_dir: PathBuf,
next_conversation_item_ordinal: u64,
next_terminal_operation_ordinal: u64,
/// Last model-visible conversation snapshot per thread.
///
/// Requests and responses both advance this sequence because both are
/// model-facing payloads. Repeated request snapshots reuse item IDs only
/// when the same normalized item appears at the same position; identical
/// content at a new position must remain a distinct conversation item.
thread_conversation_snapshots: BTreeMap<String, Vec<String>>,
/// Replacement snapshot installed by compaction but not yet seen in a sampling request.
///
/// The first full request after compaction should compare against the installed replacement
/// history, not against the pre-compaction request. That keeps repeated prefix/context messages
/// as fresh post-compaction conversation items while still reusing the summary/replacement
/// items that actually became live history.
pending_compaction_replacement_item_ids: BTreeMap<String, Vec<String>>,
/// Runtime cell ids indexed by thread-local code-mode handle.
///
/// Reduced `CodeCellId`s are based on the model-visible `exec` call id
/// because that is the durable source identity. Runtime lifecycle, nested
/// tools, and `wait` calls arrive with the runtime-local `cell_id`, so this
/// index is the one intentional bridge between those namespaces.
code_cell_ids_by_runtime: BTreeMap<(String, String), String>,
/// Code-cell starts whose model-visible `custom_tool_call` item has not
/// been reduced yet.
///
/// Core begins executing tools before the stream-completion hook records
/// the response payload that requested them. Queueing keeps replay strict
/// about eventual source-item ownership without requiring trace producers
/// to reorder runtime events behind inference completion.
pending_code_cell_starts: BTreeMap<String, PendingCodeCellStart>,
/// Initial/end events that arrived while the matching start was queued.
///
/// Fast cells can return before the inference response payload that proves
/// the model-visible `exec` source item has been reduced. The start remains
/// queued for ownership validation; these lifecycle events wait with it and
/// are replayed in raw sequence order once the cell materializes.
pending_code_cell_lifecycle_events: BTreeMap<String, Vec<PendingCodeCellLifecycleEvent>>,
/// Multi-agent deliveries whose recipient-side transcript item has not been observed yet.
///
/// V2 agent tools enqueue mailbox messages in the target thread. The trace event for the
/// sending tool arrives before the recipient inference request materializes that mailbox item
/// as a `ConversationItem`, so the reducer keeps the delivery edge pending until it can point
/// at the exact model-visible item instead of a coarse thread.
pending_agent_interaction_edges: Vec<PendingAgentInteractionEdge>,
}
impl TraceReducer {
fn read_payload_json(&self, payload: &RawPayloadRef) -> Result<Value> {
// Reducers keep raw bodies out of the graph, but typed replay sometimes
// needs a small subset of fields to build semantic objects.
let payload_path = self.bundle_dir.join(&payload.path);
let file = File::open(&payload_path)
.with_context(|| format!("open payload {}", payload.raw_payload_id))?;
serde_json::from_reader(file)
.with_context(|| format!("parse payload {}", payload.raw_payload_id))
}
fn apply_event(&mut self, event: RawTraceEvent) -> Result<()> {
// Raw payload refs are reducer-wide evidence, not owned by a single
// semantic arm. Keep this bookkeeping separate so typed reduction can
// stay strict without duplicating payload insertion in every case.
for payload in event.payload.raw_payload_refs() {
self.insert_raw_payload(payload);
}
match event.payload {
RawTraceEventPayload::RolloutStarted {
trace_id,
root_thread_id,
} => {
self.rollout.trace_id = trace_id;
self.rollout.root_thread_id = root_thread_id;
}
RawTraceEventPayload::RolloutEnded { status } => {
self.rollout.status = status;
self.rollout.ended_at_unix_ms = Some(event.wall_time_unix_ms);
}
RawTraceEventPayload::ThreadStarted {
thread_id,
agent_path,
metadata_payload,
} => {
self.start_thread(
event.seq,
event.wall_time_unix_ms,
thread_id,
agent_path,
metadata_payload,
)?;
}
RawTraceEventPayload::ThreadEnded { thread_id, status } => {
self.end_thread(event.seq, event.wall_time_unix_ms, thread_id, status)?;
}
RawTraceEventPayload::CodexTurnStarted {
codex_turn_id,
thread_id,
} => {
self.start_codex_turn(
event.seq,
event.wall_time_unix_ms,
codex_turn_id,
thread_id,
)?;
}
RawTraceEventPayload::CodexTurnEnded {
codex_turn_id,
status,
} => {
self.end_codex_turn(
event.seq,
event.wall_time_unix_ms,
event.thread_id,
codex_turn_id,
status,
)?;
}
RawTraceEventPayload::InferenceStarted {
inference_call_id,
thread_id,
codex_turn_id,
model,
provider_name,
request_payload,
} => {
self.start_inference_call(
event.seq,
event.wall_time_unix_ms,
StartedInferenceCall {
inference_call_id,
thread_id,
codex_turn_id,
model,
provider_name,
request_payload,
},
)?;
}
RawTraceEventPayload::InferenceCompleted {
inference_call_id,
response_id,
response_payload,
} => {
self.complete_inference_call(
event.seq,
event.wall_time_unix_ms,
inference_call_id,
ExecutionStatus::Completed,
response_id,
Some(response_payload),
)?;
}
RawTraceEventPayload::InferenceFailed {
inference_call_id,
partial_response_payload,
..
} => {
self.complete_inference_call(
event.seq,
event.wall_time_unix_ms,
inference_call_id,
ExecutionStatus::Failed,
/*response_id*/ None,
partial_response_payload,
)?;
}
RawTraceEventPayload::ProtocolEventObserved { .. } => {
// Protocol wrappers are raw debug breadcrumbs. Typed hooks own
// the reduced graph, so these payload refs are retained without
// creating semantic objects.
}
RawTraceEventPayload::ToolCallStarted {
tool_call_id,
model_visible_call_id,
code_mode_runtime_tool_id,
requester,
kind,
summary,
invocation_payload,
} => {
self.start_tool_call(
event.seq,
event.wall_time_unix_ms,
event.thread_id,
event.codex_turn_id,
ToolCallStarted {
tool_call_id,
model_visible_call_id,
code_mode_runtime_tool_id,
requester,
kind,
summary,
invocation_payload,
},
)?;
}
RawTraceEventPayload::ToolCallRuntimeStarted {
tool_call_id,
runtime_payload,
} => {
self.start_tool_runtime_observation(
event.seq,
event.wall_time_unix_ms,
tool_call_id,
runtime_payload,
)?;
}
RawTraceEventPayload::ToolCallRuntimeEnded {
tool_call_id,
status,
runtime_payload,
} => {
self.end_tool_runtime_observation(
event.seq,
event.wall_time_unix_ms,
tool_call_id,
status,
runtime_payload,
)?;
}
RawTraceEventPayload::ToolCallEnded {
tool_call_id,
status,
result_payload,
} => {
self.end_tool_call(
event.seq,
event.wall_time_unix_ms,
tool_call_id,
status,
result_payload,
)?;
}
RawTraceEventPayload::CodeCellStarted {
runtime_cell_id,
model_visible_call_id,
source_js,
} => {
let thread_id = self.code_cell_event_thread_id(
event.thread_id,
event.codex_turn_id.as_deref(),
&runtime_cell_id,
"code cell start",
)?;
let reduced_code_cell_id =
self.reduced_code_cell_id_for_model_visible_call(&model_visible_call_id);
self.record_runtime_code_cell_id(
&thread_id,
&runtime_cell_id,
&reduced_code_cell_id,
)?;
self.start_or_queue_code_cell(PendingCodeCellStart {
seq: event.seq,
wall_time_unix_ms: event.wall_time_unix_ms,
thread_id,
codex_turn_id: event.codex_turn_id,
started: StartedCodeCell {
code_cell_id: reduced_code_cell_id,
runtime_cell_id,
model_visible_call_id,
source_js,
},
})?;
}
RawTraceEventPayload::CodeCellInitialResponse {
runtime_cell_id,
status,
..
} => {
let thread_id = self.code_cell_event_thread_id(
event.thread_id,
event.codex_turn_id.as_deref(),
&runtime_cell_id,
"code cell initial response",
)?;
let code_cell_id = self.code_cell_id_for_runtime_cell_id(
&thread_id,
&runtime_cell_id,
"code cell initial response",
)?;
self.record_or_queue_code_cell_initial_response(
event.seq,
event.wall_time_unix_ms,
code_cell_id,
runtime_cell_id,
status,
)?;
}
RawTraceEventPayload::CodeCellEnded {
runtime_cell_id,
status,
..
} => {
let thread_id = self.code_cell_event_thread_id(
event.thread_id,
event.codex_turn_id.as_deref(),
&runtime_cell_id,
"code cell end",
)?;
let code_cell_id = self.code_cell_id_for_runtime_cell_id(
&thread_id,
&runtime_cell_id,
"code cell end",
)?;
self.end_or_queue_code_cell(
event.seq,
event.wall_time_unix_ms,
code_cell_id,
status,
)?;
}
RawTraceEventPayload::CompactionRequestStarted {
compaction_id,
compaction_request_id,
thread_id,
codex_turn_id,
model,
provider_name,
request_payload,
} => {
self.start_compaction_request(
event.seq,
event.wall_time_unix_ms,
StartedCompactionRequest {
compaction_id,
compaction_request_id,
thread_id,
codex_turn_id,
model,
provider_name,
request_payload,
},
)?;
}
RawTraceEventPayload::CompactionRequestCompleted {
compaction_id,
compaction_request_id,
response_payload,
} => {
self.complete_compaction_request(
event.seq,
event.wall_time_unix_ms,
compaction_id,
compaction_request_id,
ExecutionStatus::Completed,
Some(response_payload),
)?;
}
RawTraceEventPayload::CompactionRequestFailed {
compaction_id,
compaction_request_id,
..
} => {
self.complete_compaction_request(
event.seq,
event.wall_time_unix_ms,
compaction_id,
compaction_request_id,
ExecutionStatus::Failed,
/*response_payload*/ None,
)?;
}
RawTraceEventPayload::CompactionInstalled {
compaction_id,
checkpoint_payload,
} => {
let Some(thread_id) = event.thread_id else {
bail!("compaction installed event {compaction_id} did not include a thread id");
};
let Some(codex_turn_id) = event.codex_turn_id else {
bail!(
"compaction installed event {compaction_id} did not include a codex turn id"
);
};
self.reduce_compaction_installed_event(
event.wall_time_unix_ms,
thread_id,
codex_turn_id,
compaction_id,
checkpoint_payload,
)?;
}
RawTraceEventPayload::AgentResultObserved {
edge_id,
child_thread_id,
child_codex_turn_id,
parent_thread_id,
message,
carried_payload,
} => {
self.queue_agent_result_interaction_edge(ObservedAgentResultEdge {
wall_time_unix_ms: event.wall_time_unix_ms,
edge_id,
child_thread_id,
child_codex_turn_id,
parent_thread_id,
message,
carried_payload,
})?;
}
RawTraceEventPayload::Other { .. } => {
bail!("raw trace event has no reducer implementation");
}
}
Ok(())
}
fn insert_raw_payload(&mut self, payload: &RawPayloadRef) {
self.rollout
.raw_payloads
.insert(payload.raw_payload_id.clone(), payload.clone());
}
}

View File

@@ -0,0 +1,200 @@
//! Shared reducer test fixtures.
//!
//! These helpers only write common trace scaffolding. Scenario-specific event
//! sequences stay in each test so the behavior under test remains visible.
use serde_json::json;
use tempfile::TempDir;
use crate::model::ToolCallSummary;
use crate::payload::RawPayloadKind;
use crate::payload::RawPayloadRef;
use crate::raw_event::RawTraceEventContext;
use crate::raw_event::RawTraceEventPayload;
use crate::replay_bundle;
use crate::writer::TraceWriter;
pub(crate) const ROOT_THREAD_ID: &str = "thread-root";
pub(crate) const AGENT_ROOT_THREAD_ID: &str = "019d0000-0000-7000-8000-000000000001";
pub(crate) fn message(role: &str, text: &str) -> serde_json::Value {
json!({
"type": "message",
"role": role,
"content": [{"type": "input_text", "text": text}]
})
}
pub(crate) fn generic_summary(label: &str) -> ToolCallSummary {
ToolCallSummary::Generic {
label: label.to_string(),
input_preview: None,
output_preview: None,
}
}
pub(crate) fn create_started_writer(temp: &TempDir) -> anyhow::Result<TraceWriter> {
create_started_writer_for_thread(temp, ROOT_THREAD_ID, "/root")
}
pub(crate) fn create_started_agent_writer(temp: &TempDir) -> anyhow::Result<TraceWriter> {
create_started_writer_for_thread(temp, AGENT_ROOT_THREAD_ID, "/root")
}
pub(crate) fn create_started_writer_for_thread(
temp: &TempDir,
thread_id: &str,
agent_path: &str,
) -> anyhow::Result<TraceWriter> {
let writer = TraceWriter::create(
temp.path(),
"trace-1".to_string(),
"rollout-1".to_string(),
thread_id.to_string(),
)?;
start_thread(&writer, thread_id, agent_path)?;
Ok(writer)
}
pub(crate) fn start_thread(
writer: &TraceWriter,
thread_id: &str,
agent_path: &str,
) -> anyhow::Result<()> {
writer.append(RawTraceEventPayload::ThreadStarted {
thread_id: thread_id.to_string(),
agent_path: agent_path.to_string(),
metadata_payload: None,
})?;
Ok(())
}
pub(crate) fn start_turn(writer: &TraceWriter, turn_id: &str) -> anyhow::Result<()> {
start_turn_for_thread(writer, ROOT_THREAD_ID, turn_id)
}
pub(crate) fn start_agent_turn(writer: &TraceWriter, turn_id: &str) -> anyhow::Result<()> {
start_turn_for_thread(writer, AGENT_ROOT_THREAD_ID, turn_id)
}
pub(crate) fn start_turn_for_thread(
writer: &TraceWriter,
thread_id: &str,
turn_id: &str,
) -> anyhow::Result<()> {
writer.append(RawTraceEventPayload::CodexTurnStarted {
codex_turn_id: turn_id.to_string(),
thread_id: thread_id.to_string(),
})?;
Ok(())
}
pub(crate) fn trace_context(turn_id: &str) -> RawTraceEventContext {
trace_context_for_thread(ROOT_THREAD_ID, turn_id)
}
pub(crate) fn trace_context_for_agent(turn_id: &str) -> RawTraceEventContext {
trace_context_for_thread(AGENT_ROOT_THREAD_ID, turn_id)
}
pub(crate) fn trace_context_for_thread(thread_id: &str, turn_id: &str) -> RawTraceEventContext {
RawTraceEventContext {
thread_id: Some(thread_id.to_string()),
codex_turn_id: Some(turn_id.to_string()),
}
}
pub(crate) fn append_inference_start(
writer: &TraceWriter,
inference_call_id: &str,
codex_turn_id: &str,
request_payload: RawPayloadRef,
) -> anyhow::Result<()> {
append_inference_start_for_thread(
writer,
ROOT_THREAD_ID,
codex_turn_id,
inference_call_id,
request_payload,
)
}
pub(crate) fn append_inference_start_for_thread(
writer: &TraceWriter,
thread_id: &str,
codex_turn_id: &str,
inference_call_id: &str,
request_payload: RawPayloadRef,
) -> anyhow::Result<()> {
writer.append(RawTraceEventPayload::InferenceStarted {
inference_call_id: inference_call_id.to_string(),
thread_id: thread_id.to_string(),
codex_turn_id: codex_turn_id.to_string(),
model: "gpt-test".to_string(),
provider_name: "test-provider".to_string(),
request_payload,
})?;
Ok(())
}
pub(crate) fn append_inference_completion(
writer: &TraceWriter,
inference_call_id: &str,
response_id: &str,
response_payload: RawPayloadRef,
) -> anyhow::Result<()> {
writer.append(RawTraceEventPayload::InferenceCompleted {
inference_call_id: inference_call_id.to_string(),
response_id: Some(response_id.to_string()),
response_payload,
})?;
Ok(())
}
pub(crate) fn append_inference_request(
writer: &TraceWriter,
thread_id: &str,
turn_id: &str,
inference_id: &str,
input: Vec<serde_json::Value>,
) -> anyhow::Result<()> {
let request =
writer.write_json_payload(RawPayloadKind::InferenceRequest, &json!({ "input": input }))?;
append_inference_start_for_thread(writer, thread_id, turn_id, inference_id, request)
}
pub(crate) fn append_completed_inference(
writer: &TraceWriter,
thread_id: &str,
turn_id: &str,
inference_id: &str,
input: Vec<serde_json::Value>,
output_items: Vec<serde_json::Value>,
) -> anyhow::Result<()> {
append_inference_request(writer, thread_id, turn_id, inference_id, input)?;
let response = writer.write_json_payload(
RawPayloadKind::InferenceResponse,
&json!({
"response_id": format!("resp-{inference_id}"),
"output_items": output_items,
}),
)?;
writer.append_with_context(
trace_context_for_thread(thread_id, turn_id),
RawTraceEventPayload::InferenceCompleted {
inference_call_id: inference_id.to_string(),
response_id: Some(format!("resp-{inference_id}")),
response_payload: response,
},
)?;
Ok(())
}
pub(crate) fn expect_replay_error(temp: &TempDir, expected: &str) -> anyhow::Result<()> {
let Err(err) = replay_bundle(temp.path()) else {
panic!("expected replay error containing {expected}");
};
let message = err.to_string();
assert!(message.contains(expected), "unexpected error: {message}");
Ok(())
}

View File

@@ -0,0 +1,264 @@
//! Thread and turn reduction.
//!
//! Threads are the container that every other reducer module links into. This
//! module owns the identity metadata parsing as well, so the central dispatcher
//! does not need to know the shape of multi-agent session-source payloads.
use anyhow::Context;
use anyhow::Result;
use anyhow::bail;
use serde::Deserialize;
use serde_json::Value;
use super::TraceReducer;
use super::tool::spawn_edge_id;
use crate::model::AgentOrigin;
use crate::model::AgentThread;
use crate::model::CodexTurn;
use crate::model::CodexTurnId;
use crate::model::ExecutionStatus;
use crate::model::ExecutionWindow;
use crate::model::RolloutStatus;
use crate::payload::RawPayloadRef;
use crate::raw_event::RawEventSeq;
impl TraceReducer {
/// Inserts a thread and derives its multi-agent identity from optional metadata.
///
/// The raw event carries a denormalized agent path; when v2 subagent metadata is
/// present, that metadata is authoritative because it also drives spawn edges and task names.
pub(super) fn start_thread(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
thread_id: String,
agent_path: String,
metadata_payload: Option<RawPayloadRef>,
) -> Result<()> {
if self.rollout.threads.contains_key(&thread_id) {
bail!("duplicate thread start for {thread_id}");
}
let metadata = metadata_payload
.as_ref()
.map(|payload| self.thread_started_metadata(payload))
.transpose()?;
let spawn = metadata
.as_ref()
.and_then(ThreadStartedMetadata::thread_spawn);
// The v2 SessionSource is the authoritative child identity record.
// Prefer its nested agent_path over the denormalized event field so
// task derivation and the spawn edge are based on the same metadata.
let agent_path = spawn
.as_ref()
.and_then(|spawn| spawn.agent_path.clone())
.or_else(|| {
metadata
.as_ref()
.and_then(|metadata| metadata.agent_path.clone())
})
.unwrap_or(agent_path);
let nickname = metadata
.as_ref()
.and_then(|metadata| metadata.nickname.clone());
let default_model = metadata
.as_ref()
.and_then(|metadata| metadata.model.clone());
let origin = if let Some(spawn) = spawn {
let edge_id = spawn_edge_id(&spawn.parent_thread_id, &thread_id);
let task_name = spawn
.task_name
.clone()
.unwrap_or_else(|| task_name_from_agent_path(&agent_path));
let agent_role = spawn.agent_role.clone().unwrap_or_default();
AgentOrigin::Spawned {
parent_thread_id: spawn.parent_thread_id,
spawn_edge_id: edge_id,
task_name,
agent_role,
}
} else {
AgentOrigin::Root
};
self.rollout.threads.insert(
thread_id.clone(),
AgentThread {
thread_id,
agent_path,
nickname,
origin,
execution: ExecutionWindow {
started_at_unix_ms: wall_time_unix_ms,
started_seq: seq,
ended_at_unix_ms: None,
ended_seq: None,
status: ExecutionStatus::Running,
},
default_model,
conversation_item_ids: Vec::new(),
},
);
Ok(())
}
/// Marks a thread terminal without treating child shutdown as rollout completion.
pub(super) fn end_thread(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
thread_id: String,
status: RolloutStatus,
) -> Result<()> {
let thread = self.thread_mut(&thread_id)?;
thread.execution.ended_at_unix_ms = Some(wall_time_unix_ms);
thread.execution.ended_seq = Some(seq);
thread.execution.status = match status {
RolloutStatus::Running => ExecutionStatus::Running,
RolloutStatus::Completed => ExecutionStatus::Completed,
RolloutStatus::Failed => ExecutionStatus::Failed,
RolloutStatus::Aborted => ExecutionStatus::Aborted,
};
Ok(())
}
/// Starts a Codex turn inside an existing thread.
pub(super) fn start_codex_turn(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
codex_turn_id: CodexTurnId,
thread_id: String,
) -> Result<()> {
if self.rollout.codex_turns.contains_key(&codex_turn_id) {
bail!("duplicate codex turn start for {codex_turn_id}");
}
self.thread_mut(&thread_id)?;
self.rollout.codex_turns.insert(
codex_turn_id.clone(),
CodexTurn {
codex_turn_id,
thread_id,
execution: ExecutionWindow {
started_at_unix_ms: wall_time_unix_ms,
started_seq: seq,
ended_at_unix_ms: None,
ended_seq: None,
status: ExecutionStatus::Running,
},
input_item_ids: Vec::new(),
},
);
Ok(())
}
/// Marks a Codex turn terminal and validates any thread id carried by the raw event.
pub(super) fn end_codex_turn(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
thread_id: Option<String>,
codex_turn_id: CodexTurnId,
status: ExecutionStatus,
) -> Result<()> {
if let Some(event_thread_id) = thread_id.as_deref()
&& let Some(turn) = self.rollout.codex_turns.get(&codex_turn_id)
&& turn.thread_id != event_thread_id
{
bail!(
"codex turn end for {codex_turn_id} used thread {event_thread_id}, \
but the turn belongs to {}",
turn.thread_id
);
}
let Some(turn) = self.rollout.codex_turns.get_mut(&codex_turn_id) else {
bail!("codex turn end referenced unknown turn {codex_turn_id}");
};
turn.execution.ended_at_unix_ms = Some(wall_time_unix_ms);
turn.execution.ended_seq = Some(seq);
turn.execution.status = status.clone();
self.terminate_running_code_cells_for_turn_end(
seq,
wall_time_unix_ms,
&codex_turn_id,
&status,
)?;
Ok(())
}
/// Returns a mutable thread or reports a reducer error tied to the unknown id.
pub(super) fn thread_mut(&mut self, thread_id: &str) -> Result<&mut AgentThread> {
self.rollout
.threads
.get_mut(thread_id)
.with_context(|| format!("trace event referenced unknown thread {thread_id}"))
}
fn thread_started_metadata(
&self,
metadata_payload: &RawPayloadRef,
) -> Result<ThreadStartedMetadata> {
let value = self.read_payload_json(metadata_payload)?;
serde_json::from_value(value)
.with_context(|| format!("parse thread metadata {}", metadata_payload.raw_payload_id))
}
}
#[derive(Deserialize)]
struct ThreadStartedMetadata {
agent_path: Option<String>,
task_name: Option<String>,
nickname: Option<String>,
agent_role: Option<String>,
model: Option<String>,
session_source: Option<Value>,
}
impl ThreadStartedMetadata {
fn thread_spawn(&self) -> Option<ThreadSpawnMetadata> {
let spawn = self
.session_source
.as_ref()?
.get("subagent")?
.get("thread_spawn")?;
let agent_path = spawn
.get("agent_path")
.and_then(Value::as_str)
.map(str::to_string)
.or_else(|| self.agent_path.clone());
Some(ThreadSpawnMetadata {
parent_thread_id: spawn.get("parent_thread_id")?.as_str()?.to_string(),
agent_path: agent_path.clone(),
task_name: spawn
.get("task_name")
.and_then(Value::as_str)
.map(str::to_string)
.or_else(|| self.task_name.clone())
.or_else(|| agent_path.as_deref().map(task_name_from_agent_path)),
agent_role: spawn
.get("agent_role")
.and_then(Value::as_str)
.map(str::to_string)
.or_else(|| self.agent_role.clone()),
})
}
}
struct ThreadSpawnMetadata {
parent_thread_id: String,
agent_path: Option<String>,
task_name: Option<String>,
agent_role: Option<String>,
}
fn task_name_from_agent_path(agent_path: &str) -> String {
agent_path
.rsplit('/')
.find(|segment| !segment.is_empty())
.unwrap_or(agent_path)
.to_string()
}

View File

@@ -0,0 +1,500 @@
use anyhow::Context;
use anyhow::Result;
use anyhow::bail;
use super::TraceReducer;
use crate::model::CodeModeRuntimeToolId;
use crate::model::ConversationItemKind;
use crate::model::ExecutionStatus;
use crate::model::ExecutionWindow;
use crate::model::ModelVisibleCallId;
use crate::model::ProducerRef;
use crate::model::ToolCall;
use crate::model::ToolCallId;
use crate::model::ToolCallKind;
use crate::model::ToolCallSummary;
use crate::payload::RawPayloadRef;
use crate::raw_event::RawEventSeq;
use crate::raw_event::RawToolCallRequester;
mod agents;
mod terminal;
pub(super) use agents::ObservedAgentResultEdge;
pub(super) use agents::PendingAgentInteractionEdge;
pub(super) use agents::spawn_edge_id;
/// Raw tool-start fields after dispatch has stripped the common event envelope.
///
/// Tool starts carry several optional identity namespaces: model-visible calls,
/// code-mode runtime tools, and canonical invocation payloads. Grouping them keeps
/// the reducer callsite readable and avoids positional argument mistakes.
pub(super) struct ToolCallStarted {
pub(super) tool_call_id: ToolCallId,
pub(super) model_visible_call_id: Option<ModelVisibleCallId>,
pub(super) code_mode_runtime_tool_id: Option<CodeModeRuntimeToolId>,
pub(super) requester: RawToolCallRequester,
pub(super) kind: ToolCallKind,
pub(super) summary: ToolCallSummary,
pub(super) invocation_payload: Option<RawPayloadRef>,
}
impl TraceReducer {
/// Starts a tool call and links it to model-visible items or runtime parents when available.
///
/// Some tools also create richer domain objects, such as terminal operations, from
/// the same invocation payload. The generic ToolCall remains the common index.
pub(super) fn start_tool_call(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
thread_id: Option<String>,
codex_turn_id: Option<String>,
started: ToolCallStarted,
) -> Result<()> {
let tool_call_id = started.tool_call_id.clone();
if self.rollout.tool_calls.contains_key(&tool_call_id) {
bail!("duplicate tool call start for {tool_call_id}");
}
self.ensure_unique_model_visible_tool_call(
started.model_visible_call_id.as_deref(),
&tool_call_id,
)?;
let thread_id = self.tool_thread_id(thread_id, codex_turn_id.as_deref())?;
self.validate_tool_turn(&thread_id, codex_turn_id.as_deref())?;
let model_visible_call_id = started.model_visible_call_id.clone();
let requester = self.reduce_tool_call_requester(&thread_id, started.requester.clone())?;
let model_visible_call_item_ids = model_visible_call_id
.as_deref()
.map(|call_id| {
self.model_visible_tool_item_ids(
&thread_id,
call_id,
&[
ConversationItemKind::FunctionCall,
ConversationItemKind::CustomToolCall,
],
)
})
.unwrap_or_default();
let model_visible_output_item_ids = model_visible_call_id
.as_deref()
.map(|call_id| {
self.model_visible_tool_item_ids(
&thread_id,
call_id,
&[
ConversationItemKind::FunctionCallOutput,
ConversationItemKind::CustomToolCallOutput,
],
)
})
.unwrap_or_default();
self.thread_mut(&thread_id)?;
// Some terminal-like tools, notably write_stdin, do not emit a richer
// runtime begin event. For those tools the canonical invocation is the
// only place to recover the terminal/session join key.
let terminal_operation_id = self.start_terminal_operation_from_invocation(
seq,
wall_time_unix_ms,
&thread_id,
&tool_call_id,
&started.kind,
started.invocation_payload.as_ref(),
)?;
// Terminal-backed tools should render through the richer terminal
// operation instead of the generic tool summary captured by producers.
let summary = terminal_operation_id
.as_ref()
.map(|operation_id| ToolCallSummary::Terminal {
operation_id: operation_id.clone(),
})
.unwrap_or(started.summary);
let raw_invocation_payload_id = started
.invocation_payload
.as_ref()
.map(|payload| payload.raw_payload_id.clone());
self.link_wait_tool_call_from_request_payload(
&thread_id,
&tool_call_id,
started.invocation_payload.as_ref(),
)?;
self.rollout.tool_calls.insert(
tool_call_id.clone(),
ToolCall {
tool_call_id: tool_call_id.clone(),
model_visible_call_id,
code_mode_runtime_tool_id: started.code_mode_runtime_tool_id,
thread_id,
started_by_codex_turn_id: codex_turn_id,
execution: ExecutionWindow {
started_at_unix_ms: wall_time_unix_ms,
started_seq: seq,
ended_at_unix_ms: None,
ended_seq: None,
status: ExecutionStatus::Running,
},
requester: requester.clone(),
kind: started.kind,
model_visible_call_item_ids,
model_visible_output_item_ids: Vec::new(),
terminal_operation_id,
summary,
raw_invocation_payload_id,
raw_result_payload_id: None,
raw_runtime_payload_ids: Vec::new(),
},
);
self.link_tool_call_to_code_cell(&tool_call_id, &requester)?;
self.link_tool_to_inference_response(&tool_call_id);
// Output items need the reverse ProducerRef edge as well, so attach
// them after insertion through the same helper used by the transcript
// reducer when the output is observed after the tool start.
for item_id in model_visible_output_item_ids {
self.add_tool_output_item(&tool_call_id, &item_id)?;
}
// The call/output items may have been observed before this tool start.
// Re-sync after insertion so terminal observations get both directions
// of the model-visible link.
self.sync_terminal_model_observation(&tool_call_id)?;
Ok(())
}
/// Completes the canonical tool call and any terminal operation driven by dispatch output.
///
/// Protocol-backed terminal tools end from runtime events; direct tools
/// may only have the canonical result payload, so this method handles both paths.
pub(super) fn end_tool_call(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
tool_call_id: ToolCallId,
status: ExecutionStatus,
result_payload: Option<RawPayloadRef>,
) -> Result<()> {
let (terminal_operation_id, thread_id, end_terminal_from_result) = {
let Some(tool_call) = self.rollout.tool_calls.get_mut(&tool_call_id) else {
bail!("tool call end referenced unknown call {tool_call_id}");
};
tool_call.execution.ended_at_unix_ms = Some(wall_time_unix_ms);
tool_call.execution.ended_seq = Some(seq);
tool_call.execution.status = status.clone();
tool_call.raw_result_payload_id = result_payload
.as_ref()
.map(|payload| payload.raw_payload_id.clone());
(
tool_call.terminal_operation_id.clone(),
tool_call.thread_id.clone(),
// Protocol-backed tools end terminal operations from
// runtime observations. Dispatch result payloads are still kept
// on ToolCall, but they are caller-facing and may be transformed
// relative to the raw terminal output.
tool_call.raw_runtime_payload_ids.is_empty(),
)
};
if end_terminal_from_result && let Some(operation_id) = terminal_operation_id {
self.end_terminal_operation(
seq,
wall_time_unix_ms,
&thread_id,
&operation_id,
status,
result_payload.as_ref(),
)?;
}
self.attach_agent_interaction_tool_result(&tool_call_id, result_payload.as_ref())?;
Ok(())
}
/// Records a runtime-begin observation for an already started tool call.
///
/// Runtime observations enrich the generic tool with protocol facts and may
/// create domain-specific children such as terminal operations or agent edges.
pub(super) fn start_tool_runtime_observation(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
tool_call_id: ToolCallId,
runtime_payload: RawPayloadRef,
) -> Result<()> {
let (thread_id, _requester, kind, existing_terminal_operation_id) = {
let Some(tool_call) = self.rollout.tool_calls.get_mut(&tool_call_id) else {
bail!("tool runtime start referenced unknown call {tool_call_id}");
};
push_unique(
&mut tool_call.raw_runtime_payload_ids,
&runtime_payload.raw_payload_id,
);
(
tool_call.thread_id.clone(),
tool_call.requester.clone(),
tool_call.kind.clone(),
tool_call.terminal_operation_id.clone(),
)
};
if existing_terminal_operation_id.is_some()
&& matches!(kind, ToolCallKind::ExecCommand | ToolCallKind::WriteStdin)
{
bail!("tool runtime start would create a second terminal operation for {tool_call_id}");
}
// Protocol begin events carry runtime facts such as process ids and
// cwd. These facts should create terminal rows, but they must not
// replace the canonical invocation payload captured at dispatch.
let terminal_operation_id = self.start_terminal_operation_from_runtime(
seq,
wall_time_unix_ms,
&thread_id,
&tool_call_id,
&kind,
&runtime_payload,
)?;
if let Some(operation_id) = &terminal_operation_id {
let Some(tool_call) = self.rollout.tool_calls.get_mut(&tool_call_id) else {
bail!("tool call {tool_call_id} disappeared during runtime start reduction");
};
if tool_call.terminal_operation_id.is_none() {
tool_call.terminal_operation_id = Some(operation_id.clone());
tool_call.summary = ToolCallSummary::Terminal {
operation_id: operation_id.clone(),
};
}
}
if terminal_operation_id.is_some() {
self.sync_terminal_model_observation(&tool_call_id)?;
}
self.start_agent_interaction_from_runtime(&tool_call_id, &runtime_payload)?;
Ok(())
}
/// Records a runtime-end observation for an already started tool call.
pub(super) fn end_tool_runtime_observation(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
tool_call_id: ToolCallId,
status: ExecutionStatus,
runtime_payload: RawPayloadRef,
) -> Result<()> {
let (thread_id, terminal_operation_id) = {
let Some(tool_call) = self.rollout.tool_calls.get_mut(&tool_call_id) else {
bail!("tool runtime end referenced unknown call {tool_call_id}");
};
push_unique(
&mut tool_call.raw_runtime_payload_ids,
&runtime_payload.raw_payload_id,
);
(
tool_call.thread_id.clone(),
tool_call.terminal_operation_id.clone(),
)
};
if let Some(operation_id) = terminal_operation_id {
self.end_terminal_operation(
seq,
wall_time_unix_ms,
&thread_id,
&operation_id,
status,
Some(&runtime_payload),
)?;
}
self.end_agent_interaction_from_runtime(
wall_time_unix_ms,
&tool_call_id,
&runtime_payload,
)?;
Ok(())
}
/// Attaches a conversation item observed after the tool call was reduced.
///
/// Inference request/response ordering can expose call/output items after the
/// runtime tool object exists, so transcript reduction calls back here to add
/// reverse links without duplicating matching logic.
pub(super) fn attach_model_visible_tool_item(
&mut self,
item_id: &str,
call_id: Option<&str>,
kind: &ConversationItemKind,
) -> Result<()> {
let Some(call_id) = call_id else {
return Ok(());
};
match kind {
ConversationItemKind::FunctionCall | ConversationItemKind::CustomToolCall => {
if let Some(tool_call_id) = self.single_tool_for_model_visible_call(call_id)? {
self.add_tool_call_item(&tool_call_id, item_id)?;
self.link_tool_to_inference_response(&tool_call_id);
self.sync_terminal_model_observation(&tool_call_id)?;
}
}
ConversationItemKind::FunctionCallOutput
| ConversationItemKind::CustomToolCallOutput => {
if let Some(tool_call_id) = self.single_tool_for_model_visible_call(call_id)? {
self.add_tool_output_item(&tool_call_id, item_id)?;
self.sync_terminal_model_observation(&tool_call_id)?;
}
}
ConversationItemKind::Message
| ConversationItemKind::Reasoning
| ConversationItemKind::CompactionMarker => {}
}
Ok(())
}
fn tool_thread_id(
&self,
thread_id: Option<String>,
codex_turn_id: Option<&str>,
) -> Result<String> {
if let Some(thread_id) = thread_id {
return Ok(thread_id);
}
let Some(codex_turn_id) = codex_turn_id else {
bail!("tool call start did not include thread or Codex turn context");
};
self.rollout
.codex_turns
.get(codex_turn_id)
.map(|turn| turn.thread_id.clone())
.with_context(|| {
format!("tool call start referenced unknown Codex turn {codex_turn_id}")
})
}
fn validate_tool_turn(&self, thread_id: &str, codex_turn_id: Option<&str>) -> Result<()> {
if !self.rollout.threads.contains_key(thread_id) {
bail!("tool call start referenced unknown thread {thread_id}");
}
if let Some(codex_turn_id) = codex_turn_id {
let Some(turn) = self.rollout.codex_turns.get(codex_turn_id) else {
bail!("tool call start referenced unknown Codex turn {codex_turn_id}");
};
if turn.thread_id != thread_id {
bail!(
"tool call start used thread {thread_id}, but Codex turn {codex_turn_id} \
belongs to {}",
turn.thread_id
);
}
}
Ok(())
}
fn ensure_unique_model_visible_tool_call(
&self,
model_visible_call_id: Option<&str>,
tool_call_id: &str,
) -> Result<()> {
let Some(model_visible_call_id) = model_visible_call_id else {
return Ok(());
};
if let Some(existing) = self.single_tool_for_model_visible_call(model_visible_call_id)?
&& existing != tool_call_id
{
bail!("duplicate tool call for model-visible call id {model_visible_call_id}");
}
Ok(())
}
fn single_tool_for_model_visible_call(
&self,
model_visible_call_id: &str,
) -> Result<Option<ToolCallId>> {
let mut matching = self
.rollout
.tool_calls
.values()
.filter(|tool| tool.model_visible_call_id.as_deref() == Some(model_visible_call_id))
.map(|tool| tool.tool_call_id.clone());
let first = matching.next();
if matching.next().is_some() {
bail!("multiple tool calls matched model-visible call id {model_visible_call_id}");
}
Ok(first)
}
fn model_visible_tool_item_ids(
&self,
thread_id: &str,
call_id: &str,
kinds: &[ConversationItemKind],
) -> Vec<String> {
self.rollout
.conversation_items
.values()
.filter(|item| {
item.thread_id == thread_id
&& item.call_id.as_deref() == Some(call_id)
&& kinds.contains(&item.kind)
})
.map(|item| item.item_id.clone())
.collect::<Vec<_>>()
}
fn add_tool_call_item(&mut self, tool_call_id: &str, item_id: &str) -> Result<()> {
let Some(tool_call) = self.rollout.tool_calls.get_mut(tool_call_id) else {
bail!("tool call {tool_call_id} disappeared during conversation linking");
};
push_unique(&mut tool_call.model_visible_call_item_ids, item_id);
Ok(())
}
fn add_tool_output_item(&mut self, tool_call_id: &str, item_id: &str) -> Result<()> {
let Some(tool_call) = self.rollout.tool_calls.get_mut(tool_call_id) else {
bail!("tool call {tool_call_id} disappeared during output linking");
};
push_unique(&mut tool_call.model_visible_output_item_ids, item_id);
let Some(item) = self.rollout.conversation_items.get_mut(item_id) else {
bail!("conversation item {item_id} disappeared during output linking");
};
let producer = ProducerRef::Tool {
tool_call_id: tool_call_id.to_string(),
};
if !item.produced_by.contains(&producer) {
item.produced_by.push(producer);
}
Ok(())
}
fn link_tool_to_inference_response(&mut self, tool_call_id: &str) {
let Some(tool_call) = self.rollout.tool_calls.get(tool_call_id) else {
return;
};
let call_item_ids = tool_call.model_visible_call_item_ids.clone();
if call_item_ids.is_empty() {
return;
}
for inference in self.rollout.inference_calls.values_mut() {
if inference
.response_item_ids
.iter()
.any(|item_id| call_item_ids.contains(item_id))
&& !inference
.tool_call_ids_started_by_response
.contains(&tool_call_id.to_string())
{
inference
.tool_call_ids_started_by_response
.push(tool_call_id.to_string());
}
}
}
}
fn push_unique(items: &mut Vec<String>, item_id: &str) {
if !items.iter().any(|existing| existing == item_id) {
items.push(item_id.to_string());
}
}

View File

@@ -0,0 +1,621 @@
use anyhow::Context;
use anyhow::Result;
use anyhow::bail;
use codex_protocol::protocol::CollabAgentInteractionBeginEvent;
use codex_protocol::protocol::CollabAgentInteractionEndEvent;
use codex_protocol::protocol::CollabAgentSpawnEndEvent;
use codex_protocol::protocol::CollabCloseBeginEvent;
use codex_protocol::protocol::CollabCloseEndEvent;
use codex_protocol::protocol::InterAgentCommunication;
use super::super::TraceReducer;
use crate::model::ConversationItem;
use crate::model::ConversationItemKind;
use crate::model::ConversationPart;
use crate::model::ConversationRole;
use crate::model::InteractionEdge;
use crate::model::InteractionEdgeKind;
use crate::model::ToolCallKind;
use crate::model::TraceAnchor;
use crate::payload::RawPayloadRef;
/// Agent delivery edge waiting for the recipient-side conversation item.
///
/// Multi-agent v2 records the sender tool before the target thread necessarily
/// includes the delivered mailbox message in a model-visible request. The edge
/// stays pending so it can target that exact conversation item when possible.
pub(in crate::reducer) struct PendingAgentInteractionEdge {
pub(in crate::reducer) edge_id: String,
pub(in crate::reducer) kind: InteractionEdgeKind,
pub(in crate::reducer) source: TraceAnchor,
pub(in crate::reducer) target_thread_id: String,
pub(in crate::reducer) message_content: String,
/// Spawn-only fallback for children that fail before their task message is model-visible.
pub(in crate::reducer) unresolved_spawn_thread_id: Option<String>,
pub(in crate::reducer) started_at_unix_ms: i64,
pub(in crate::reducer) ended_at_unix_ms: Option<i64>,
pub(in crate::reducer) carried_raw_payload_ids: Vec<String>,
}
/// Typed reducer input for a multi-agent v2 child completion notification.
///
/// Child results are observed outside the normal tool lifecycle, but they still
/// carry a parent-thread notification. This wrapper keeps the dispatcher from
/// passing a positional bundle of thread and turn ids.
pub(in crate::reducer) struct ObservedAgentResultEdge {
pub(in crate::reducer) wall_time_unix_ms: i64,
pub(in crate::reducer) edge_id: String,
pub(in crate::reducer) child_thread_id: String,
pub(in crate::reducer) child_codex_turn_id: String,
pub(in crate::reducer) parent_thread_id: String,
pub(in crate::reducer) message: String,
pub(in crate::reducer) carried_payload: Option<RawPayloadRef>,
}
/// Builds the stable edge id for the spawn relationship between two threads.
pub(in crate::reducer) fn spawn_edge_id(parent_thread_id: &str, child_thread_id: &str) -> String {
format!("edge:spawn:{parent_thread_id}:{child_thread_id}")
}
impl TraceReducer {
/// Starts a multi-agent edge from a runtime begin payload, when the tool kind supports one.
pub(super) fn start_agent_interaction_from_runtime(
&mut self,
tool_call_id: &str,
runtime_payload: &RawPayloadRef,
) -> Result<()> {
let kind = self
.rollout
.tool_calls
.get(tool_call_id)
.with_context(|| format!("agent edge referenced unknown tool call {tool_call_id}"))?
.kind
.clone();
match kind {
ToolCallKind::AssignAgentTask => {
let payload: CollabAgentInteractionBeginEvent =
serde_json::from_value(self.read_payload_json(runtime_payload)?)?;
self.queue_message_agent_interaction(
tool_call_id,
InteractionEdgeKind::AssignAgentTask,
payload.receiver_thread_id.to_string(),
payload.prompt,
/*ended_at_unix_ms*/ None,
)
}
ToolCallKind::SendMessage => {
let payload: CollabAgentInteractionBeginEvent =
serde_json::from_value(self.read_payload_json(runtime_payload)?)?;
self.queue_message_agent_interaction(
tool_call_id,
InteractionEdgeKind::SendMessage,
payload.receiver_thread_id.to_string(),
payload.prompt,
/*ended_at_unix_ms*/ None,
)
}
ToolCallKind::CloseAgent => {
let payload: CollabCloseBeginEvent =
serde_json::from_value(self.read_payload_json(runtime_payload)?)?;
self.upsert_close_agent_interaction(
tool_call_id,
payload.receiver_thread_id.to_string(),
/*ended_at_unix_ms*/ None,
)
}
ToolCallKind::ExecCommand
| ToolCallKind::WriteStdin
| ToolCallKind::ApplyPatch
| ToolCallKind::Mcp { .. }
| ToolCallKind::Web
| ToolCallKind::ImageGeneration
| ToolCallKind::SpawnAgent
| ToolCallKind::WaitAgent
| ToolCallKind::Other { .. } => Ok(()),
}
}
/// Ends or enriches a multi-agent edge from a runtime end payload.
pub(super) fn end_agent_interaction_from_runtime(
&mut self,
wall_time_unix_ms: i64,
tool_call_id: &str,
runtime_payload: &RawPayloadRef,
) -> Result<()> {
let kind = self.rollout.tool_calls[tool_call_id].kind.clone();
match kind {
ToolCallKind::SpawnAgent => {
let payload: CollabAgentSpawnEndEvent =
serde_json::from_value(self.read_payload_json(runtime_payload)?)?;
self.end_spawn_agent_interaction(wall_time_unix_ms, tool_call_id, &payload)
}
ToolCallKind::AssignAgentTask => {
let payload: CollabAgentInteractionEndEvent =
serde_json::from_value(self.read_payload_json(runtime_payload)?)?;
self.end_message_agent_interaction(
wall_time_unix_ms,
tool_call_id,
InteractionEdgeKind::AssignAgentTask,
&payload,
)
}
ToolCallKind::SendMessage => {
let payload: CollabAgentInteractionEndEvent =
serde_json::from_value(self.read_payload_json(runtime_payload)?)?;
self.end_message_agent_interaction(
wall_time_unix_ms,
tool_call_id,
InteractionEdgeKind::SendMessage,
&payload,
)
}
ToolCallKind::CloseAgent => {
let payload: CollabCloseEndEvent =
serde_json::from_value(self.read_payload_json(runtime_payload)?)?;
self.upsert_close_agent_interaction(
tool_call_id,
payload.receiver_thread_id.to_string(),
Some(wall_time_unix_ms),
)
}
ToolCallKind::ExecCommand
| ToolCallKind::WriteStdin
| ToolCallKind::ApplyPatch
| ToolCallKind::Mcp { .. }
| ToolCallKind::Web
| ToolCallKind::ImageGeneration
| ToolCallKind::WaitAgent
| ToolCallKind::Other { .. } => Ok(()),
}
}
/// Adds the canonical tool result payload to an already reduced multi-agent edge.
pub(super) fn attach_agent_interaction_tool_result(
&mut self,
tool_call_id: &str,
result_payload: Option<&RawPayloadRef>,
) -> Result<()> {
let Some(result_payload) = result_payload else {
return Ok(());
};
if let Some(edge) = self
.rollout
.interaction_edges
.values_mut()
.find(|edge| tool_call_source_matches(&edge.source, tool_call_id))
{
push_unique(
&mut edge.carried_raw_payload_ids,
&result_payload.raw_payload_id,
);
return Ok(());
}
// Agent delivery edges intentionally wait for the recipient-side
// conversation item. Tool end can arrive before that item is
// reduced, so preserve the response payload on the pending edge rather
// than dropping evidence until the delivery materializes.
if let Some(pending) = self
.pending_agent_interaction_edges
.iter_mut()
.find(|pending| tool_call_source_matches(&pending.source, tool_call_id))
{
push_unique(
&mut pending.carried_raw_payload_ids,
&result_payload.raw_payload_id,
);
}
Ok(())
}
fn end_spawn_agent_interaction(
&mut self,
wall_time_unix_ms: i64,
tool_call_id: &str,
payload: &CollabAgentSpawnEndEvent,
) -> Result<()> {
let Some(child_thread_id) = payload.new_thread_id else {
return Ok(());
};
let tool_call = &self.rollout.tool_calls[tool_call_id];
let child_thread_id = child_thread_id.to_string();
let edge_id = spawn_edge_id(&payload.sender_thread_id.to_string(), &child_thread_id);
self.queue_or_resolve_agent_interaction_edge(PendingAgentInteractionEdge {
edge_id,
kind: InteractionEdgeKind::SpawnAgent,
source: TraceAnchor::ToolCall {
tool_call_id: tool_call_id.to_string(),
},
target_thread_id: child_thread_id.clone(),
message_content: payload.prompt.clone(),
unresolved_spawn_thread_id: Some(child_thread_id),
started_at_unix_ms: tool_call.execution.started_at_unix_ms,
ended_at_unix_ms: Some(wall_time_unix_ms),
carried_raw_payload_ids: self.agent_tool_payload_ids(tool_call_id)?,
})
}
fn end_message_agent_interaction(
&mut self,
wall_time_unix_ms: i64,
tool_call_id: &str,
edge_kind: InteractionEdgeKind,
payload: &CollabAgentInteractionEndEvent,
) -> Result<()> {
self.queue_message_agent_interaction(
tool_call_id,
edge_kind,
payload.receiver_thread_id.to_string(),
payload.prompt.clone(),
Some(wall_time_unix_ms),
)
}
fn queue_message_agent_interaction(
&mut self,
tool_call_id: &str,
kind: InteractionEdgeKind,
target_thread_id: String,
message_content: String,
ended_at_unix_ms: Option<i64>,
) -> Result<()> {
let tool_call = &self.rollout.tool_calls[tool_call_id];
self.queue_or_resolve_agent_interaction_edge(PendingAgentInteractionEdge {
edge_id: tool_edge_id(tool_call_id),
kind,
source: TraceAnchor::ToolCall {
tool_call_id: tool_call_id.to_string(),
},
target_thread_id,
message_content,
unresolved_spawn_thread_id: None,
started_at_unix_ms: tool_call.execution.started_at_unix_ms,
ended_at_unix_ms,
carried_raw_payload_ids: self.agent_tool_payload_ids(tool_call_id)?,
})
}
fn agent_tool_payload_ids(&self, tool_call_id: &str) -> Result<Vec<String>> {
let tool_call =
self.rollout.tool_calls.get(tool_call_id).with_context(|| {
format!("agent edge referenced unknown tool call {tool_call_id}")
})?;
let mut payload_ids = Vec::new();
if let Some(payload_id) = &tool_call.raw_invocation_payload_id {
push_unique(&mut payload_ids, payload_id);
}
for payload_id in &tool_call.raw_runtime_payload_ids {
push_unique(&mut payload_ids, payload_id);
}
if let Some(payload_id) = &tool_call.raw_result_payload_id {
push_unique(&mut payload_ids, payload_id);
}
Ok(payload_ids)
}
fn upsert_close_agent_interaction(
&mut self,
tool_call_id: &str,
target_thread_id: String,
ended_at_unix_ms: Option<i64>,
) -> Result<()> {
if !self.rollout.threads.contains_key(&target_thread_id) {
// A failed close can name a thread that never participated in this
// trace. Keep that evidence on the ToolCall raw payloads rather
// than creating an anchor to a non-existent reduced object.
return Ok(());
}
let started_at_unix_ms = self
.rollout
.tool_calls
.get(tool_call_id)
.with_context(|| format!("close edge referenced unknown tool call {tool_call_id}"))?
.execution
.started_at_unix_ms;
let carried_raw_payload_ids = self.agent_tool_payload_ids(tool_call_id)?;
self.upsert_interaction_edge(InteractionEdge {
edge_id: tool_edge_id(tool_call_id),
kind: InteractionEdgeKind::CloseAgent,
source: TraceAnchor::ToolCall {
tool_call_id: tool_call_id.to_string(),
},
target: TraceAnchor::Thread {
thread_id: target_thread_id,
},
started_at_unix_ms,
ended_at_unix_ms,
carried_item_ids: Vec::new(),
carried_raw_payload_ids,
})
}
/// Queues or resolves the edge from a child completion to its parent notification.
pub(in crate::reducer) fn queue_agent_result_interaction_edge(
&mut self,
observed: ObservedAgentResultEdge,
) -> Result<()> {
let source = if let Some(source_item_id) = self.latest_assistant_message_item_for_turn(
&observed.child_thread_id,
&observed.child_codex_turn_id,
) {
TraceAnchor::ConversationItem {
item_id: source_item_id,
}
} else {
// Child completion is delivered from AgentStatus, not from transcript
// content. Failed or cancelled children can therefore notify the parent
// without producing a final assistant message. Anchor those edges to
// the child thread so the trace keeps the valid delivery instead of
// inventing a missing conversation item.
TraceAnchor::Thread {
thread_id: observed.child_thread_id,
}
};
self.queue_or_resolve_agent_interaction_edge(PendingAgentInteractionEdge {
edge_id: observed.edge_id,
kind: InteractionEdgeKind::AgentResult,
source,
target_thread_id: observed.parent_thread_id,
message_content: observed.message,
unresolved_spawn_thread_id: None,
started_at_unix_ms: observed.wall_time_unix_ms,
ended_at_unix_ms: Some(observed.wall_time_unix_ms),
carried_raw_payload_ids: observed
.carried_payload
.map(|payload| vec![payload.raw_payload_id])
.unwrap_or_default(),
})
}
/// Resolves pending agent edges whose target is the newly reduced conversation item.
pub(in crate::reducer) fn resolve_pending_agent_edges_for_item(
&mut self,
item_id: &str,
) -> Result<()> {
let Some((thread_id, message_content)) = self.inter_agent_message_item(item_id) else {
return Ok(());
};
let Some(pending_index) = self
.pending_agent_interaction_edges
.iter()
.position(|pending| {
pending.target_thread_id == thread_id && pending.message_content == message_content
})
else {
return Ok(());
};
let pending = self.pending_agent_interaction_edges.remove(pending_index);
self.upsert_agent_interaction_edge_for_item(pending, item_id.to_string())
}
fn queue_or_resolve_agent_interaction_edge(
&mut self,
pending: PendingAgentInteractionEdge,
) -> Result<()> {
if let Some(item_id) = self.find_unlinked_inter_agent_message_item(
&pending.target_thread_id,
&pending.message_content,
) {
return self.upsert_agent_interaction_edge_for_item(pending, item_id);
}
if let Some(existing) = self
.pending_agent_interaction_edges
.iter_mut()
.find(|existing| existing.edge_id == pending.edge_id)
{
if existing.kind != pending.kind
|| existing.source != pending.source
|| existing.target_thread_id != pending.target_thread_id
|| existing.message_content != pending.message_content
|| existing.unresolved_spawn_thread_id != pending.unresolved_spawn_thread_id
{
bail!(
"pending interaction edge {} was observed with conflicting delivery data",
pending.edge_id
);
}
existing.started_at_unix_ms =
existing.started_at_unix_ms.min(pending.started_at_unix_ms);
existing.ended_at_unix_ms = match (existing.ended_at_unix_ms, pending.ended_at_unix_ms)
{
(Some(existing_ended), Some(pending_ended)) => {
Some(existing_ended.max(pending_ended))
}
(None, ended) | (ended, None) => ended,
};
extend_unique(
&mut existing.carried_raw_payload_ids,
pending.carried_raw_payload_ids,
);
return Ok(());
}
self.pending_agent_interaction_edges.push(pending);
Ok(())
}
/// Materializes unresolved spawn edges that have a valid child-thread fallback target.
pub(in crate::reducer) fn resolve_pending_spawn_edge_fallbacks(&mut self) -> Result<()> {
let pending_edges = std::mem::take(&mut self.pending_agent_interaction_edges);
for pending in pending_edges {
let Some(child_thread_id) = pending.unresolved_spawn_thread_id else {
continue;
};
if pending.kind != InteractionEdgeKind::SpawnAgent {
bail!(
"non-spawn interaction edge {} carried a spawn fallback target",
pending.edge_id
);
}
if !self.rollout.threads.contains_key(&child_thread_id) {
continue;
}
// Spawn normally resolves to the child task message because that is
// where the delegated work first becomes model-visible. A child can
// fail before that transcript item exists, but the spawned thread is
// still real and the spawning tool still created it. Preserve that
// relationship with the thread fallback instead of dropping the edge.
self.upsert_interaction_edge(InteractionEdge {
edge_id: pending.edge_id,
kind: pending.kind,
source: pending.source,
target: TraceAnchor::Thread {
thread_id: child_thread_id,
},
started_at_unix_ms: pending.started_at_unix_ms,
ended_at_unix_ms: pending.ended_at_unix_ms,
carried_item_ids: Vec::new(),
carried_raw_payload_ids: pending.carried_raw_payload_ids,
})?;
}
Ok(())
}
fn upsert_agent_interaction_edge_for_item(
&mut self,
pending: PendingAgentInteractionEdge,
target_item_id: String,
) -> Result<()> {
self.upsert_interaction_edge(InteractionEdge {
edge_id: pending.edge_id,
kind: pending.kind,
source: pending.source,
target: TraceAnchor::ConversationItem {
item_id: target_item_id.clone(),
},
started_at_unix_ms: pending.started_at_unix_ms,
ended_at_unix_ms: pending.ended_at_unix_ms,
carried_item_ids: vec![target_item_id],
carried_raw_payload_ids: pending.carried_raw_payload_ids,
})
}
fn upsert_interaction_edge(&mut self, edge: InteractionEdge) -> Result<()> {
if let Some(existing) = self.rollout.interaction_edges.get_mut(&edge.edge_id) {
if existing.kind != edge.kind
|| existing.source != edge.source
|| existing.target != edge.target
{
bail!(
"interaction edge {} was observed with conflicting endpoints",
edge.edge_id
);
}
existing.started_at_unix_ms = existing.started_at_unix_ms.min(edge.started_at_unix_ms);
existing.ended_at_unix_ms = match (existing.ended_at_unix_ms, edge.ended_at_unix_ms) {
(Some(existing_ended), Some(edge_ended)) => Some(existing_ended.max(edge_ended)),
(None, ended) | (ended, None) => ended,
};
extend_unique(&mut existing.carried_item_ids, edge.carried_item_ids);
extend_unique(
&mut existing.carried_raw_payload_ids,
edge.carried_raw_payload_ids,
);
return Ok(());
}
self.rollout
.interaction_edges
.insert(edge.edge_id.clone(), edge);
Ok(())
}
fn find_unlinked_inter_agent_message_item(
&self,
thread_id: &str,
message_content: &str,
) -> Option<String> {
self.rollout
.threads
.get(thread_id)?
.conversation_item_ids
.iter()
.find(|item_id| {
!self.is_interaction_edge_target_item(item_id)
&& self
.inter_agent_message_item(item_id)
.is_some_and(|(_, content)| content == message_content)
})
.cloned()
}
fn inter_agent_message_item(&self, item_id: &str) -> Option<(String, String)> {
let item = self.rollout.conversation_items.get(item_id)?;
let (recipient_agent_path, message_content) = inter_agent_message_fields(item)?;
let thread = self.rollout.threads.get(&item.thread_id)?;
if recipient_agent_path != thread.agent_path {
return None;
}
Some((item.thread_id.clone(), message_content))
}
fn is_interaction_edge_target_item(&self, item_id: &str) -> bool {
self.rollout
.interaction_edges
.values()
.any(|edge| matches!(&edge.target, TraceAnchor::ConversationItem { item_id: target } if target == item_id))
}
fn latest_assistant_message_item_for_turn(
&self,
thread_id: &str,
codex_turn_id: &str,
) -> Option<String> {
self.rollout
.conversation_items
.values()
.filter(|item| {
item.thread_id == thread_id
&& item.codex_turn_id.as_deref() == Some(codex_turn_id)
&& item.role == ConversationRole::Assistant
&& item.kind == ConversationItemKind::Message
})
.max_by_key(|item| item.first_seen_at_unix_ms)
.map(|item| item.item_id.clone())
}
}
fn extend_unique(items: &mut Vec<String>, new_items: Vec<String>) {
for item in new_items {
if !items.iter().any(|existing| existing == &item) {
items.push(item);
}
}
}
fn tool_edge_id(tool_call_id: &str) -> String {
format!("edge:tool:{tool_call_id}")
}
fn tool_call_source_matches(anchor: &TraceAnchor, tool_call_id: &str) -> bool {
matches!(anchor, TraceAnchor::ToolCall { tool_call_id: source } if source == tool_call_id)
}
fn push_unique(items: &mut Vec<String>, item: &str) {
if !items.iter().any(|existing| existing == item) {
items.push(item.to_string());
}
}
fn inter_agent_message_fields(item: &ConversationItem) -> Option<(String, String)> {
// Multi-agent v2 injects mailbox deliveries as assistant messages whose
// text is serialized `InterAgentCommunication`. Treat only that exact
// transport shape as an edge target; ordinary assistant JSON must not be
// mistaken for cross-thread delivery.
if item.role != ConversationRole::Assistant || item.kind != ConversationItemKind::Message {
return None;
}
let [ConversationPart::Text { text }] = item.body.parts.as_slice() else {
return None;
};
let communication = serde_json::from_str::<InterAgentCommunication>(text).ok()?;
Some((communication.recipient.to_string(), communication.content))
}
#[cfg(test)]
#[path = "agents_tests.rs"]
mod tests;

View File

@@ -0,0 +1,717 @@
use pretty_assertions::assert_eq;
use serde_json::json;
use tempfile::TempDir;
use crate::model::AgentOrigin;
use crate::model::ExecutionStatus;
use crate::model::InteractionEdgeKind;
use crate::model::RolloutStatus;
use crate::model::ToolCallKind;
use crate::model::ToolCallSummary;
use crate::model::TraceAnchor;
use crate::payload::RawPayloadKind;
use crate::payload::RawPayloadRef;
use crate::raw_event::RawToolCallRequester;
use crate::raw_event::RawTraceEventPayload;
use crate::reducer::test_support::append_completed_inference;
use crate::reducer::test_support::append_inference_request;
use crate::reducer::test_support::create_started_agent_writer;
use crate::reducer::test_support::message;
use crate::reducer::test_support::start_agent_turn;
use crate::reducer::test_support::start_thread;
use crate::reducer::test_support::start_turn_for_thread;
use crate::reducer::test_support::trace_context_for_agent;
use crate::reducer::test_support::trace_context_for_thread;
use crate::replay_bundle;
use crate::writer::TraceWriter;
#[test]
fn child_thread_metadata_creates_spawn_origin_without_delivery_edge() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = TraceWriter::create(
temp.path(),
"trace-1".to_string(),
"rollout-1".to_string(),
"019d0000-0000-7000-8000-000000000002".to_string(),
)?;
let metadata = writer.write_json_payload(
RawPayloadKind::SessionMetadata,
&json!({
"nickname": "James",
"agent_role": "explorer",
"task_name": "repo_file_counter",
"model": "gpt-test",
"session_source": {
"subagent": {
"thread_spawn": {
"parent_thread_id": "019d0000-0000-7000-8000-000000000001",
"agent_path": "/root/repo_file_counter",
"agent_nickname": "James",
"agent_role": "explorer"
}
}
}
}),
)?;
writer.append(RawTraceEventPayload::ThreadStarted {
thread_id: "019d0000-0000-7000-8000-000000000002".to_string(),
agent_path: "/root/repo_file_counter".to_string(),
metadata_payload: Some(metadata),
})?;
let replayed = replay_bundle(temp.path())?;
let thread = &replayed.threads["019d0000-0000-7000-8000-000000000002"];
assert_eq!(thread.nickname, Some("James".to_string()));
assert_eq!(thread.default_model, Some("gpt-test".to_string()));
assert_eq!(
thread.origin,
AgentOrigin::Spawned {
parent_thread_id: "019d0000-0000-7000-8000-000000000001".to_string(),
spawn_edge_id: "edge:spawn:019d0000-0000-7000-8000-000000000001:019d0000-0000-7000-8000-000000000002".to_string(),
task_name: "repo_file_counter".to_string(),
agent_role: "explorer".to_string(),
}
);
assert!(
!replayed.interaction_edges.contains_key(
"edge:spawn:019d0000-0000-7000-8000-000000000001:019d0000-0000-7000-8000-000000000002"
),
"spawn metadata identifies the child, but the delivery edge waits for the recipient \
conversation item"
);
Ok(())
}
#[test]
fn spawn_runtime_payload_targets_delivered_child_message() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_agent_writer(&temp)?;
start_agent_turn(&writer, "turn-1")?;
let spawn_payloads = append_spawn_agent_tool_lifecycle(&writer, "turn-1")?;
// Then record the child-side model-visible task message. This is the
// preferred target because it pinpoints where the delegated work entered
// the child timeline.
start_thread(
&writer,
"019d0000-0000-7000-8000-000000000002",
"/root/repo_file_counter",
)?;
start_turn_for_thread(
&writer,
"019d0000-0000-7000-8000-000000000002",
"turn-child-1",
)?;
let delivered = inter_agent_message(
"/root",
"/root/repo_file_counter",
"count",
/*trigger_turn*/ true,
);
append_inference_request(
&writer,
"019d0000-0000-7000-8000-000000000002",
"turn-child-1",
"inference-child-1",
vec![message("assistant", &delivered)],
)?;
let replayed = replay_bundle(temp.path())?;
let edge = &replayed.interaction_edges["edge:spawn:019d0000-0000-7000-8000-000000000001:019d0000-0000-7000-8000-000000000002"];
assert_eq!(edge.kind, InteractionEdgeKind::SpawnAgent);
assert_eq!(
edge.source,
TraceAnchor::ToolCall {
tool_call_id: "call-spawn".to_string()
}
);
let target_item_id = target_conversation_item_id(&edge.target);
assert_eq!(edge.carried_item_ids, vec![target_item_id.clone()]);
assert_eq!(
replayed.conversation_items[target_item_id].thread_id,
"019d0000-0000-7000-8000-000000000002"
);
assert_eq!(
edge.carried_raw_payload_ids,
vec![
spawn_payloads.invocation.raw_payload_id,
spawn_payloads.begin.raw_payload_id,
spawn_payloads.end.raw_payload_id,
spawn_payloads.result.raw_payload_id,
]
);
Ok(())
}
#[test]
fn spawn_runtime_payload_falls_back_to_child_thread_without_delivery_item() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_agent_writer(&temp)?;
start_agent_turn(&writer, "turn-1")?;
let spawn_payloads = append_spawn_agent_tool_lifecycle(&writer, "turn-1")?;
// Deliberately start the child thread without appending an inference
// request containing the inter-agent task message. This reproduces the
// failure path where the child aborts before the reducer can target the
// precise child-side ConversationItem.
start_thread(
&writer,
"019d0000-0000-7000-8000-000000000002",
"/root/repo_file_counter",
)?;
let replayed = replay_bundle(temp.path())?;
let edge = &replayed.interaction_edges["edge:spawn:019d0000-0000-7000-8000-000000000001:019d0000-0000-7000-8000-000000000002"];
assert_eq!(edge.kind, InteractionEdgeKind::SpawnAgent);
assert_eq!(
edge.source,
TraceAnchor::ToolCall {
tool_call_id: "call-spawn".to_string()
}
);
assert_eq!(
edge.target,
TraceAnchor::Thread {
thread_id: "019d0000-0000-7000-8000-000000000002".to_string()
}
);
// No transcript item carried the task, so the fallback edge should not
// claim one. The raw payloads still preserve the tool evidence.
assert!(edge.carried_item_ids.is_empty());
assert_eq!(
edge.carried_raw_payload_ids,
vec![
spawn_payloads.invocation.raw_payload_id,
spawn_payloads.begin.raw_payload_id,
spawn_payloads.end.raw_payload_id,
spawn_payloads.result.raw_payload_id,
]
);
Ok(())
}
#[test]
fn send_message_runtime_payload_targets_delivered_child_message() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_agent_writer(&temp)?;
start_agent_turn(&writer, "turn-1")?;
let invocation_payload = writer.write_json_payload(
RawPayloadKind::ToolInvocation,
&json!({
"tool_name": "send_message",
"payload": {
"type": "function",
"arguments": "{\"target\":\"/root/child\",\"message\":\"hello\"}"
}
}),
)?;
writer.append_with_context(
trace_context_for_agent("turn-1"),
RawTraceEventPayload::ToolCallStarted {
tool_call_id: "call-send".to_string(),
model_visible_call_id: Some("call-send".to_string()),
code_mode_runtime_tool_id: None,
requester: RawToolCallRequester::Model,
kind: ToolCallKind::SendMessage,
summary: ToolCallSummary::Generic {
label: "send_message".to_string(),
input_preview: None,
output_preview: None,
},
invocation_payload: Some(invocation_payload),
},
)?;
let begin_payload = writer.write_json_payload(
RawPayloadKind::ToolRuntimeEvent,
&json!({
"call_id": "call-send",
"sender_thread_id": "019d0000-0000-7000-8000-000000000001",
"receiver_thread_id": "019d0000-0000-7000-8000-000000000002",
"prompt": "hello",
"status": "running"
}),
)?;
writer.append_with_context(
trace_context_for_agent("turn-1"),
RawTraceEventPayload::ToolCallRuntimeStarted {
tool_call_id: "call-send".to_string(),
runtime_payload: begin_payload,
},
)?;
let end_payload = writer.write_json_payload(
RawPayloadKind::ToolRuntimeEvent,
&json!({
"call_id": "call-send",
"sender_thread_id": "019d0000-0000-7000-8000-000000000001",
"receiver_thread_id": "019d0000-0000-7000-8000-000000000002",
"prompt": "hello",
"status": "running"
}),
)?;
writer.append_with_context(
trace_context_for_agent("turn-1"),
RawTraceEventPayload::ToolCallRuntimeEnded {
tool_call_id: "call-send".to_string(),
status: ExecutionStatus::Completed,
runtime_payload: end_payload,
},
)?;
start_thread(
&writer,
"019d0000-0000-7000-8000-000000000002",
"/root/child",
)?;
start_turn_for_thread(
&writer,
"019d0000-0000-7000-8000-000000000002",
"turn-child-1",
)?;
let delivered =
inter_agent_message("/root", "/root/child", "hello", /*trigger_turn*/ false);
append_inference_request(
&writer,
"019d0000-0000-7000-8000-000000000002",
"turn-child-1",
"inference-child-1",
vec![message("assistant", &delivered)],
)?;
let replayed = replay_bundle(temp.path())?;
let edge = &replayed.interaction_edges["edge:tool:call-send"];
assert_eq!(edge.kind, InteractionEdgeKind::SendMessage);
assert_eq!(
edge.source,
TraceAnchor::ToolCall {
tool_call_id: "call-send".to_string()
}
);
let target_item_id = target_conversation_item_id(&edge.target);
assert_eq!(edge.carried_item_ids, vec![target_item_id.clone()]);
assert_eq!(
replayed.conversation_items[target_item_id].thread_id,
"019d0000-0000-7000-8000-000000000002"
);
assert!(edge.ended_at_unix_ms.is_some());
Ok(())
}
#[test]
fn close_agent_runtime_payload_targets_thread() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_agent_writer(&temp)?;
start_thread(
&writer,
"019d0000-0000-7000-8000-000000000002",
"/root/child",
)?;
start_agent_turn(&writer, "turn-1")?;
let invocation_payload = writer.write_json_payload(
RawPayloadKind::ToolInvocation,
&json!({
"tool_name": "close_agent",
"payload": {
"type": "function",
"arguments": r#"{"target":"/root/child"}"#
}
}),
)?;
writer.append_with_context(
trace_context_for_agent("turn-1"),
RawTraceEventPayload::ToolCallStarted {
tool_call_id: "call-close".to_string(),
model_visible_call_id: Some("call-close".to_string()),
code_mode_runtime_tool_id: None,
requester: RawToolCallRequester::Model,
kind: ToolCallKind::CloseAgent,
summary: ToolCallSummary::Generic {
label: "close_agent".to_string(),
input_preview: None,
output_preview: None,
},
invocation_payload: Some(invocation_payload.clone()),
},
)?;
let begin_payload = writer.write_json_payload(
RawPayloadKind::ToolRuntimeEvent,
&json!({
"call_id": "call-close",
"sender_thread_id": "019d0000-0000-7000-8000-000000000001",
"receiver_thread_id": "019d0000-0000-7000-8000-000000000002"
}),
)?;
writer.append_with_context(
trace_context_for_agent("turn-1"),
RawTraceEventPayload::ToolCallRuntimeStarted {
tool_call_id: "call-close".to_string(),
runtime_payload: begin_payload.clone(),
},
)?;
let end_payload = writer.write_json_payload(
RawPayloadKind::ToolRuntimeEvent,
&json!({
"call_id": "call-close",
"sender_thread_id": "019d0000-0000-7000-8000-000000000001",
"receiver_thread_id": "019d0000-0000-7000-8000-000000000002",
"receiver_agent_nickname": "Scout",
"receiver_agent_role": "explorer",
"status": "running"
}),
)?;
writer.append_with_context(
trace_context_for_agent("turn-1"),
RawTraceEventPayload::ToolCallRuntimeEnded {
tool_call_id: "call-close".to_string(),
status: ExecutionStatus::Completed,
runtime_payload: end_payload.clone(),
},
)?;
let result_payload = writer.write_json_payload(
RawPayloadKind::ToolResult,
&json!({"previous_status": "running"}),
)?;
writer.append_with_context(
trace_context_for_agent("turn-1"),
RawTraceEventPayload::ToolCallEnded {
tool_call_id: "call-close".to_string(),
status: ExecutionStatus::Completed,
result_payload: Some(result_payload.clone()),
},
)?;
writer.append(RawTraceEventPayload::ThreadEnded {
thread_id: "019d0000-0000-7000-8000-000000000002".to_string(),
status: RolloutStatus::Completed,
})?;
let replayed = replay_bundle(temp.path())?;
let edge = &replayed.interaction_edges["edge:tool:call-close"];
assert_eq!(edge.kind, InteractionEdgeKind::CloseAgent);
assert_eq!(
edge.source,
TraceAnchor::ToolCall {
tool_call_id: "call-close".to_string()
}
);
assert_eq!(
edge.target,
TraceAnchor::Thread {
thread_id: "019d0000-0000-7000-8000-000000000002".to_string()
}
);
assert!(edge.carried_item_ids.is_empty());
assert_eq!(
edge.carried_raw_payload_ids,
vec![
invocation_payload.raw_payload_id,
begin_payload.raw_payload_id,
end_payload.raw_payload_id,
result_payload.raw_payload_id,
]
);
assert_eq!(
replayed.threads["019d0000-0000-7000-8000-000000000002"]
.execution
.status,
ExecutionStatus::Completed
);
assert_eq!(replayed.status, RolloutStatus::Running);
Ok(())
}
#[test]
fn agent_result_edge_links_child_result_to_parent_notification() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_agent_writer(&temp)?;
start_thread(
&writer,
"019d0000-0000-7000-8000-000000000002",
"/root/child",
)?;
start_turn_for_thread(
&writer,
"019d0000-0000-7000-8000-000000000002",
"turn-child-1",
)?;
append_completed_inference(
&writer,
"019d0000-0000-7000-8000-000000000002",
"turn-child-1",
"inference-child-1",
vec![message("assistant", "task")],
vec![message("assistant", "done")],
)?;
let notification = "<subagent_notification>{\"agent_path\":\"/root/child\",\"status\":{\"completed\":\"done\"}}</subagent_notification>";
let carried_payload = writer.write_json_payload(
RawPayloadKind::AgentResult,
&json!({
"child_agent_path": "/root/child",
"message": notification,
"status": {"completed": "done"}
}),
)?;
writer.append_with_context(
trace_context_for_thread("019d0000-0000-7000-8000-000000000002", "turn-child-1"),
RawTraceEventPayload::AgentResultObserved {
edge_id: "edge:agent_result:thread-child:turn-child-1:thread-root".to_string(),
child_thread_id: "019d0000-0000-7000-8000-000000000002".to_string(),
child_codex_turn_id: "turn-child-1".to_string(),
parent_thread_id: "019d0000-0000-7000-8000-000000000001".to_string(),
message: notification.to_string(),
carried_payload: Some(carried_payload.clone()),
},
)?;
start_agent_turn(&writer, "turn-root-1")?;
let delivered = inter_agent_message(
"/root/child",
"/root",
notification,
/*trigger_turn*/ false,
);
append_inference_request(
&writer,
"019d0000-0000-7000-8000-000000000001",
"turn-root-1",
"inference-root-1",
vec![message("assistant", &delivered)],
)?;
let replayed = replay_bundle(temp.path())?;
let edge =
&replayed.interaction_edges["edge:agent_result:thread-child:turn-child-1:thread-root"];
assert_eq!(edge.kind, InteractionEdgeKind::AgentResult);
let TraceAnchor::ConversationItem {
item_id: source_item_id,
} = &edge.source
else {
panic!("expected child result conversation item source");
};
assert_eq!(
text_body(&replayed.conversation_items[source_item_id]),
"done"
);
let target_item_id = target_conversation_item_id(&edge.target);
assert_eq!(
replayed.conversation_items[target_item_id].thread_id,
"019d0000-0000-7000-8000-000000000001"
);
assert_eq!(edge.carried_item_ids, vec![target_item_id.clone()]);
assert_eq!(
edge.carried_raw_payload_ids,
vec![carried_payload.raw_payload_id]
);
Ok(())
}
#[test]
fn agent_result_edge_falls_back_to_child_thread_without_result_message() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_agent_writer(&temp)?;
// The child thread and turn exist, but there is intentionally no completed
// assistant message for this turn. Failed child tasks can still notify the
// parent through AgentStatus, so the result edge must not require a final
// transcript item from the child.
start_thread(
&writer,
"019d0000-0000-7000-8000-000000000002",
"/root/child",
)?;
start_turn_for_thread(
&writer,
"019d0000-0000-7000-8000-000000000002",
"turn-child-1",
)?;
let notification = r#"<subagent_notification>{"agent_path":"/root/child","status":{"failed":"boom"}}</subagent_notification>"#;
let carried_payload = writer.write_json_payload(
RawPayloadKind::AgentResult,
&json!({
"child_agent_path": "/root/child",
"message": notification,
"status": {"failed": "boom"}
}),
)?;
writer.append_with_context(
trace_context_for_thread("019d0000-0000-7000-8000-000000000002", "turn-child-1"),
RawTraceEventPayload::AgentResultObserved {
edge_id: "edge:agent_result:thread-child:turn-child-1:thread-root".to_string(),
child_thread_id: "019d0000-0000-7000-8000-000000000002".to_string(),
child_codex_turn_id: "turn-child-1".to_string(),
parent_thread_id: "019d0000-0000-7000-8000-000000000001".to_string(),
message: notification.to_string(),
carried_payload: Some(carried_payload.clone()),
},
)?;
// The parent does receive the failure notification as a model-visible
// mailbox item. The target should remain that precise parent-side
// ConversationItem even though the source falls back to the child thread.
start_agent_turn(&writer, "turn-root-1")?;
let delivered = inter_agent_message(
"/root/child",
"/root",
notification,
/*trigger_turn*/ false,
);
append_inference_request(
&writer,
"019d0000-0000-7000-8000-000000000001",
"turn-root-1",
"inference-root-1",
vec![message("assistant", &delivered)],
)?;
let replayed = replay_bundle(temp.path())?;
let edge =
&replayed.interaction_edges["edge:agent_result:thread-child:turn-child-1:thread-root"];
assert_eq!(edge.kind, InteractionEdgeKind::AgentResult);
assert_eq!(
edge.source,
TraceAnchor::Thread {
thread_id: "019d0000-0000-7000-8000-000000000002".to_string(),
}
);
let target_item_id = target_conversation_item_id(&edge.target);
assert_eq!(
replayed.conversation_items[target_item_id].thread_id,
"019d0000-0000-7000-8000-000000000001"
);
assert_eq!(edge.carried_item_ids, vec![target_item_id.clone()]);
assert_eq!(
edge.carried_raw_payload_ids,
vec![carried_payload.raw_payload_id]
);
Ok(())
}
struct SpawnAgentToolPayloads {
invocation: RawPayloadRef,
begin: RawPayloadRef,
end: RawPayloadRef,
result: RawPayloadRef,
}
fn append_spawn_agent_tool_lifecycle(
writer: &TraceWriter,
turn_id: &str,
) -> anyhow::Result<SpawnAgentToolPayloads> {
// Keep the parent-side tool lifecycle in one place so the spawn tests can
// focus on the child-side event that decides the edge target.
let invocation = writer.write_json_payload(
RawPayloadKind::ToolInvocation,
&json!({
"tool_name": "spawn_agent",
"payload": {
"type": "function",
"arguments": r#"{"task_name":"repo_file_counter","message":"count"}"#
}
}),
)?;
writer.append_with_context(
trace_context_for_agent(turn_id),
RawTraceEventPayload::ToolCallStarted {
tool_call_id: "call-spawn".to_string(),
model_visible_call_id: Some("call-spawn".to_string()),
code_mode_runtime_tool_id: None,
requester: RawToolCallRequester::Model,
kind: ToolCallKind::SpawnAgent,
summary: ToolCallSummary::Generic {
label: "spawn_agent".to_string(),
input_preview: None,
output_preview: None,
},
invocation_payload: Some(invocation.clone()),
},
)?;
let begin = writer.write_json_payload(
RawPayloadKind::ToolRuntimeEvent,
&json!({
"call_id": "call-spawn",
"sender_thread_id": "019d0000-0000-7000-8000-000000000001",
"prompt": "count"
}),
)?;
writer.append_with_context(
trace_context_for_agent(turn_id),
RawTraceEventPayload::ToolCallRuntimeStarted {
tool_call_id: "call-spawn".to_string(),
runtime_payload: begin.clone(),
},
)?;
let end = writer.write_json_payload(
RawPayloadKind::ToolRuntimeEvent,
&json!({
"call_id": "call-spawn",
"sender_thread_id": "019d0000-0000-7000-8000-000000000001",
"new_thread_id": "019d0000-0000-7000-8000-000000000002",
"prompt": "count",
"model": "gpt-test",
"reasoning_effort": "medium",
"status": "running"
}),
)?;
writer.append_with_context(
trace_context_for_agent(turn_id),
RawTraceEventPayload::ToolCallRuntimeEnded {
tool_call_id: "call-spawn".to_string(),
status: ExecutionStatus::Completed,
runtime_payload: end.clone(),
},
)?;
let result = writer.write_json_payload(
RawPayloadKind::ToolResult,
&json!({"task_name": "/root/repo_file_counter"}),
)?;
writer.append_with_context(
trace_context_for_agent(turn_id),
RawTraceEventPayload::ToolCallEnded {
tool_call_id: "call-spawn".to_string(),
status: ExecutionStatus::Completed,
result_payload: Some(result.clone()),
},
)?;
Ok(SpawnAgentToolPayloads {
invocation,
begin,
end,
result,
})
}
fn inter_agent_message(author: &str, recipient: &str, content: &str, trigger_turn: bool) -> String {
json!({
"author": author,
"recipient": recipient,
"other_recipients": [],
"content": content,
"trigger_turn": trigger_turn,
})
.to_string()
}
fn target_conversation_item_id(anchor: &TraceAnchor) -> &String {
let TraceAnchor::ConversationItem { item_id } = anchor else {
panic!("expected conversation item target");
};
item_id
}
fn text_body(item: &crate::model::ConversationItem) -> &str {
let [crate::model::ConversationPart::Text { text }] = item.body.parts.as_slice() else {
panic!("expected single text part");
};
text
}

View File

@@ -0,0 +1,606 @@
//! Terminal reduction for exec-like tool calls.
//!
//! The raw trace records terminal activity as normal tool lifecycle events.
//! Protocol-backed exec events carry `ExecCommand*` payloads with the richest
//! runtime details. Direct tools without protocol observations, such as
//! `write_stdin`, can still form a terminal row from the canonical dispatch
//! invocation/result payloads when those payloads carry the session join key.
use anyhow::Context;
use anyhow::Result;
use anyhow::bail;
use serde::Deserialize;
use serde_json::Value as JsonValue;
use super::push_unique;
use crate::model::ExecutionStatus;
use crate::model::ExecutionWindow;
use crate::model::TerminalModelObservation;
use crate::model::TerminalObservationSource;
use crate::model::TerminalOperation;
use crate::model::TerminalOperationId;
use crate::model::TerminalOperationKind;
use crate::model::TerminalRequest;
use crate::model::TerminalResult;
use crate::model::TerminalSession;
use crate::model::ToolCallKind;
use crate::payload::RawPayloadRef;
use crate::raw_event::RawEventSeq;
use crate::reducer::TraceReducer;
impl TraceReducer {
/// Starts a terminal operation from a canonical dispatch invocation payload.
///
/// This is currently needed for direct tools such as write-stdin that do not
/// emit a richer protocol runtime-begin event with the terminal join key.
pub(in crate::reducer) fn start_terminal_operation_from_invocation(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
thread_id: &str,
tool_call_id: &str,
kind: &ToolCallKind,
invocation_payload: Option<&RawPayloadRef>,
) -> Result<Option<TerminalOperationId>> {
if !matches!(kind, ToolCallKind::WriteStdin) {
return Ok(None);
}
let operation_kind = TerminalOperationKind::WriteStdin;
let Some(invocation_payload) = invocation_payload else {
// Payload writes are best-effort in the live recorder. If the
// canonical invocation is missing, keep the ToolCall but avoid
// fabricating a lossy terminal row.
return Ok(None);
};
let payload = self.read_payload_json(invocation_payload)?;
let request = parse_dispatch_terminal_request(payload).with_context(|| {
format!(
"parse terminal invocation payload {} as dispatch payload",
invocation_payload.raw_payload_id
)
})?;
self.insert_terminal_operation(TerminalOperationStart {
seq,
wall_time_unix_ms,
thread_id,
tool_call_id,
operation_kind,
raw_payload: invocation_payload,
request,
})
}
/// Starts a terminal operation from a protocol runtime-begin payload.
pub(in crate::reducer) fn start_terminal_operation_from_runtime(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
thread_id: &str,
tool_call_id: &str,
kind: &ToolCallKind,
runtime_payload: &RawPayloadRef,
) -> Result<Option<TerminalOperationId>> {
let Some(operation_kind) = terminal_operation_kind(kind) else {
return Ok(None);
};
let payload = self.read_payload_json(runtime_payload)?;
let payload: ExecCommandBeginPayload =
serde_json::from_value(payload).with_context(|| {
format!(
"parse terminal runtime start payload {}",
runtime_payload.raw_payload_id
)
})?;
let request = parse_protocol_terminal_request(payload, &operation_kind);
self.insert_terminal_operation(TerminalOperationStart {
seq,
wall_time_unix_ms,
thread_id,
tool_call_id,
operation_kind,
raw_payload: runtime_payload,
request,
})
}
fn insert_terminal_operation(
&mut self,
start: TerminalOperationStart<'_>,
) -> Result<Option<TerminalOperationId>> {
let operation_id = self.next_terminal_operation_id();
let ParsedTerminalRequest {
terminal_id,
request,
} = start.request;
self.rollout.terminal_operations.insert(
operation_id.clone(),
TerminalOperation {
operation_id: operation_id.clone(),
terminal_id: terminal_id.clone(),
tool_call_id: start.tool_call_id.to_string(),
kind: start.operation_kind,
execution: ExecutionWindow {
started_at_unix_ms: start.wall_time_unix_ms,
started_seq: start.seq,
ended_at_unix_ms: None,
ended_seq: None,
status: ExecutionStatus::Running,
},
request,
result: None,
model_observations: Vec::new(),
raw_payload_ids: vec![start.raw_payload.raw_payload_id.clone()],
},
);
if let Some(terminal_id) = terminal_id {
self.ensure_terminal_session(
start.thread_id,
&terminal_id,
&operation_id,
start.wall_time_unix_ms,
start.seq,
)?;
}
Ok(Some(operation_id))
}
/// Completes the terminal operation associated with a tool call, if one exists.
///
/// Non-terminal tools flow through the same generic tool lifecycle, so callers
/// may invoke this unconditionally and receive Ok for unrelated tool kinds.
pub(in crate::reducer) fn end_terminal_operation(
&mut self,
seq: RawEventSeq,
wall_time_unix_ms: i64,
thread_id: &str,
operation_id: &str,
status: ExecutionStatus,
response_payload: Option<&RawPayloadRef>,
) -> Result<()> {
let Some(operation_kind) = self
.rollout
.terminal_operations
.get(operation_id)
.map(|operation| operation.kind.clone())
else {
bail!("terminal end referenced unknown operation {operation_id}");
};
let response = response_payload
.map(|payload| {
let value = self.read_payload_json(payload)?;
let response = parse_terminal_response_payload(
value,
&operation_kind,
&payload.raw_payload_id,
)?;
Ok::<_, anyhow::Error>((payload.raw_payload_id.clone(), response))
})
.transpose()?;
let (terminal_id, started_at_unix_ms, started_seq) = {
let Some(operation) = self.rollout.terminal_operations.get_mut(operation_id) else {
bail!("terminal end referenced unknown operation {operation_id}");
};
operation.execution.ended_at_unix_ms = Some(wall_time_unix_ms);
operation.execution.ended_seq = Some(seq);
operation.execution.status = status;
if let Some((raw_payload_id, response)) = response {
push_unique(&mut operation.raw_payload_ids, &raw_payload_id);
// If begin and end both report a process id they must name the
// same terminal. If begin omitted it, the end event completes
// the session join key for this operation.
match (&operation.terminal_id, response.terminal_id.as_deref()) {
(Some(existing), Some(process_id)) if existing != process_id => {
bail!(
"terminal operation {operation_id} changed process id from \
{existing} to {process_id}"
);
}
(None, Some(process_id)) => {
operation.terminal_id = Some(process_id.to_string());
}
(Some(_), Some(_)) | (Some(_), None) | (None, None) => {}
}
operation.result = Some(response.result);
}
(
operation.terminal_id.clone(),
operation.execution.started_at_unix_ms,
operation.execution.started_seq,
)
};
if let Some(terminal_id) = terminal_id {
self.ensure_terminal_session(
thread_id,
&terminal_id,
operation_id,
started_at_unix_ms,
started_seq,
)?;
}
Ok(())
}
fn ensure_terminal_session(
&mut self,
thread_id: &str,
terminal_id: &str,
operation_id: &str,
started_at_unix_ms: i64,
started_seq: RawEventSeq,
) -> Result<()> {
if !self.rollout.terminal_sessions.contains_key(terminal_id) {
self.rollout.terminal_sessions.insert(
terminal_id.to_string(),
TerminalSession {
terminal_id: terminal_id.to_string(),
thread_id: thread_id.to_string(),
created_by_operation_id: operation_id.to_string(),
operation_ids: Vec::new(),
execution: ExecutionWindow {
started_at_unix_ms,
started_seq,
// Current raw events do not report a terminal/session
// shutdown boundary, so the session remains open even
// after individual operations complete.
ended_at_unix_ms: None,
ended_seq: None,
status: ExecutionStatus::Running,
},
},
);
}
let Some(session) = self.rollout.terminal_sessions.get_mut(terminal_id) else {
bail!("terminal session {terminal_id} disappeared during reduction");
};
if session.thread_id != thread_id {
bail!(
"terminal session {terminal_id} belongs to thread {}, not {thread_id}",
session.thread_id
);
}
push_unique(&mut session.operation_ids, operation_id);
Ok(())
}
/// Mirrors model-visible tool items onto the terminal observation view.
///
/// Runtime terminal rows are useful on their own, but the model-visible call
/// and output item ids let viewers jump between transcript and terminal timelines.
pub(in crate::reducer) fn sync_terminal_model_observation(
&mut self,
tool_call_id: &str,
) -> Result<()> {
let Some(tool_call) = self.rollout.tool_calls.get(tool_call_id) else {
bail!("tool call {tool_call_id} disappeared during terminal observation linking");
};
let Some(operation_id) = tool_call.terminal_operation_id.clone() else {
return Ok(());
};
let call_item_ids = tool_call.model_visible_call_item_ids.clone();
let output_item_ids = tool_call.model_visible_output_item_ids.clone();
if call_item_ids.is_empty() && output_item_ids.is_empty() {
return Ok(());
}
let Some(operation) = self.rollout.terminal_operations.get_mut(&operation_id) else {
bail!("terminal operation {operation_id} disappeared during observation linking");
};
// A terminal result and a model-visible tool output are intentionally
// separate: the former is what the runtime saw, the latter is what later
// inference payloads prove was shown back to the model.
if let Some(observation) = operation
.model_observations
.iter_mut()
.find(|observation| observation.source == TerminalObservationSource::DirectToolCall)
{
observation.call_item_ids = call_item_ids;
observation.output_item_ids = output_item_ids;
} else {
operation.model_observations.push(TerminalModelObservation {
call_item_ids,
output_item_ids,
source: TerminalObservationSource::DirectToolCall,
});
}
Ok(())
}
fn next_terminal_operation_id(&mut self) -> TerminalOperationId {
let ordinal = self.next_terminal_operation_ordinal;
self.next_terminal_operation_ordinal += 1;
format!("terminal_operation:{ordinal}")
}
}
fn terminal_operation_kind(kind: &ToolCallKind) -> Option<TerminalOperationKind> {
match kind {
ToolCallKind::ExecCommand => Some(TerminalOperationKind::ExecCommand),
ToolCallKind::WriteStdin => Some(TerminalOperationKind::WriteStdin),
ToolCallKind::ApplyPatch
| ToolCallKind::Mcp { .. }
| ToolCallKind::Web
| ToolCallKind::ImageGeneration
| ToolCallKind::SpawnAgent
| ToolCallKind::AssignAgentTask
| ToolCallKind::SendMessage
| ToolCallKind::WaitAgent
| ToolCallKind::CloseAgent
| ToolCallKind::Other { .. } => None,
}
}
struct TerminalOperationStart<'a> {
seq: RawEventSeq,
wall_time_unix_ms: i64,
thread_id: &'a str,
tool_call_id: &'a str,
operation_kind: TerminalOperationKind,
raw_payload: &'a RawPayloadRef,
request: ParsedTerminalRequest,
}
struct ParsedTerminalRequest {
terminal_id: Option<String>,
request: TerminalRequest,
}
struct ParsedTerminalResponse {
terminal_id: Option<String>,
result: TerminalResult,
}
fn parse_protocol_terminal_request(
payload: ExecCommandBeginPayload,
operation_kind: &TerminalOperationKind,
) -> ParsedTerminalRequest {
// Startup/poll paths usually include a process id at begin time, but plain
// exec starts may only learn it in the matching end event.
let terminal_id = payload.process_id.clone();
let request = match operation_kind {
TerminalOperationKind::ExecCommand => TerminalRequest::ExecCommand {
display_command: payload.command.join(" "),
command: payload.command,
cwd: payload.cwd,
yield_time_ms: None,
max_output_tokens: None,
},
TerminalOperationKind::WriteStdin => TerminalRequest::WriteStdin {
stdin: payload.interaction_input.unwrap_or_default(),
yield_time_ms: None,
max_output_tokens: None,
},
};
ParsedTerminalRequest {
terminal_id,
request,
}
}
fn parse_dispatch_terminal_request(value: JsonValue) -> Result<ParsedTerminalRequest> {
let payload: DispatchedToolTraceRequestPayload = serde_json::from_value(value)?;
if payload.tool_name != "write_stdin" {
bail!(
"dispatch terminal request is for {}, not write_stdin",
payload.tool_name
);
}
if payload.payload.kind != "function" {
bail!(
"write_stdin dispatch payload used unsupported {} payload",
payload.payload.kind
);
}
let arguments = payload
.payload
.arguments
.context("write_stdin dispatch payload omitted function arguments")?;
let args: DispatchedWriteStdinArgs = serde_json::from_str(&arguments)
.context("parse write_stdin dispatch function arguments")?;
let terminal_id = terminal_id_from_json(&args.session_id)
.context("write_stdin dispatch payload omitted session_id")?;
Ok(ParsedTerminalRequest {
terminal_id: Some(terminal_id),
request: TerminalRequest::WriteStdin {
stdin: args.chars,
yield_time_ms: args.yield_time_ms,
max_output_tokens: args.max_output_tokens,
},
})
}
fn parse_terminal_response_payload(
value: JsonValue,
operation_kind: &TerminalOperationKind,
raw_payload_id: &str,
) -> Result<ParsedTerminalResponse> {
match operation_kind {
TerminalOperationKind::ExecCommand => {
let payload = serde_json::from_value::<ExecCommandEndPayload>(value)
.with_context(|| format!("parse exec terminal response {raw_payload_id}"))?;
Ok(parse_protocol_terminal_response(payload))
}
TerminalOperationKind::WriteStdin => {
match serde_json::from_value::<ExecCommandEndPayload>(value.clone()) {
Ok(payload) => Ok(parse_protocol_terminal_response(payload)),
Err(protocol_err) => parse_dispatch_terminal_response(value).with_context(|| {
format!(
"parse write_stdin terminal response {raw_payload_id} as protocol payload \
({protocol_err}) or dispatch payload"
)
}),
}
}
}
}
fn parse_protocol_terminal_response(payload: ExecCommandEndPayload) -> ParsedTerminalResponse {
ParsedTerminalResponse {
terminal_id: payload.process_id,
result: TerminalResult {
exit_code: Some(payload.exit_code),
stdout: payload.stdout,
stderr: payload.stderr,
formatted_output: Some(payload.formatted_output),
original_token_count: None,
chunk_id: None,
},
}
}
fn parse_dispatch_terminal_response(value: JsonValue) -> Result<ParsedTerminalResponse> {
let payload: DispatchedToolTraceResponsePayload = serde_json::from_value(value)?;
let result = match payload {
DispatchedToolTraceResponsePayload::DirectResponse { response_item } => {
let output = response_item
.get("output")
.and_then(json_text_content)
.unwrap_or_else(|| response_item.to_string());
TerminalResult {
exit_code: None,
stdout: output.clone(),
stderr: String::new(),
formatted_output: Some(output),
original_token_count: None,
chunk_id: None,
}
}
DispatchedToolTraceResponsePayload::CodeModeResponse { value } => {
// Code-mode returns the JavaScript-facing tool value, not the text
// shown to the model. For write_stdin that value is the structured
// unified-exec result, so keep ToolCall.raw_result_payload_id as the
// raw boundary while projecting terminal-specific fields here.
parse_code_mode_exec_result(value)
}
DispatchedToolTraceResponsePayload::Error { error } => TerminalResult {
exit_code: None,
stdout: String::new(),
stderr: error.clone(),
formatted_output: Some(error),
original_token_count: None,
chunk_id: None,
},
};
Ok(ParsedTerminalResponse {
terminal_id: None,
result,
})
}
fn parse_code_mode_exec_result(value: JsonValue) -> TerminalResult {
match serde_json::from_value::<CodeModeExecResult>(value.clone()) {
Ok(result) => TerminalResult {
exit_code: result.exit_code,
stdout: result.output.clone(),
stderr: String::new(),
formatted_output: Some(result.output),
original_token_count: result.original_token_count,
chunk_id: result.chunk_id,
},
Err(_) => {
let output = json_text_content(&value).unwrap_or_else(|| value.to_string());
TerminalResult {
exit_code: None,
stdout: output.clone(),
stderr: String::new(),
formatted_output: Some(output),
original_token_count: None,
chunk_id: None,
}
}
}
}
fn json_text_content(value: &JsonValue) -> Option<String> {
match value {
JsonValue::String(text) => Some(text.clone()),
JsonValue::Array(items) => {
let text = items
.iter()
.filter_map(|item| item.get("text").and_then(JsonValue::as_str))
.collect::<Vec<_>>()
.join("\n");
(!text.is_empty()).then_some(text)
}
JsonValue::Null => None,
other => Some(other.to_string()),
}
}
fn terminal_id_from_json(value: &JsonValue) -> Option<String> {
match value {
JsonValue::String(value) if !value.is_empty() => Some(value.clone()),
JsonValue::Number(value) => Some(value.to_string()),
_ => None,
}
}
#[derive(Deserialize)]
struct ExecCommandBeginPayload {
process_id: Option<String>,
command: Vec<String>,
cwd: String,
interaction_input: Option<String>,
}
#[derive(Deserialize)]
struct ExecCommandEndPayload {
process_id: Option<String>,
stdout: String,
stderr: String,
exit_code: i32,
formatted_output: String,
}
#[derive(Deserialize)]
struct DispatchedToolTraceRequestPayload {
tool_name: String,
payload: DispatchedToolPayload,
}
#[derive(Deserialize)]
struct DispatchedToolPayload {
#[serde(rename = "type")]
kind: String,
arguments: Option<String>,
}
#[derive(Deserialize)]
struct DispatchedWriteStdinArgs {
session_id: JsonValue,
#[serde(default)]
chars: String,
yield_time_ms: Option<u64>,
max_output_tokens: Option<usize>,
}
#[derive(Deserialize)]
#[serde(rename_all = "snake_case", tag = "type")]
enum DispatchedToolTraceResponsePayload {
DirectResponse { response_item: JsonValue },
CodeModeResponse { value: JsonValue },
Error { error: String },
}
#[derive(Deserialize)]
struct CodeModeExecResult {
chunk_id: Option<String>,
exit_code: Option<i32>,
original_token_count: Option<usize>,
output: String,
}
#[cfg(test)]
#[path = "terminal_tests.rs"]
mod tests;

View File

@@ -0,0 +1,580 @@
use pretty_assertions::assert_eq;
use serde_json::json;
use tempfile::TempDir;
use crate::model::ExecutionStatus;
use crate::model::ExecutionWindow;
use crate::model::TerminalModelObservation;
use crate::model::TerminalObservationSource;
use crate::model::TerminalOperation;
use crate::model::TerminalOperationKind;
use crate::model::TerminalRequest;
use crate::model::TerminalResult;
use crate::model::TerminalSession;
use crate::model::ToolCallKind;
use crate::model::ToolCallSummary;
use crate::payload::RawPayloadKind;
use crate::raw_event::RawTraceEventPayload;
use crate::reducer::test_support::create_started_writer;
use crate::reducer::test_support::generic_summary;
use crate::reducer::test_support::message;
use crate::reducer::test_support::start_turn;
use crate::reducer::test_support::trace_context;
use crate::replay_bundle;
use crate::writer::TraceWriter;
#[test]
fn exec_tool_reduces_to_terminal_operation_and_session() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
append_inference_with_tool_call(&writer)?;
let invocation_payload = writer.write_json_payload(
RawPayloadKind::ToolInvocation,
&json!({
"tool_name": "exec_command",
"tool_namespace": null,
"payload": {
"type": "function",
"arguments": "{\"cmd\":\"cargo test\"}"
}
}),
)?;
let invocation_payload_id = invocation_payload.raw_payload_id.clone();
let _tool_start = writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::ToolCallStarted {
tool_call_id: "tool-1".to_string(),
model_visible_call_id: Some("call-1".to_string()),
code_mode_runtime_tool_id: None,
requester: crate::raw_event::RawToolCallRequester::Model,
kind: ToolCallKind::ExecCommand,
summary: generic_summary("exec_command"),
invocation_payload: Some(invocation_payload),
},
)?;
let runtime_start_payload = writer.write_json_payload(
RawPayloadKind::ToolRuntimeEvent,
&json!({
"call_id": "tool-1",
"turn_id": "turn-1",
"command": ["cargo", "test"],
"cwd": "/repo"
}),
)?;
let runtime_start_payload_id = runtime_start_payload.raw_payload_id.clone();
let runtime_start = writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::ToolCallRuntimeStarted {
tool_call_id: "tool-1".to_string(),
runtime_payload: runtime_start_payload,
},
)?;
let runtime_end_payload = writer.write_json_payload(
RawPayloadKind::ToolRuntimeEvent,
&json!({
"call_id": "tool-1",
"process_id": "pty-1",
"turn_id": "turn-1",
"command": ["cargo", "test"],
"cwd": "/repo",
"stdout": "ok\n",
"stderr": "",
"exit_code": 0,
"formatted_output": "ok\n",
"status": "completed"
}),
)?;
let runtime_end_payload_id = runtime_end_payload.raw_payload_id.clone();
let runtime_end = writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::ToolCallRuntimeEnded {
tool_call_id: "tool-1".to_string(),
status: ExecutionStatus::Completed,
runtime_payload: runtime_end_payload,
},
)?;
let result_payload = writer.write_json_payload(
RawPayloadKind::ToolResult,
&json!({
"type": "direct_response",
"response_item": {
"type": "function_call_output",
"call_id": "call-1",
"output": "ok\n"
}
}),
)?;
let result_payload_id = result_payload.raw_payload_id.clone();
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::ToolCallEnded {
tool_call_id: "tool-1".to_string(),
status: ExecutionStatus::Completed,
result_payload: Some(result_payload),
},
)?;
start_turn(&writer, "turn-2")?;
append_followup_with_tool_output(&writer)?;
let rollout = replay_bundle(temp.path())?;
let operation_id = "terminal_operation:1".to_string();
let output_item_id = rollout.inference_calls["inference-2"]
.request_item_ids
.last()
.expect("tool output item")
.clone();
assert_eq!(
rollout.tool_calls["tool-1"].terminal_operation_id,
Some(operation_id.clone()),
);
assert_eq!(
rollout.tool_calls["tool-1"].raw_invocation_payload_id,
Some(invocation_payload_id),
);
assert_eq!(
rollout.tool_calls["tool-1"].raw_result_payload_id,
Some(result_payload_id),
);
assert_eq!(
rollout.tool_calls["tool-1"].raw_runtime_payload_ids,
vec![
runtime_start_payload_id.clone(),
runtime_end_payload_id.clone()
],
);
assert_eq!(
rollout.tool_calls["tool-1"].summary,
ToolCallSummary::Terminal {
operation_id: operation_id.clone(),
},
);
assert_eq!(
rollout.terminal_operations[&operation_id],
TerminalOperation {
operation_id: operation_id.clone(),
terminal_id: Some("pty-1".to_string()),
tool_call_id: "tool-1".to_string(),
kind: TerminalOperationKind::ExecCommand,
execution: ExecutionWindow {
started_at_unix_ms: runtime_start.wall_time_unix_ms,
started_seq: runtime_start.seq,
ended_at_unix_ms: Some(runtime_end.wall_time_unix_ms),
ended_seq: Some(runtime_end.seq),
status: ExecutionStatus::Completed,
},
request: TerminalRequest::ExecCommand {
command: vec!["cargo".to_string(), "test".to_string()],
display_command: "cargo test".to_string(),
cwd: "/repo".to_string(),
yield_time_ms: None,
max_output_tokens: None,
},
result: Some(TerminalResult {
exit_code: Some(0),
stdout: "ok\n".to_string(),
stderr: String::new(),
formatted_output: Some("ok\n".to_string()),
original_token_count: None,
chunk_id: None,
}),
model_observations: vec![TerminalModelObservation {
call_item_ids: rollout.inference_calls["inference-1"]
.response_item_ids
.clone(),
output_item_ids: vec![output_item_id],
source: TerminalObservationSource::DirectToolCall,
}],
raw_payload_ids: vec![runtime_start_payload_id, runtime_end_payload_id],
},
);
assert_eq!(
rollout.terminal_sessions["pty-1"],
TerminalSession {
terminal_id: "pty-1".to_string(),
thread_id: "thread-root".to_string(),
created_by_operation_id: operation_id.clone(),
operation_ids: vec![operation_id],
execution: ExecutionWindow {
started_at_unix_ms: runtime_start.wall_time_unix_ms,
started_seq: runtime_start.seq,
ended_at_unix_ms: None,
ended_seq: None,
status: ExecutionStatus::Running,
},
},
);
Ok(())
}
#[test]
fn write_stdin_operation_reuses_existing_terminal_session() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let startup_payload = writer.write_json_payload(
RawPayloadKind::ToolRuntimeEvent,
&json!({
"call_id": "tool-start",
"process_id": "pty-1",
"turn_id": "turn-1",
"command": ["bash"],
"cwd": "/repo"
}),
)?;
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::ToolCallStarted {
tool_call_id: "tool-start".to_string(),
model_visible_call_id: None,
code_mode_runtime_tool_id: None,
requester: crate::raw_event::RawToolCallRequester::Model,
kind: ToolCallKind::ExecCommand,
summary: generic_summary("exec_command"),
invocation_payload: None,
},
)?;
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::ToolCallRuntimeStarted {
tool_call_id: "tool-start".to_string(),
runtime_payload: startup_payload,
},
)?;
let stdin_payload = writer.write_json_payload(
RawPayloadKind::ToolRuntimeEvent,
&json!({
"call_id": "tool-stdin",
"process_id": "pty-1",
"turn_id": "turn-1",
"command": ["bash"],
"cwd": "/repo",
"interaction_input": "echo hi\n"
}),
)?;
let _stdin_start = writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::ToolCallStarted {
tool_call_id: "tool-stdin".to_string(),
model_visible_call_id: None,
code_mode_runtime_tool_id: None,
requester: crate::raw_event::RawToolCallRequester::Model,
kind: ToolCallKind::WriteStdin,
summary: generic_summary("write_stdin"),
invocation_payload: None,
},
)?;
let stdin_runtime_start = writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::ToolCallRuntimeStarted {
tool_call_id: "tool-stdin".to_string(),
runtime_payload: stdin_payload,
},
)?;
let rollout = replay_bundle(temp.path())?;
let startup_operation_id = "terminal_operation:1".to_string();
let stdin_operation_id = "terminal_operation:2".to_string();
assert_eq!(
rollout.terminal_sessions["pty-1"].operation_ids,
vec![startup_operation_id, stdin_operation_id.clone()],
);
assert_eq!(
rollout.terminal_operations[&stdin_operation_id],
TerminalOperation {
operation_id: stdin_operation_id.clone(),
terminal_id: Some("pty-1".to_string()),
tool_call_id: "tool-stdin".to_string(),
kind: TerminalOperationKind::WriteStdin,
execution: ExecutionWindow {
started_at_unix_ms: stdin_runtime_start.wall_time_unix_ms,
started_seq: stdin_runtime_start.seq,
ended_at_unix_ms: None,
ended_seq: None,
status: ExecutionStatus::Running,
},
request: TerminalRequest::WriteStdin {
stdin: "echo hi\n".to_string(),
yield_time_ms: None,
max_output_tokens: None,
},
result: None,
model_observations: Vec::new(),
raw_payload_ids: vec!["raw_payload:2".to_string()],
},
);
Ok(())
}
#[test]
fn dispatch_write_stdin_payload_reduces_to_terminal_operation() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let request_payload = writer.write_json_payload(
RawPayloadKind::ToolInvocation,
&json!({
"tool_name": "write_stdin",
"tool_namespace": null,
"payload": {
"type": "function",
"arguments": json!({
"session_id": 123,
"chars": "echo hi\n",
"yield_time_ms": 250,
"max_output_tokens": 2000
}).to_string()
}
}),
)?;
let request_payload_id = request_payload.raw_payload_id.clone();
let tool_start = writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::ToolCallStarted {
tool_call_id: "tool-stdin".to_string(),
model_visible_call_id: Some("call-stdin".to_string()),
code_mode_runtime_tool_id: None,
requester: crate::raw_event::RawToolCallRequester::Model,
kind: ToolCallKind::WriteStdin,
summary: generic_summary("write_stdin"),
invocation_payload: Some(request_payload),
},
)?;
let response_payload = writer.write_json_payload(
RawPayloadKind::ToolResult,
&json!({
"type": "direct_response",
"response_item": {
"type": "function_call_output",
"call_id": "call-stdin",
"output": "hi\n"
}
}),
)?;
let response_payload_id = response_payload.raw_payload_id.clone();
let tool_end = writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::ToolCallEnded {
tool_call_id: "tool-stdin".to_string(),
status: ExecutionStatus::Completed,
result_payload: Some(response_payload),
},
)?;
let rollout = replay_bundle(temp.path())?;
let operation_id = "terminal_operation:1".to_string();
assert_eq!(
rollout.tool_calls["tool-stdin"].terminal_operation_id,
Some(operation_id.clone()),
);
assert_eq!(
rollout.tool_calls["tool-stdin"].summary,
ToolCallSummary::Terminal {
operation_id: operation_id.clone(),
},
);
assert_eq!(
rollout.terminal_operations[&operation_id],
TerminalOperation {
operation_id: operation_id.clone(),
terminal_id: Some("123".to_string()),
tool_call_id: "tool-stdin".to_string(),
kind: TerminalOperationKind::WriteStdin,
execution: ExecutionWindow {
started_at_unix_ms: tool_start.wall_time_unix_ms,
started_seq: tool_start.seq,
ended_at_unix_ms: Some(tool_end.wall_time_unix_ms),
ended_seq: Some(tool_end.seq),
status: ExecutionStatus::Completed,
},
request: TerminalRequest::WriteStdin {
stdin: "echo hi\n".to_string(),
yield_time_ms: Some(250),
max_output_tokens: Some(2000),
},
result: Some(TerminalResult {
exit_code: None,
stdout: "hi\n".to_string(),
stderr: String::new(),
formatted_output: Some("hi\n".to_string()),
original_token_count: None,
chunk_id: None,
}),
model_observations: Vec::new(),
raw_payload_ids: vec![request_payload_id, response_payload_id],
},
);
assert_eq!(
rollout.terminal_sessions["123"],
TerminalSession {
terminal_id: "123".to_string(),
thread_id: "thread-root".to_string(),
created_by_operation_id: operation_id.clone(),
operation_ids: vec![operation_id],
execution: ExecutionWindow {
started_at_unix_ms: tool_start.wall_time_unix_ms,
started_seq: tool_start.seq,
ended_at_unix_ms: None,
ended_seq: None,
status: ExecutionStatus::Running,
},
},
);
Ok(())
}
#[test]
fn code_mode_write_stdin_result_projects_structured_exec_fields() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = create_started_writer(&temp)?;
start_turn(&writer, "turn-1")?;
let request_payload = writer.write_json_payload(
RawPayloadKind::ToolInvocation,
&json!({
"tool_name": "write_stdin",
"tool_namespace": null,
"payload": {
"type": "function",
"arguments": json!({
"session_id": 456,
"chars": "",
"yield_time_ms": 1000,
"max_output_tokens": 4000
}).to_string()
}
}),
)?;
let response_payload = writer.write_json_payload(
RawPayloadKind::ToolResult,
&json!({
"type": "code_mode_response",
"value": {
"chunk_id": "abc123",
"wall_time_seconds": 1.25,
"exit_code": 0,
"original_token_count": 3,
"output": "done\n"
}
}),
)?;
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::CodeCellStarted {
runtime_cell_id: "cell-1".to_string(),
model_visible_call_id: "call-code".to_string(),
source_js: "await tools.write_stdin({ chars: '' })".to_string(),
},
)?;
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::ToolCallStarted {
tool_call_id: "tool-stdin".to_string(),
model_visible_call_id: None,
code_mode_runtime_tool_id: Some("runtime-tool-1".to_string()),
requester: crate::raw_event::RawToolCallRequester::CodeCell {
runtime_cell_id: "cell-1".to_string(),
},
kind: ToolCallKind::WriteStdin,
summary: generic_summary("write_stdin"),
invocation_payload: Some(request_payload),
},
)?;
writer.append_with_context(
trace_context("turn-1"),
RawTraceEventPayload::ToolCallEnded {
tool_call_id: "tool-stdin".to_string(),
status: ExecutionStatus::Completed,
result_payload: Some(response_payload),
},
)?;
let rollout = replay_bundle(temp.path())?;
assert_eq!(
rollout.terminal_operations["terminal_operation:1"].result,
Some(TerminalResult {
exit_code: Some(0),
stdout: "done\n".to_string(),
stderr: String::new(),
formatted_output: Some("done\n".to_string()),
original_token_count: Some(3),
chunk_id: Some("abc123".to_string()),
}),
);
Ok(())
}
fn append_inference_with_tool_call(writer: &TraceWriter) -> anyhow::Result<()> {
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"input": [message("user", "run tests")]
}),
)?;
writer.append(RawTraceEventPayload::InferenceStarted {
inference_call_id: "inference-1".to_string(),
thread_id: "thread-root".to_string(),
codex_turn_id: "turn-1".to_string(),
model: "gpt-test".to_string(),
provider_name: "test-provider".to_string(),
request_payload: request,
})?;
let response = writer.write_json_payload(
RawPayloadKind::InferenceResponse,
&json!({
"response_id": "resp-1",
"output_items": [{
"type": "function_call",
"name": "exec_command",
"arguments": "{\"cmd\":\"cargo test\"}",
"call_id": "call-1"
}]
}),
)?;
writer.append(RawTraceEventPayload::InferenceCompleted {
inference_call_id: "inference-1".to_string(),
response_id: Some("resp-1".to_string()),
response_payload: response,
})?;
Ok(())
}
fn append_followup_with_tool_output(writer: &TraceWriter) -> anyhow::Result<()> {
let request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"previous_response_id": "resp-1",
"input": [{
"type": "function_call_output",
"call_id": "call-1",
"output": "ok\n"
}]
}),
)?;
writer.append(RawTraceEventPayload::InferenceStarted {
inference_call_id: "inference-2".to_string(),
thread_id: "thread-root".to_string(),
codex_turn_id: "turn-2".to_string(),
model: "gpt-test".to_string(),
provider_name: "test-provider".to_string(),
request_payload: request,
})?;
Ok(())
}

View File

@@ -0,0 +1,264 @@
//! Hot-path trace bundle writer.
use std::fs::File;
use std::fs::OpenOptions;
use std::io::BufWriter;
use std::io::Write;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Mutex;
use std::sync::MutexGuard;
use std::sync::PoisonError;
use std::time::SystemTime;
use std::time::UNIX_EPOCH;
use anyhow::Context;
use anyhow::Result;
use serde::Serialize;
use crate::bundle::MANIFEST_FILE_NAME;
use crate::bundle::PAYLOADS_DIR_NAME;
use crate::bundle::RAW_EVENT_LOG_FILE_NAME;
use crate::bundle::TraceBundleManifest;
use crate::model::AgentThreadId;
use crate::payload::RawPayloadKind;
use crate::payload::RawPayloadRef;
use crate::raw_event::RAW_TRACE_EVENT_SCHEMA_VERSION;
use crate::raw_event::RawTraceEvent;
use crate::raw_event::RawTraceEventContext;
use crate::raw_event::RawTraceEventPayload;
/// Local trace bundle writer.
///
/// The writer appends raw events and writes payload files. It does not keep a
/// reduced `RolloutTrace` in memory; replay is owned by the reducer.
#[derive(Debug)]
pub struct TraceWriter {
inner: Mutex<TraceWriterInner>,
}
#[derive(Debug)]
struct TraceWriterInner {
manifest: TraceBundleManifest,
payloads_dir: PathBuf,
event_log: BufWriter<File>,
next_seq: u64,
next_payload_ordinal: u64,
}
impl TraceWriter {
/// Creates a trace bundle directory and writes its manifest.
pub fn create(
bundle_dir: impl AsRef<Path>,
trace_id: String,
rollout_id: String,
root_thread_id: AgentThreadId,
) -> Result<Self> {
let bundle_dir = bundle_dir.as_ref().to_path_buf();
let payloads_dir = bundle_dir.join(PAYLOADS_DIR_NAME);
std::fs::create_dir_all(&payloads_dir)
.with_context(|| format!("create trace payload dir {}", payloads_dir.display()))?;
let started_at_unix_ms = unix_time_ms();
let manifest =
TraceBundleManifest::new(trace_id, rollout_id, root_thread_id, started_at_unix_ms);
write_json_file(&bundle_dir.join(MANIFEST_FILE_NAME), &manifest)?;
let event_log_path = bundle_dir.join(RAW_EVENT_LOG_FILE_NAME);
let event_log = OpenOptions::new()
.create(true)
.append(true)
.open(&event_log_path)
.with_context(|| format!("open trace event log {}", event_log_path.display()))?;
Ok(Self {
inner: Mutex::new(TraceWriterInner {
manifest,
payloads_dir,
event_log: BufWriter::new(event_log),
next_seq: 1,
next_payload_ordinal: 1,
}),
})
}
/// Writes a JSON payload file and returns its reduced-state reference.
pub fn write_json_payload(
&self,
kind: RawPayloadKind,
value: &impl Serialize,
) -> Result<RawPayloadRef> {
let mut inner = self.lock_inner();
let ordinal = inner.next_payload_ordinal;
inner.next_payload_ordinal += 1;
let raw_payload_id = format!("raw_payload:{ordinal}");
let relative_path = format!("{PAYLOADS_DIR_NAME}/{ordinal}.json");
let absolute_path = inner.payloads_dir.join(format!("{ordinal}.json"));
// Payload files are created before the event that references them. A
// replay interrupted after an event is appended should never point at a
// payload file that the writer planned but had not written yet.
write_json_file(&absolute_path, value)?;
Ok(RawPayloadRef {
raw_payload_id,
kind,
path: relative_path,
})
}
/// Appends one raw event with no extra envelope context.
pub fn append(&self, payload: RawTraceEventPayload) -> Result<RawTraceEvent> {
self.append_with_context(RawTraceEventContext::default(), payload)
}
/// Appends one raw event with explicit thread/turn context.
pub fn append_with_context(
&self,
context: RawTraceEventContext,
payload: RawTraceEventPayload,
) -> Result<RawTraceEvent> {
let mut inner = self.lock_inner();
let event = RawTraceEvent {
schema_version: RAW_TRACE_EVENT_SCHEMA_VERSION,
seq: inner.next_seq,
wall_time_unix_ms: unix_time_ms(),
rollout_id: inner.manifest.rollout_id.clone(),
thread_id: context.thread_id,
codex_turn_id: context.codex_turn_id,
payload,
};
inner.next_seq += 1;
serde_json::to_writer(&mut inner.event_log, &event)?;
inner.event_log.write_all(b"\n")?;
inner.event_log.flush()?;
Ok(event)
}
fn lock_inner(&self) -> MutexGuard<'_, TraceWriterInner> {
// Preserve the event log after a panic in tracing code. Dropping the
// writer would lose subsequent diagnostic events in exactly the session
// we are trying to debug.
self.inner.lock().unwrap_or_else(PoisonError::into_inner)
}
}
fn write_json_file(path: &Path, value: &impl Serialize) -> Result<()> {
let file = File::create(path).with_context(|| format!("create {}", path.display()))?;
serde_json::to_writer_pretty(file, value)
.with_context(|| format!("write JSON {}", path.display()))
}
pub(crate) fn unix_time_ms() -> i64 {
let duration = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default();
i64::try_from(duration.as_millis()).unwrap_or(i64::MAX)
}
#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use serde_json::json;
use tempfile::TempDir;
use crate::model::ExecutionStatus;
use crate::model::RolloutStatus;
use crate::payload::RawPayloadKind;
use crate::raw_event::RawTraceEventPayload;
use crate::replay_bundle;
use crate::writer::TraceWriter;
#[test]
fn writer_records_payload_refs_and_replays_rollout_status() -> anyhow::Result<()> {
let temp = TempDir::new()?;
let writer = TraceWriter::create(
temp.path(),
"trace-1".to_string(),
"rollout-1".to_string(),
"thread-root".to_string(),
)?;
writer.append(RawTraceEventPayload::RolloutStarted {
trace_id: "trace-1".to_string(),
root_thread_id: "thread-root".to_string(),
})?;
let metadata_payload = writer.write_json_payload(
RawPayloadKind::ProtocolEvent,
&json!({
"source": "test",
"model": "gpt-test",
}),
)?;
writer.append(RawTraceEventPayload::ThreadStarted {
thread_id: "thread-root".to_string(),
agent_path: "/root".to_string(),
metadata_payload: Some(metadata_payload.clone()),
})?;
writer.append(RawTraceEventPayload::CodexTurnStarted {
codex_turn_id: "turn-1".to_string(),
thread_id: "thread-root".to_string(),
})?;
let inference_request = writer.write_json_payload(
RawPayloadKind::InferenceRequest,
&json!({
"model": "gpt-test",
"input": [{
"type": "message",
"role": "user",
"content": [{"type": "input_text", "text": "hello"}]
}],
}),
)?;
writer.append(RawTraceEventPayload::InferenceStarted {
inference_call_id: "inference-1".to_string(),
thread_id: "thread-root".to_string(),
codex_turn_id: "turn-1".to_string(),
model: "gpt-test".to_string(),
provider_name: "test-provider".to_string(),
request_payload: inference_request.clone(),
})?;
let inference_response = writer.write_json_payload(
RawPayloadKind::InferenceResponse,
&json!({
"response_id": "resp-1",
"output_items": [],
}),
)?;
writer.append(RawTraceEventPayload::InferenceCompleted {
inference_call_id: "inference-1".to_string(),
response_id: Some("resp-1".to_string()),
response_payload: inference_response.clone(),
})?;
writer.append(RawTraceEventPayload::CodexTurnEnded {
codex_turn_id: "turn-1".to_string(),
status: ExecutionStatus::Completed,
})?;
writer.append(RawTraceEventPayload::RolloutEnded {
status: RolloutStatus::Completed,
})?;
let rollout = replay_bundle(temp.path())?;
assert_eq!(rollout.status, RolloutStatus::Completed);
assert_eq!(rollout.root_thread_id, "thread-root");
assert_eq!(rollout.threads["thread-root"].agent_path, "/root");
assert_eq!(rollout.codex_turns["turn-1"].thread_id, "thread-root");
assert_eq!(
rollout.codex_turns["turn-1"].execution.status,
ExecutionStatus::Completed,
);
assert_eq!(
rollout.inference_calls["inference-1"].raw_request_payload_id,
inference_request.raw_payload_id,
);
assert_eq!(
rollout.inference_calls["inference-1"].raw_response_payload_id,
Some(inference_response.raw_payload_id),
);
assert_eq!(
rollout.raw_payloads[&metadata_payload.raw_payload_id].path,
"payloads/1.json"
);
Ok(())
}
}