mirror of
https://github.com/openai/codex.git
synced 2026-05-09 13:52:41 +00:00
Compare commits
8 Commits
owen/sqlit
...
codex/roll
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8630f1f242 | ||
|
|
70fb10676a | ||
|
|
d364b9651e | ||
|
|
55d7e431f3 | ||
|
|
c657343888 | ||
|
|
2abf9dcd4e | ||
|
|
856ef510c2 | ||
|
|
eff93282cf |
14
codex-rs/Cargo.lock
generated
14
codex-rs/Cargo.lock
generated
@@ -1690,6 +1690,7 @@ dependencies = [
|
||||
"codex-protocol",
|
||||
"codex-responses-api-proxy",
|
||||
"codex-rmcp-client",
|
||||
"codex-rollout-trace",
|
||||
"codex-sandboxing",
|
||||
"codex-state",
|
||||
"codex-stdio-to-uds",
|
||||
@@ -1932,6 +1933,7 @@ dependencies = [
|
||||
"codex-response-debug-context",
|
||||
"codex-rmcp-client",
|
||||
"codex-rollout",
|
||||
"codex-rollout-trace",
|
||||
"codex-sandboxing",
|
||||
"codex-secrets",
|
||||
"codex-shell-command",
|
||||
@@ -2705,6 +2707,18 @@ dependencies = [
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-rollout-trace"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"codex-protocol",
|
||||
"pretty_assertions",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-sandboxing"
|
||||
version = "0.0.0"
|
||||
|
||||
@@ -51,6 +51,7 @@ members = [
|
||||
"protocol",
|
||||
"realtime-webrtc",
|
||||
"rollout",
|
||||
"rollout-trace",
|
||||
"rmcp-client",
|
||||
"responses-api-proxy",
|
||||
"response-debug-context",
|
||||
@@ -159,6 +160,7 @@ codex-responses-api-proxy = { path = "responses-api-proxy" }
|
||||
codex-response-debug-context = { path = "response-debug-context" }
|
||||
codex-rmcp-client = { path = "rmcp-client" }
|
||||
codex-rollout = { path = "rollout" }
|
||||
codex-rollout-trace = { path = "rollout-trace" }
|
||||
codex-sandboxing = { path = "sandboxing" }
|
||||
codex-secrets = { path = "secrets" }
|
||||
codex-shell-command = { path = "shell-command" }
|
||||
|
||||
@@ -40,6 +40,7 @@ codex-mcp-server = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
codex-responses-api-proxy = { workspace = true }
|
||||
codex-rmcp-client = { workspace = true }
|
||||
codex-rollout-trace = { workspace = true }
|
||||
codex-sandboxing = { workspace = true }
|
||||
codex-state = { workspace = true }
|
||||
codex-stdio-to-uds = { workspace = true }
|
||||
|
||||
@@ -22,6 +22,8 @@ use codex_exec::Command as ExecCommand;
|
||||
use codex_exec::ReviewArgs;
|
||||
use codex_execpolicy::ExecPolicyCheckCommand;
|
||||
use codex_responses_api_proxy::Args as ResponsesApiProxyArgs;
|
||||
use codex_rollout_trace::REDUCED_STATE_FILE_NAME;
|
||||
use codex_rollout_trace::replay_bundle;
|
||||
use codex_state::StateRuntime;
|
||||
use codex_state::state_db_path;
|
||||
use codex_tui::AppExitInfo;
|
||||
@@ -190,6 +192,9 @@ enum DebugSubcommand {
|
||||
/// Render the model-visible prompt input list as JSON.
|
||||
PromptInput(DebugPromptInputCommand),
|
||||
|
||||
/// Replay a rollout trace bundle and write reduced state JSON.
|
||||
TraceReduce(DebugTraceReduceCommand),
|
||||
|
||||
/// Internal: reset local memory state for a fresh start.
|
||||
#[clap(hide = true)]
|
||||
ClearMemories,
|
||||
@@ -224,6 +229,17 @@ struct DebugPromptInputCommand {
|
||||
images: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct DebugTraceReduceCommand {
|
||||
/// Trace bundle directory containing manifest.json and trace.jsonl.
|
||||
#[arg(value_name = "TRACE_BUNDLE")]
|
||||
trace_bundle: PathBuf,
|
||||
|
||||
/// Output path for reduced RolloutTrace JSON. Defaults to TRACE_BUNDLE/state.json.
|
||||
#[arg(long = "output", short = 'o', value_name = "FILE")]
|
||||
output: Option<PathBuf>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct ResumeCommand {
|
||||
/// Conversation/session id (UUID) or thread name. UUIDs take precedence if it parses.
|
||||
@@ -991,6 +1007,14 @@ async fn cli_main(arg0_paths: Arg0DispatchPaths) -> anyhow::Result<()> {
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
DebugSubcommand::TraceReduce(cmd) => {
|
||||
reject_remote_mode_for_subcommand(
|
||||
root_remote.as_deref(),
|
||||
root_remote_auth_token_env.as_deref(),
|
||||
"debug trace-reduce",
|
||||
)?;
|
||||
run_debug_trace_reduce_command(cmd).await?;
|
||||
}
|
||||
DebugSubcommand::ClearMemories => {
|
||||
reject_remote_mode_for_subcommand(
|
||||
root_remote.as_deref(),
|
||||
@@ -1192,6 +1216,19 @@ fn maybe_print_under_development_feature_warning(
|
||||
);
|
||||
}
|
||||
|
||||
async fn run_debug_trace_reduce_command(cmd: DebugTraceReduceCommand) -> anyhow::Result<()> {
|
||||
let output = cmd
|
||||
.output
|
||||
.unwrap_or_else(|| cmd.trace_bundle.join(REDUCED_STATE_FILE_NAME));
|
||||
|
||||
let trace = replay_bundle(&cmd.trace_bundle)?;
|
||||
let reduced_json = serde_json::to_vec_pretty(&trace)?;
|
||||
tokio::fs::write(&output, reduced_json).await?;
|
||||
println!("{}", output.display());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn run_debug_prompt_input_command(
|
||||
cmd: DebugPromptInputCommand,
|
||||
root_config_overrides: CliConfigOverrides,
|
||||
|
||||
@@ -23,7 +23,9 @@ pub use runtime::DEFAULT_WAIT_YIELD_TIME_MS;
|
||||
pub use runtime::ExecuteRequest;
|
||||
pub use runtime::RuntimeResponse;
|
||||
pub use runtime::WaitRequest;
|
||||
pub use runtime::WaitResponse;
|
||||
pub use service::CodeModeService;
|
||||
pub use service::CodeModeToolInvocation;
|
||||
pub use service::CodeModeTurnHost;
|
||||
pub use service::CodeModeTurnWorker;
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ use std::sync::mpsc as std_mpsc;
|
||||
use std::thread;
|
||||
|
||||
use codex_protocol::ToolName;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value as JsonValue;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
@@ -25,6 +26,12 @@ const EXIT_SENTINEL: &str = "__codex_code_mode_exit__";
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ExecuteRequest {
|
||||
/// Runtime cell id to use for this execution.
|
||||
///
|
||||
/// Hosts that need to trace work before JavaScript starts can allocate an id
|
||||
/// first and pass it here. `None` keeps the service-owned allocation path
|
||||
/// for callers that only need the id once a runtime response is returned.
|
||||
pub cell_id: Option<String>,
|
||||
pub tool_call_id: String,
|
||||
pub enabled_tools: Vec<ToolDefinition>,
|
||||
pub source: String,
|
||||
@@ -41,6 +48,33 @@ pub struct WaitRequest {
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum WaitResponse {
|
||||
/// The requested cell was live when the wait command was accepted.
|
||||
///
|
||||
/// Non-yielding responses from this variant are terminal lifecycle points
|
||||
/// for the matching code cell.
|
||||
Cell(RuntimeResponse),
|
||||
/// The requested cell was not live, so the response is only the result of
|
||||
/// the `wait` tool call. It must not be treated as a code-cell lifecycle
|
||||
/// event because there is no cell to complete.
|
||||
MissingCell(RuntimeResponse),
|
||||
}
|
||||
|
||||
impl WaitResponse {
|
||||
pub fn into_runtime_response(self) -> RuntimeResponse {
|
||||
match self {
|
||||
WaitResponse::Cell(response) | WaitResponse::MissingCell(response) => response,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn runtime_response(&self) -> &RuntimeResponse {
|
||||
match self {
|
||||
WaitResponse::Cell(response) | WaitResponse::MissingCell(response) => response,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Serialize)]
|
||||
pub enum RuntimeResponse {
|
||||
Yielded {
|
||||
cell_id: String,
|
||||
@@ -331,6 +365,7 @@ mod tests {
|
||||
|
||||
fn execute_request(source: &str) -> ExecuteRequest {
|
||||
ExecuteRequest {
|
||||
cell_id: None,
|
||||
tool_call_id: "call_1".to_string(),
|
||||
enabled_tools: Vec::new(),
|
||||
source: source.to_string(),
|
||||
|
||||
@@ -21,14 +21,26 @@ use crate::runtime::RuntimeEvent;
|
||||
use crate::runtime::RuntimeResponse;
|
||||
use crate::runtime::TurnMessage;
|
||||
use crate::runtime::WaitRequest;
|
||||
use crate::runtime::WaitResponse;
|
||||
use crate::runtime::spawn_runtime;
|
||||
|
||||
/// Nested tool request emitted by one code-mode cell.
|
||||
///
|
||||
/// Code mode owns the per-cell runtime id. Hosts should preserve it for
|
||||
/// provenance/debugging, but should still assign their own runtime tool call id
|
||||
/// if their tool-call graph requires globally unique ids.
|
||||
pub struct CodeModeToolInvocation {
|
||||
pub cell_id: String,
|
||||
pub runtime_tool_call_id: String,
|
||||
pub tool_name: ToolName,
|
||||
pub input: Option<JsonValue>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait CodeModeTurnHost: Send + Sync {
|
||||
async fn invoke_tool(
|
||||
&self,
|
||||
tool_name: ToolName,
|
||||
input: Option<JsonValue>,
|
||||
invocation: CodeModeToolInvocation,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> Result<JsonValue, String>;
|
||||
|
||||
@@ -76,24 +88,44 @@ impl CodeModeService {
|
||||
*self.inner.stored_values.lock().await = values;
|
||||
}
|
||||
|
||||
pub async fn execute(&self, request: ExecuteRequest) -> Result<RuntimeResponse, String> {
|
||||
let cell_id = self
|
||||
.inner
|
||||
/// Reserves the runtime cell id for a future `execute` request.
|
||||
///
|
||||
/// The runtime can issue nested tool calls before the first `execute`
|
||||
/// response is returned. Hosts that need a parent trace object for those
|
||||
/// nested calls should allocate the cell id up front and pass it back on the
|
||||
/// `ExecuteRequest`.
|
||||
pub fn allocate_cell_id(&self) -> String {
|
||||
self.inner
|
||||
.next_cell_id
|
||||
.fetch_add(1, Ordering::Relaxed)
|
||||
.to_string();
|
||||
.to_string()
|
||||
}
|
||||
|
||||
pub async fn execute(&self, request: ExecuteRequest) -> Result<RuntimeResponse, String> {
|
||||
let cell_id = request
|
||||
.cell_id
|
||||
.clone()
|
||||
.unwrap_or_else(|| self.allocate_cell_id());
|
||||
let mut sessions = self.inner.sessions.lock().await;
|
||||
if sessions.contains_key(&cell_id) {
|
||||
return Err(format!("exec cell {cell_id} already exists"));
|
||||
}
|
||||
|
||||
// Keep the session registry locked through insertion so a caller-owned
|
||||
// cell id cannot race with another execute and replace a live runtime.
|
||||
let (event_tx, event_rx) = mpsc::unbounded_channel();
|
||||
let (runtime_tx, runtime_terminate_handle) = spawn_runtime(request.clone(), event_tx)?;
|
||||
let (control_tx, control_rx) = mpsc::unbounded_channel();
|
||||
let (response_tx, response_rx) = oneshot::channel();
|
||||
|
||||
self.inner.sessions.lock().await.insert(
|
||||
sessions.insert(
|
||||
cell_id.clone(),
|
||||
SessionHandle {
|
||||
control_tx: control_tx.clone(),
|
||||
runtime_tx: runtime_tx.clone(),
|
||||
},
|
||||
);
|
||||
drop(sessions);
|
||||
|
||||
tokio::spawn(run_session_control(
|
||||
Arc::clone(&self.inner),
|
||||
@@ -113,7 +145,7 @@ impl CodeModeService {
|
||||
.map_err(|_| "exec runtime ended unexpectedly".to_string())
|
||||
}
|
||||
|
||||
pub async fn wait(&self, request: WaitRequest) -> Result<RuntimeResponse, String> {
|
||||
pub async fn wait(&self, request: WaitRequest) -> Result<WaitResponse, String> {
|
||||
let cell_id = request.cell_id.clone();
|
||||
let handle = self
|
||||
.inner
|
||||
@@ -123,7 +155,7 @@ impl CodeModeService {
|
||||
.get(&request.cell_id)
|
||||
.cloned();
|
||||
let Some(handle) = handle else {
|
||||
return Ok(missing_cell_response(cell_id));
|
||||
return Ok(WaitResponse::MissingCell(missing_cell_response(cell_id)));
|
||||
};
|
||||
let (response_tx, response_rx) = oneshot::channel();
|
||||
let control_message = if request.terminate {
|
||||
@@ -135,11 +167,13 @@ impl CodeModeService {
|
||||
}
|
||||
};
|
||||
if handle.control_tx.send(control_message).is_err() {
|
||||
return Ok(missing_cell_response(cell_id));
|
||||
return Ok(WaitResponse::MissingCell(missing_cell_response(cell_id)));
|
||||
}
|
||||
match response_rx.await {
|
||||
Ok(response) => Ok(response),
|
||||
Err(_) => Ok(missing_cell_response(request.cell_id)),
|
||||
Ok(response) => Ok(WaitResponse::Cell(response)),
|
||||
Err(_) => Ok(WaitResponse::MissingCell(missing_cell_response(
|
||||
request.cell_id,
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -181,9 +215,14 @@ impl CodeModeService {
|
||||
let host = Arc::clone(&host);
|
||||
let inner = Arc::clone(&inner);
|
||||
tokio::spawn(async move {
|
||||
let response = host
|
||||
.invoke_tool(name, input, CancellationToken::new())
|
||||
.await;
|
||||
let invocation = CodeModeToolInvocation {
|
||||
cell_id: cell_id.clone(),
|
||||
runtime_tool_call_id: id.clone(),
|
||||
tool_name: name,
|
||||
input,
|
||||
};
|
||||
let response =
|
||||
host.invoke_tool(invocation, CancellationToken::new()).await;
|
||||
let runtime_tx = inner
|
||||
.sessions
|
||||
.lock()
|
||||
@@ -482,6 +521,8 @@ mod tests {
|
||||
use super::RuntimeResponse;
|
||||
use super::SessionControlCommand;
|
||||
use super::SessionControlContext;
|
||||
use super::WaitRequest;
|
||||
use super::WaitResponse;
|
||||
use super::run_session_control;
|
||||
use crate::FunctionCallOutputContentItem;
|
||||
use crate::runtime::ExecuteRequest;
|
||||
@@ -490,6 +531,7 @@ mod tests {
|
||||
|
||||
fn execute_request(source: &str) -> ExecuteRequest {
|
||||
ExecuteRequest {
|
||||
cell_id: None,
|
||||
tool_call_id: "call_1".to_string(),
|
||||
enabled_tools: Vec::new(),
|
||||
source: source.to_string(),
|
||||
@@ -832,6 +874,30 @@ image({
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn wait_reports_missing_cell_separately_from_runtime_results() {
|
||||
let service = CodeModeService::new();
|
||||
|
||||
let response = service
|
||||
.wait(WaitRequest {
|
||||
cell_id: "missing".to_string(),
|
||||
yield_time_ms: 1,
|
||||
terminate: false,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
response,
|
||||
WaitResponse::MissingCell(RuntimeResponse::Result {
|
||||
cell_id: "missing".to_string(),
|
||||
content_items: Vec::new(),
|
||||
stored_values: HashMap::new(),
|
||||
error_text: Some("exec cell missing not found".to_string()),
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn terminate_waits_for_runtime_shutdown_before_responding() {
|
||||
let inner = test_inner();
|
||||
|
||||
@@ -53,6 +53,7 @@ codex-plugin = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
codex-response-debug-context = { workspace = true }
|
||||
codex-rollout = { workspace = true }
|
||||
codex-rollout-trace = { workspace = true }
|
||||
codex-rmcp-client = { workspace = true }
|
||||
codex-sandboxing = { workspace = true }
|
||||
codex-state = { workspace = true }
|
||||
|
||||
@@ -77,6 +77,9 @@ use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::protocol::SubAgentSource;
|
||||
use codex_protocol::protocol::W3cTraceContext;
|
||||
use codex_rollout_trace::CompactionTraceContext;
|
||||
use codex_rollout_trace::InferenceTraceAttempt;
|
||||
use codex_rollout_trace::InferenceTraceContext;
|
||||
use codex_tools::create_tools_json_for_responses_api;
|
||||
use eventsource_stream::Event;
|
||||
use eventsource_stream::EventStreamError;
|
||||
@@ -408,6 +411,7 @@ impl ModelClient {
|
||||
effort: Option<ReasoningEffortConfig>,
|
||||
summary: ReasoningSummaryConfig,
|
||||
session_telemetry: &SessionTelemetry,
|
||||
compaction_trace: &CompactionTraceContext,
|
||||
) -> Result<Vec<ResponseItem>> {
|
||||
if prompt.input.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
@@ -462,10 +466,18 @@ impl ModelClient {
|
||||
extra_headers.extend(build_conversation_headers(Some(
|
||||
self.state.conversation_id.to_string(),
|
||||
)));
|
||||
client
|
||||
.compact_input(&payload, extra_headers)
|
||||
.await
|
||||
.map_err(map_api_error)
|
||||
let trace_attempt = compaction_trace.start_attempt(&payload);
|
||||
match client.compact_input(&payload, extra_headers).await {
|
||||
Ok(output) => {
|
||||
trace_attempt.record_completed(&output);
|
||||
Ok(output)
|
||||
}
|
||||
Err(err) => {
|
||||
let err = map_api_error(err);
|
||||
trace_attempt.record_failed(&err);
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) async fn create_realtime_call_with_headers(
|
||||
@@ -1140,6 +1152,7 @@ impl ModelClientSession {
|
||||
summary: ReasoningSummaryConfig,
|
||||
service_tier: Option<ServiceTier>,
|
||||
turn_metadata_header: Option<&str>,
|
||||
inference_trace: &InferenceTraceContext,
|
||||
) -> Result<ResponseStream> {
|
||||
if let Some(path) = &*CODEX_RS_SSE_FIXTURE {
|
||||
warn!(path, "Streaming from fixture");
|
||||
@@ -1148,7 +1161,11 @@ impl ModelClientSession {
|
||||
self.client.state.provider.stream_idle_timeout(),
|
||||
)
|
||||
.map_err(map_api_error)?;
|
||||
let (stream, _last_request_rx) = map_response_stream(stream, session_telemetry.clone());
|
||||
let (stream, _last_request_rx) = map_response_stream(
|
||||
stream,
|
||||
session_telemetry.clone(),
|
||||
InferenceTraceAttempt::disabled(),
|
||||
);
|
||||
return Ok(stream);
|
||||
}
|
||||
|
||||
@@ -1182,6 +1199,8 @@ impl ModelClientSession {
|
||||
summary,
|
||||
service_tier,
|
||||
)?;
|
||||
let inference_trace_attempt = inference_trace.start_attempt();
|
||||
inference_trace_attempt.record_started(&request);
|
||||
let client = ApiResponsesClient::new(
|
||||
transport,
|
||||
client_setup.api_provider,
|
||||
@@ -1192,12 +1211,17 @@ impl ModelClientSession {
|
||||
|
||||
match stream_result {
|
||||
Ok(stream) => {
|
||||
let (stream, _) = map_response_stream(stream, session_telemetry.clone());
|
||||
let (stream, _) = map_response_stream(
|
||||
stream,
|
||||
session_telemetry.clone(),
|
||||
inference_trace_attempt,
|
||||
);
|
||||
return Ok(stream);
|
||||
}
|
||||
Err(ApiError::Transport(
|
||||
unauthorized_transport @ TransportError::Http { status, .. },
|
||||
)) if status == StatusCode::UNAUTHORIZED => {
|
||||
inference_trace_attempt.record_failed(&unauthorized_transport);
|
||||
pending_retry = PendingUnauthorizedRetry::from_recovery(
|
||||
handle_unauthorized(
|
||||
unauthorized_transport,
|
||||
@@ -1208,7 +1232,11 @@ impl ModelClientSession {
|
||||
);
|
||||
continue;
|
||||
}
|
||||
Err(err) => return Err(map_api_error(err)),
|
||||
Err(err) => {
|
||||
let err = map_api_error(err);
|
||||
inference_trace_attempt.record_failed(&err);
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1239,6 +1267,7 @@ impl ModelClientSession {
|
||||
turn_metadata_header: Option<&str>,
|
||||
warmup: bool,
|
||||
request_trace: Option<W3cTraceContext>,
|
||||
inference_trace: &InferenceTraceContext,
|
||||
) -> Result<WebsocketStreamOutcome> {
|
||||
let auth_manager = self.client.state.auth_manager.clone();
|
||||
|
||||
@@ -1313,17 +1342,35 @@ impl ModelClientSession {
|
||||
|
||||
let ws_request = self.prepare_websocket_request(ws_payload, &request);
|
||||
self.websocket_session.last_request = Some(request);
|
||||
let inference_trace_attempt = if warmup {
|
||||
// Prewarm sends `generate=false`; it is connection setup, not a
|
||||
// model inference attempt that should appear in rollout traces.
|
||||
InferenceTraceAttempt::disabled()
|
||||
} else {
|
||||
inference_trace.start_attempt()
|
||||
};
|
||||
inference_trace_attempt.record_started(&ws_request);
|
||||
let stream_result = self.websocket_session.connection.as_ref().ok_or_else(|| {
|
||||
map_api_error(ApiError::Stream(
|
||||
"websocket connection is unavailable".to_string(),
|
||||
))
|
||||
})?;
|
||||
let stream_result = stream_result
|
||||
let stream_result = match stream_result
|
||||
.stream_request(ws_request, self.websocket_session.connection_reused())
|
||||
.await
|
||||
.map_err(map_api_error)?;
|
||||
let (stream, last_request_rx) =
|
||||
map_response_stream(stream_result, session_telemetry.clone());
|
||||
{
|
||||
Ok(stream_result) => stream_result,
|
||||
Err(err) => {
|
||||
let err = map_api_error(err);
|
||||
inference_trace_attempt.record_failed(&err);
|
||||
return Err(err);
|
||||
}
|
||||
};
|
||||
let (stream, last_request_rx) = map_response_stream(
|
||||
stream_result,
|
||||
session_telemetry.clone(),
|
||||
inference_trace_attempt,
|
||||
);
|
||||
self.websocket_session.last_response_rx = Some(last_request_rx);
|
||||
return Ok(WebsocketStreamOutcome::Stream(stream));
|
||||
}
|
||||
@@ -1382,6 +1429,7 @@ impl ModelClientSession {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let disabled_trace = InferenceTraceContext::disabled();
|
||||
match self
|
||||
.stream_responses_websocket(
|
||||
prompt,
|
||||
@@ -1393,6 +1441,7 @@ impl ModelClientSession {
|
||||
turn_metadata_header,
|
||||
/*warmup*/ true,
|
||||
current_span_w3c_trace_context(),
|
||||
&disabled_trace,
|
||||
)
|
||||
.await
|
||||
{
|
||||
@@ -1416,12 +1465,11 @@ impl ModelClientSession {
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
/// Streams a single model request within the current turn.
|
||||
/// Streams a single model request without rollout tracing.
|
||||
///
|
||||
/// The caller is responsible for passing per-turn settings explicitly (model selection,
|
||||
/// reasoning settings, telemetry context, and turn metadata). This method will prefer the
|
||||
/// Responses WebSocket transport when the provider supports it and it remains healthy, and will
|
||||
/// fall back to the HTTP Responses API transport otherwise.
|
||||
/// This is the public client API. It routes through the same transport code
|
||||
/// as traced Codex turns, but supplies a disabled trace context so tracing
|
||||
/// does not leak into callers that only need model streaming.
|
||||
pub async fn stream(
|
||||
&mut self,
|
||||
prompt: &Prompt,
|
||||
@@ -1431,6 +1479,37 @@ impl ModelClientSession {
|
||||
summary: ReasoningSummaryConfig,
|
||||
service_tier: Option<ServiceTier>,
|
||||
turn_metadata_header: Option<&str>,
|
||||
) -> Result<ResponseStream> {
|
||||
let disabled_trace = InferenceTraceContext::disabled();
|
||||
self.stream_with_trace(
|
||||
prompt,
|
||||
model_info,
|
||||
session_telemetry,
|
||||
effort,
|
||||
summary,
|
||||
service_tier,
|
||||
turn_metadata_header,
|
||||
&disabled_trace,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
/// Streams a model request with an explicit rollout trace context.
|
||||
///
|
||||
/// The context may be enabled or disabled. Transport code records against it
|
||||
/// unconditionally so HTTP, WebSocket, retry, and fallback paths do not need
|
||||
/// separate trace/no-trace branches.
|
||||
pub(crate) async fn stream_with_trace(
|
||||
&mut self,
|
||||
prompt: &Prompt,
|
||||
model_info: &ModelInfo,
|
||||
session_telemetry: &SessionTelemetry,
|
||||
effort: Option<ReasoningEffortConfig>,
|
||||
summary: ReasoningSummaryConfig,
|
||||
service_tier: Option<ServiceTier>,
|
||||
turn_metadata_header: Option<&str>,
|
||||
inference_trace: &InferenceTraceContext,
|
||||
) -> Result<ResponseStream> {
|
||||
let wire_api = self.client.state.provider.wire_api;
|
||||
match wire_api {
|
||||
@@ -1448,6 +1527,7 @@ impl ModelClientSession {
|
||||
turn_metadata_header,
|
||||
/*warmup*/ false,
|
||||
request_trace,
|
||||
inference_trace,
|
||||
)
|
||||
.await?
|
||||
{
|
||||
@@ -1466,6 +1546,7 @@ impl ModelClientSession {
|
||||
summary,
|
||||
service_tier,
|
||||
turn_metadata_header,
|
||||
inference_trace,
|
||||
)
|
||||
.await
|
||||
}
|
||||
@@ -1561,6 +1642,7 @@ fn parent_thread_id_header_value(session_source: &SessionSource) -> Option<Strin
|
||||
fn map_response_stream<S>(
|
||||
api_stream: S,
|
||||
session_telemetry: SessionTelemetry,
|
||||
inference_trace_attempt: InferenceTraceAttempt,
|
||||
) -> (ResponseStream, oneshot::Receiver<LastResponse>)
|
||||
where
|
||||
S: futures::Stream<Item = std::result::Result<ResponseEvent, ApiError>>
|
||||
@@ -1601,6 +1683,11 @@ where
|
||||
usage.total_tokens,
|
||||
);
|
||||
}
|
||||
inference_trace_attempt.record_completed(
|
||||
&response_id,
|
||||
&token_usage,
|
||||
&items_added,
|
||||
);
|
||||
if let Some(sender) = tx_last_response.take() {
|
||||
let _ = sender.send(LastResponse {
|
||||
response_id: response_id.clone(),
|
||||
@@ -1625,6 +1712,7 @@ where
|
||||
}
|
||||
Err(err) => {
|
||||
let mapped = map_api_error(err);
|
||||
inference_trace_attempt.record_failed(&mapped);
|
||||
if !logged_error {
|
||||
session_telemetry.see_event_completed_failed(&mapped);
|
||||
logged_error = true;
|
||||
|
||||
@@ -299,6 +299,8 @@ use crate::rollout::RolloutRecorderParams;
|
||||
use crate::rollout::map_session_init_error;
|
||||
use crate::rollout::metadata;
|
||||
use crate::rollout::policy::EventPersistenceMode;
|
||||
use crate::rollout_trace::RolloutTraceRecorder;
|
||||
use crate::rollout_trace::ThreadStartedTraceMetadata;
|
||||
use crate::session_startup_prewarm::SessionStartupPrewarmHandle;
|
||||
use crate::shell;
|
||||
use crate::shell_snapshot::ShellSnapshot;
|
||||
@@ -440,6 +442,7 @@ pub(crate) struct CodexSpawnArgs {
|
||||
pub(crate) metrics_service_name: Option<String>,
|
||||
pub(crate) inherited_shell_snapshot: Option<Arc<ShellSnapshot>>,
|
||||
pub(crate) inherited_exec_policy: Option<Arc<ExecPolicyManager>>,
|
||||
pub(crate) inherited_rollout_trace: Option<RolloutTraceRecorder>,
|
||||
pub(crate) user_shell_override: Option<shell::Shell>,
|
||||
pub(crate) parent_trace: Option<W3cTraceContext>,
|
||||
pub(crate) analytics_events_client: Option<AnalyticsEventsClient>,
|
||||
@@ -494,6 +497,7 @@ impl Codex {
|
||||
inherited_shell_snapshot,
|
||||
user_shell_override,
|
||||
inherited_exec_policy,
|
||||
inherited_rollout_trace,
|
||||
parent_trace: _,
|
||||
analytics_events_client,
|
||||
} = args;
|
||||
@@ -690,6 +694,7 @@ impl Codex {
|
||||
agent_control,
|
||||
environment,
|
||||
analytics_events_client,
|
||||
inherited_rollout_trace,
|
||||
)
|
||||
.await
|
||||
.map_err(|e| {
|
||||
@@ -1697,6 +1702,7 @@ impl Session {
|
||||
agent_control: AgentControl,
|
||||
environment: Option<Arc<Environment>>,
|
||||
analytics_events_client: Option<AnalyticsEventsClient>,
|
||||
inherited_rollout_trace: Option<RolloutTraceRecorder>,
|
||||
) -> anyhow::Result<Arc<Self>> {
|
||||
debug!(
|
||||
"Configuring session: model={}; provider={:?}",
|
||||
@@ -1833,6 +1839,40 @@ impl Session {
|
||||
let rollout_path = rollout_recorder
|
||||
.as_ref()
|
||||
.map(|rec| rec.rollout_path().to_path_buf());
|
||||
let trace_agent_path = session_configuration
|
||||
.session_source
|
||||
.get_agent_path()
|
||||
.unwrap_or_else(codex_protocol::AgentPath::root);
|
||||
let trace_task_name =
|
||||
(!trace_agent_path.is_root()).then(|| trace_agent_path.name().to_string());
|
||||
let trace_metadata = ThreadStartedTraceMetadata {
|
||||
thread_id: conversation_id.to_string(),
|
||||
agent_path: trace_agent_path.to_string(),
|
||||
task_name: trace_task_name,
|
||||
nickname: session_configuration.session_source.get_nickname(),
|
||||
agent_role: session_configuration.session_source.get_agent_role(),
|
||||
session_source: session_configuration.session_source.clone(),
|
||||
cwd: session_configuration.cwd.to_path_buf(),
|
||||
rollout_path: rollout_path.clone(),
|
||||
model: session_configuration.collaboration_mode.model().to_string(),
|
||||
provider_name: config.model_provider_id.clone(),
|
||||
approval_policy: session_configuration.approval_policy.value().to_string(),
|
||||
sandbox_policy: format!("{:?}", session_configuration.sandbox_policy.get()),
|
||||
};
|
||||
let rollout_trace = if let Some(rollout_trace) = inherited_rollout_trace {
|
||||
rollout_trace.record_thread_started(trace_metadata);
|
||||
Some(rollout_trace)
|
||||
} else if matches!(
|
||||
session_configuration.session_source,
|
||||
SessionSource::SubAgent(SubAgentSource::ThreadSpawn { .. })
|
||||
) {
|
||||
// Spawned child threads are part of their root rollout tree. If
|
||||
// the parent had no trace recorder, do not create an orphan child
|
||||
// bundle that looks like an independent rollout.
|
||||
None
|
||||
} else {
|
||||
RolloutTraceRecorder::maybe_create(conversation_id, trace_metadata)
|
||||
};
|
||||
|
||||
let mut post_session_configured_events = Vec::<Event>::new();
|
||||
|
||||
@@ -2107,6 +2147,7 @@ impl Session {
|
||||
analytics_events_client,
|
||||
hooks,
|
||||
rollout: Mutex::new(rollout_recorder),
|
||||
rollout_trace,
|
||||
user_shell: Arc::new(default_shell),
|
||||
agent_identity_manager: Arc::new(AgentIdentityManager::new(
|
||||
config.as_ref(),
|
||||
@@ -2873,6 +2914,18 @@ impl Session {
|
||||
/// Persist the event to rollout and send it to clients.
|
||||
pub(crate) async fn send_event(&self, turn_context: &TurnContext, msg: EventMsg) {
|
||||
let legacy_source = msg.clone();
|
||||
if let Some(rollout_trace) = &self.services.rollout_trace {
|
||||
rollout_trace.record_codex_turn_event(
|
||||
self.conversation_id.to_string(),
|
||||
&turn_context.sub_id,
|
||||
&legacy_source,
|
||||
);
|
||||
rollout_trace.record_tool_call_event(
|
||||
self.conversation_id.to_string(),
|
||||
turn_context.sub_id.clone(),
|
||||
&legacy_source,
|
||||
);
|
||||
}
|
||||
let event = Event {
|
||||
id: turn_context.sub_id.clone(),
|
||||
msg,
|
||||
@@ -2925,13 +2978,19 @@ impl Session {
|
||||
return;
|
||||
}
|
||||
|
||||
self.forward_child_completion_to_parent(*parent_thread_id, child_agent_path, status)
|
||||
.await;
|
||||
self.forward_child_completion_to_parent(
|
||||
turn_context,
|
||||
*parent_thread_id,
|
||||
child_agent_path,
|
||||
status,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Sends the standard completion envelope from a spawned MultiAgentV2 child to its parent.
|
||||
async fn forward_child_completion_to_parent(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
parent_thread_id: ThreadId,
|
||||
child_agent_path: &codex_protocol::AgentPath,
|
||||
status: AgentStatus,
|
||||
@@ -2949,9 +3008,19 @@ impl Session {
|
||||
child_agent_path.clone(),
|
||||
parent_agent_path,
|
||||
Vec::new(),
|
||||
message,
|
||||
message.clone(),
|
||||
/*trigger_turn*/ false,
|
||||
);
|
||||
if let Some(rollout_trace) = &self.services.rollout_trace {
|
||||
rollout_trace.record_agent_result_interaction(
|
||||
self.conversation_id.to_string(),
|
||||
turn_context.sub_id.clone(),
|
||||
parent_thread_id.to_string(),
|
||||
child_agent_path.as_str(),
|
||||
&message,
|
||||
&status,
|
||||
);
|
||||
}
|
||||
if let Err(err) = self
|
||||
.services
|
||||
.agent_control
|
||||
@@ -2990,6 +3059,9 @@ impl Session {
|
||||
// Persist the event into rollout (recorder filters as needed)
|
||||
let rollout_items = vec![RolloutItem::EventMsg(event.msg.clone())];
|
||||
self.persist_rollout_items(&rollout_items).await;
|
||||
if let Some(rollout_trace) = &self.services.rollout_trace {
|
||||
rollout_trace.record_protocol_event(&event.msg);
|
||||
}
|
||||
self.deliver_event_raw(event).await;
|
||||
}
|
||||
|
||||
@@ -5887,6 +5959,12 @@ mod handlers {
|
||||
msg: EventMsg::ShutdownComplete,
|
||||
};
|
||||
sess.send_event_raw(event).await;
|
||||
if let Some(rollout_trace) = &sess.services.rollout_trace {
|
||||
rollout_trace.record_thread_ended(
|
||||
sess.conversation_id.to_string(),
|
||||
codex_rollout_trace::RolloutStatus::Completed,
|
||||
);
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
@@ -7843,8 +7921,19 @@ async fn try_run_sampling_request(
|
||||
auth_mode = sess.services.auth_manager.auth_mode(),
|
||||
features = sess.features.enabled_features(),
|
||||
);
|
||||
let inference_trace = sess.services.rollout_trace.as_ref().map_or_else(
|
||||
codex_rollout_trace::InferenceTraceContext::disabled,
|
||||
|trace| {
|
||||
trace.inference_trace_context(
|
||||
sess.conversation_id.to_string(),
|
||||
turn_context.sub_id.clone(),
|
||||
turn_context.model_info.slug.clone(),
|
||||
turn_context.provider.name.clone(),
|
||||
)
|
||||
},
|
||||
);
|
||||
let mut stream = client_session
|
||||
.stream(
|
||||
.stream_with_trace(
|
||||
prompt,
|
||||
&turn_context.model_info,
|
||||
&turn_context.session_telemetry,
|
||||
@@ -7852,6 +7941,7 @@ async fn try_run_sampling_request(
|
||||
turn_context.reasoning_summary,
|
||||
turn_context.config.service_tier,
|
||||
turn_metadata_header,
|
||||
&inference_trace,
|
||||
)
|
||||
.instrument(trace_span!("stream_request"))
|
||||
.or_cancel(&cancellation_token)
|
||||
|
||||
@@ -95,6 +95,7 @@ pub(crate) async fn run_codex_thread_interactive(
|
||||
inherited_shell_snapshot: None,
|
||||
user_shell_override: None,
|
||||
inherited_exec_policy: Some(Arc::clone(&parent_session.services.exec_policy)),
|
||||
inherited_rollout_trace: None,
|
||||
parent_trace: None,
|
||||
analytics_events_client: Some(parent_session.services.analytics_events_client.clone()),
|
||||
}))
|
||||
|
||||
@@ -2776,6 +2776,7 @@ async fn session_new_fails_when_zsh_fork_enabled_without_zsh_path() {
|
||||
.expect("create environment"),
|
||||
)),
|
||||
/*analytics_events_client*/ None,
|
||||
/*inherited_rollout_trace*/ None,
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -2898,6 +2899,7 @@ pub(crate) async fn make_session_and_context() -> (Session, TurnContext) {
|
||||
..HooksConfig::default()
|
||||
}),
|
||||
rollout: Mutex::new(None),
|
||||
rollout_trace: None,
|
||||
user_shell: Arc::new(default_user_shell()),
|
||||
agent_identity_manager: Arc::new(crate::agent_identity::AgentIdentityManager::new(
|
||||
config.as_ref(),
|
||||
@@ -3758,6 +3760,7 @@ pub(crate) async fn make_session_and_context_with_dynamic_tools_and_rx(
|
||||
..HooksConfig::default()
|
||||
}),
|
||||
rollout: Mutex::new(None),
|
||||
rollout_trace: None,
|
||||
user_shell: Arc::new(default_user_shell()),
|
||||
agent_identity_manager: Arc::new(crate::agent_identity::AgentIdentityManager::new(
|
||||
config.as_ref(),
|
||||
@@ -5720,6 +5723,7 @@ async fn rejects_escalated_permissions_when_policy_not_on_request() {
|
||||
tracker: Arc::clone(&turn_diff_tracker),
|
||||
call_id,
|
||||
tool_name: codex_tools::ToolName::plain(tool_name),
|
||||
source: crate::tools::context::ToolCallSource::Direct,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"command": params.command.clone(),
|
||||
@@ -5798,6 +5802,7 @@ async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request()
|
||||
tracker: Arc::clone(&tracker),
|
||||
call_id: "exec-call".to_string(),
|
||||
tool_name: codex_tools::ToolName::plain("exec_command"),
|
||||
source: crate::tools::context::ToolCallSource::Direct,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"cmd": "echo hi",
|
||||
|
||||
@@ -145,6 +145,7 @@ async fn guardian_allows_shell_additional_permissions_requests_past_policy_valid
|
||||
tracker: Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())),
|
||||
call_id: "test-call".to_string(),
|
||||
tool_name: codex_tools::ToolName::plain("shell"),
|
||||
source: crate::tools::context::ToolCallSource::Direct,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"command": params.command.clone(),
|
||||
@@ -211,6 +212,7 @@ async fn guardian_allows_unified_exec_additional_permissions_requests_past_polic
|
||||
tracker: Arc::clone(&tracker),
|
||||
call_id: "exec-call".to_string(),
|
||||
tool_name: codex_tools::ToolName::plain("exec_command"),
|
||||
source: crate::tools::context::ToolCallSource::Direct,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"cmd": "echo hi",
|
||||
@@ -324,6 +326,7 @@ async fn shell_handler_allows_sticky_turn_permissions_without_inline_request_per
|
||||
tracker: Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())),
|
||||
call_id: "sticky-turn-grant".to_string(),
|
||||
tool_name: codex_tools::ToolName::plain("shell"),
|
||||
source: crate::tools::context::ToolCallSource::Direct,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"command": [
|
||||
@@ -449,6 +452,7 @@ async fn guardian_subagent_does_not_inherit_parent_exec_policy_rules() {
|
||||
metrics_service_name: None,
|
||||
inherited_shell_snapshot: None,
|
||||
inherited_exec_policy: Some(Arc::new(parent_exec_policy)),
|
||||
inherited_rollout_trace: None,
|
||||
user_shell_override: None,
|
||||
parent_trace: None,
|
||||
analytics_events_client: None,
|
||||
|
||||
@@ -13,6 +13,7 @@ use crate::context_manager::ContextManager;
|
||||
use crate::context_manager::TotalTokenUsageBreakdown;
|
||||
use crate::context_manager::estimate_response_item_model_visible_bytes;
|
||||
use crate::context_manager::is_codex_generated_item;
|
||||
use crate::rollout_trace::CompactionCheckpointTracePayload;
|
||||
use codex_analytics::CompactionImplementation;
|
||||
use codex_analytics::CompactionPhase;
|
||||
use codex_analytics::CompactionReason;
|
||||
@@ -26,6 +27,7 @@ use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::CompactedItem;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::TurnStartedEvent;
|
||||
use codex_rollout_trace::CompactionTraceContext;
|
||||
use futures::TryFutureExt;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::error;
|
||||
@@ -114,7 +116,11 @@ async fn run_remote_compact_task_inner_impl(
|
||||
turn_context: &Arc<TurnContext>,
|
||||
initial_context_injection: InitialContextInjection,
|
||||
) -> CodexResult<()> {
|
||||
let compaction_item = TurnItem::ContextCompaction(ContextCompactionItem::new());
|
||||
let context_compaction_item = ContextCompactionItem::new();
|
||||
// Use the UI compaction item ID as the trace compaction ID so protocol lifecycle events,
|
||||
// endpoint attempts, and the installed history checkpoint all have one join key.
|
||||
let compaction_id = context_compaction_item.id.clone();
|
||||
let compaction_item = TurnItem::ContextCompaction(context_compaction_item);
|
||||
sess.emit_turn_item_started(turn_context, &compaction_item)
|
||||
.await;
|
||||
let mut history = sess.clone_history().await;
|
||||
@@ -131,6 +137,10 @@ async fn run_remote_compact_task_inner_impl(
|
||||
"trimmed history items before remote compaction"
|
||||
);
|
||||
}
|
||||
// This is the history selected for remote compaction, after any trimming required to fit the
|
||||
// compact endpoint. The checkpoint below records it separately from the next sampling request,
|
||||
// whose prompt will repeat current developer/context prefix items.
|
||||
let trace_input_history = history.raw_items().to_vec();
|
||||
// Required to keep `/undo` available after compaction
|
||||
let ghost_snapshots: Vec<ResponseItem> = history
|
||||
.raw_items()
|
||||
@@ -157,6 +167,21 @@ async fn run_remote_compact_task_inner_impl(
|
||||
personality: turn_context.personality,
|
||||
output_schema: None,
|
||||
};
|
||||
// Remote compaction is the only compaction shape rollout tracing supports. The trace context
|
||||
// records the exact `/responses/compact` request and response; normal sampling requests remain
|
||||
// traced through the inference path.
|
||||
let compaction_trace = sess.services.rollout_trace.as_ref().map_or_else(
|
||||
CompactionTraceContext::disabled,
|
||||
|trace| {
|
||||
trace.compaction_trace_context(
|
||||
sess.conversation_id.to_string(),
|
||||
turn_context.sub_id.clone(),
|
||||
compaction_id.clone(),
|
||||
turn_context.model_info.slug.clone(),
|
||||
turn_context.provider.name.clone(),
|
||||
)
|
||||
},
|
||||
);
|
||||
|
||||
let mut new_history = sess
|
||||
.services
|
||||
@@ -167,6 +192,7 @@ async fn run_remote_compact_task_inner_impl(
|
||||
turn_context.reasoning_effort,
|
||||
turn_context.reasoning_summary,
|
||||
&turn_context.session_telemetry,
|
||||
&compaction_trace,
|
||||
)
|
||||
.or_else(|err| async {
|
||||
let total_usage_breakdown = sess.get_total_token_usage_breakdown().await;
|
||||
@@ -200,6 +226,20 @@ async fn run_remote_compact_task_inner_impl(
|
||||
message: String::new(),
|
||||
replacement_history: Some(new_history.clone()),
|
||||
};
|
||||
if let Some(trace) = sess.services.rollout_trace.as_ref() {
|
||||
// Install is the semantic boundary where the compact endpoint's output becomes live
|
||||
// thread history. Keep it distinct from the later inference request so the reducer can
|
||||
// still represent repeated developer/context prefix items exactly as the model saw them.
|
||||
trace.record_compaction_installed(
|
||||
sess.conversation_id.to_string(),
|
||||
turn_context.sub_id.clone(),
|
||||
compaction_id,
|
||||
&CompactionCheckpointTracePayload {
|
||||
input_history: &trace_input_history,
|
||||
replacement_history: &new_history,
|
||||
},
|
||||
);
|
||||
}
|
||||
sess.replace_compacted_history(new_history, reference_context_item, compacted_item)
|
||||
.await;
|
||||
sess.recompute_token_usage(turn_context).await;
|
||||
|
||||
@@ -137,6 +137,7 @@ pub use project_doc::LOCAL_PROJECT_DOC_FILENAME;
|
||||
pub use project_doc::discover_project_doc_paths;
|
||||
pub use project_doc::read_project_docs;
|
||||
mod rollout;
|
||||
mod rollout_trace;
|
||||
pub(crate) mod safety;
|
||||
mod session_rollout_init_error;
|
||||
pub mod shell;
|
||||
|
||||
886
codex-rs/core/src/rollout_trace.rs
Normal file
886
codex-rs/core/src/rollout_trace.rs
Normal file
@@ -0,0 +1,886 @@
|
||||
//! Opt-in producer for the rollout trace bundle.
|
||||
//!
|
||||
//! This module is the deliberately thin bridge from `codex-core` into
|
||||
//! `codex-rollout-trace`. Core emits raw observations; the trace crate's
|
||||
//! offline reducer owns the semantic graph.
|
||||
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::agent::AgentStatus;
|
||||
use crate::tools::context::ToolCallSource;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use codex_protocol::ThreadId;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::ExecCommandSource;
|
||||
use codex_protocol::protocol::ExecCommandStatus;
|
||||
use codex_protocol::protocol::PatchApplyStatus;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::protocol::TurnAbortReason;
|
||||
use codex_rollout_trace::AgentThreadId;
|
||||
use codex_rollout_trace::CodeCellRuntimeStatus;
|
||||
use codex_rollout_trace::CodeModeRuntimeToolId;
|
||||
use codex_rollout_trace::CompactionTraceContext;
|
||||
use codex_rollout_trace::ExecutionStatus;
|
||||
use codex_rollout_trace::InferenceTraceContext;
|
||||
use codex_rollout_trace::ModelVisibleCallId;
|
||||
use codex_rollout_trace::RawPayloadKind;
|
||||
use codex_rollout_trace::RawPayloadRef;
|
||||
use codex_rollout_trace::RawToolCallRequester;
|
||||
use codex_rollout_trace::RawTraceEventContext;
|
||||
use codex_rollout_trace::RawTraceEventPayload;
|
||||
use codex_rollout_trace::RolloutStatus;
|
||||
use codex_rollout_trace::ToolCallKind;
|
||||
use codex_rollout_trace::ToolCallSummary;
|
||||
use codex_rollout_trace::TraceWriter;
|
||||
use serde::Serialize;
|
||||
use tracing::debug;
|
||||
use tracing::warn;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Environment variable that enables local trace-bundle recording.
|
||||
///
|
||||
/// The value is a root directory. Each independent root session gets one child
|
||||
/// bundle directory. Spawned child threads share their root session's bundle so
|
||||
/// one reduced `state.json` describes the whole multi-agent rollout tree.
|
||||
pub(crate) const CODEX_ROLLOUT_TRACE_ROOT_ENV: &str = "CODEX_ROLLOUT_TRACE_ROOT";
|
||||
|
||||
/// Lightweight handle stored in `SessionServices`.
|
||||
///
|
||||
/// Cloning the handle is cheap; all sequencing and file ownership remains
|
||||
/// inside `TraceWriter`.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct RolloutTraceRecorder {
|
||||
writer: Arc<TraceWriter>,
|
||||
root_thread_id: AgentThreadId,
|
||||
}
|
||||
|
||||
/// Metadata captured once at thread/session start.
|
||||
///
|
||||
/// This payload is intentionally operational rather than reduced: it is a raw
|
||||
/// payload that later reducers can mine as the reduced thread model evolves.
|
||||
#[derive(Serialize)]
|
||||
pub(crate) struct ThreadStartedTraceMetadata {
|
||||
pub(crate) thread_id: String,
|
||||
pub(crate) agent_path: String,
|
||||
pub(crate) task_name: Option<String>,
|
||||
pub(crate) nickname: Option<String>,
|
||||
pub(crate) agent_role: Option<String>,
|
||||
pub(crate) session_source: SessionSource,
|
||||
pub(crate) cwd: PathBuf,
|
||||
pub(crate) rollout_path: Option<PathBuf>,
|
||||
pub(crate) model: String,
|
||||
pub(crate) provider_name: String,
|
||||
pub(crate) approval_policy: String,
|
||||
pub(crate) sandbox_policy: String,
|
||||
}
|
||||
|
||||
/// History replacement checkpoint persisted when compaction installs new live history.
|
||||
///
|
||||
/// The checkpoint keeps compaction separate from ordinary sampling snapshots:
|
||||
/// `input_history` is the live thread history selected for compaction, while
|
||||
/// `replacement_history` is what future prompts may carry after the checkpoint.
|
||||
#[derive(Serialize)]
|
||||
pub(crate) struct CompactionCheckpointTracePayload<'a> {
|
||||
pub(crate) input_history: &'a [ResponseItem],
|
||||
pub(crate) replacement_history: &'a [ResponseItem],
|
||||
}
|
||||
|
||||
/// Raw invocation payload for the canonical Codex tool boundary.
|
||||
///
|
||||
/// Protocol events may add runtime detail later, but this envelope preserves
|
||||
/// the caller-facing request for both direct model calls and code-mode nested
|
||||
/// calls.
|
||||
#[derive(Serialize)]
|
||||
struct DispatchedToolTraceRequest<'a> {
|
||||
tool_name: &'a str,
|
||||
tool_namespace: Option<&'a str>,
|
||||
payload: serde_json::Value,
|
||||
}
|
||||
|
||||
/// Raw response payload for dispatch-level tool trace events.
|
||||
#[derive(Serialize)]
|
||||
#[serde(rename_all = "snake_case", tag = "type")]
|
||||
enum DispatchedToolTraceResponse<'a> {
|
||||
DirectResponse {
|
||||
response_item: &'a ResponseInputItem,
|
||||
},
|
||||
CodeModeResponse {
|
||||
value: serde_json::Value,
|
||||
},
|
||||
Error {
|
||||
error: &'a str,
|
||||
},
|
||||
}
|
||||
|
||||
/// Raw code-mode response captured at the runtime boundary.
|
||||
///
|
||||
/// The reducer keeps the graph small and uses this payload as evidence for
|
||||
/// future viewers that need exact content items or stored-value details.
|
||||
#[derive(Serialize)]
|
||||
struct CodeCellResponseTracePayload<'a> {
|
||||
response: &'a codex_code_mode::RuntimeResponse,
|
||||
}
|
||||
|
||||
/// Trace-only payload for the notification a finished child sends back to its parent.
|
||||
#[derive(Serialize)]
|
||||
struct AgentResultTracePayload<'a> {
|
||||
child_agent_path: &'a str,
|
||||
message: &'a str,
|
||||
status: &'a AgentStatus,
|
||||
}
|
||||
|
||||
impl RolloutTraceRecorder {
|
||||
/// Creates and starts a trace bundle if `CODEX_ROLLOUT_TRACE_ROOT` is set.
|
||||
///
|
||||
/// Trace startup is best-effort. A tracing failure must not make the Codex
|
||||
/// session unusable, because traces are diagnostic and can be enabled while
|
||||
/// debugging unrelated production failures.
|
||||
pub(crate) fn maybe_create(
|
||||
thread_id: ThreadId,
|
||||
metadata: ThreadStartedTraceMetadata,
|
||||
) -> Option<Self> {
|
||||
let root = std::env::var_os(CODEX_ROLLOUT_TRACE_ROOT_ENV)?;
|
||||
let root = PathBuf::from(root);
|
||||
match Self::create_in_root(root.as_path(), thread_id, metadata) {
|
||||
Ok(recorder) => Some(recorder),
|
||||
Err(err) => {
|
||||
warn!("failed to initialize rollout trace recorder: {err:#}");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn create_in_root(
|
||||
root: &Path,
|
||||
thread_id: ThreadId,
|
||||
metadata: ThreadStartedTraceMetadata,
|
||||
) -> anyhow::Result<Self> {
|
||||
let trace_id = Uuid::new_v4().to_string();
|
||||
let thread_id = thread_id.to_string();
|
||||
let bundle_dir = root.join(format!("trace-{trace_id}-{thread_id}"));
|
||||
let writer = TraceWriter::create(
|
||||
&bundle_dir,
|
||||
trace_id.clone(),
|
||||
thread_id.clone(),
|
||||
thread_id.clone(),
|
||||
)?;
|
||||
let recorder = Self {
|
||||
writer: Arc::new(writer),
|
||||
root_thread_id: thread_id.clone(),
|
||||
};
|
||||
|
||||
recorder.append_best_effort(RawTraceEventPayload::RolloutStarted {
|
||||
trace_id,
|
||||
root_thread_id: thread_id,
|
||||
});
|
||||
|
||||
recorder.record_thread_started(metadata);
|
||||
|
||||
debug!("recording rollout trace at {}", bundle_dir.display());
|
||||
Ok(recorder)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn create_in_root_for_test(
|
||||
root: &Path,
|
||||
thread_id: ThreadId,
|
||||
metadata: ThreadStartedTraceMetadata,
|
||||
) -> anyhow::Result<Self> {
|
||||
Self::create_in_root(root, thread_id, metadata)
|
||||
}
|
||||
|
||||
/// Wraps selected UI/protocol events in the trace bundle.
|
||||
///
|
||||
/// We intentionally skip high-volume stream deltas here. Inference/tool
|
||||
/// hooks emit typed raw events; protocol wrappers are debug breadcrumbs, not
|
||||
/// the canonical transcript.
|
||||
pub(crate) fn record_protocol_event(&self, event: &EventMsg) {
|
||||
let Some(event_type) = wrapped_protocol_event_type(event) else {
|
||||
return;
|
||||
};
|
||||
let event_payload =
|
||||
match self.write_json_payload_best_effort(RawPayloadKind::ProtocolEvent, event) {
|
||||
Some(event_payload) => event_payload,
|
||||
None => return,
|
||||
};
|
||||
self.append_best_effort(RawTraceEventPayload::ProtocolEventObserved {
|
||||
event_type: event_type.to_string(),
|
||||
event_payload,
|
||||
});
|
||||
}
|
||||
|
||||
/// Emits the lifecycle event and metadata for one thread in this rollout tree.
|
||||
///
|
||||
/// Root sessions call this immediately after `RolloutStarted`; spawned
|
||||
/// child sessions call it on the inherited recorder. Keeping children in
|
||||
/// the root bundle preserves one raw payload namespace and one reduced
|
||||
/// `RolloutTrace` for the whole multi-agent task.
|
||||
pub(crate) fn record_thread_started(&self, metadata: ThreadStartedTraceMetadata) {
|
||||
let metadata_payload =
|
||||
self.write_json_payload_best_effort(RawPayloadKind::SessionMetadata, &metadata);
|
||||
self.append_best_effort(RawTraceEventPayload::ThreadStarted {
|
||||
thread_id: metadata.thread_id,
|
||||
agent_path: metadata.agent_path,
|
||||
metadata_payload,
|
||||
});
|
||||
}
|
||||
|
||||
/// Emits typed turn lifecycle events from the UI/protocol lifecycle.
|
||||
pub(crate) fn record_codex_turn_event(
|
||||
&self,
|
||||
thread_id: AgentThreadId,
|
||||
default_turn_id: &str,
|
||||
event: &EventMsg,
|
||||
) {
|
||||
match event {
|
||||
EventMsg::TurnStarted(event) => {
|
||||
self.append_with_context_best_effort(
|
||||
thread_id.clone(),
|
||||
event.turn_id.clone(),
|
||||
RawTraceEventPayload::CodexTurnStarted {
|
||||
codex_turn_id: event.turn_id.clone(),
|
||||
thread_id,
|
||||
},
|
||||
);
|
||||
}
|
||||
EventMsg::TurnComplete(event) => {
|
||||
self.append_with_context_best_effort(
|
||||
thread_id,
|
||||
event.turn_id.clone(),
|
||||
RawTraceEventPayload::CodexTurnEnded {
|
||||
codex_turn_id: event.turn_id.clone(),
|
||||
status: ExecutionStatus::Completed,
|
||||
},
|
||||
);
|
||||
}
|
||||
EventMsg::TurnAborted(event) => {
|
||||
let turn_id = event
|
||||
.turn_id
|
||||
.clone()
|
||||
.unwrap_or_else(|| default_turn_id.to_string());
|
||||
self.append_with_context_best_effort(
|
||||
thread_id,
|
||||
turn_id.clone(),
|
||||
RawTraceEventPayload::CodexTurnEnded {
|
||||
codex_turn_id: turn_id,
|
||||
status: execution_status_for_abort_reason(&event.reason),
|
||||
},
|
||||
);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Emits typed runtime tool events from existing protocol lifecycle events.
|
||||
///
|
||||
/// The protocol event stays separate from the caller-facing invocation and
|
||||
/// result payloads. Reducers attach it to `ToolCall.raw_runtime_payload_ids`
|
||||
/// and can also use it to build richer objects such as terminal operations.
|
||||
pub(crate) fn record_tool_call_event(
|
||||
&self,
|
||||
thread_id: AgentThreadId,
|
||||
codex_turn_id: String,
|
||||
event: &EventMsg,
|
||||
) {
|
||||
let Some(payload) = self.tool_call_trace_payload(event) else {
|
||||
return;
|
||||
};
|
||||
self.append_with_context_best_effort(thread_id, codex_turn_id, payload);
|
||||
}
|
||||
|
||||
/// Emits the parent runtime object for one model-authored code-mode cell.
|
||||
///
|
||||
/// This must run before JavaScript starts because the runtime can request
|
||||
/// nested tools before the initial custom-tool response is available.
|
||||
pub(crate) fn record_code_cell_started(
|
||||
&self,
|
||||
thread_id: AgentThreadId,
|
||||
codex_turn_id: String,
|
||||
runtime_cell_id: &str,
|
||||
model_visible_call_id: &str,
|
||||
source_js: &str,
|
||||
) {
|
||||
self.append_with_context_best_effort(
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
RawTraceEventPayload::CodeCellStarted {
|
||||
runtime_cell_id: runtime_cell_id.to_string(),
|
||||
model_visible_call_id: model_visible_call_id.to_string(),
|
||||
source_js: source_js.to_string(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/// Emits the first response returned by the public code-mode `exec` tool.
|
||||
///
|
||||
/// A yielded response returns control to the model while the cell keeps
|
||||
/// running. A terminal response is followed by `CodeCellEnded` so the
|
||||
/// reducer can distinguish "first model-visible output" from runtime end.
|
||||
pub(crate) fn record_code_cell_initial_response(
|
||||
&self,
|
||||
thread_id: AgentThreadId,
|
||||
codex_turn_id: String,
|
||||
response: &codex_code_mode::RuntimeResponse,
|
||||
) {
|
||||
let response_payload = self.code_cell_response_payload(response);
|
||||
self.append_with_context_best_effort(
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
RawTraceEventPayload::CodeCellInitialResponse {
|
||||
runtime_cell_id: code_cell_runtime_id(response).to_string(),
|
||||
status: code_cell_status_for_runtime_response(response),
|
||||
response_payload,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/// Emits the terminal lifecycle point for a code-mode cell.
|
||||
pub(crate) fn record_code_cell_ended(
|
||||
&self,
|
||||
thread_id: AgentThreadId,
|
||||
codex_turn_id: String,
|
||||
response: &codex_code_mode::RuntimeResponse,
|
||||
) {
|
||||
let response_payload = self.code_cell_response_payload(response);
|
||||
self.append_with_context_best_effort(
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
RawTraceEventPayload::CodeCellEnded {
|
||||
runtime_cell_id: code_cell_runtime_id(response).to_string(),
|
||||
status: code_cell_status_for_runtime_response(response),
|
||||
response_payload,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/// Emits a generic lifecycle start for direct/code-mode tools without a
|
||||
/// richer protocol-backed lifecycle.
|
||||
///
|
||||
/// The registry calls this after it has resolved a concrete handler. At that
|
||||
/// point we know the tool call is valid, but we are still before
|
||||
/// approval/pre-use hooks, so blocked tools are represented as failed tool
|
||||
/// executions instead of disappearing from the trace.
|
||||
pub(crate) fn record_dispatched_tool_call_started(&self, invocation: &ToolInvocation) {
|
||||
let request = DispatchedToolTraceRequest {
|
||||
tool_name: invocation.tool_name.name.as_str(),
|
||||
tool_namespace: invocation.tool_name.namespace.as_deref(),
|
||||
payload: dispatched_tool_payload(&invocation.payload),
|
||||
};
|
||||
let request_payload =
|
||||
self.write_json_payload_best_effort(RawPayloadKind::ToolInvocation, &request);
|
||||
let (model_visible_call_id, code_mode_runtime_tool_id, requester) =
|
||||
dispatched_tool_requester_fields(invocation);
|
||||
|
||||
self.append_with_context_best_effort(
|
||||
invocation.session.conversation_id.to_string(),
|
||||
invocation.turn.sub_id.clone(),
|
||||
RawTraceEventPayload::ToolCallStarted {
|
||||
tool_call_id: invocation.call_id.clone(),
|
||||
model_visible_call_id,
|
||||
code_mode_runtime_tool_id,
|
||||
requester,
|
||||
kind: dispatched_tool_kind(invocation),
|
||||
summary: ToolCallSummary::Generic {
|
||||
label: dispatched_tool_label(invocation),
|
||||
input_preview: Some(truncate_preview(&invocation.payload.log_payload())),
|
||||
output_preview: None,
|
||||
},
|
||||
invocation_payload: request_payload,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/// Emits the caller-facing result for a dispatch-level tool lifecycle.
|
||||
pub(crate) fn record_dispatched_tool_call_ended(
|
||||
&self,
|
||||
invocation: &ToolInvocation,
|
||||
status: ExecutionStatus,
|
||||
result: &dyn ToolOutput,
|
||||
response_call_id: &str,
|
||||
tool_payload: &ToolPayload,
|
||||
) {
|
||||
let direct_response_item;
|
||||
let response = match invocation.source {
|
||||
ToolCallSource::Direct => {
|
||||
direct_response_item = result.to_response_item(response_call_id, tool_payload);
|
||||
DispatchedToolTraceResponse::DirectResponse {
|
||||
response_item: &direct_response_item,
|
||||
}
|
||||
}
|
||||
ToolCallSource::CodeMode { .. } => DispatchedToolTraceResponse::CodeModeResponse {
|
||||
value: result.code_mode_result(tool_payload),
|
||||
},
|
||||
ToolCallSource::JsRepl => return,
|
||||
};
|
||||
self.append_dispatched_tool_call_ended(invocation, status, &response);
|
||||
}
|
||||
|
||||
/// Emits a failed end event for a dispatch-level tool lifecycle.
|
||||
pub(crate) fn record_dispatched_tool_call_failed(
|
||||
&self,
|
||||
invocation: &ToolInvocation,
|
||||
error: &str,
|
||||
) {
|
||||
let response = DispatchedToolTraceResponse::Error { error };
|
||||
self.append_dispatched_tool_call_ended(invocation, ExecutionStatus::Failed, &response);
|
||||
}
|
||||
|
||||
fn append_dispatched_tool_call_ended(
|
||||
&self,
|
||||
invocation: &ToolInvocation,
|
||||
status: ExecutionStatus,
|
||||
response: &DispatchedToolTraceResponse<'_>,
|
||||
) {
|
||||
let response_payload =
|
||||
self.write_json_payload_best_effort(RawPayloadKind::ToolResult, response);
|
||||
self.append_with_context_best_effort(
|
||||
invocation.session.conversation_id.to_string(),
|
||||
invocation.turn.sub_id.clone(),
|
||||
RawTraceEventPayload::ToolCallEnded {
|
||||
tool_call_id: invocation.call_id.clone(),
|
||||
status,
|
||||
result_payload: response_payload,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/// Builds reusable inference trace context for one Codex turn.
|
||||
///
|
||||
/// The returned context is intentionally not "an inference call" yet.
|
||||
/// Transport code owns retry/fallback attempts and calls `start_attempt`
|
||||
/// only after it has built the concrete request payload for that attempt.
|
||||
pub(crate) fn inference_trace_context(
|
||||
&self,
|
||||
thread_id: AgentThreadId,
|
||||
codex_turn_id: String,
|
||||
model: String,
|
||||
provider_name: String,
|
||||
) -> InferenceTraceContext {
|
||||
InferenceTraceContext::enabled(
|
||||
Arc::clone(&self.writer),
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
model,
|
||||
provider_name,
|
||||
)
|
||||
}
|
||||
|
||||
/// Builds remote-compaction trace context for one checkpoint.
|
||||
///
|
||||
/// Rollout tracing currently has a first-class checkpoint model only for remote compaction.
|
||||
/// The compact endpoint is a model-facing request whose output replaces live history, so it
|
||||
/// needs both request/response attempt events and a later checkpoint event when processed
|
||||
/// replacement history is installed.
|
||||
pub(crate) fn compaction_trace_context(
|
||||
&self,
|
||||
thread_id: AgentThreadId,
|
||||
codex_turn_id: String,
|
||||
compaction_id: String,
|
||||
model: String,
|
||||
provider_name: String,
|
||||
) -> CompactionTraceContext {
|
||||
CompactionTraceContext::enabled(
|
||||
Arc::clone(&self.writer),
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
compaction_id,
|
||||
model,
|
||||
provider_name,
|
||||
)
|
||||
}
|
||||
|
||||
/// Emits the checkpoint where remote-compacted history replaces live thread history.
|
||||
///
|
||||
/// This checkpoint is deliberately separate from the compact endpoint response: Codex filters
|
||||
/// and reinjects context before replacement history becomes live. The reducer uses this event
|
||||
/// to connect the pre-compaction history to the processed replacement items without treating
|
||||
/// repeated developer/context prefix items as part of the replacement itself.
|
||||
pub(crate) fn record_compaction_installed(
|
||||
&self,
|
||||
thread_id: AgentThreadId,
|
||||
codex_turn_id: String,
|
||||
compaction_id: String,
|
||||
checkpoint: &CompactionCheckpointTracePayload<'_>,
|
||||
) {
|
||||
let Some(checkpoint_payload) =
|
||||
self.write_json_payload_best_effort(RawPayloadKind::CompactionCheckpoint, checkpoint)
|
||||
else {
|
||||
return;
|
||||
};
|
||||
self.append_with_context_best_effort(
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
RawTraceEventPayload::CompactionInstalled {
|
||||
compaction_id,
|
||||
checkpoint_payload,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/// Emits the v2 child-to-parent completion message as an explicit graph edge.
|
||||
///
|
||||
/// This notification is not a tool call in the child: it is runtime delivery
|
||||
/// from the completed child turn into the parent's mailbox. Without a
|
||||
/// trace-owned edge the viewer would have to infer the relationship from a
|
||||
/// later parent prompt snapshot, which loses the runtime timing and source.
|
||||
pub(crate) fn record_agent_result_interaction(
|
||||
&self,
|
||||
child_thread_id: AgentThreadId,
|
||||
child_codex_turn_id: String,
|
||||
parent_thread_id: AgentThreadId,
|
||||
child_agent_path: &str,
|
||||
message: &str,
|
||||
status: &AgentStatus,
|
||||
) {
|
||||
let carried_payload = self.write_json_payload_best_effort(
|
||||
RawPayloadKind::AgentResult,
|
||||
&AgentResultTracePayload {
|
||||
child_agent_path,
|
||||
message,
|
||||
status,
|
||||
},
|
||||
);
|
||||
self.append_with_context_best_effort(
|
||||
child_thread_id.clone(),
|
||||
child_codex_turn_id.clone(),
|
||||
RawTraceEventPayload::AgentResultObserved {
|
||||
edge_id: format!(
|
||||
"edge:agent_result:{child_thread_id}:{child_codex_turn_id}:{parent_thread_id}"
|
||||
),
|
||||
child_thread_id,
|
||||
child_codex_turn_id,
|
||||
parent_thread_id,
|
||||
message: message.to_string(),
|
||||
carried_payload,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/// Emits terminal trace events for graceful session shutdown.
|
||||
///
|
||||
/// Child agent sessions share their root recorder, so ending a child thread
|
||||
/// must not close the whole rollout. Only the root thread's shutdown emits
|
||||
/// `RolloutEnded`.
|
||||
pub(crate) fn record_thread_ended(&self, thread_id: AgentThreadId, status: RolloutStatus) {
|
||||
self.append_best_effort(RawTraceEventPayload::ThreadEnded {
|
||||
thread_id: thread_id.clone(),
|
||||
status: status.clone(),
|
||||
});
|
||||
if thread_id == self.root_thread_id {
|
||||
self.append_best_effort(RawTraceEventPayload::RolloutEnded { status });
|
||||
}
|
||||
}
|
||||
|
||||
fn write_json_payload_best_effort(
|
||||
&self,
|
||||
kind: RawPayloadKind,
|
||||
payload: &impl Serialize,
|
||||
) -> Option<codex_rollout_trace::RawPayloadRef> {
|
||||
match self.writer.write_json_payload(kind, payload) {
|
||||
Ok(payload_ref) => Some(payload_ref),
|
||||
Err(err) => {
|
||||
warn!("failed to write rollout trace payload: {err:#}");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn append_best_effort(&self, payload: RawTraceEventPayload) {
|
||||
if let Err(err) = self.writer.append(payload) {
|
||||
warn!("failed to append rollout trace event: {err:#}");
|
||||
}
|
||||
}
|
||||
|
||||
fn append_with_context_best_effort(
|
||||
&self,
|
||||
thread_id: AgentThreadId,
|
||||
codex_turn_id: String,
|
||||
payload: RawTraceEventPayload,
|
||||
) {
|
||||
let context = RawTraceEventContext {
|
||||
thread_id: Some(thread_id),
|
||||
codex_turn_id: Some(codex_turn_id),
|
||||
};
|
||||
if let Err(err) = self.writer.append_with_context(context, payload) {
|
||||
warn!("failed to append rollout trace event: {err:#}");
|
||||
}
|
||||
}
|
||||
|
||||
fn tool_call_trace_payload(&self, event: &EventMsg) -> Option<RawTraceEventPayload> {
|
||||
match event {
|
||||
EventMsg::ExecCommandBegin(event) if event.source != ExecCommandSource::UserShell => {
|
||||
self.tool_runtime_started_payload(&event.call_id, event)
|
||||
}
|
||||
EventMsg::ExecCommandEnd(event) if event.source != ExecCommandSource::UserShell => self
|
||||
.tool_runtime_ended_payload(
|
||||
&event.call_id,
|
||||
execution_status_for_exec_status(&event.status),
|
||||
event,
|
||||
),
|
||||
EventMsg::PatchApplyBegin(event) => {
|
||||
self.tool_runtime_started_payload(&event.call_id, event)
|
||||
}
|
||||
EventMsg::PatchApplyEnd(event) => self.tool_runtime_ended_payload(
|
||||
&event.call_id,
|
||||
execution_status_for_patch_status(&event.status),
|
||||
event,
|
||||
),
|
||||
EventMsg::McpToolCallBegin(event) => {
|
||||
self.tool_runtime_started_payload(&event.call_id, event)
|
||||
}
|
||||
EventMsg::McpToolCallEnd(event) => self.tool_runtime_ended_payload(
|
||||
&event.call_id,
|
||||
if event.result.is_ok() {
|
||||
ExecutionStatus::Completed
|
||||
} else {
|
||||
ExecutionStatus::Failed
|
||||
},
|
||||
event,
|
||||
),
|
||||
EventMsg::CollabAgentSpawnBegin(event) => {
|
||||
self.tool_runtime_started_payload(&event.call_id, event)
|
||||
}
|
||||
EventMsg::CollabAgentSpawnEnd(event) => self.tool_runtime_ended_payload(
|
||||
&event.call_id,
|
||||
if event.new_thread_id.is_some() {
|
||||
ExecutionStatus::Completed
|
||||
} else {
|
||||
ExecutionStatus::Failed
|
||||
},
|
||||
event,
|
||||
),
|
||||
EventMsg::CollabAgentInteractionBegin(event) => {
|
||||
self.tool_runtime_started_payload(&event.call_id, event)
|
||||
}
|
||||
EventMsg::CollabAgentInteractionEnd(event) => {
|
||||
self.tool_runtime_ended_payload(&event.call_id, ExecutionStatus::Completed, event)
|
||||
}
|
||||
EventMsg::CollabWaitingBegin(event) => {
|
||||
self.tool_runtime_started_payload(&event.call_id, event)
|
||||
}
|
||||
EventMsg::CollabWaitingEnd(event) => {
|
||||
self.tool_runtime_ended_payload(&event.call_id, ExecutionStatus::Completed, event)
|
||||
}
|
||||
EventMsg::CollabCloseBegin(event) => {
|
||||
self.tool_runtime_started_payload(&event.call_id, event)
|
||||
}
|
||||
EventMsg::CollabCloseEnd(event) => {
|
||||
self.tool_runtime_ended_payload(&event.call_id, ExecutionStatus::Completed, event)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn tool_runtime_started_payload(
|
||||
&self,
|
||||
tool_call_id: &str,
|
||||
event: &impl Serialize,
|
||||
) -> Option<RawTraceEventPayload> {
|
||||
let runtime_payload =
|
||||
self.write_json_payload_best_effort(RawPayloadKind::ToolRuntimeEvent, event)?;
|
||||
Some(RawTraceEventPayload::ToolCallRuntimeStarted {
|
||||
tool_call_id: tool_call_id.to_string(),
|
||||
runtime_payload,
|
||||
})
|
||||
}
|
||||
|
||||
fn tool_runtime_ended_payload(
|
||||
&self,
|
||||
tool_call_id: &str,
|
||||
status: ExecutionStatus,
|
||||
event: &impl Serialize,
|
||||
) -> Option<RawTraceEventPayload> {
|
||||
let runtime_payload =
|
||||
self.write_json_payload_best_effort(RawPayloadKind::ToolRuntimeEvent, event)?;
|
||||
Some(RawTraceEventPayload::ToolCallRuntimeEnded {
|
||||
tool_call_id: tool_call_id.to_string(),
|
||||
status,
|
||||
runtime_payload,
|
||||
})
|
||||
}
|
||||
|
||||
fn code_cell_response_payload(
|
||||
&self,
|
||||
response: &codex_code_mode::RuntimeResponse,
|
||||
) -> Option<RawPayloadRef> {
|
||||
self.write_json_payload_best_effort(
|
||||
RawPayloadKind::ToolResult,
|
||||
&CodeCellResponseTracePayload { response },
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn execution_status_for_abort_reason(reason: &TurnAbortReason) -> ExecutionStatus {
|
||||
match reason {
|
||||
TurnAbortReason::Interrupted | TurnAbortReason::Replaced | TurnAbortReason::ReviewEnded => {
|
||||
ExecutionStatus::Cancelled
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn execution_status_for_exec_status(status: &ExecCommandStatus) -> ExecutionStatus {
|
||||
match status {
|
||||
ExecCommandStatus::Completed => ExecutionStatus::Completed,
|
||||
ExecCommandStatus::Failed => ExecutionStatus::Failed,
|
||||
ExecCommandStatus::Declined => ExecutionStatus::Cancelled,
|
||||
}
|
||||
}
|
||||
|
||||
fn execution_status_for_patch_status(status: &PatchApplyStatus) -> ExecutionStatus {
|
||||
match status {
|
||||
PatchApplyStatus::Completed => ExecutionStatus::Completed,
|
||||
PatchApplyStatus::Failed => ExecutionStatus::Failed,
|
||||
PatchApplyStatus::Declined => ExecutionStatus::Cancelled,
|
||||
}
|
||||
}
|
||||
|
||||
fn code_cell_runtime_id(response: &codex_code_mode::RuntimeResponse) -> &str {
|
||||
match response {
|
||||
codex_code_mode::RuntimeResponse::Yielded { cell_id, .. }
|
||||
| codex_code_mode::RuntimeResponse::Terminated { cell_id, .. }
|
||||
| codex_code_mode::RuntimeResponse::Result { cell_id, .. } => cell_id,
|
||||
}
|
||||
}
|
||||
|
||||
fn code_cell_status_for_runtime_response(
|
||||
response: &codex_code_mode::RuntimeResponse,
|
||||
) -> CodeCellRuntimeStatus {
|
||||
match response {
|
||||
codex_code_mode::RuntimeResponse::Yielded { .. } => CodeCellRuntimeStatus::Yielded,
|
||||
codex_code_mode::RuntimeResponse::Terminated { .. } => CodeCellRuntimeStatus::Terminated,
|
||||
codex_code_mode::RuntimeResponse::Result { error_text, .. } => {
|
||||
if error_text.is_some() {
|
||||
CodeCellRuntimeStatus::Failed
|
||||
} else {
|
||||
CodeCellRuntimeStatus::Completed
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn dispatched_tool_requester_fields(
|
||||
invocation: &ToolInvocation,
|
||||
) -> (
|
||||
Option<ModelVisibleCallId>,
|
||||
Option<CodeModeRuntimeToolId>,
|
||||
RawToolCallRequester,
|
||||
) {
|
||||
match &invocation.source {
|
||||
ToolCallSource::Direct => (
|
||||
Some(invocation.call_id.clone()),
|
||||
None,
|
||||
RawToolCallRequester::Model,
|
||||
),
|
||||
ToolCallSource::CodeMode {
|
||||
cell_id,
|
||||
runtime_tool_call_id,
|
||||
} => (
|
||||
None,
|
||||
Some(runtime_tool_call_id.clone()),
|
||||
RawToolCallRequester::CodeCell {
|
||||
runtime_cell_id: cell_id.clone(),
|
||||
},
|
||||
),
|
||||
ToolCallSource::JsRepl => (None, None, RawToolCallRequester::Model),
|
||||
}
|
||||
}
|
||||
|
||||
fn dispatched_tool_kind(invocation: &ToolInvocation) -> ToolCallKind {
|
||||
if let ToolPayload::Mcp { server, tool, .. } = &invocation.payload {
|
||||
return ToolCallKind::Mcp {
|
||||
server: server.clone(),
|
||||
tool: tool.clone(),
|
||||
};
|
||||
}
|
||||
|
||||
match invocation.tool_name.name.as_str() {
|
||||
"exec_command" | "local_shell" | "shell" | "shell_command" => ToolCallKind::ExecCommand,
|
||||
"write_stdin" => ToolCallKind::WriteStdin,
|
||||
"apply_patch" => ToolCallKind::ApplyPatch,
|
||||
"web_search" | "web_search_preview" => ToolCallKind::Web,
|
||||
"image_generation" | "image_query" => ToolCallKind::ImageGeneration,
|
||||
"spawn_agent" => ToolCallKind::SpawnAgent,
|
||||
"send_message" => ToolCallKind::SendMessage,
|
||||
"followup_task" => ToolCallKind::AssignAgentTask,
|
||||
"wait_agent" => ToolCallKind::WaitAgent,
|
||||
"close_agent" => ToolCallKind::CloseAgent,
|
||||
other => ToolCallKind::Other {
|
||||
name: other.to_string(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn dispatched_tool_label(invocation: &ToolInvocation) -> String {
|
||||
if let ToolPayload::Mcp { server, tool, .. } = &invocation.payload {
|
||||
return format!("mcp:{server}:{tool}");
|
||||
}
|
||||
|
||||
invocation.tool_name.to_string()
|
||||
}
|
||||
|
||||
fn dispatched_tool_payload(payload: &ToolPayload) -> serde_json::Value {
|
||||
match payload {
|
||||
ToolPayload::Function { arguments } => serde_json::json!({
|
||||
"type": "function",
|
||||
"arguments": arguments,
|
||||
}),
|
||||
ToolPayload::ToolSearch { arguments } => serde_json::json!({
|
||||
"type": "tool_search",
|
||||
"arguments": arguments,
|
||||
}),
|
||||
ToolPayload::Custom { input } => serde_json::json!({
|
||||
"type": "custom",
|
||||
"input": input,
|
||||
}),
|
||||
ToolPayload::LocalShell { params } => serde_json::json!({
|
||||
"type": "local_shell",
|
||||
"command": params.command,
|
||||
"workdir": params.workdir,
|
||||
"timeout_ms": params.timeout_ms,
|
||||
}),
|
||||
ToolPayload::Mcp {
|
||||
server,
|
||||
tool,
|
||||
raw_arguments,
|
||||
} => serde_json::json!({
|
||||
"type": "mcp",
|
||||
"server": server,
|
||||
"tool": tool,
|
||||
"raw_arguments": raw_arguments,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn truncate_preview(value: &str) -> String {
|
||||
const MAX_PREVIEW_CHARS: usize = 160;
|
||||
let mut preview = value.chars().take(MAX_PREVIEW_CHARS).collect::<String>();
|
||||
if value.chars().count() > MAX_PREVIEW_CHARS {
|
||||
preview.push_str("...");
|
||||
}
|
||||
preview
|
||||
}
|
||||
|
||||
fn wrapped_protocol_event_type(event: &EventMsg) -> Option<&'static str> {
|
||||
match event {
|
||||
EventMsg::SessionConfigured(_) => Some("session_configured"),
|
||||
EventMsg::TurnStarted(_) => Some("turn_started"),
|
||||
EventMsg::TurnComplete(_) => Some("turn_complete"),
|
||||
EventMsg::TurnAborted(_) => Some("turn_aborted"),
|
||||
EventMsg::ThreadNameUpdated(_) => Some("thread_name_updated"),
|
||||
EventMsg::ThreadRolledBack(_) => Some("thread_rolled_back"),
|
||||
EventMsg::Error(_) => Some("error"),
|
||||
EventMsg::Warning(_) => Some("warning"),
|
||||
EventMsg::ShutdownComplete => Some("shutdown_complete"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "rollout_trace_tests.rs"]
|
||||
mod tests;
|
||||
163
codex-rs/core/src/rollout_trace_tests.rs
Normal file
163
codex-rs/core/src/rollout_trace_tests.rs
Normal file
@@ -0,0 +1,163 @@
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use codex_protocol::AgentPath;
|
||||
use codex_protocol::ThreadId;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::protocol::SubAgentSource;
|
||||
use codex_rollout_trace::ExecutionStatus;
|
||||
use codex_rollout_trace::RawTraceEventPayload;
|
||||
use codex_rollout_trace::RolloutStatus;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn create_in_root_writes_replayable_lifecycle_events() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let thread_id = ThreadId::new();
|
||||
let recorder = RolloutTraceRecorder::create_in_root(
|
||||
temp.path(),
|
||||
thread_id,
|
||||
ThreadStartedTraceMetadata {
|
||||
thread_id: thread_id.to_string(),
|
||||
agent_path: "/root".to_string(),
|
||||
task_name: None,
|
||||
nickname: None,
|
||||
agent_role: None,
|
||||
session_source: SessionSource::Exec,
|
||||
cwd: PathBuf::from("/workspace"),
|
||||
rollout_path: Some(PathBuf::from("/tmp/rollout.jsonl")),
|
||||
model: "gpt-test".to_string(),
|
||||
provider_name: "test-provider".to_string(),
|
||||
approval_policy: "never".to_string(),
|
||||
sandbox_policy: format!("{:?}", SandboxPolicy::DangerFullAccess),
|
||||
},
|
||||
)
|
||||
.expect("trace recorder");
|
||||
|
||||
recorder.record_thread_ended(thread_id.to_string(), RolloutStatus::Completed);
|
||||
|
||||
let bundle_dir = single_bundle_dir(temp.path())?;
|
||||
let replayed = codex_rollout_trace::replay_bundle(&bundle_dir)?;
|
||||
|
||||
assert_eq!(replayed.status, RolloutStatus::Completed);
|
||||
assert_eq!(replayed.root_thread_id, thread_id.to_string());
|
||||
assert_eq!(replayed.threads[&thread_id.to_string()].agent_path, "/root");
|
||||
assert_eq!(replayed.raw_payloads.len(), 1);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spawned_thread_start_appends_to_root_bundle() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let root_thread_id = ThreadId::new();
|
||||
let child_thread_id = ThreadId::new();
|
||||
let recorder = RolloutTraceRecorder::create_in_root(
|
||||
temp.path(),
|
||||
root_thread_id,
|
||||
minimal_metadata(root_thread_id),
|
||||
)
|
||||
.expect("trace recorder");
|
||||
|
||||
recorder.record_thread_started(ThreadStartedTraceMetadata {
|
||||
thread_id: child_thread_id.to_string(),
|
||||
agent_path: "/root/repo_file_counter".to_string(),
|
||||
task_name: Some("repo_file_counter".to_string()),
|
||||
nickname: Some("Kepler".to_string()),
|
||||
agent_role: Some("worker".to_string()),
|
||||
session_source: SessionSource::SubAgent(SubAgentSource::ThreadSpawn {
|
||||
parent_thread_id: root_thread_id,
|
||||
depth: 1,
|
||||
agent_path: Some(
|
||||
AgentPath::try_from("/root/repo_file_counter").map_err(anyhow::Error::msg)?,
|
||||
),
|
||||
agent_nickname: Some("Kepler".to_string()),
|
||||
agent_role: Some("worker".to_string()),
|
||||
}),
|
||||
cwd: PathBuf::from("/workspace"),
|
||||
rollout_path: Some(PathBuf::from("/tmp/child-rollout.jsonl")),
|
||||
model: "gpt-test".to_string(),
|
||||
provider_name: "test-provider".to_string(),
|
||||
approval_policy: "never".to_string(),
|
||||
sandbox_policy: format!("{:?}", SandboxPolicy::DangerFullAccess),
|
||||
});
|
||||
recorder.record_thread_ended(child_thread_id.to_string(), RolloutStatus::Completed);
|
||||
|
||||
let bundle_dir = single_bundle_dir(temp.path())?;
|
||||
let replayed = codex_rollout_trace::replay_bundle(&bundle_dir)?;
|
||||
|
||||
assert_eq!(fs::read_dir(temp.path())?.count(), 1);
|
||||
assert_eq!(replayed.threads.len(), 2);
|
||||
assert_eq!(
|
||||
replayed.threads[&child_thread_id.to_string()].agent_path,
|
||||
"/root/repo_file_counter"
|
||||
);
|
||||
assert_eq!(replayed.status, RolloutStatus::Running);
|
||||
assert_eq!(
|
||||
replayed.threads[&child_thread_id.to_string()]
|
||||
.execution
|
||||
.status,
|
||||
ExecutionStatus::Completed
|
||||
);
|
||||
assert_eq!(replayed.raw_payloads.len(), 2);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn protocol_wrapper_records_selected_events_as_raw_payloads() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let thread_id = ThreadId::new();
|
||||
let recorder =
|
||||
RolloutTraceRecorder::create_in_root(temp.path(), thread_id, minimal_metadata(thread_id))
|
||||
.expect("trace recorder");
|
||||
|
||||
recorder.record_protocol_event(&EventMsg::ShutdownComplete);
|
||||
|
||||
let event_log = fs::read_to_string(single_bundle_dir(temp.path())?.join("trace.jsonl"))?;
|
||||
let protocol_event_seen = event_log.lines().any(|line| {
|
||||
let event: codex_rollout_trace::RawTraceEvent =
|
||||
serde_json::from_str(line).expect("raw trace event");
|
||||
matches!(
|
||||
event.payload,
|
||||
RawTraceEventPayload::ProtocolEventObserved {
|
||||
event_type,
|
||||
..
|
||||
} if event_type == "shutdown_complete"
|
||||
)
|
||||
});
|
||||
|
||||
assert!(protocol_event_seen);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn minimal_metadata(thread_id: ThreadId) -> ThreadStartedTraceMetadata {
|
||||
ThreadStartedTraceMetadata {
|
||||
thread_id: thread_id.to_string(),
|
||||
agent_path: "/root".to_string(),
|
||||
task_name: None,
|
||||
nickname: None,
|
||||
agent_role: None,
|
||||
session_source: SessionSource::Exec,
|
||||
cwd: PathBuf::from("/workspace"),
|
||||
rollout_path: None,
|
||||
model: "gpt-test".to_string(),
|
||||
provider_name: "test-provider".to_string(),
|
||||
approval_policy: "never".to_string(),
|
||||
sandbox_policy: "danger-full-access".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn single_bundle_dir(root: &Path) -> anyhow::Result<PathBuf> {
|
||||
let mut entries = fs::read_dir(root)?
|
||||
.map(|entry| entry.map(|entry| entry.path()))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
entries.sort();
|
||||
assert_eq!(entries.len(), 1);
|
||||
Ok(entries.remove(0))
|
||||
}
|
||||
@@ -11,6 +11,7 @@ use crate::exec_policy::ExecPolicyManager;
|
||||
use crate::guardian::GuardianRejection;
|
||||
use crate::mcp::McpManager;
|
||||
use crate::plugins::PluginsManager;
|
||||
use crate::rollout_trace::RolloutTraceRecorder;
|
||||
use crate::skills_watcher::SkillsWatcher;
|
||||
use crate::tools::code_mode::CodeModeService;
|
||||
use crate::tools::network_approval::NetworkApprovalService;
|
||||
@@ -42,6 +43,7 @@ pub(crate) struct SessionServices {
|
||||
pub(crate) analytics_events_client: AnalyticsEventsClient,
|
||||
pub(crate) hooks: Hooks,
|
||||
pub(crate) rollout: Mutex<Option<RolloutRecorder>>,
|
||||
pub(crate) rollout_trace: Option<RolloutTraceRecorder>,
|
||||
pub(crate) user_shell: Arc<crate::shell::Shell>,
|
||||
pub(crate) agent_identity_manager: Arc<AgentIdentityManager>,
|
||||
pub(crate) shell_snapshot_tx: watch::Sender<Option<Arc<crate::shell_snapshot::ShellSnapshot>>>,
|
||||
|
||||
@@ -41,6 +41,7 @@ use codex_protocol::protocol::Op;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::protocol::SessionConfiguredEvent;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::protocol::SubAgentSource;
|
||||
use codex_protocol::protocol::TurnAbortReason;
|
||||
use codex_protocol::protocol::TurnAbortedEvent;
|
||||
use codex_protocol::protocol::W3cTraceContext;
|
||||
@@ -925,6 +926,9 @@ impl ThreadManagerState {
|
||||
}
|
||||
Some(_) | None => crate::file_watcher::WatchRegistration::default(),
|
||||
};
|
||||
let inherited_rollout_trace = self
|
||||
.inherited_rollout_trace_for_source(&session_source)
|
||||
.await;
|
||||
let CodexSpawnOk {
|
||||
codex, thread_id, ..
|
||||
} = Codex::spawn(CodexSpawnArgs {
|
||||
@@ -944,6 +948,7 @@ impl ThreadManagerState {
|
||||
metrics_service_name,
|
||||
inherited_shell_snapshot,
|
||||
inherited_exec_policy,
|
||||
inherited_rollout_trace,
|
||||
user_shell_override,
|
||||
parent_trace,
|
||||
analytics_events_client: self.analytics_events_client.clone(),
|
||||
@@ -988,6 +993,24 @@ impl ThreadManagerState {
|
||||
pub(crate) fn notify_thread_created(&self, thread_id: ThreadId) {
|
||||
let _ = self.thread_created_tx.send(thread_id);
|
||||
}
|
||||
|
||||
async fn inherited_rollout_trace_for_source(
|
||||
&self,
|
||||
session_source: &SessionSource,
|
||||
) -> Option<crate::rollout_trace::RolloutTraceRecorder> {
|
||||
// Only v2 thread-spawn children inherit a recorder. Independent
|
||||
// top-level threads still create their own rollout bundles.
|
||||
let SessionSource::SubAgent(SubAgentSource::ThreadSpawn {
|
||||
parent_thread_id, ..
|
||||
}) = session_source
|
||||
else {
|
||||
return None;
|
||||
};
|
||||
self.get_thread(*parent_thread_id)
|
||||
.await
|
||||
.ok()
|
||||
.and_then(|thread| thread.codex.session.services.rollout_trace.clone())
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a fork snapshot cut strictly before the nth user message (0-based).
|
||||
|
||||
@@ -30,12 +30,25 @@ impl CodeModeExecuteHandler {
|
||||
.code_mode_service
|
||||
.stored_values()
|
||||
.await;
|
||||
// Allocate before starting V8 so the trace can create the parent
|
||||
// CodeCell before model-authored JavaScript issues nested tool calls.
|
||||
let runtime_cell_id = exec.session.services.code_mode_service.allocate_cell_id();
|
||||
if let Some(trace) = &exec.session.services.rollout_trace {
|
||||
trace.record_code_cell_started(
|
||||
exec.session.conversation_id.to_string(),
|
||||
exec.turn.sub_id.clone(),
|
||||
&runtime_cell_id,
|
||||
&call_id,
|
||||
&args.code,
|
||||
);
|
||||
}
|
||||
let started_at = std::time::Instant::now();
|
||||
let response = exec
|
||||
.session
|
||||
.services
|
||||
.code_mode_service
|
||||
.execute(codex_code_mode::ExecuteRequest {
|
||||
cell_id: Some(runtime_cell_id.clone()),
|
||||
tool_call_id: call_id,
|
||||
enabled_tools,
|
||||
source: args.code,
|
||||
@@ -45,6 +58,23 @@ impl CodeModeExecuteHandler {
|
||||
})
|
||||
.await
|
||||
.map_err(FunctionCallError::RespondToModel)?;
|
||||
if let Some(trace) = &exec.session.services.rollout_trace {
|
||||
// The initial response is the model-visible custom-tool return.
|
||||
// Yielded cells keep running, so terminal lifecycle is only emitted
|
||||
// here when the first response also ended the runtime.
|
||||
trace.record_code_cell_initial_response(
|
||||
exec.session.conversation_id.to_string(),
|
||||
exec.turn.sub_id.clone(),
|
||||
&response,
|
||||
);
|
||||
if !matches!(response, codex_code_mode::RuntimeResponse::Yielded { .. }) {
|
||||
trace.record_code_cell_ended(
|
||||
exec.session.conversation_id.to_string(),
|
||||
exec.turn.sub_id.clone(),
|
||||
&response,
|
||||
);
|
||||
}
|
||||
}
|
||||
handle_runtime_response(&exec, response, args.max_output_tokens, started_at)
|
||||
.await
|
||||
.map_err(FunctionCallError::RespondToModel)
|
||||
@@ -62,6 +92,15 @@ impl ToolHandler for CodeModeExecuteHandler {
|
||||
matches!(payload, ToolPayload::Custom { .. })
|
||||
}
|
||||
|
||||
fn uses_first_class_trace_object(&self, invocation: &ToolInvocation) -> bool {
|
||||
// `exec` is represented by the first-class CodeCell lifecycle. The
|
||||
// dispatch-level ToolCall event would duplicate the same runtime
|
||||
// boundary as a less precise object.
|
||||
matches!(invocation.payload, ToolPayload::Custom { .. })
|
||||
&& invocation.tool_name.namespace.is_none()
|
||||
&& invocation.tool_name.name == PUBLIC_TOOL_NAME
|
||||
}
|
||||
|
||||
async fn handle(&self, invocation: ToolInvocation) -> Result<Self::Output, FunctionCallError> {
|
||||
let ToolInvocation {
|
||||
session,
|
||||
|
||||
@@ -7,6 +7,7 @@ use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_code_mode::CodeModeToolInvocation;
|
||||
use codex_code_mode::CodeModeTurnHost;
|
||||
use codex_code_mode::RuntimeResponse;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
@@ -73,6 +74,10 @@ impl CodeModeService {
|
||||
self.inner.replace_stored_values(values).await;
|
||||
}
|
||||
|
||||
pub(crate) fn allocate_cell_id(&self) -> String {
|
||||
self.inner.allocate_cell_id()
|
||||
}
|
||||
|
||||
pub(crate) async fn execute(
|
||||
&self,
|
||||
request: codex_code_mode::ExecuteRequest,
|
||||
@@ -83,7 +88,7 @@ impl CodeModeService {
|
||||
pub(crate) async fn wait(
|
||||
&self,
|
||||
request: codex_code_mode::WaitRequest,
|
||||
) -> Result<RuntimeResponse, String> {
|
||||
) -> Result<codex_code_mode::WaitResponse, String> {
|
||||
self.inner.wait(request).await
|
||||
}
|
||||
|
||||
@@ -118,15 +123,13 @@ struct CoreTurnHost {
|
||||
impl CodeModeTurnHost for CoreTurnHost {
|
||||
async fn invoke_tool(
|
||||
&self,
|
||||
tool_name: ToolName,
|
||||
input: Option<JsonValue>,
|
||||
invocation: CodeModeToolInvocation,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> Result<JsonValue, String> {
|
||||
call_nested_tool(
|
||||
self.exec.clone(),
|
||||
self.tool_runtime.clone(),
|
||||
tool_name,
|
||||
input,
|
||||
invocation,
|
||||
cancellation_token,
|
||||
)
|
||||
.await
|
||||
@@ -298,10 +301,15 @@ async fn build_nested_router(exec: &ExecContext) -> ToolRouter {
|
||||
async fn call_nested_tool(
|
||||
exec: ExecContext,
|
||||
tool_runtime: ToolCallRuntime,
|
||||
tool_name: ToolName,
|
||||
input: Option<JsonValue>,
|
||||
invocation: CodeModeToolInvocation,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> Result<JsonValue, FunctionCallError> {
|
||||
let CodeModeToolInvocation {
|
||||
cell_id,
|
||||
runtime_tool_call_id,
|
||||
tool_name,
|
||||
input,
|
||||
} = invocation;
|
||||
if tool_name.namespace.is_none() && tool_name.name == PUBLIC_TOOL_NAME {
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"{PUBLIC_TOOL_NAME} cannot invoke itself"
|
||||
@@ -335,7 +343,14 @@ async fn call_nested_tool(
|
||||
payload,
|
||||
};
|
||||
let result = tool_runtime
|
||||
.handle_tool_call_with_source(call, ToolCallSource::CodeMode, cancellation_token)
|
||||
.handle_tool_call_with_source(
|
||||
call,
|
||||
ToolCallSource::CodeMode {
|
||||
cell_id,
|
||||
runtime_tool_call_id,
|
||||
},
|
||||
cancellation_token,
|
||||
)
|
||||
.await?;
|
||||
Ok(result.code_mode_result())
|
||||
}
|
||||
|
||||
@@ -61,7 +61,7 @@ impl ToolHandler for CodeModeWaitHandler {
|
||||
let args: ExecWaitArgs = parse_arguments(&arguments)?;
|
||||
let exec = ExecContext { session, turn };
|
||||
let started_at = std::time::Instant::now();
|
||||
let response = exec
|
||||
let wait_response = exec
|
||||
.session
|
||||
.services
|
||||
.code_mode_service
|
||||
@@ -72,9 +72,28 @@ impl ToolHandler for CodeModeWaitHandler {
|
||||
})
|
||||
.await
|
||||
.map_err(FunctionCallError::RespondToModel)?;
|
||||
handle_runtime_response(&exec, response, args.max_tokens, started_at)
|
||||
.await
|
||||
.map_err(FunctionCallError::RespondToModel)
|
||||
let response = wait_response.runtime_response();
|
||||
if matches!(&wait_response, codex_code_mode::WaitResponse::Cell(_))
|
||||
&& !matches!(response, codex_code_mode::RuntimeResponse::Yielded { .. })
|
||||
&& let Some(trace) = &exec.session.services.rollout_trace
|
||||
{
|
||||
// Only a live-cell wait can close a CodeCell. A missing
|
||||
// cell is still an ordinary `wait` tool result, but there
|
||||
// is no runtime object for the reducer to complete.
|
||||
trace.record_code_cell_ended(
|
||||
exec.session.conversation_id.to_string(),
|
||||
exec.turn.sub_id.clone(),
|
||||
response,
|
||||
);
|
||||
}
|
||||
handle_runtime_response(
|
||||
&exec,
|
||||
wait_response.into_runtime_response(),
|
||||
args.max_tokens,
|
||||
started_at,
|
||||
)
|
||||
.await
|
||||
.map_err(FunctionCallError::RespondToModel)
|
||||
}
|
||||
_ => Err(FunctionCallError::RespondToModel(format!(
|
||||
"{WAIT_TOOL_NAME} expects JSON arguments"
|
||||
|
||||
@@ -28,11 +28,18 @@ use tokio::sync::Mutex;
|
||||
|
||||
pub type SharedTurnDiffTracker = Arc<Mutex<TurnDiffTracker>>;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum ToolCallSource {
|
||||
Direct,
|
||||
JsRepl,
|
||||
CodeMode,
|
||||
CodeMode {
|
||||
/// Runtime cell that issued the nested tool request.
|
||||
cell_id: String,
|
||||
/// Code-mode's per-cell tool invocation id. This is useful for
|
||||
/// debugging the JS/runtime bridge, but it is not the Codex tool call id
|
||||
/// because the runtime id only needs to be unique within one cell.
|
||||
runtime_tool_call_id: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
@@ -42,6 +49,7 @@ pub struct ToolInvocation {
|
||||
pub tracker: SharedTurnDiffTracker,
|
||||
pub call_id: String,
|
||||
pub tool_name: ToolName,
|
||||
pub source: ToolCallSource,
|
||||
pub payload: ToolPayload,
|
||||
}
|
||||
|
||||
|
||||
@@ -70,6 +70,7 @@ fn invocation(
|
||||
tracker: Arc::new(Mutex::new(TurnDiffTracker::default())),
|
||||
call_id: "call-1".to_string(),
|
||||
tool_name: codex_tools::ToolName::plain(tool_name),
|
||||
source: crate::tools::context::ToolCallSource::Direct,
|
||||
payload,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ async fn multi_agent_v2_request_user_input_rejects_subagent_threads() {
|
||||
tracker: Arc::new(Mutex::new(TurnDiffTracker::default())),
|
||||
call_id: "call-1".to_string(),
|
||||
tool_name: codex_tools::ToolName::plain(REQUEST_USER_INPUT_TOOL_NAME),
|
||||
source: crate::tools::context::ToolCallSource::Direct,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: json!({
|
||||
"questions": [{
|
||||
|
||||
@@ -228,6 +228,7 @@ async fn shell_pre_tool_use_payload_uses_joined_command() {
|
||||
tracker: Arc::new(Mutex::new(TurnDiffTracker::new())),
|
||||
call_id: "call-41".to_string(),
|
||||
tool_name: codex_tools::ToolName::plain("shell"),
|
||||
source: crate::tools::context::ToolCallSource::Direct,
|
||||
payload,
|
||||
}),
|
||||
Some(crate::tools::registry::PreToolUsePayload {
|
||||
@@ -253,6 +254,7 @@ async fn shell_command_pre_tool_use_payload_uses_raw_command() {
|
||||
tracker: Arc::new(Mutex::new(TurnDiffTracker::new())),
|
||||
call_id: "call-42".to_string(),
|
||||
tool_name: codex_tools::ToolName::plain("shell_command"),
|
||||
source: crate::tools::context::ToolCallSource::Direct,
|
||||
payload,
|
||||
}),
|
||||
Some(crate::tools::registry::PreToolUsePayload {
|
||||
|
||||
@@ -213,6 +213,7 @@ async fn exec_command_pre_tool_use_payload_uses_raw_command() {
|
||||
tracker: Arc::new(Mutex::new(TurnDiffTracker::new())),
|
||||
call_id: "call-43".to_string(),
|
||||
tool_name: codex_tools::ToolName::plain("exec_command"),
|
||||
source: crate::tools::context::ToolCallSource::Direct,
|
||||
payload,
|
||||
}),
|
||||
Some(crate::tools::registry::PreToolUsePayload {
|
||||
@@ -236,6 +237,7 @@ async fn exec_command_pre_tool_use_payload_skips_write_stdin() {
|
||||
tracker: Arc::new(Mutex::new(TurnDiffTracker::new())),
|
||||
call_id: "call-44".to_string(),
|
||||
tool_name: codex_tools::ToolName::plain("write_stdin"),
|
||||
source: crate::tools::context::ToolCallSource::Direct,
|
||||
payload,
|
||||
}),
|
||||
None
|
||||
|
||||
@@ -8,8 +8,10 @@ use crate::hook_runtime::record_additional_contexts;
|
||||
use crate::hook_runtime::run_post_tool_use_hooks;
|
||||
use crate::hook_runtime::run_pre_tool_use_hooks;
|
||||
use crate::memories::usage::emit_metric_for_tool_read;
|
||||
use crate::rollout_trace::RolloutTraceRecorder;
|
||||
use crate::sandbox_tags::sandbox_tag;
|
||||
use crate::tools::context::FunctionToolOutput;
|
||||
use crate::tools::context::ToolCallSource;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
@@ -22,6 +24,7 @@ use codex_hooks::HookToolInputLocalShell;
|
||||
use codex_hooks::HookToolKind;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_rollout_trace::ExecutionStatus;
|
||||
use codex_tools::ConfiguredToolSpec;
|
||||
use codex_tools::ToolName;
|
||||
use codex_tools::ToolSpec;
|
||||
@@ -74,6 +77,16 @@ pub trait ToolHandler: Send + Sync {
|
||||
None
|
||||
}
|
||||
|
||||
/// Returns `true` when this handler is represented by a trace object other
|
||||
/// than `ToolCall`.
|
||||
///
|
||||
/// Protocol events are runtime observations on the `ToolCall`; they do not
|
||||
/// suppress the canonical tool boundary. The public code-mode `exec` tool is
|
||||
/// the exception because `CodeCell` owns that model-visible boundary.
|
||||
fn uses_first_class_trace_object(&self, _invocation: &ToolInvocation) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Perform the actual [ToolInvocation] and returns a [ToolOutput] containing
|
||||
/// the final output to return to the model.
|
||||
fn handle(
|
||||
@@ -132,6 +145,8 @@ trait AnyToolHandler: Send + Sync {
|
||||
result: &dyn ToolOutput,
|
||||
) -> Option<PostToolUsePayload>;
|
||||
|
||||
fn uses_first_class_trace_object(&self, invocation: &ToolInvocation) -> bool;
|
||||
|
||||
fn handle_any<'a>(
|
||||
&'a self,
|
||||
invocation: ToolInvocation,
|
||||
@@ -163,6 +178,10 @@ where
|
||||
ToolHandler::post_tool_use_payload(self, call_id, payload, result)
|
||||
}
|
||||
|
||||
fn uses_first_class_trace_object(&self, invocation: &ToolInvocation) -> bool {
|
||||
ToolHandler::uses_first_class_trace_object(self, invocation)
|
||||
}
|
||||
|
||||
fn handle_any<'a>(
|
||||
&'a self,
|
||||
invocation: ToolInvocation,
|
||||
@@ -184,6 +203,60 @@ pub struct ToolRegistry {
|
||||
handlers: HashMap<ToolName, Arc<dyn AnyToolHandler>>,
|
||||
}
|
||||
|
||||
/// No-op capable trace handle for one resolved tool dispatch.
|
||||
///
|
||||
/// The registry has several early-return paths after a handler is selected:
|
||||
/// pre-use hooks, handler execution, after-use hooks, and result status all
|
||||
/// affect the trace lifecycle. Keeping the trace eligibility and event writes
|
||||
/// behind this helper makes those paths say what happened instead of repeating
|
||||
/// the Direct/CodeMode/JsRepl/first-class-object policy at each branch.
|
||||
struct DispatchTrace {
|
||||
recorder: Option<RolloutTraceRecorder>,
|
||||
}
|
||||
|
||||
impl DispatchTrace {
|
||||
fn new(invocation: &ToolInvocation, handler: &dyn AnyToolHandler) -> Self {
|
||||
let should_trace = matches!(
|
||||
invocation.source,
|
||||
ToolCallSource::Direct | ToolCallSource::CodeMode { .. }
|
||||
) && !handler.uses_first_class_trace_object(invocation);
|
||||
|
||||
let recorder = should_trace
|
||||
.then(|| invocation.session.services.rollout_trace.clone())
|
||||
.flatten();
|
||||
Self { recorder }
|
||||
}
|
||||
|
||||
fn record_started(&self, invocation: &ToolInvocation) {
|
||||
if let Some(recorder) = &self.recorder {
|
||||
recorder.record_dispatched_tool_call_started(invocation);
|
||||
}
|
||||
}
|
||||
|
||||
fn record_completed(&self, invocation: &ToolInvocation, result: &AnyToolResult) {
|
||||
if let Some(recorder) = &self.recorder {
|
||||
let status = if result.result.success_for_logging() {
|
||||
ExecutionStatus::Completed
|
||||
} else {
|
||||
ExecutionStatus::Failed
|
||||
};
|
||||
recorder.record_dispatched_tool_call_ended(
|
||||
invocation,
|
||||
status,
|
||||
result.result.as_ref(),
|
||||
&result.call_id,
|
||||
&result.payload,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn record_failed(&self, invocation: &ToolInvocation, error: &FunctionCallError) {
|
||||
if let Some(recorder) = &self.recorder {
|
||||
recorder.record_dispatched_tool_call_failed(invocation, &error.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ToolRegistry {
|
||||
fn new(handlers: HashMap<ToolName, Arc<dyn AnyToolHandler>>) -> Self {
|
||||
Self { handlers }
|
||||
@@ -288,6 +361,9 @@ impl ToolRegistry {
|
||||
return Err(FunctionCallError::Fatal(message));
|
||||
}
|
||||
|
||||
let dispatch_trace = DispatchTrace::new(&invocation, handler.as_ref());
|
||||
dispatch_trace.record_started(&invocation);
|
||||
|
||||
if let Some(pre_tool_use_payload) = handler.pre_tool_use_payload(&invocation)
|
||||
&& let Some(reason) = run_pre_tool_use_hooks(
|
||||
&invocation.session,
|
||||
@@ -297,10 +373,12 @@ impl ToolRegistry {
|
||||
)
|
||||
.await
|
||||
{
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
let err = FunctionCallError::RespondToModel(format!(
|
||||
"Command blocked by PreToolUse hook: {reason}. Command: {}",
|
||||
pre_tool_use_payload.command
|
||||
)));
|
||||
));
|
||||
dispatch_trace.record_failed(&invocation, &err);
|
||||
return Err(err);
|
||||
}
|
||||
|
||||
let is_mutating = handler.is_mutating(&invocation).await;
|
||||
@@ -383,6 +461,7 @@ impl ToolRegistry {
|
||||
.await;
|
||||
|
||||
if let Some(err) = hook_abort_error {
|
||||
dispatch_trace.record_failed(&invocation, &err);
|
||||
return Err(err);
|
||||
}
|
||||
|
||||
@@ -422,9 +501,13 @@ impl ToolRegistry {
|
||||
let result = guard.take().ok_or_else(|| {
|
||||
FunctionCallError::Fatal("tool produced no output".to_string())
|
||||
})?;
|
||||
dispatch_trace.record_completed(&invocation, &result);
|
||||
Ok(result)
|
||||
}
|
||||
Err(err) => Err(err),
|
||||
Err(err) => {
|
||||
dispatch_trace.record_failed(&invocation, &err);
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,25 @@
|
||||
use super::*;
|
||||
use crate::codex::make_session_and_context;
|
||||
use crate::rollout_trace::RolloutTraceRecorder;
|
||||
use crate::rollout_trace::ThreadStartedTraceMetadata;
|
||||
use crate::tools::code_mode::CodeModeWaitHandler;
|
||||
use crate::tools::code_mode::WAIT_TOOL_NAME;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use codex_protocol::config_types::ModeKind;
|
||||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::protocol::TurnStartedEvent;
|
||||
use codex_rollout_trace::ToolCallRequester;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use tempfile::TempDir;
|
||||
|
||||
struct TestHandler;
|
||||
#[derive(Default)]
|
||||
struct TestHandler {
|
||||
first_class_trace_object: bool,
|
||||
}
|
||||
|
||||
impl ToolHandler for TestHandler {
|
||||
type Output = crate::tools::context::FunctionToolOutput;
|
||||
@@ -10,15 +28,22 @@ impl ToolHandler for TestHandler {
|
||||
ToolKind::Function
|
||||
}
|
||||
|
||||
fn uses_first_class_trace_object(&self, _invocation: &ToolInvocation) -> bool {
|
||||
self.first_class_trace_object
|
||||
}
|
||||
|
||||
async fn handle(&self, _invocation: ToolInvocation) -> Result<Self::Output, FunctionCallError> {
|
||||
unreachable!("test handler should not be invoked")
|
||||
Ok(crate::tools::context::FunctionToolOutput::from_text(
|
||||
"ok".to_string(),
|
||||
Some(true),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handler_looks_up_namespaced_aliases_explicitly() {
|
||||
let plain_handler = Arc::new(TestHandler) as Arc<dyn AnyToolHandler>;
|
||||
let namespaced_handler = Arc::new(TestHandler) as Arc<dyn AnyToolHandler>;
|
||||
let plain_handler = Arc::new(TestHandler::default()) as Arc<dyn AnyToolHandler>;
|
||||
let namespaced_handler = Arc::new(TestHandler::default()) as Arc<dyn AnyToolHandler>;
|
||||
let namespace = "mcp__codex_apps__gmail";
|
||||
let tool_name = "gmail_get_recent_emails";
|
||||
let plain_name = codex_tools::ToolName::plain(tool_name);
|
||||
@@ -49,3 +74,248 @@ fn handler_looks_up_namespaced_aliases_explicitly() {
|
||||
.is_some_and(|handler| Arc::ptr_eq(handler, &namespaced_handler))
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn dispatch_lifecycle_trace_records_direct_and_code_mode_requesters() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let (mut session, turn) = make_session_and_context().await;
|
||||
attach_test_trace(&mut session, &turn, temp.path())?;
|
||||
session
|
||||
.services
|
||||
.rollout_trace
|
||||
.as_ref()
|
||||
.expect("trace recorder")
|
||||
.record_code_cell_started(
|
||||
session.conversation_id.to_string(),
|
||||
turn.sub_id.clone(),
|
||||
"cell-1",
|
||||
"call-code",
|
||||
"await tools.test_tool({})",
|
||||
);
|
||||
|
||||
let registry = ToolRegistry::new(HashMap::from([(
|
||||
codex_tools::ToolName::plain("test_tool"),
|
||||
Arc::new(TestHandler::default()) as Arc<dyn AnyToolHandler>,
|
||||
)]));
|
||||
let session = Arc::new(session);
|
||||
let turn = Arc::new(turn);
|
||||
|
||||
registry
|
||||
.dispatch_any(test_invocation(
|
||||
Arc::clone(&session),
|
||||
Arc::clone(&turn),
|
||||
"direct-call",
|
||||
"test_tool",
|
||||
ToolCallSource::Direct,
|
||||
"{}",
|
||||
))
|
||||
.await?;
|
||||
registry
|
||||
.dispatch_any(test_invocation(
|
||||
session,
|
||||
turn,
|
||||
"code-mode-call",
|
||||
"test_tool",
|
||||
ToolCallSource::CodeMode {
|
||||
cell_id: "cell-1".to_string(),
|
||||
runtime_tool_call_id: "tool-1".to_string(),
|
||||
},
|
||||
"{}",
|
||||
))
|
||||
.await?;
|
||||
|
||||
let replayed = codex_rollout_trace::replay_bundle(single_bundle_dir(temp.path())?)?;
|
||||
assert_eq!(
|
||||
replayed.tool_calls["direct-call"].model_visible_call_id,
|
||||
Some("direct-call".to_string()),
|
||||
);
|
||||
assert_eq!(
|
||||
replayed.tool_calls["direct-call"].requester,
|
||||
ToolCallRequester::Model,
|
||||
);
|
||||
assert!(
|
||||
replayed.tool_calls["direct-call"]
|
||||
.raw_invocation_payload_id
|
||||
.is_some(),
|
||||
"dispatch tracing should keep the tool invocation payload",
|
||||
);
|
||||
assert!(
|
||||
replayed.tool_calls["direct-call"]
|
||||
.raw_result_payload_id
|
||||
.is_some(),
|
||||
"direct calls should keep the model-facing result payload",
|
||||
);
|
||||
assert_eq!(
|
||||
replayed.tool_calls["code-mode-call"].model_visible_call_id,
|
||||
None,
|
||||
);
|
||||
assert_eq!(
|
||||
replayed.tool_calls["code-mode-call"].code_mode_runtime_tool_id,
|
||||
Some("tool-1".to_string()),
|
||||
);
|
||||
assert_eq!(
|
||||
replayed.tool_calls["code-mode-call"].requester,
|
||||
ToolCallRequester::CodeCell {
|
||||
code_cell_id: "code_cell:call-code".to_string(),
|
||||
},
|
||||
);
|
||||
assert!(
|
||||
replayed.tool_calls["code-mode-call"]
|
||||
.raw_result_payload_id
|
||||
.is_some(),
|
||||
"code-mode calls should keep the result returned to JavaScript",
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn dispatch_lifecycle_trace_skips_noncanonical_boundaries() -> anyhow::Result<()> {
|
||||
assert_dispatch_trace_skips(
|
||||
Arc::new(TestHandler::default()) as Arc<dyn AnyToolHandler>,
|
||||
ToolCallSource::JsRepl,
|
||||
)
|
||||
.await?;
|
||||
assert_dispatch_trace_skips(
|
||||
Arc::new(TestHandler {
|
||||
first_class_trace_object: true,
|
||||
}) as Arc<dyn AnyToolHandler>,
|
||||
ToolCallSource::Direct,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn assert_dispatch_trace_skips(
|
||||
handler: Arc<dyn AnyToolHandler>,
|
||||
source: ToolCallSource,
|
||||
) -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let (mut session, turn) = make_session_and_context().await;
|
||||
attach_test_trace(&mut session, &turn, temp.path())?;
|
||||
|
||||
let registry = ToolRegistry::new(HashMap::from([(
|
||||
codex_tools::ToolName::plain("test_tool"),
|
||||
handler,
|
||||
)]));
|
||||
let session = Arc::new(session);
|
||||
let turn = Arc::new(turn);
|
||||
|
||||
registry
|
||||
.dispatch_any(test_invocation(
|
||||
session,
|
||||
turn,
|
||||
"skipped-call",
|
||||
"test_tool",
|
||||
source,
|
||||
"{}",
|
||||
))
|
||||
.await?;
|
||||
|
||||
let replayed = codex_rollout_trace::replay_bundle(single_bundle_dir(temp.path())?)?;
|
||||
assert_eq!(replayed.tool_calls, Default::default());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn missing_code_mode_wait_traces_only_the_wait_tool_call() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let (mut session, turn) = make_session_and_context().await;
|
||||
attach_test_trace(&mut session, &turn, temp.path())?;
|
||||
|
||||
let registry = ToolRegistry::new(HashMap::from([(
|
||||
codex_tools::ToolName::plain(WAIT_TOOL_NAME),
|
||||
Arc::new(CodeModeWaitHandler) as Arc<dyn AnyToolHandler>,
|
||||
)]));
|
||||
let session = Arc::new(session);
|
||||
let turn = Arc::new(turn);
|
||||
|
||||
registry
|
||||
.dispatch_any(test_invocation(
|
||||
session,
|
||||
turn,
|
||||
"wait-call",
|
||||
WAIT_TOOL_NAME,
|
||||
ToolCallSource::Direct,
|
||||
r#"{"cell_id":"noop","terminate":true}"#,
|
||||
))
|
||||
.await?;
|
||||
|
||||
let replayed = codex_rollout_trace::replay_bundle(single_bundle_dir(temp.path())?)?;
|
||||
assert_eq!(replayed.code_cells.len(), 0);
|
||||
assert!(
|
||||
replayed.tool_calls["wait-call"]
|
||||
.raw_result_payload_id
|
||||
.is_some()
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn test_invocation(
|
||||
session: Arc<crate::codex::Session>,
|
||||
turn: Arc<crate::codex::TurnContext>,
|
||||
call_id: &str,
|
||||
tool_name: &str,
|
||||
source: ToolCallSource,
|
||||
arguments: &str,
|
||||
) -> ToolInvocation {
|
||||
ToolInvocation {
|
||||
session,
|
||||
turn,
|
||||
tracker: Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())),
|
||||
call_id: call_id.to_string(),
|
||||
tool_name: codex_tools::ToolName::plain(tool_name),
|
||||
source,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: arguments.to_string(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn attach_test_trace(
|
||||
session: &mut crate::codex::Session,
|
||||
turn: &crate::codex::TurnContext,
|
||||
root: &Path,
|
||||
) -> anyhow::Result<()> {
|
||||
let thread_id = session.conversation_id;
|
||||
let recorder = RolloutTraceRecorder::create_in_root_for_test(
|
||||
root,
|
||||
thread_id,
|
||||
ThreadStartedTraceMetadata {
|
||||
thread_id: thread_id.to_string(),
|
||||
agent_path: "/root".to_string(),
|
||||
task_name: None,
|
||||
nickname: None,
|
||||
agent_role: None,
|
||||
session_source: SessionSource::Exec,
|
||||
cwd: PathBuf::from("/workspace"),
|
||||
rollout_path: None,
|
||||
model: "gpt-test".to_string(),
|
||||
provider_name: "test-provider".to_string(),
|
||||
approval_policy: "never".to_string(),
|
||||
sandbox_policy: "danger-full-access".to_string(),
|
||||
},
|
||||
)?;
|
||||
recorder.record_codex_turn_event(
|
||||
thread_id.to_string(),
|
||||
&turn.sub_id,
|
||||
&EventMsg::TurnStarted(TurnStartedEvent {
|
||||
turn_id: turn.sub_id.clone(),
|
||||
started_at: None,
|
||||
model_context_window: None,
|
||||
collaboration_mode_kind: ModeKind::default(),
|
||||
}),
|
||||
);
|
||||
session.services.rollout_trace = Some(recorder);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn single_bundle_dir(root: &Path) -> anyhow::Result<PathBuf> {
|
||||
let mut entries = fs::read_dir(root)?
|
||||
.map(|entry| entry.map(|entry| entry.path()))
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
entries.sort();
|
||||
assert_eq!(entries.len(), 1);
|
||||
Ok(entries.remove(0))
|
||||
}
|
||||
|
||||
@@ -271,7 +271,7 @@ impl ToolRouter {
|
||||
|
||||
let direct_js_repl_call = tool_name.namespace.is_none()
|
||||
&& matches!(tool_name.name.as_str(), "js_repl" | "js_repl_reset");
|
||||
if source == ToolCallSource::Direct
|
||||
if matches!(source, ToolCallSource::Direct)
|
||||
&& turn.tools_config.js_repl_tools_only
|
||||
&& !direct_js_repl_call
|
||||
{
|
||||
@@ -287,6 +287,7 @@ impl ToolRouter {
|
||||
tracker,
|
||||
call_id,
|
||||
tool_name,
|
||||
source,
|
||||
payload,
|
||||
};
|
||||
|
||||
|
||||
6
codex-rs/rollout-trace/BUILD.bazel
Normal file
6
codex-rs/rollout-trace/BUILD.bazel
Normal file
@@ -0,0 +1,6 @@
|
||||
load("//:defs.bzl", "codex_rust_crate")
|
||||
|
||||
codex_rust_crate(
|
||||
name = "rollout-trace",
|
||||
crate_name = "codex_rollout_trace",
|
||||
)
|
||||
23
codex-rs/rollout-trace/Cargo.toml
Normal file
23
codex-rs/rollout-trace/Cargo.toml
Normal file
@@ -0,0 +1,23 @@
|
||||
[package]
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
name = "codex-rollout-trace"
|
||||
version.workspace = true
|
||||
|
||||
[lib]
|
||||
doctest = false
|
||||
name = "codex_rollout_trace"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
200
codex-rs/rollout-trace/README.md
Normal file
200
codex-rs/rollout-trace/README.md
Normal file
@@ -0,0 +1,200 @@
|
||||
# Rollout Trace
|
||||
|
||||
Rollout tracing is an opt-in diagnostic path for understanding what happened
|
||||
during a Codex session. It records raw runtime evidence into a local bundle, then
|
||||
replays that bundle into a semantic graph that a debugger or UI can inspect.
|
||||
|
||||
The key design choice is: **observe first, interpret later**.
|
||||
|
||||
Hot-path Codex code does not try to build the final graph while the session is
|
||||
running. It writes ordered raw events and payload references. The offline reducer
|
||||
then decides which events became model-visible conversation, which events were
|
||||
runtime work, and how information moved between threads, tools, code cells, and
|
||||
terminal sessions.
|
||||
|
||||
## What This Gives Us
|
||||
|
||||
Rollout traces make failures debuggable when the normal transcript is not enough.
|
||||
They preserve enough evidence to answer questions like:
|
||||
|
||||
- Which model request produced this tool call?
|
||||
- Did this output come from the model-visible transcript, a code-mode runtime
|
||||
value, a terminal operation, or an agent notification?
|
||||
- Which code-mode `exec` cell issued a nested tool call?
|
||||
- Which terminal operation created or reused a running process?
|
||||
- Which multi-agent v2 tool call spawned, messaged, received from, or closed a
|
||||
child thread?
|
||||
|
||||
The reduced `state.json` is intentionally not just a transcript. It is a graph of
|
||||
model-visible conversation plus the runtime objects that explain how Codex got
|
||||
there.
|
||||
|
||||
## System Shape
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
subgraph Runtime["codex-core runtime"]
|
||||
Protocol["protocol lifecycle\nthread start/end, turn start/end"]
|
||||
Inference["inference + compaction\nrequests, responses, checkpoints"]
|
||||
Tools["tool dispatch\ndirect model tools + code-mode nested tools"]
|
||||
CodeMode["code-mode runtime\nexec cells, yields, waits, termination"]
|
||||
Terminal["terminal runtime\nexec_command / write_stdin operations"]
|
||||
Agents["multi_agent_v2\nspawn, task delivery, result, close"]
|
||||
end
|
||||
|
||||
Recorder["RolloutTraceRecorder\nthin best-effort producer"]
|
||||
Writer["TraceWriter\nassigns seq and writes payloads before events"]
|
||||
|
||||
subgraph Bundle["trace bundle"]
|
||||
Manifest["manifest.json\ntrace_id, rollout_id, root_thread_id"]
|
||||
Events["trace.jsonl\nordered raw event spine"]
|
||||
Payloads["payloads/*.json\nlarge raw evidence"]
|
||||
end
|
||||
|
||||
Reducer["replay_bundle\ndeterministic offline reducer"]
|
||||
|
||||
subgraph State["state.json"]
|
||||
Threads["threads + turns"]
|
||||
Conversation["conversation_items\nwhat the model saw"]
|
||||
RuntimeObjects["inference_calls, tool_calls,\ncode_cells, terminals, compactions"]
|
||||
Edges["interaction_edges\nspawn, task, result, close"]
|
||||
RawRefs["raw_payload refs"]
|
||||
end
|
||||
|
||||
Protocol --> Recorder
|
||||
Inference --> Recorder
|
||||
Tools --> Recorder
|
||||
CodeMode --> Recorder
|
||||
Terminal --> Recorder
|
||||
Agents --> Recorder
|
||||
|
||||
Recorder --> Writer
|
||||
Writer --> Manifest
|
||||
Writer --> Payloads
|
||||
Writer --> Events
|
||||
|
||||
Manifest --> Reducer
|
||||
Events --> Reducer
|
||||
Payloads --> Reducer
|
||||
|
||||
Reducer --> Threads
|
||||
Reducer --> Conversation
|
||||
Reducer --> RuntimeObjects
|
||||
Reducer --> Edges
|
||||
Reducer --> RawRefs
|
||||
```
|
||||
|
||||
The recorder is deliberately small. It is enabled by `CODEX_ROLLOUT_TRACE_ROOT`
|
||||
and must never make a Codex session fail just because tracing failed. Core emits
|
||||
raw observations; this crate owns the bundle schema, writer API, and reducer.
|
||||
|
||||
## Bundle Layout
|
||||
|
||||
A trace bundle contains:
|
||||
|
||||
- `manifest.json`: trace identity and bundle metadata.
|
||||
- `trace.jsonl`: append-only raw events ordered by writer-assigned `seq`.
|
||||
- `payloads/*.json`: raw requests, responses, tool inputs/results, runtime
|
||||
events, terminal output, compaction data, and protocol snapshots.
|
||||
- `state.json`: optional reducer output written by `codex debug trace-reduce`.
|
||||
|
||||
`trace_id` identifies this diagnostic artifact. `rollout_id` identifies the
|
||||
Codex rollout/session being observed. Keeping those separate lets us reason about
|
||||
the stored trace without confusing it with the product-level session identity.
|
||||
|
||||
To reduce a bundle:
|
||||
|
||||
```bash
|
||||
codex debug trace-reduce <trace-bundle>
|
||||
```
|
||||
|
||||
By default this writes `<trace-bundle>/state.json`.
|
||||
|
||||
## Raw Evidence vs Reduced Graph
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
Model["model-visible payloads\nrequests and response output items"]
|
||||
Runtime["runtime observations\ntool dispatch, terminal output, code-mode JSON"]
|
||||
RawPayloads["payloads/*.json\nexact evidence"]
|
||||
Reducer["reducer"]
|
||||
Conversation["ConversationItem\nwhat the model saw"]
|
||||
ToolCall["ToolCall\nruntime tool boundary"]
|
||||
CodeCell["CodeCell\nmodel-authored exec cell"]
|
||||
TerminalOperation["TerminalOperation\ncommand/write/poll"]
|
||||
InteractionEdge["InteractionEdge\ninformation flow"]
|
||||
|
||||
Model --> RawPayloads
|
||||
Runtime --> RawPayloads
|
||||
RawPayloads --> Reducer
|
||||
|
||||
Reducer --> Conversation
|
||||
Reducer --> ToolCall
|
||||
Reducer --> CodeCell
|
||||
Reducer --> TerminalOperation
|
||||
Reducer --> InteractionEdge
|
||||
|
||||
CodeCell --> ToolCall
|
||||
ToolCall --> TerminalOperation
|
||||
ToolCall --> InteractionEdge
|
||||
Conversation --> InteractionEdge
|
||||
```
|
||||
|
||||
This distinction is the reason the model has both raw payload references and
|
||||
semantic objects. A code-mode nested tool call, for example, has JSON input and
|
||||
output at the JavaScript runtime boundary, but the model-visible transcript only
|
||||
contains the surrounding `exec` custom tool call and its eventual output.
|
||||
|
||||
The reducer keeps those facts separate:
|
||||
|
||||
- `ConversationItem` records what appeared in model-facing requests/responses.
|
||||
- `ToolCall`, `CodeCell`, `TerminalOperation`, `InferenceCall`, and
|
||||
`Compaction` record runtime/debug boundaries.
|
||||
- `InteractionEdge` records information flow between objects, such as a
|
||||
`spawn_agent` tool call delivering a task into a child thread.
|
||||
- `RawPayloadRef` points back to exact evidence when a viewer needs more detail
|
||||
than the reduced graph stores inline.
|
||||
|
||||
## Multi-Agent v2
|
||||
|
||||
Multi-agent v2 child threads share the root trace writer. That means one root
|
||||
bundle reduces into one graph containing the parent thread, child threads, and
|
||||
the edges between them.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
RootTool["root ToolCall\nspawn_agent / followup_task / send_message"]
|
||||
ChildInput["child ConversationItem\ninjected task/message"]
|
||||
ChildThread["child AgentThread"]
|
||||
ChildResult["child assistant ConversationItem\nresult message"]
|
||||
RootNotice["root ConversationItem\nsubagent notification"]
|
||||
CloseTool["root ToolCall\nclose_agent"]
|
||||
TargetThread["target AgentThread"]
|
||||
|
||||
RootTool -- "spawn/task edge" --> ChildInput
|
||||
ChildInput --> ChildThread
|
||||
ChildThread --> ChildResult
|
||||
ChildResult -- "agent_result edge" --> RootNotice
|
||||
CloseTool -- "close_agent edge" --> TargetThread
|
||||
```
|
||||
|
||||
Top-level independent threads still get independent bundles. Spawned child
|
||||
threads are different: they are part of the same rollout tree, so they belong in
|
||||
the same raw event log, payload directory, and reduced `state.json`.
|
||||
|
||||
## Reducer Invariants
|
||||
|
||||
The reducer is strict where the raw evidence should be self-consistent:
|
||||
|
||||
- raw events are replayed in `seq` order;
|
||||
- payload files must exist before events refer to them;
|
||||
- reduced object IDs are stable within one replay;
|
||||
- runtime events may be queued until the model-visible source or delivery target
|
||||
has been observed;
|
||||
- model-visible conversation is derived from model-facing payloads, not from
|
||||
runtime convenience output;
|
||||
- runtime payloads are evidence, not proof that the model saw the same bytes.
|
||||
|
||||
Those invariants let the reduced graph stay small while preserving a path back
|
||||
to the original evidence whenever a debugger needs to explain why an object or
|
||||
edge exists.
|
||||
49
codex-rs/rollout-trace/src/bundle.rs
Normal file
49
codex-rs/rollout-trace/src/bundle.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
//! Trace bundle manifest and local layout constants.
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::model::AgentThreadId;
|
||||
|
||||
pub(crate) const MANIFEST_FILE_NAME: &str = "manifest.json";
|
||||
pub(crate) const RAW_EVENT_LOG_FILE_NAME: &str = "trace.jsonl";
|
||||
pub(crate) const PAYLOADS_DIR_NAME: &str = "payloads";
|
||||
/// Conventional file name for a reducer-written `RolloutTrace` cache.
|
||||
pub const REDUCED_STATE_FILE_NAME: &str = "state.json";
|
||||
pub(crate) const TRACE_MANIFEST_SCHEMA_VERSION: u32 = 1;
|
||||
pub(crate) const REDUCED_TRACE_SCHEMA_VERSION: u32 = 1;
|
||||
|
||||
/// Manifest stored at the root of a trace bundle.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub(crate) struct TraceBundleManifest {
|
||||
pub(crate) schema_version: u32,
|
||||
pub(crate) trace_id: String,
|
||||
pub(crate) rollout_id: String,
|
||||
/// Root thread for the recorded rollout. Replay should fail rather than
|
||||
/// inventing a placeholder, because every reduced object is scoped back to
|
||||
/// this thread tree.
|
||||
pub(crate) root_thread_id: AgentThreadId,
|
||||
pub(crate) started_at_unix_ms: i64,
|
||||
pub(crate) raw_event_log: String,
|
||||
pub(crate) payloads_dir: String,
|
||||
}
|
||||
|
||||
impl TraceBundleManifest {
|
||||
/// Builds a manifest that uses the standard local bundle layout.
|
||||
pub(crate) fn new(
|
||||
trace_id: String,
|
||||
rollout_id: String,
|
||||
root_thread_id: AgentThreadId,
|
||||
started_at_unix_ms: i64,
|
||||
) -> Self {
|
||||
Self {
|
||||
schema_version: TRACE_MANIFEST_SCHEMA_VERSION,
|
||||
trace_id,
|
||||
rollout_id,
|
||||
root_thread_id,
|
||||
started_at_unix_ms,
|
||||
raw_event_log: RAW_EVENT_LOG_FILE_NAME.to_string(),
|
||||
payloads_dir: PAYLOADS_DIR_NAME.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
225
codex-rs/rollout-trace/src/compaction.rs
Normal file
225
codex-rs/rollout-trace/src/compaction.rs
Normal file
@@ -0,0 +1,225 @@
|
||||
//! Hot-path helpers for recording upstream remote compaction attempts.
|
||||
//!
|
||||
//! Remote compaction is a model-facing request with a different semantic role
|
||||
//! from normal sampling. Keeping the no-op capable trace handle in this crate
|
||||
//! lets `codex-core` record exact endpoint payloads without owning trace schema
|
||||
//! details.
|
||||
|
||||
use std::fmt::Display;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicU64;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value as JsonValue;
|
||||
|
||||
use crate::inference::trace_response_item_json;
|
||||
use crate::model::AgentThreadId;
|
||||
use crate::model::CodexTurnId;
|
||||
use crate::model::CompactionId;
|
||||
use crate::model::CompactionRequestId;
|
||||
use crate::payload::RawPayloadKind;
|
||||
use crate::raw_event::RawTraceEventContext;
|
||||
use crate::raw_event::RawTraceEventPayload;
|
||||
use crate::writer::TraceWriter;
|
||||
|
||||
static NEXT_COMPACTION_REQUEST: AtomicU64 = AtomicU64::new(1);
|
||||
|
||||
/// Turn-local remote compaction tracing context.
|
||||
///
|
||||
/// A compaction can retry its upstream request before installing one checkpoint. The context
|
||||
/// owns the stable checkpoint ID; each request attempt gets a separate request ID.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CompactionTraceContext {
|
||||
state: CompactionTraceContextState,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum CompactionTraceContextState {
|
||||
Disabled,
|
||||
Enabled(EnabledCompactionTraceContext),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct EnabledCompactionTraceContext {
|
||||
writer: Arc<TraceWriter>,
|
||||
thread_id: AgentThreadId,
|
||||
codex_turn_id: CodexTurnId,
|
||||
compaction_id: CompactionId,
|
||||
model: String,
|
||||
provider_name: String,
|
||||
}
|
||||
|
||||
/// One upstream request attempt made while computing a compaction checkpoint.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CompactionTraceAttempt {
|
||||
state: CompactionTraceAttemptState,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum CompactionTraceAttemptState {
|
||||
Disabled,
|
||||
Enabled(EnabledCompactionTraceAttempt),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct EnabledCompactionTraceAttempt {
|
||||
context: EnabledCompactionTraceContext,
|
||||
compaction_request_id: CompactionRequestId,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct TracedCompactionCompleted {
|
||||
output_items: Vec<JsonValue>,
|
||||
}
|
||||
|
||||
impl CompactionTraceContext {
|
||||
/// Builds a context that accepts trace calls and records nothing.
|
||||
pub fn disabled() -> Self {
|
||||
Self {
|
||||
state: CompactionTraceContextState::Disabled,
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds an enabled context for upstream attempts that compute one checkpoint.
|
||||
pub fn enabled(
|
||||
writer: Arc<TraceWriter>,
|
||||
thread_id: AgentThreadId,
|
||||
codex_turn_id: CodexTurnId,
|
||||
compaction_id: CompactionId,
|
||||
model: String,
|
||||
provider_name: String,
|
||||
) -> Self {
|
||||
Self {
|
||||
state: CompactionTraceContextState::Enabled(EnabledCompactionTraceContext {
|
||||
writer,
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
compaction_id,
|
||||
model,
|
||||
provider_name,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/// Starts a new upstream attempt and records the exact compact endpoint request.
|
||||
pub fn start_attempt(&self, request: &impl Serialize) -> CompactionTraceAttempt {
|
||||
let CompactionTraceContextState::Enabled(context) = &self.state else {
|
||||
return CompactionTraceAttempt::disabled();
|
||||
};
|
||||
|
||||
let attempt = CompactionTraceAttempt {
|
||||
state: CompactionTraceAttemptState::Enabled(EnabledCompactionTraceAttempt {
|
||||
context: context.clone(),
|
||||
compaction_request_id: next_compaction_request_id(),
|
||||
}),
|
||||
};
|
||||
attempt.record_started(request);
|
||||
attempt
|
||||
}
|
||||
}
|
||||
|
||||
impl CompactionTraceAttempt {
|
||||
/// Builds an attempt that records nothing.
|
||||
fn disabled() -> Self {
|
||||
Self {
|
||||
state: CompactionTraceAttemptState::Disabled,
|
||||
}
|
||||
}
|
||||
|
||||
fn record_started(&self, request: &impl Serialize) {
|
||||
let CompactionTraceAttemptState::Enabled(attempt) = &self.state else {
|
||||
return;
|
||||
};
|
||||
let Some(request_payload) = write_json_payload_best_effort(
|
||||
&attempt.context.writer,
|
||||
RawPayloadKind::CompactionRequest,
|
||||
request,
|
||||
) else {
|
||||
return;
|
||||
};
|
||||
|
||||
append_with_context_best_effort(
|
||||
&attempt.context,
|
||||
RawTraceEventPayload::CompactionRequestStarted {
|
||||
compaction_id: attempt.context.compaction_id.clone(),
|
||||
compaction_request_id: attempt.compaction_request_id.clone(),
|
||||
thread_id: attempt.context.thread_id.clone(),
|
||||
codex_turn_id: attempt.context.codex_turn_id.clone(),
|
||||
model: attempt.context.model.clone(),
|
||||
provider_name: attempt.context.provider_name.clone(),
|
||||
request_payload,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/// Records the non-streaming compact endpoint response payload.
|
||||
///
|
||||
/// Compaction responses use the same response-item preservation rules as
|
||||
/// inference streams: traces are evidence, while normal ResponseItem
|
||||
/// serialization is shaped for future request construction.
|
||||
pub fn record_completed(&self, output_items: &[ResponseItem]) {
|
||||
let response_payload = TracedCompactionCompleted {
|
||||
output_items: output_items.iter().map(trace_response_item_json).collect(),
|
||||
};
|
||||
let CompactionTraceAttemptState::Enabled(attempt) = &self.state else {
|
||||
return;
|
||||
};
|
||||
let Some(response_payload) = write_json_payload_best_effort(
|
||||
&attempt.context.writer,
|
||||
RawPayloadKind::CompactionResponse,
|
||||
&response_payload,
|
||||
) else {
|
||||
return;
|
||||
};
|
||||
|
||||
append_with_context_best_effort(
|
||||
&attempt.context,
|
||||
RawTraceEventPayload::CompactionRequestCompleted {
|
||||
compaction_id: attempt.context.compaction_id.clone(),
|
||||
compaction_request_id: attempt.compaction_request_id.clone(),
|
||||
response_payload,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/// Records pre-response failures from the compact endpoint.
|
||||
pub fn record_failed(&self, error: impl Display) {
|
||||
let CompactionTraceAttemptState::Enabled(attempt) = &self.state else {
|
||||
return;
|
||||
};
|
||||
append_with_context_best_effort(
|
||||
&attempt.context,
|
||||
RawTraceEventPayload::CompactionRequestFailed {
|
||||
compaction_id: attempt.context.compaction_id.clone(),
|
||||
compaction_request_id: attempt.compaction_request_id.clone(),
|
||||
error: error.to_string(),
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn next_compaction_request_id() -> CompactionRequestId {
|
||||
let ordinal = NEXT_COMPACTION_REQUEST.fetch_add(1, Ordering::Relaxed);
|
||||
format!("compaction_request:{ordinal}")
|
||||
}
|
||||
|
||||
fn write_json_payload_best_effort(
|
||||
writer: &TraceWriter,
|
||||
kind: RawPayloadKind,
|
||||
payload: &impl Serialize,
|
||||
) -> Option<crate::RawPayloadRef> {
|
||||
writer.write_json_payload(kind, payload).ok()
|
||||
}
|
||||
|
||||
fn append_with_context_best_effort(
|
||||
context: &EnabledCompactionTraceContext,
|
||||
payload: RawTraceEventPayload,
|
||||
) {
|
||||
let event_context = RawTraceEventContext {
|
||||
thread_id: Some(context.thread_id.clone()),
|
||||
codex_turn_id: Some(context.codex_turn_id.clone()),
|
||||
};
|
||||
let _ = context.writer.append_with_context(event_context, payload);
|
||||
}
|
||||
369
codex-rs/rollout-trace/src/inference.rs
Normal file
369
codex-rs/rollout-trace/src/inference.rs
Normal file
@@ -0,0 +1,369 @@
|
||||
//! Hot-path helpers for recording upstream inference attempts.
|
||||
//!
|
||||
//! The model client should not need to know whether rollout tracing is enabled.
|
||||
//! A disabled context records nothing, which keeps one-shot HTTP calls,
|
||||
//! WebSocket reuse, and retry/fallback attempts on the same code path.
|
||||
|
||||
use std::fmt::Display;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicU64;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::protocol::TokenUsage;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value as JsonValue;
|
||||
|
||||
use crate::model::AgentThreadId;
|
||||
use crate::model::CodexTurnId;
|
||||
use crate::model::InferenceCallId;
|
||||
use crate::payload::RawPayloadKind;
|
||||
use crate::raw_event::RawTraceEventContext;
|
||||
use crate::raw_event::RawTraceEventPayload;
|
||||
use crate::writer::TraceWriter;
|
||||
|
||||
static NEXT_INFERENCE_ATTEMPT: AtomicU64 = AtomicU64::new(1);
|
||||
|
||||
/// Turn-local inference tracing context.
|
||||
///
|
||||
/// This is intentionally a no-op capable handle instead of an `Option` at each
|
||||
/// transport callsite. Whether tracing is enabled is a session concern; retry,
|
||||
/// fallback, and stream mapping code should always be able to say what happened
|
||||
/// without first branching on trace availability.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct InferenceTraceContext {
|
||||
state: InferenceTraceContextState,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum InferenceTraceContextState {
|
||||
Disabled,
|
||||
Enabled(EnabledInferenceTraceContext),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct EnabledInferenceTraceContext {
|
||||
writer: Arc<TraceWriter>,
|
||||
thread_id: AgentThreadId,
|
||||
codex_turn_id: CodexTurnId,
|
||||
model: String,
|
||||
provider_name: String,
|
||||
}
|
||||
|
||||
/// One concrete upstream request attempt.
|
||||
///
|
||||
/// A Codex turn can create multiple attempts when auth recovery retries the
|
||||
/// HTTP request or WebSocket setup falls back to HTTP. Completion is often
|
||||
/// observed after the client returns the response stream, so attempts are
|
||||
/// cloneable and self-contained.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct InferenceTraceAttempt {
|
||||
state: InferenceTraceAttemptState,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum InferenceTraceAttemptState {
|
||||
Disabled,
|
||||
Enabled(EnabledInferenceTraceAttempt),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct EnabledInferenceTraceAttempt {
|
||||
context: EnabledInferenceTraceContext,
|
||||
inference_call_id: InferenceCallId,
|
||||
}
|
||||
|
||||
/// Non-delta response payload saved when a traced inference stream completes.
|
||||
///
|
||||
/// We intentionally record completed output items instead of every stream delta
|
||||
/// here. The raw stream can be added later as a separate payload class; this
|
||||
/// response summary gives the reducer stable response identity, usage, and
|
||||
/// model-visible output without duplicating high-volume text deltas.
|
||||
#[derive(Serialize)]
|
||||
struct TracedResponseStreamCompleted<'a> {
|
||||
response_id: &'a str,
|
||||
token_usage: &'a Option<TokenUsage>,
|
||||
output_items: Vec<JsonValue>,
|
||||
}
|
||||
|
||||
impl InferenceTraceContext {
|
||||
/// Builds a context that accepts trace calls and records nothing.
|
||||
pub fn disabled() -> Self {
|
||||
Self {
|
||||
state: InferenceTraceContextState::Disabled,
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds an enabled context for all upstream attempts made by one Codex turn.
|
||||
pub fn enabled(
|
||||
writer: Arc<TraceWriter>,
|
||||
thread_id: AgentThreadId,
|
||||
codex_turn_id: CodexTurnId,
|
||||
model: String,
|
||||
provider_name: String,
|
||||
) -> Self {
|
||||
Self {
|
||||
state: InferenceTraceContextState::Enabled(EnabledInferenceTraceContext {
|
||||
writer,
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
model,
|
||||
provider_name,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/// Starts a new attempt after the concrete provider request has been built.
|
||||
pub fn start_attempt(&self) -> InferenceTraceAttempt {
|
||||
let InferenceTraceContextState::Enabled(context) = &self.state else {
|
||||
return InferenceTraceAttempt::disabled();
|
||||
};
|
||||
|
||||
InferenceTraceAttempt {
|
||||
state: InferenceTraceAttemptState::Enabled(EnabledInferenceTraceAttempt {
|
||||
context: context.clone(),
|
||||
inference_call_id: next_inference_call_id(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl InferenceTraceAttempt {
|
||||
/// Builds an attempt that records nothing.
|
||||
pub fn disabled() -> Self {
|
||||
Self {
|
||||
state: InferenceTraceAttemptState::Disabled,
|
||||
}
|
||||
}
|
||||
|
||||
/// Records the exact request object about to be sent to the model provider.
|
||||
pub fn record_started(&self, request: &impl Serialize) {
|
||||
let InferenceTraceAttemptState::Enabled(attempt) = &self.state else {
|
||||
return;
|
||||
};
|
||||
let Some(request_payload) = write_json_payload_best_effort(
|
||||
&attempt.context.writer,
|
||||
RawPayloadKind::InferenceRequest,
|
||||
request,
|
||||
) else {
|
||||
return;
|
||||
};
|
||||
|
||||
append_with_context_best_effort(
|
||||
&attempt.context,
|
||||
RawTraceEventPayload::InferenceStarted {
|
||||
inference_call_id: attempt.inference_call_id.clone(),
|
||||
thread_id: attempt.context.thread_id.clone(),
|
||||
codex_turn_id: attempt.context.codex_turn_id.clone(),
|
||||
model: attempt.context.model.clone(),
|
||||
provider_name: attempt.context.provider_name.clone(),
|
||||
request_payload,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/// Records a bounded, non-streaming summary of the completed response stream.
|
||||
///
|
||||
/// The caller passes protocol-native response items so this crate owns the
|
||||
/// trace-specific serialization rules. That keeps codex-core focused on
|
||||
/// transport behavior while preserving trace evidence that normal request
|
||||
/// serialization intentionally omits.
|
||||
pub fn record_completed(
|
||||
&self,
|
||||
response_id: &str,
|
||||
token_usage: &Option<TokenUsage>,
|
||||
output_items: &[ResponseItem],
|
||||
) {
|
||||
let response_payload = TracedResponseStreamCompleted {
|
||||
response_id,
|
||||
token_usage,
|
||||
output_items: output_items.iter().map(trace_response_item_json).collect(),
|
||||
};
|
||||
let InferenceTraceAttemptState::Enabled(attempt) = &self.state else {
|
||||
return;
|
||||
};
|
||||
let Some(response_payload) = write_json_payload_best_effort(
|
||||
&attempt.context.writer,
|
||||
RawPayloadKind::InferenceResponse,
|
||||
&response_payload,
|
||||
) else {
|
||||
return;
|
||||
};
|
||||
|
||||
append_with_context_best_effort(
|
||||
&attempt.context,
|
||||
RawTraceEventPayload::InferenceCompleted {
|
||||
inference_call_id: attempt.inference_call_id.clone(),
|
||||
response_id: Some(response_id.to_string()),
|
||||
response_payload,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/// Records pre-response and mid-stream failures.
|
||||
pub fn record_failed(&self, error: impl Display) {
|
||||
let InferenceTraceAttemptState::Enabled(attempt) = &self.state else {
|
||||
return;
|
||||
};
|
||||
append_with_context_best_effort(
|
||||
&attempt.context,
|
||||
RawTraceEventPayload::InferenceFailed {
|
||||
inference_call_id: attempt.inference_call_id.clone(),
|
||||
error: error.to_string(),
|
||||
partial_response_payload: None,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Serializes a response item for trace evidence rather than future request construction.
|
||||
///
|
||||
/// The protocol serializer intentionally omits some readable reasoning content
|
||||
/// when shaping items for later model requests. Rollout traces need the item as
|
||||
/// Codex received it, so this helper restores that content in the raw payload.
|
||||
pub(crate) fn trace_response_item_json(item: &ResponseItem) -> JsonValue {
|
||||
let mut value = serde_json::to_value(item).unwrap_or_else(|err| {
|
||||
serde_json::json!({
|
||||
"serialization_error": err.to_string(),
|
||||
})
|
||||
});
|
||||
|
||||
if let ResponseItem::Reasoning {
|
||||
content: Some(content),
|
||||
..
|
||||
} = item
|
||||
&& let JsonValue::Object(object) = &mut value
|
||||
{
|
||||
object.insert(
|
||||
"content".to_string(),
|
||||
serde_json::to_value(content).unwrap_or_else(|err| {
|
||||
serde_json::json!({
|
||||
"serialization_error": err.to_string(),
|
||||
})
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
value
|
||||
}
|
||||
|
||||
fn next_inference_call_id() -> InferenceCallId {
|
||||
let ordinal = NEXT_INFERENCE_ATTEMPT.fetch_add(1, Ordering::Relaxed);
|
||||
format!("inference:{ordinal}")
|
||||
}
|
||||
|
||||
fn write_json_payload_best_effort(
|
||||
writer: &TraceWriter,
|
||||
kind: RawPayloadKind,
|
||||
payload: &impl Serialize,
|
||||
) -> Option<crate::RawPayloadRef> {
|
||||
writer.write_json_payload(kind, payload).ok()
|
||||
}
|
||||
|
||||
fn append_with_context_best_effort(
|
||||
context: &EnabledInferenceTraceContext,
|
||||
payload: RawTraceEventPayload,
|
||||
) {
|
||||
let event_context = RawTraceEventContext {
|
||||
thread_id: Some(context.thread_id.clone()),
|
||||
codex_turn_id: Some(context.codex_turn_id.clone()),
|
||||
};
|
||||
let _ = context.writer.append_with_context(event_context, payload);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use codex_protocol::models::ReasoningItemContent;
|
||||
use codex_protocol::models::ReasoningItemReasoningSummary;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use super::*;
|
||||
use crate::model::ExecutionStatus;
|
||||
use crate::replay_bundle;
|
||||
|
||||
#[test]
|
||||
fn enabled_context_records_replayable_inference_attempt() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = Arc::new(TraceWriter::create(
|
||||
temp.path(),
|
||||
"trace-1".to_string(),
|
||||
"rollout-1".to_string(),
|
||||
"thread-root".to_string(),
|
||||
)?);
|
||||
writer.append(RawTraceEventPayload::ThreadStarted {
|
||||
thread_id: "thread-root".to_string(),
|
||||
agent_path: "/root".to_string(),
|
||||
metadata_payload: None,
|
||||
})?;
|
||||
writer.append(RawTraceEventPayload::CodexTurnStarted {
|
||||
codex_turn_id: "turn-1".to_string(),
|
||||
thread_id: "thread-root".to_string(),
|
||||
})?;
|
||||
let context = InferenceTraceContext::enabled(
|
||||
writer,
|
||||
"thread-root".to_string(),
|
||||
"turn-1".to_string(),
|
||||
"gpt-test".to_string(),
|
||||
"test-provider".to_string(),
|
||||
);
|
||||
|
||||
let attempt = context.start_attempt();
|
||||
attempt.record_started(&json!({
|
||||
"model": "gpt-test",
|
||||
"input": [{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [{"type": "input_text", "text": "hello"}]
|
||||
}],
|
||||
}));
|
||||
attempt.record_completed("resp-1", &None, &[]);
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let inference = rollout
|
||||
.inference_calls
|
||||
.values()
|
||||
.next()
|
||||
.expect("recorded inference call");
|
||||
|
||||
assert_eq!(rollout.inference_calls.len(), 1);
|
||||
assert_eq!(inference.thread_id, "thread-root");
|
||||
assert_eq!(inference.codex_turn_id, "turn-1");
|
||||
assert_eq!(inference.execution.status, ExecutionStatus::Completed);
|
||||
assert_eq!(inference.upstream_request_id, Some("resp-1".to_string()));
|
||||
assert_eq!(rollout.raw_payloads.len(), 2);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn traced_response_item_preserves_reasoning_content_omitted_by_normal_serializer() {
|
||||
let item = ResponseItem::Reasoning {
|
||||
id: "rs-1".to_string(),
|
||||
summary: vec![ReasoningItemReasoningSummary::SummaryText {
|
||||
text: "summary".to_string(),
|
||||
}],
|
||||
content: Some(vec![ReasoningItemContent::Text {
|
||||
text: "raw reasoning".to_string(),
|
||||
}]),
|
||||
encrypted_content: Some("encoded".to_string()),
|
||||
};
|
||||
|
||||
let normal = serde_json::to_value(&item).expect("response item serializes");
|
||||
let traced = trace_response_item_json(&item);
|
||||
|
||||
assert_eq!(normal.get("content"), None);
|
||||
assert_eq!(
|
||||
traced,
|
||||
json!({
|
||||
"type": "reasoning",
|
||||
"summary": [{"type": "summary_text", "text": "summary"}],
|
||||
"content": [{"type": "text", "text": "raw reasoning"}],
|
||||
"encrypted_content": "encoded",
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
49
codex-rs/rollout-trace/src/lib.rs
Normal file
49
codex-rs/rollout-trace/src/lib.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
//! Trace bundle format, writer, and reducer for Codex rollouts.
|
||||
//!
|
||||
//! This crate owns the trace schema. Hot-path Codex code should depend on the
|
||||
//! small writer API here; semantic replay and viewer projections stay outside
|
||||
//! `codex-core`.
|
||||
//!
|
||||
//! See `README.md` for the system diagram and reducer model.
|
||||
|
||||
mod bundle;
|
||||
mod compaction;
|
||||
mod inference;
|
||||
mod model;
|
||||
mod payload;
|
||||
mod raw_event;
|
||||
mod reducer;
|
||||
mod writer;
|
||||
|
||||
/// Conventional reduced-state cache name written next to a raw trace bundle.
|
||||
pub use bundle::REDUCED_STATE_FILE_NAME;
|
||||
/// No-op-capable handle for recording remote-compaction requests.
|
||||
pub use compaction::CompactionTraceAttempt;
|
||||
/// Shared recorder context for a compaction checkpoint.
|
||||
pub use compaction::CompactionTraceContext;
|
||||
/// No-op-capable handle for recording one upstream inference attempt.
|
||||
pub use inference::InferenceTraceAttempt;
|
||||
/// Shared recorder context for inference attempts within one Codex turn.
|
||||
pub use inference::InferenceTraceContext;
|
||||
/// Public reduced trace model returned by replay.
|
||||
pub use model::*;
|
||||
/// Stable identifier for one raw payload inside a rollout bundle.
|
||||
pub use payload::RawPayloadId;
|
||||
/// Coarse role labels for raw payload files.
|
||||
pub use payload::RawPayloadKind;
|
||||
/// Reference to a raw request/response/log payload stored in the bundle.
|
||||
pub use payload::RawPayloadRef;
|
||||
/// Monotonic sequence number assigned by the raw trace writer.
|
||||
pub use raw_event::RawEventSeq;
|
||||
/// Runtime requester observed before semantic reduction.
|
||||
pub use raw_event::RawToolCallRequester;
|
||||
/// One append-only raw trace event from `trace.jsonl`.
|
||||
pub use raw_event::RawTraceEvent;
|
||||
/// Event-envelope context supplied by hot-path trace producers.
|
||||
pub use raw_event::RawTraceEventContext;
|
||||
/// Typed payload for one raw trace event.
|
||||
pub use raw_event::RawTraceEventPayload;
|
||||
/// Replay a raw trace bundle and write/read its reduced `RolloutTrace`.
|
||||
pub use reducer::replay_bundle;
|
||||
/// Append-only writer used by hot-path Codex instrumentation.
|
||||
pub use writer::TraceWriter;
|
||||
176
codex-rs/rollout-trace/src/model/conversation.rs
Normal file
176
codex-rs/rollout-trace/src/model/conversation.rs
Normal file
@@ -0,0 +1,176 @@
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::payload::RawPayloadId;
|
||||
|
||||
use super::AgentThreadId;
|
||||
use super::CodeCellId;
|
||||
use super::CodexTurnId;
|
||||
use super::CompactionId;
|
||||
use super::ConversationItemId;
|
||||
use super::EdgeId;
|
||||
use super::InferenceCallId;
|
||||
use super::ModelVisibleCallId;
|
||||
use super::ToolCallId;
|
||||
use super::session::ExecutionWindow;
|
||||
|
||||
/// One logical transcript item or transcript boundary.
|
||||
///
|
||||
/// The reducer builds conversation items primarily from inference request and
|
||||
/// response payloads. Runtime objects can be listed in `produced_by`, but they
|
||||
/// must not rewrite what the item body says the model saw. Structural items,
|
||||
/// such as compaction markers, live in the same ordered list so conversation
|
||||
/// views can show where the live history changed.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ConversationItem {
|
||||
pub item_id: ConversationItemId,
|
||||
pub thread_id: AgentThreadId,
|
||||
/// Runtime activation that first introduced this item locally, when known.
|
||||
pub codex_turn_id: Option<CodexTurnId>,
|
||||
pub first_seen_at_unix_ms: i64,
|
||||
pub role: ConversationRole,
|
||||
/// Codex channel for assistant/tool content, when the item is channel-specific.
|
||||
pub channel: Option<ConversationChannel>,
|
||||
pub kind: ConversationItemKind,
|
||||
pub body: ConversationBody,
|
||||
/// Protocol/model `call_id` for function/custom tool call and output items.
|
||||
pub call_id: Option<ModelVisibleCallId>,
|
||||
/// Runtime or control-plane objects that caused this conversation item to exist.
|
||||
pub produced_by: Vec<ProducerRef>,
|
||||
}
|
||||
|
||||
/// Model-visible role assigned to a conversation item.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ConversationRole {
|
||||
System,
|
||||
Developer,
|
||||
User,
|
||||
Assistant,
|
||||
Tool,
|
||||
}
|
||||
|
||||
/// Codex channel for model-visible content.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ConversationChannel {
|
||||
Analysis,
|
||||
Commentary,
|
||||
Final,
|
||||
/// Remote compaction summaries are reintroduced as assistant summary-channel content.
|
||||
Summary,
|
||||
}
|
||||
|
||||
/// Responses item category after normalization into the reduced transcript.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ConversationItemKind {
|
||||
Message,
|
||||
Reasoning,
|
||||
FunctionCall,
|
||||
FunctionCallOutput,
|
||||
CustomToolCall,
|
||||
CustomToolCallOutput,
|
||||
/// Structural marker inserted where live history was replaced by compaction.
|
||||
CompactionMarker,
|
||||
}
|
||||
|
||||
/// Ordered content parts for a reduced conversation item.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ConversationBody {
|
||||
/// Renderable model-visible parts. Raw payload refs are used when the bytes
|
||||
/// are too large or too structured for the normal conversation path.
|
||||
pub parts: Vec<ConversationPart>,
|
||||
}
|
||||
|
||||
/// One model-visible part inside a conversation item.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case", tag = "type")]
|
||||
pub enum ConversationPart {
|
||||
Text {
|
||||
text: String,
|
||||
},
|
||||
/// A model-provided summary of content whose full form may also be present.
|
||||
///
|
||||
/// Reasoning summaries are not interchangeable with raw reasoning text:
|
||||
/// both can be present in one payload, and replay/debug tooling needs to
|
||||
/// preserve which representation the model actually returned.
|
||||
Summary {
|
||||
text: String,
|
||||
},
|
||||
/// Opaque model-visible content that is intentionally not decoded here.
|
||||
///
|
||||
/// Reasoning can be carried as `encrypted_content` with no readable text.
|
||||
/// Keeping that blob inline makes it part of item identity, unlike a raw
|
||||
/// payload reference whose ID changes every time the same item is replayed
|
||||
/// in a later inference request.
|
||||
Encoded {
|
||||
label: String,
|
||||
value: String,
|
||||
},
|
||||
/// Small JSON-ish body represented by a summary plus a raw ref.
|
||||
Json {
|
||||
summary: String,
|
||||
raw_payload_id: RawPayloadId,
|
||||
},
|
||||
Code {
|
||||
language: String,
|
||||
source: String,
|
||||
},
|
||||
/// Large or uncommon payload that should be lazy-loaded from details UI.
|
||||
PayloadRef {
|
||||
label: String,
|
||||
raw_payload_id: RawPayloadId,
|
||||
},
|
||||
}
|
||||
|
||||
/// Explanation for where a conversation item came from.
|
||||
///
|
||||
/// This is deliberately plural at the call site: a function output can be both
|
||||
/// model-visible conversation and the product of a runtime tool call.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case", tag = "type")]
|
||||
pub enum ProducerRef {
|
||||
UserInput,
|
||||
Inference { inference_call_id: InferenceCallId },
|
||||
Tool { tool_call_id: ToolCallId },
|
||||
CodeCell { code_cell_id: CodeCellId },
|
||||
InteractionEdge { edge_id: EdgeId },
|
||||
Compaction { compaction_id: CompactionId },
|
||||
Harness,
|
||||
}
|
||||
|
||||
/// One outbound inference request and its response metadata.
|
||||
///
|
||||
/// Full upstream request/response bodies live behind raw payload refs. The
|
||||
/// request/response item ID lists are the reduced, model-visible snapshot.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct InferenceCall {
|
||||
pub inference_call_id: InferenceCallId,
|
||||
pub thread_id: AgentThreadId,
|
||||
pub codex_turn_id: CodexTurnId,
|
||||
pub execution: ExecutionWindow,
|
||||
pub model: String,
|
||||
pub provider_name: String,
|
||||
/// Upstream request ID returned by HTTP/proxy/engine infrastructure.
|
||||
pub upstream_request_id: Option<String>,
|
||||
/// Complete ordered input snapshot sent with this request.
|
||||
pub request_item_ids: Vec<ConversationItemId>,
|
||||
/// Ordered output items produced by this response.
|
||||
pub response_item_ids: Vec<ConversationItemId>,
|
||||
/// Runtime tool calls whose model-visible call item came from this response.
|
||||
pub tool_call_ids_started_by_response: Vec<ToolCallId>,
|
||||
pub usage: Option<TokenUsage>,
|
||||
pub raw_request_payload_id: RawPayloadId,
|
||||
/// Full upstream response payload. `None` while running or after pre-stream failures.
|
||||
pub raw_response_payload_id: Option<RawPayloadId>,
|
||||
}
|
||||
|
||||
/// Token usage summary for one inference call.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct TokenUsage {
|
||||
pub input_tokens: u64,
|
||||
pub cached_input_tokens: u64,
|
||||
pub output_tokens: u64,
|
||||
pub reasoning_output_tokens: u64,
|
||||
}
|
||||
121
codex-rs/rollout-trace/src/model/mod.rs
Normal file
121
codex-rs/rollout-trace/src/model/mod.rs
Normal file
@@ -0,0 +1,121 @@
|
||||
//! Reduced rollout trace model.
|
||||
//!
|
||||
//! These types describe the deterministic replay output. They intentionally
|
||||
//! separate model-visible conversation from runtime/debug objects.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::payload::RawPayloadId;
|
||||
use crate::payload::RawPayloadRef;
|
||||
mod conversation;
|
||||
mod runtime;
|
||||
mod session;
|
||||
|
||||
pub use conversation::*;
|
||||
pub use runtime::*;
|
||||
pub use session::*;
|
||||
|
||||
/// Codex conversation/session UUID.
|
||||
pub type AgentThreadId = String;
|
||||
/// Stable multi-agent routing path such as `/root` or `/root/search_docs`.
|
||||
pub type AgentPath = String;
|
||||
/// Runtime submission/activation UUID. This is not a chat turn.
|
||||
pub type CodexTurnId = String;
|
||||
/// Reduced transcript item ID assigned by the trace reducer.
|
||||
pub type ConversationItemId = String;
|
||||
/// Local ID for one outbound upstream inference request.
|
||||
pub type InferenceCallId = String;
|
||||
/// Reducer-owned ID for one runtime tool-call object.
|
||||
pub type ToolCallId = String;
|
||||
/// Responses `call_id` / custom-tool call ID visible in inference payloads.
|
||||
pub type ModelVisibleCallId = String;
|
||||
/// Tool invocation ID assigned inside the code-mode JavaScript runtime.
|
||||
pub type CodeModeRuntimeToolId = String;
|
||||
/// Reducer-owned ID for one model-authored `exec` JavaScript cell.
|
||||
pub type CodeCellId = String;
|
||||
/// Process/session ID returned by Codex's terminal runtime.
|
||||
pub type TerminalId = String;
|
||||
/// Reducer-owned ID for one command/write/poll operation against a terminal.
|
||||
pub type TerminalOperationId = String;
|
||||
/// Reducer-owned ID for one installed conversation-history checkpoint.
|
||||
pub type CompactionId = String;
|
||||
/// Reducer-owned ID for one upstream request that computes a compaction.
|
||||
pub type CompactionRequestId = String;
|
||||
/// Reducer-owned ID for one information-flow edge.
|
||||
pub type EdgeId = String;
|
||||
/// Reducer-owned ID for request/log correlation metadata.
|
||||
pub type CorrelationId = String;
|
||||
|
||||
/// Canonical reduced graph for one Codex rollout.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct RolloutTrace {
|
||||
pub schema_version: u32,
|
||||
/// Unique identity for this trace capture.
|
||||
///
|
||||
/// `rollout_id` names the Codex rollout/session being observed. `trace_id`
|
||||
/// names the diagnostic artifact produced for that rollout, which keeps
|
||||
/// storage/replay identity separate from the product-level session identity.
|
||||
pub trace_id: String,
|
||||
/// CLI-visible rollout/run identity. Higher-level experiment/sample IDs wrap this object.
|
||||
pub rollout_id: String,
|
||||
pub started_at_unix_ms: i64,
|
||||
/// Wall-clock timestamp for terminal rollout status. `None` means running or partial trace.
|
||||
pub ended_at_unix_ms: Option<i64>,
|
||||
pub status: RolloutStatus,
|
||||
pub root_thread_id: AgentThreadId,
|
||||
pub threads: BTreeMap<AgentThreadId, AgentThread>,
|
||||
pub codex_turns: BTreeMap<CodexTurnId, CodexTurn>,
|
||||
pub conversation_items: BTreeMap<ConversationItemId, ConversationItem>,
|
||||
pub inference_calls: BTreeMap<InferenceCallId, InferenceCall>,
|
||||
/// Model-authored `exec` JavaScript cells keyed by reducer-owned cell ID.
|
||||
pub code_cells: BTreeMap<CodeCellId, CodeCell>,
|
||||
pub tool_calls: BTreeMap<ToolCallId, ToolCall>,
|
||||
/// Terminal runtime sessions keyed by process/session ID returned by the runtime.
|
||||
pub terminal_sessions: BTreeMap<TerminalId, TerminalSession>,
|
||||
/// Commands/writes/polls against terminals keyed by reducer-owned operation ID.
|
||||
pub terminal_operations: BTreeMap<TerminalOperationId, TerminalOperation>,
|
||||
/// Installed compaction checkpoints keyed by checkpoint ID.
|
||||
pub compactions: BTreeMap<CompactionId, Compaction>,
|
||||
/// Upstream remote compaction calls keyed by local request ID.
|
||||
pub compaction_requests: BTreeMap<CompactionRequestId, CompactionRequest>,
|
||||
/// Information-flow edges between threads, cells, tools, and runtime resources.
|
||||
pub interaction_edges: BTreeMap<EdgeId, InteractionEdge>,
|
||||
/// Raw JSON payloads keyed by raw-payload ID. Most point at files outside this object.
|
||||
pub raw_payloads: BTreeMap<RawPayloadId, RawPayloadRef>,
|
||||
}
|
||||
|
||||
impl RolloutTrace {
|
||||
/// Builds an empty reduced trace that a reducer can populate.
|
||||
pub(crate) fn new(
|
||||
schema_version: u32,
|
||||
trace_id: String,
|
||||
rollout_id: String,
|
||||
root_thread_id: AgentThreadId,
|
||||
started_at_unix_ms: i64,
|
||||
) -> Self {
|
||||
Self {
|
||||
schema_version,
|
||||
trace_id,
|
||||
rollout_id,
|
||||
started_at_unix_ms,
|
||||
ended_at_unix_ms: None,
|
||||
status: RolloutStatus::Running,
|
||||
root_thread_id,
|
||||
threads: BTreeMap::new(),
|
||||
codex_turns: BTreeMap::new(),
|
||||
conversation_items: BTreeMap::new(),
|
||||
inference_calls: BTreeMap::new(),
|
||||
code_cells: BTreeMap::new(),
|
||||
tool_calls: BTreeMap::new(),
|
||||
terminal_sessions: BTreeMap::new(),
|
||||
terminal_operations: BTreeMap::new(),
|
||||
compactions: BTreeMap::new(),
|
||||
compaction_requests: BTreeMap::new(),
|
||||
interaction_edges: BTreeMap::new(),
|
||||
raw_payloads: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
331
codex-rs/rollout-trace/src/model/runtime.rs
Normal file
331
codex-rs/rollout-trace/src/model/runtime.rs
Normal file
@@ -0,0 +1,331 @@
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::payload::RawPayloadId;
|
||||
use crate::raw_event::RawEventSeq;
|
||||
|
||||
use super::AgentPath;
|
||||
use super::AgentThreadId;
|
||||
use super::CodeCellId;
|
||||
use super::CodeModeRuntimeToolId;
|
||||
use super::CodexTurnId;
|
||||
use super::CompactionId;
|
||||
use super::CompactionRequestId;
|
||||
use super::ConversationItemId;
|
||||
use super::EdgeId;
|
||||
use super::ModelVisibleCallId;
|
||||
use super::TerminalId;
|
||||
use super::TerminalOperationId;
|
||||
use super::ToolCallId;
|
||||
use super::session::ExecutionWindow;
|
||||
|
||||
/// Runtime/debug object for one model-authored `exec` cell.
|
||||
///
|
||||
/// The JavaScript source and custom-tool outputs are still conversation items;
|
||||
/// this object tracks the code-mode runtime boundary and nested runtime work.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct CodeCell {
|
||||
/// Reducer-owned graph id derived from the model-visible `exec` call id.
|
||||
/// Runtime cell ids are stored separately because they are only handles for
|
||||
/// later waits and nested code-mode tools.
|
||||
pub code_cell_id: CodeCellId,
|
||||
pub model_visible_call_id: ModelVisibleCallId,
|
||||
pub thread_id: AgentThreadId,
|
||||
pub codex_turn_id: CodexTurnId,
|
||||
/// Conversation item containing the model-authored JavaScript.
|
||||
pub source_item_id: ConversationItemId,
|
||||
pub output_item_ids: Vec<ConversationItemId>,
|
||||
/// Raw code-mode runtime/session id, useful when matching runtime payloads.
|
||||
pub runtime_cell_id: Option<String>,
|
||||
/// Full JS-cell runtime window; yielded cells can outlive the initial custom call.
|
||||
pub execution: ExecutionWindow,
|
||||
pub runtime_status: CodeCellRuntimeStatus,
|
||||
pub initial_response_at_unix_ms: Option<i64>,
|
||||
pub initial_response_seq: Option<RawEventSeq>,
|
||||
pub yielded_at_unix_ms: Option<i64>,
|
||||
pub yielded_seq: Option<RawEventSeq>,
|
||||
pub source_js: String,
|
||||
pub nested_tool_call_ids: Vec<ToolCallId>,
|
||||
pub wait_tool_call_ids: Vec<ToolCallId>,
|
||||
}
|
||||
|
||||
/// Code-mode runtime lifecycle.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum CodeCellRuntimeStatus {
|
||||
/// The `exec` request has been accepted but the runtime has not yet started user code.
|
||||
Starting,
|
||||
/// Runtime is executing JavaScript and has not yet yielded or terminated.
|
||||
Running,
|
||||
/// Initial `exec` returned while JavaScript kept running in the background.
|
||||
Yielded,
|
||||
/// Runtime reached a normal terminal result.
|
||||
Completed,
|
||||
/// Runtime reached an error terminal result.
|
||||
Failed,
|
||||
/// Runtime was explicitly terminated.
|
||||
Terminated,
|
||||
}
|
||||
|
||||
/// Installed conversation-history replacement boundary.
|
||||
///
|
||||
/// Duration-bearing upstream requests live in `CompactionRequest`. This object
|
||||
/// is the checkpoint where replacement history became the live thread history.
|
||||
/// The boundary marker and the model-visible summary are separate conversation
|
||||
/// items: the marker says where history was replaced, while the summary is part
|
||||
/// of `replacement_item_ids` when the compact endpoint returned one.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Compaction {
|
||||
pub compaction_id: CompactionId,
|
||||
pub thread_id: AgentThreadId,
|
||||
pub codex_turn_id: CodexTurnId,
|
||||
pub installed_at_unix_ms: i64,
|
||||
/// Structural conversation item marking where pre-compaction history ended.
|
||||
pub marker_item_id: ConversationItemId,
|
||||
/// Upstream compaction request attempts that contributed to this checkpoint.
|
||||
pub request_ids: Vec<CompactionRequestId>,
|
||||
/// Logical conversation items present immediately before replacement.
|
||||
pub input_item_ids: Vec<ConversationItemId>,
|
||||
/// Replacement conversation items installed by the checkpoint.
|
||||
pub replacement_item_ids: Vec<ConversationItemId>,
|
||||
}
|
||||
|
||||
/// One upstream remote request made while computing a compaction checkpoint.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct CompactionRequest {
|
||||
pub compaction_request_id: CompactionRequestId,
|
||||
pub compaction_id: CompactionId,
|
||||
pub thread_id: AgentThreadId,
|
||||
pub codex_turn_id: CodexTurnId,
|
||||
pub execution: ExecutionWindow,
|
||||
pub model: String,
|
||||
pub provider_name: String,
|
||||
pub raw_request_payload_id: RawPayloadId,
|
||||
/// Full compaction response payload. `None` while running or after pre-response failures.
|
||||
pub raw_response_payload_id: Option<RawPayloadId>,
|
||||
}
|
||||
|
||||
/// Runtime operation requested by the model, a JS code cell, or Codex itself.
|
||||
///
|
||||
/// A `ToolCall` is not a chat transcript row. Model-visible call/output items
|
||||
/// link to it through `model_visible_*_item_ids`; runtime-only tools can have
|
||||
/// empty model-visible lists.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct ToolCall {
|
||||
pub tool_call_id: ToolCallId,
|
||||
/// Model-visible protocol call ID, if the model directly requested this tool.
|
||||
pub model_visible_call_id: Option<ModelVisibleCallId>,
|
||||
/// Code-mode runtime's internal tool invocation ID, if this call came from JS.
|
||||
pub code_mode_runtime_tool_id: Option<CodeModeRuntimeToolId>,
|
||||
pub thread_id: AgentThreadId,
|
||||
/// Runtime activation that started the tool. Background work may outlive this turn.
|
||||
pub started_by_codex_turn_id: Option<CodexTurnId>,
|
||||
pub execution: ExecutionWindow,
|
||||
pub requester: ToolCallRequester,
|
||||
pub kind: ToolCallKind,
|
||||
pub model_visible_call_item_ids: Vec<ConversationItemId>,
|
||||
pub model_visible_output_item_ids: Vec<ConversationItemId>,
|
||||
/// Terminal operation started by this tool, when the tool touched a terminal.
|
||||
pub terminal_operation_id: Option<TerminalOperationId>,
|
||||
pub summary: ToolCallSummary,
|
||||
/// Original invocation at the Codex tool boundary.
|
||||
///
|
||||
/// Direct model tools store the model's function/custom call payload here.
|
||||
/// Code-mode nested tools store the JSON call made by model-authored JS.
|
||||
/// Runtime protocol events are deliberately kept separate below because
|
||||
/// they describe how Codex executed the request, not what the caller sent.
|
||||
pub raw_invocation_payload_id: Option<RawPayloadId>,
|
||||
/// Result returned to the immediate requester.
|
||||
///
|
||||
/// For direct tools this is the tool output item returned to the model; for
|
||||
/// code-mode nested tools this is the value returned to JavaScript.
|
||||
pub raw_result_payload_id: Option<RawPayloadId>,
|
||||
/// Runtime/protocol payloads observed while executing the tool.
|
||||
///
|
||||
/// Examples include exec begin/end, patch begin/end, and MCP begin/end
|
||||
/// events. Reducers can use these to build richer runtime objects such as
|
||||
/// terminal operations without overwriting the canonical invocation/result.
|
||||
pub raw_runtime_payload_ids: Vec<RawPayloadId>,
|
||||
}
|
||||
|
||||
/// Requester of a runtime tool.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case", tag = "type")]
|
||||
pub enum ToolCallRequester {
|
||||
Model,
|
||||
/// Model-authored JavaScript requested the tool through code-mode.
|
||||
CodeCell {
|
||||
code_cell_id: CodeCellId,
|
||||
},
|
||||
}
|
||||
|
||||
/// Runtime tool category.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case", tag = "type")]
|
||||
pub enum ToolCallKind {
|
||||
ExecCommand,
|
||||
WriteStdin,
|
||||
ApplyPatch,
|
||||
Mcp {
|
||||
server: String,
|
||||
tool: String,
|
||||
},
|
||||
Web,
|
||||
ImageGeneration,
|
||||
SpawnAgent,
|
||||
AssignAgentTask,
|
||||
SendMessage,
|
||||
/// Multi-agent wait operation. Code-mode wait is modeled separately.
|
||||
WaitAgent,
|
||||
CloseAgent,
|
||||
Other {
|
||||
name: String,
|
||||
},
|
||||
}
|
||||
|
||||
/// Bounded card/list summary for a tool call.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case", tag = "type")]
|
||||
pub enum ToolCallSummary {
|
||||
/// Tool is summarized by its terminal operation.
|
||||
Terminal { operation_id: TerminalOperationId },
|
||||
Agent {
|
||||
target_agent_path: AgentPath,
|
||||
/// Task name/path segment when the operation creates or targets a task.
|
||||
task_name: Option<String>,
|
||||
message_preview: String,
|
||||
},
|
||||
WaitAgent {
|
||||
/// Wait target, when narrower than "any child".
|
||||
target_agent_path: Option<AgentPath>,
|
||||
timeout_ms: Option<u64>,
|
||||
},
|
||||
Generic {
|
||||
label: String,
|
||||
input_preview: Option<String>,
|
||||
output_preview: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Reusable terminal process/session returned by the runtime.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct TerminalSession {
|
||||
pub terminal_id: TerminalId,
|
||||
pub thread_id: AgentThreadId,
|
||||
pub created_by_operation_id: TerminalOperationId,
|
||||
pub operation_ids: Vec<TerminalOperationId>,
|
||||
/// Terminal lifetime. This can outlive the operation that created it.
|
||||
pub execution: ExecutionWindow,
|
||||
}
|
||||
|
||||
/// One command/write/poll operation against a terminal session.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct TerminalOperation {
|
||||
pub operation_id: TerminalOperationId,
|
||||
/// Runtime terminal/process ID. `None` is legal only while the operation that creates it is starting.
|
||||
pub terminal_id: Option<TerminalId>,
|
||||
pub tool_call_id: ToolCallId,
|
||||
pub kind: TerminalOperationKind,
|
||||
/// Operation execution window. This is not necessarily the terminal session lifetime.
|
||||
pub execution: ExecutionWindow,
|
||||
pub request: TerminalRequest,
|
||||
/// Runtime-observed terminal result. Model-visible output links through observations.
|
||||
pub result: Option<TerminalResult>,
|
||||
pub model_observations: Vec<TerminalModelObservation>,
|
||||
pub raw_payload_ids: Vec<RawPayloadId>,
|
||||
}
|
||||
|
||||
/// Terminal operation category.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum TerminalOperationKind {
|
||||
ExecCommand,
|
||||
WriteStdin,
|
||||
}
|
||||
|
||||
/// Terminal request summary.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case", tag = "type")]
|
||||
pub enum TerminalRequest {
|
||||
ExecCommand {
|
||||
command: Vec<String>,
|
||||
display_command: String,
|
||||
cwd: String,
|
||||
yield_time_ms: Option<u64>,
|
||||
max_output_tokens: Option<usize>,
|
||||
},
|
||||
/// Request to interact with an existing terminal.
|
||||
WriteStdin {
|
||||
/// Bytes/text sent to stdin. Empty string means poll/read without writing bytes.
|
||||
stdin: String,
|
||||
yield_time_ms: Option<u64>,
|
||||
max_output_tokens: Option<usize>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Terminal result observed by the runtime.
|
||||
///
|
||||
/// This is debugger/runtime output. It is not proof that the model saw the same
|
||||
/// bytes; link model-visible call/output items through `TerminalModelObservation`.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct TerminalResult {
|
||||
/// Process exit code. `None` if the process is still running or no exit status was produced.
|
||||
pub exit_code: Option<i32>,
|
||||
pub stdout: String,
|
||||
pub stderr: String,
|
||||
/// Tool runtime's formatted caller-facing output, when present.
|
||||
pub formatted_output: Option<String>,
|
||||
/// Token count before truncation, when the tool runtime reported it.
|
||||
pub original_token_count: Option<usize>,
|
||||
/// Streaming chunk ID, when this result was assembled from chunked terminal output.
|
||||
pub chunk_id: Option<String>,
|
||||
}
|
||||
|
||||
/// Conversation items that observed a terminal operation.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct TerminalModelObservation {
|
||||
pub call_item_ids: Vec<ConversationItemId>,
|
||||
pub output_item_ids: Vec<ConversationItemId>,
|
||||
pub source: TerminalObservationSource,
|
||||
}
|
||||
|
||||
/// Source of model-visible terminal observation.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum TerminalObservationSource {
|
||||
DirectToolCall,
|
||||
CodeCellOutput,
|
||||
}
|
||||
|
||||
/// Directed information-flow relationship between trace objects.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct InteractionEdge {
|
||||
pub edge_id: EdgeId,
|
||||
pub kind: InteractionEdgeKind,
|
||||
pub source: TraceAnchor,
|
||||
pub target: TraceAnchor,
|
||||
pub started_at_unix_ms: i64,
|
||||
pub ended_at_unix_ms: Option<i64>,
|
||||
pub carried_item_ids: Vec<ConversationItemId>,
|
||||
pub carried_raw_payload_ids: Vec<RawPayloadId>,
|
||||
}
|
||||
|
||||
/// Information-flow edge category.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum InteractionEdgeKind {
|
||||
SpawnAgent,
|
||||
AssignAgentTask,
|
||||
SendMessage,
|
||||
AgentResult,
|
||||
CloseAgent,
|
||||
}
|
||||
|
||||
/// Typed pointer to one stable reduced-trace object.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case", tag = "type")]
|
||||
pub enum TraceAnchor {
|
||||
ConversationItem { item_id: ConversationItemId },
|
||||
ToolCall { tool_call_id: ToolCallId },
|
||||
Thread { thread_id: AgentThreadId },
|
||||
}
|
||||
110
codex-rs/rollout-trace/src/model/session.rs
Normal file
110
codex-rs/rollout-trace/src/model/session.rs
Normal file
@@ -0,0 +1,110 @@
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::raw_event::RawEventSeq;
|
||||
|
||||
use super::AgentPath;
|
||||
use super::AgentThreadId;
|
||||
use super::CodexTurnId;
|
||||
use super::ConversationItemId;
|
||||
use super::EdgeId;
|
||||
|
||||
/// Coarse terminal status for the rollout.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum RolloutStatus {
|
||||
/// Writer has not seen a terminal rollout event.
|
||||
Running,
|
||||
/// Rollout ended normally.
|
||||
Completed,
|
||||
/// Rollout ended because an operation failed.
|
||||
Failed,
|
||||
/// Rollout was cancelled or otherwise stopped before normal completion.
|
||||
Aborted,
|
||||
}
|
||||
|
||||
/// One Codex thread/session participating in the rollout.
|
||||
///
|
||||
/// Threads are agents in the multi-agent sense, but the root interactive
|
||||
/// session is represented by the same object. Runtime objects live in top-level
|
||||
/// maps and point back to their owning thread; only transcript order is stored
|
||||
/// here because compaction/reconciliation makes it semantic.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct AgentThread {
|
||||
pub thread_id: AgentThreadId,
|
||||
/// Stable routing identity. Viewer/search should prefer this over nickname.
|
||||
pub agent_path: AgentPath,
|
||||
/// Presentation hint. It can collide and must not be used as identity.
|
||||
pub nickname: Option<String>,
|
||||
pub origin: AgentOrigin,
|
||||
/// Session lifecycle for this thread.
|
||||
///
|
||||
/// Child threads can end independently from the root rollout, for example
|
||||
/// after a parent calls `close_agent`. Keeping this on the thread prevents
|
||||
/// those shutdowns from being mistaken for whole-rollout completion.
|
||||
pub execution: ExecutionWindow,
|
||||
/// Configured model presentation hint. Individual inference calls carry the actual upstream model.
|
||||
pub default_model: Option<String>,
|
||||
/// Logical conversation items first observed for this thread, in transcript order.
|
||||
pub conversation_item_ids: Vec<ConversationItemId>,
|
||||
}
|
||||
|
||||
/// Provenance for a traced Codex thread.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case", tag = "type")]
|
||||
pub enum AgentOrigin {
|
||||
Root,
|
||||
Spawned {
|
||||
parent_thread_id: AgentThreadId,
|
||||
/// Interaction edge that carried the spawn task.
|
||||
spawn_edge_id: EdgeId,
|
||||
/// Stable path segment/task name selected by the parent/tool call.
|
||||
task_name: String,
|
||||
/// Selected agent role/type, for example `worker` or `explorer`.
|
||||
agent_role: String,
|
||||
},
|
||||
}
|
||||
|
||||
/// Runtime interval for a typed trace object.
|
||||
///
|
||||
/// Wall-clock timestamps are for display and latency. Sequence numbers are the
|
||||
/// causal ordering primitive and should be used to pair observations or break
|
||||
/// same-millisecond ties.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ExecutionWindow {
|
||||
pub started_at_unix_ms: i64,
|
||||
pub started_seq: RawEventSeq,
|
||||
pub ended_at_unix_ms: Option<i64>,
|
||||
pub ended_seq: Option<RawEventSeq>,
|
||||
pub status: ExecutionStatus,
|
||||
}
|
||||
|
||||
/// Coarse lifecycle status for a runtime object.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ExecutionStatus {
|
||||
/// Object is still live or the trace ended before its terminal event.
|
||||
Running,
|
||||
/// Object completed successfully.
|
||||
Completed,
|
||||
/// Object reached an error state.
|
||||
Failed,
|
||||
/// Object was cancelled by user/policy/runtime before completion.
|
||||
Cancelled,
|
||||
/// Object was aborted when its owner/runtime stopped.
|
||||
Aborted,
|
||||
}
|
||||
|
||||
/// One activation of the Codex runtime for one thread.
|
||||
///
|
||||
/// A Codex turn groups protocol/runtime work for one thread activation.
|
||||
/// It is not a user/assistant message pair; conversation belongs in
|
||||
/// `ConversationItem`.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct CodexTurn {
|
||||
pub codex_turn_id: CodexTurnId,
|
||||
pub thread_id: AgentThreadId,
|
||||
pub execution: ExecutionWindow,
|
||||
/// Conversation items that directly triggered this activation, when known.
|
||||
pub input_item_ids: Vec<ConversationItemId>,
|
||||
}
|
||||
49
codex-rs/rollout-trace/src/payload.rs
Normal file
49
codex-rs/rollout-trace/src/payload.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
//! References to heavyweight trace payloads stored outside the reduced graph.
|
||||
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
|
||||
/// Stable identifier for one raw payload inside a rollout bundle.
|
||||
pub type RawPayloadId = String;
|
||||
|
||||
/// Reference to a raw request/response/log payload.
|
||||
///
|
||||
/// `RolloutTrace` stores these references so normal timeline and conversation
|
||||
/// rendering does not require the browser or reducer output to inline every
|
||||
/// upstream request, tool response, or terminal log.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct RawPayloadRef {
|
||||
pub raw_payload_id: RawPayloadId,
|
||||
/// Payload role. This lets details UI choose syntax highlighting and labels
|
||||
/// without opening the payload file first.
|
||||
pub kind: RawPayloadKind,
|
||||
/// Path relative to the trace bundle root.
|
||||
///
|
||||
/// The writer always materializes payloads as bundle-local files. Keeping
|
||||
/// this as a plain path avoids exposing storage modes we do not produce.
|
||||
pub path: String,
|
||||
}
|
||||
|
||||
/// Coarse role of a raw payload.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case", tag = "type", content = "value")]
|
||||
pub enum RawPayloadKind {
|
||||
InferenceRequest,
|
||||
/// Full upstream inference response or non-delta response stream summary.
|
||||
InferenceResponse,
|
||||
CompactionRequest,
|
||||
/// Trace-only checkpoint captured when processed replacement history is installed.
|
||||
CompactionCheckpoint,
|
||||
CompactionResponse,
|
||||
ToolInvocation,
|
||||
ToolResult,
|
||||
/// Raw runtime/protocol observation for an executing tool.
|
||||
ToolRuntimeEvent,
|
||||
/// Raw terminal runtime event or stream shard.
|
||||
TerminalRuntimeEvent,
|
||||
ProtocolEvent,
|
||||
/// One-shot metadata captured when a Codex session/thread starts.
|
||||
SessionMetadata,
|
||||
/// Runtime notification payload carried when a child agent reports back to its parent.
|
||||
AgentResult,
|
||||
}
|
||||
285
codex-rs/rollout-trace/src/raw_event.rs
Normal file
285
codex-rs/rollout-trace/src/raw_event.rs
Normal file
@@ -0,0 +1,285 @@
|
||||
//! Append-only raw trace events.
|
||||
|
||||
use crate::model::AgentThreadId;
|
||||
use crate::model::CodeCellRuntimeStatus;
|
||||
use crate::model::CodexTurnId;
|
||||
use crate::model::CompactionId;
|
||||
use crate::model::CompactionRequestId;
|
||||
use crate::model::EdgeId;
|
||||
use crate::model::ExecutionStatus;
|
||||
use crate::model::InferenceCallId;
|
||||
use crate::model::ModelVisibleCallId;
|
||||
use crate::model::RolloutStatus;
|
||||
use crate::model::ToolCallId;
|
||||
use crate::model::ToolCallKind;
|
||||
use crate::model::ToolCallSummary;
|
||||
use crate::payload::RawPayloadRef;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
|
||||
/// Monotonic sequence number assigned by the raw trace writer.
|
||||
pub type RawEventSeq = u64;
|
||||
|
||||
/// Current raw event envelope schema version.
|
||||
pub(crate) const RAW_TRACE_EVENT_SCHEMA_VERSION: u32 = 1;
|
||||
|
||||
/// One append-only raw trace event.
|
||||
///
|
||||
/// Every event uses the same envelope so partial replay and corruption checks
|
||||
/// can run before the reducer understands the event-specific payload.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct RawTraceEvent {
|
||||
pub schema_version: u32,
|
||||
/// Contiguous writer-assigned order inside one rollout event log.
|
||||
pub seq: RawEventSeq,
|
||||
/// Unix wall-clock timestamp in milliseconds. Use for display/latency.
|
||||
pub wall_time_unix_ms: i64,
|
||||
pub rollout_id: String,
|
||||
pub thread_id: Option<AgentThreadId>,
|
||||
pub codex_turn_id: Option<CodexTurnId>,
|
||||
pub payload: RawTraceEventPayload,
|
||||
}
|
||||
|
||||
/// Writer-supplied context that appears in the raw event envelope.
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq)]
|
||||
pub struct RawTraceEventContext {
|
||||
pub thread_id: Option<AgentThreadId>,
|
||||
pub codex_turn_id: Option<CodexTurnId>,
|
||||
}
|
||||
|
||||
/// Runtime requester as observed at the raw tool boundary.
|
||||
///
|
||||
/// This intentionally uses runtime-local identifiers. The reducer is the only
|
||||
/// place that maps these handles to graph identities such as `CodeCellId`.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case", tag = "type")]
|
||||
pub enum RawToolCallRequester {
|
||||
Model,
|
||||
CodeCell {
|
||||
/// Runtime-local code-mode cell handle.
|
||||
runtime_cell_id: String,
|
||||
},
|
||||
}
|
||||
|
||||
/// Typed payload for a raw trace event.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case", tag = "type")]
|
||||
pub enum RawTraceEventPayload {
|
||||
RolloutStarted {
|
||||
trace_id: String,
|
||||
root_thread_id: AgentThreadId,
|
||||
},
|
||||
RolloutEnded {
|
||||
status: RolloutStatus,
|
||||
},
|
||||
ThreadStarted {
|
||||
thread_id: AgentThreadId,
|
||||
/// Stable agent path.
|
||||
agent_path: String,
|
||||
metadata_payload: Option<RawPayloadRef>,
|
||||
},
|
||||
ThreadEnded {
|
||||
thread_id: AgentThreadId,
|
||||
status: RolloutStatus,
|
||||
},
|
||||
CodexTurnStarted {
|
||||
codex_turn_id: CodexTurnId,
|
||||
thread_id: AgentThreadId,
|
||||
},
|
||||
CodexTurnEnded {
|
||||
codex_turn_id: CodexTurnId,
|
||||
status: ExecutionStatus,
|
||||
},
|
||||
InferenceStarted {
|
||||
inference_call_id: InferenceCallId,
|
||||
thread_id: AgentThreadId,
|
||||
codex_turn_id: CodexTurnId,
|
||||
model: String,
|
||||
provider_name: String,
|
||||
request_payload: RawPayloadRef,
|
||||
},
|
||||
InferenceCompleted {
|
||||
inference_call_id: InferenceCallId,
|
||||
response_id: Option<String>,
|
||||
response_payload: RawPayloadRef,
|
||||
},
|
||||
InferenceFailed {
|
||||
inference_call_id: InferenceCallId,
|
||||
error: String,
|
||||
/// Partial response payload, when stream events arrived before failure.
|
||||
partial_response_payload: Option<RawPayloadRef>,
|
||||
},
|
||||
ToolCallStarted {
|
||||
tool_call_id: ToolCallId,
|
||||
/// Protocol/model call ID when this runtime call came from model output.
|
||||
model_visible_call_id: Option<String>,
|
||||
/// Code-mode runtime bridge ID when model-authored code issued this call.
|
||||
code_mode_runtime_tool_id: Option<String>,
|
||||
/// Runtime requester that caused this tool lifecycle.
|
||||
requester: RawToolCallRequester,
|
||||
kind: ToolCallKind,
|
||||
summary: ToolCallSummary,
|
||||
invocation_payload: Option<RawPayloadRef>,
|
||||
},
|
||||
ToolCallRuntimeStarted {
|
||||
tool_call_id: ToolCallId,
|
||||
/// Runtime/protocol observation for how Codex began executing the tool.
|
||||
runtime_payload: RawPayloadRef,
|
||||
},
|
||||
ToolCallRuntimeEnded {
|
||||
tool_call_id: ToolCallId,
|
||||
status: ExecutionStatus,
|
||||
/// Runtime/protocol observation for how Codex finished executing the tool.
|
||||
runtime_payload: RawPayloadRef,
|
||||
},
|
||||
ToolCallEnded {
|
||||
tool_call_id: ToolCallId,
|
||||
status: ExecutionStatus,
|
||||
result_payload: Option<RawPayloadRef>,
|
||||
},
|
||||
CodeCellStarted {
|
||||
/// Runtime-local handle allocated by code mode for waits and nested tools.
|
||||
runtime_cell_id: String,
|
||||
/// Custom tool call id on the model-visible `exec` item.
|
||||
model_visible_call_id: ModelVisibleCallId,
|
||||
/// JavaScript source after the public `exec` wrapper has been parsed.
|
||||
source_js: String,
|
||||
},
|
||||
CodeCellInitialResponse {
|
||||
/// Runtime-local handle, matching `CodeCellStarted`.
|
||||
runtime_cell_id: String,
|
||||
status: CodeCellRuntimeStatus,
|
||||
response_payload: Option<RawPayloadRef>,
|
||||
},
|
||||
CodeCellEnded {
|
||||
/// Runtime-local handle, matching `CodeCellStarted`.
|
||||
runtime_cell_id: String,
|
||||
status: CodeCellRuntimeStatus,
|
||||
response_payload: Option<RawPayloadRef>,
|
||||
},
|
||||
CompactionRequestStarted {
|
||||
compaction_id: CompactionId,
|
||||
compaction_request_id: CompactionRequestId,
|
||||
thread_id: AgentThreadId,
|
||||
codex_turn_id: CodexTurnId,
|
||||
model: String,
|
||||
provider_name: String,
|
||||
request_payload: RawPayloadRef,
|
||||
},
|
||||
CompactionRequestCompleted {
|
||||
compaction_id: CompactionId,
|
||||
compaction_request_id: CompactionRequestId,
|
||||
response_payload: RawPayloadRef,
|
||||
},
|
||||
CompactionRequestFailed {
|
||||
compaction_id: CompactionId,
|
||||
compaction_request_id: CompactionRequestId,
|
||||
error: String,
|
||||
},
|
||||
/// Checkpoint installation event for remote-compacted replacement history.
|
||||
CompactionInstalled {
|
||||
compaction_id: CompactionId,
|
||||
/// Trace-only checkpoint payload. Do not route this through public UI protocol.
|
||||
checkpoint_payload: RawPayloadRef,
|
||||
},
|
||||
/// Multi-agent v2 child-to-parent completion delivery.
|
||||
AgentResultObserved {
|
||||
edge_id: EdgeId,
|
||||
child_thread_id: AgentThreadId,
|
||||
child_codex_turn_id: CodexTurnId,
|
||||
parent_thread_id: AgentThreadId,
|
||||
message: String,
|
||||
/// Raw notification payload. This is evidence for the runtime delivery,
|
||||
/// not the parent-side model-visible item.
|
||||
carried_payload: Option<RawPayloadRef>,
|
||||
},
|
||||
/// Existing UI/protocol event wrapped into trace format.
|
||||
ProtocolEventObserved {
|
||||
event_type: String,
|
||||
event_payload: RawPayloadRef,
|
||||
},
|
||||
/// Structured payload for early instrumentation before a dedicated variant exists.
|
||||
Other {
|
||||
kind: String,
|
||||
summary: String,
|
||||
payloads: Vec<RawPayloadRef>,
|
||||
/// Small structured metadata. Large data belongs in `payloads`.
|
||||
metadata: Value,
|
||||
},
|
||||
}
|
||||
|
||||
impl RawTraceEventPayload {
|
||||
/// Raw payload refs that must exist before this raw event is appended.
|
||||
pub(crate) fn raw_payload_refs(&self) -> Vec<&RawPayloadRef> {
|
||||
match self {
|
||||
RawTraceEventPayload::RolloutStarted { .. }
|
||||
| RawTraceEventPayload::RolloutEnded { .. }
|
||||
| RawTraceEventPayload::ThreadEnded { .. }
|
||||
| RawTraceEventPayload::CodexTurnStarted { .. }
|
||||
| RawTraceEventPayload::CodexTurnEnded { .. }
|
||||
| RawTraceEventPayload::CompactionRequestFailed { .. }
|
||||
| RawTraceEventPayload::CodeCellStarted { .. }
|
||||
| RawTraceEventPayload::AgentResultObserved {
|
||||
carried_payload: None,
|
||||
..
|
||||
} => Vec::new(),
|
||||
RawTraceEventPayload::ThreadStarted {
|
||||
metadata_payload, ..
|
||||
} => metadata_payload.iter().collect(),
|
||||
RawTraceEventPayload::InferenceStarted {
|
||||
request_payload, ..
|
||||
}
|
||||
| RawTraceEventPayload::InferenceCompleted {
|
||||
response_payload: request_payload,
|
||||
..
|
||||
}
|
||||
| RawTraceEventPayload::CompactionRequestStarted {
|
||||
request_payload, ..
|
||||
}
|
||||
| RawTraceEventPayload::CompactionRequestCompleted {
|
||||
response_payload: request_payload,
|
||||
..
|
||||
}
|
||||
| RawTraceEventPayload::CompactionInstalled {
|
||||
checkpoint_payload: request_payload,
|
||||
..
|
||||
}
|
||||
| RawTraceEventPayload::ProtocolEventObserved {
|
||||
event_payload: request_payload,
|
||||
..
|
||||
} => vec![request_payload],
|
||||
RawTraceEventPayload::InferenceFailed {
|
||||
partial_response_payload,
|
||||
..
|
||||
}
|
||||
| RawTraceEventPayload::ToolCallStarted {
|
||||
invocation_payload: partial_response_payload,
|
||||
..
|
||||
}
|
||||
| RawTraceEventPayload::ToolCallEnded {
|
||||
result_payload: partial_response_payload,
|
||||
..
|
||||
}
|
||||
| RawTraceEventPayload::CodeCellInitialResponse {
|
||||
response_payload: partial_response_payload,
|
||||
..
|
||||
}
|
||||
| RawTraceEventPayload::CodeCellEnded {
|
||||
response_payload: partial_response_payload,
|
||||
..
|
||||
} => partial_response_payload.iter().collect(),
|
||||
RawTraceEventPayload::AgentResultObserved {
|
||||
carried_payload: Some(carried_payload),
|
||||
..
|
||||
} => vec![carried_payload],
|
||||
RawTraceEventPayload::ToolCallRuntimeStarted {
|
||||
runtime_payload, ..
|
||||
}
|
||||
| RawTraceEventPayload::ToolCallRuntimeEnded {
|
||||
runtime_payload, ..
|
||||
} => vec![runtime_payload],
|
||||
RawTraceEventPayload::Other { payloads, .. } => payloads.iter().collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
738
codex-rs/rollout-trace/src/reducer/code_cell.rs
Normal file
738
codex-rs/rollout-trace/src/reducer/code_cell.rs
Normal file
@@ -0,0 +1,738 @@
|
||||
//! Code-mode reduction.
|
||||
//!
|
||||
//! A code cell is the runtime parent for model-authored `exec`
|
||||
//! JavaScript. Nested tools, waits, and terminal operations hang off this
|
||||
//! object so viewers can inspect runtime work without flattening it into the
|
||||
//! model-visible conversation.
|
||||
//!
|
||||
//! The reducer has to reconcile two clocks:
|
||||
//! - model-visible items come from inference request/response payloads;
|
||||
//! - runtime work starts as soon as Codex dispatches the tool.
|
||||
//!
|
||||
//! In real traces `CodeCellStarted` can arrive before the inference completion
|
||||
//! payload that contains the `custom_tool_call` item. We therefore queue starts
|
||||
//! until their source conversation item exists, then attach runtime edges.
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::bail;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::TraceReducer;
|
||||
use crate::model::CodeCell;
|
||||
use crate::model::CodeCellId;
|
||||
use crate::model::CodeCellRuntimeStatus;
|
||||
use crate::model::ConversationItemKind;
|
||||
use crate::model::ExecutionStatus;
|
||||
use crate::model::ExecutionWindow;
|
||||
use crate::model::ProducerRef;
|
||||
use crate::model::ToolCallId;
|
||||
use crate::model::ToolCallRequester;
|
||||
use crate::payload::RawPayloadRef;
|
||||
use crate::raw_event::RawEventSeq;
|
||||
use crate::raw_event::RawToolCallRequester;
|
||||
|
||||
/// Runtime start payload for one model-authored code-mode exec call.
|
||||
///
|
||||
/// The reduced id is already derived from the model-visible call id before this
|
||||
/// reaches the code-cell reducer, so the reducer can reconcile runtime lifecycle
|
||||
/// events against a stable graph identity.
|
||||
pub(super) struct StartedCodeCell {
|
||||
pub(super) code_cell_id: CodeCellId,
|
||||
pub(super) runtime_cell_id: String,
|
||||
pub(super) model_visible_call_id: crate::model::ModelVisibleCallId,
|
||||
pub(super) source_js: String,
|
||||
}
|
||||
|
||||
/// Queued code-cell start waiting for its model-visible source item.
|
||||
///
|
||||
/// Code execution can begin before inference stream completion records the
|
||||
/// custom-tool call item that authored it. This wrapper keeps the original
|
||||
/// event timing intact until that source item exists.
|
||||
pub(super) struct PendingCodeCellStart {
|
||||
pub(super) seq: RawEventSeq,
|
||||
pub(super) wall_time_unix_ms: i64,
|
||||
pub(super) thread_id: String,
|
||||
pub(super) codex_turn_id: Option<String>,
|
||||
pub(super) started: StartedCodeCell,
|
||||
}
|
||||
|
||||
/// Lifecycle event observed before a queued code cell has materialized.
|
||||
///
|
||||
/// These events are replayed after the start is resolved so failed or very fast
|
||||
/// cells do not lose runtime status while preserving source-item ownership.
|
||||
pub(super) struct PendingCodeCellLifecycleEvent {
|
||||
pub(super) seq: RawEventSeq,
|
||||
pub(super) wall_time_unix_ms: i64,
|
||||
pub(super) kind: PendingCodeCellLifecycleEventKind,
|
||||
}
|
||||
|
||||
/// Runtime lifecycle transitions that can arrive while a code-cell start is queued.
|
||||
pub(super) enum PendingCodeCellLifecycleEventKind {
|
||||
InitialResponse {
|
||||
runtime_cell_id: String,
|
||||
status: CodeCellRuntimeStatus,
|
||||
},
|
||||
Ended {
|
||||
status: CodeCellRuntimeStatus,
|
||||
},
|
||||
}
|
||||
|
||||
impl TraceReducer {
|
||||
/// Starts a code cell once its model-visible source item exists.
|
||||
///
|
||||
/// Runtime events are allowed to arrive before stream completion has
|
||||
/// reduced the model output that requested `exec`. Queueing preserves the
|
||||
/// event order while still requiring every final `CodeCell` to point at the
|
||||
/// exact conversation item that authored its JavaScript.
|
||||
pub(super) fn start_or_queue_code_cell(&mut self, pending: PendingCodeCellStart) -> Result<()> {
|
||||
let code_cell_id = pending.started.code_cell_id.clone();
|
||||
if self
|
||||
.source_item_id_for_pending_code_cell(&pending)?
|
||||
.is_none()
|
||||
{
|
||||
if self.rollout.code_cells.contains_key(&code_cell_id)
|
||||
|| self.pending_code_cell_starts.contains_key(&code_cell_id)
|
||||
{
|
||||
bail!("duplicate code cell start for {code_cell_id}");
|
||||
}
|
||||
self.pending_code_cell_starts.insert(code_cell_id, pending);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.start_code_cell(pending)
|
||||
}
|
||||
|
||||
/// Materializes any queued code-cell starts unlocked by newly reduced conversation items.
|
||||
///
|
||||
/// This is called after inference and compaction conversation reduction,
|
||||
/// because those are the only paths that create model-visible items today.
|
||||
pub(super) fn flush_pending_code_cell_starts(&mut self) -> Result<()> {
|
||||
let mut ready_ids = Vec::new();
|
||||
for (code_cell_id, pending) in &self.pending_code_cell_starts {
|
||||
if self
|
||||
.source_item_id_for_pending_code_cell(pending)?
|
||||
.is_some()
|
||||
{
|
||||
ready_ids.push(code_cell_id.clone());
|
||||
}
|
||||
}
|
||||
|
||||
for code_cell_id in ready_ids {
|
||||
let Some(pending) = self.pending_code_cell_starts.remove(&code_cell_id) else {
|
||||
continue;
|
||||
};
|
||||
self.start_code_cell(pending)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Inserts the reduced `CodeCell` once source ownership can be proven.
|
||||
fn start_code_cell(&mut self, pending: PendingCodeCellStart) -> Result<()> {
|
||||
let PendingCodeCellStart {
|
||||
seq,
|
||||
wall_time_unix_ms,
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
started,
|
||||
} = pending;
|
||||
if self.rollout.code_cells.contains_key(&started.code_cell_id) {
|
||||
bail!("duplicate code cell start for {}", started.code_cell_id);
|
||||
}
|
||||
|
||||
let Some(codex_turn_id) = codex_turn_id else {
|
||||
bail!(
|
||||
"code cell start {} did not include a Codex turn id",
|
||||
started.code_cell_id
|
||||
);
|
||||
};
|
||||
self.validate_code_cell_turn(&thread_id, &codex_turn_id)?;
|
||||
|
||||
let source_item_id = self.source_item_id_for_code_cell_start(
|
||||
&thread_id,
|
||||
&started.code_cell_id,
|
||||
&started.model_visible_call_id,
|
||||
)?;
|
||||
let output_item_ids = self.model_visible_code_cell_item_ids(
|
||||
&thread_id,
|
||||
&started.model_visible_call_id,
|
||||
ConversationItemKind::CustomToolCallOutput,
|
||||
);
|
||||
// Runtime events may also have arrived while the start was queued.
|
||||
// Seed these reverse links from already-reduced tool calls so replay is
|
||||
// order-insensitive within the known trace causality.
|
||||
let requester = ToolCallRequester::CodeCell {
|
||||
code_cell_id: started.code_cell_id.clone(),
|
||||
};
|
||||
let nested_tool_call_ids = self
|
||||
.rollout
|
||||
.tool_calls
|
||||
.values()
|
||||
.filter(|tool_call| tool_call.requester == requester)
|
||||
.map(|tool_call| tool_call.tool_call_id.clone())
|
||||
.collect();
|
||||
|
||||
self.rollout.code_cells.insert(
|
||||
started.code_cell_id.clone(),
|
||||
CodeCell {
|
||||
code_cell_id: started.code_cell_id.clone(),
|
||||
model_visible_call_id: started.model_visible_call_id,
|
||||
thread_id: thread_id.clone(),
|
||||
codex_turn_id,
|
||||
source_item_id,
|
||||
output_item_ids: output_item_ids.clone(),
|
||||
runtime_cell_id: Some(started.runtime_cell_id),
|
||||
execution: ExecutionWindow {
|
||||
started_at_unix_ms: wall_time_unix_ms,
|
||||
started_seq: seq,
|
||||
ended_at_unix_ms: None,
|
||||
ended_seq: None,
|
||||
status: ExecutionStatus::Running,
|
||||
},
|
||||
runtime_status: CodeCellRuntimeStatus::Starting,
|
||||
initial_response_at_unix_ms: None,
|
||||
initial_response_seq: None,
|
||||
yielded_at_unix_ms: None,
|
||||
yielded_seq: None,
|
||||
source_js: started.source_js,
|
||||
nested_tool_call_ids,
|
||||
wait_tool_call_ids: Vec::new(),
|
||||
},
|
||||
);
|
||||
|
||||
self.thread_mut(&thread_id)?;
|
||||
|
||||
for item_id in output_item_ids {
|
||||
self.add_code_cell_output_item(&started.code_cell_id, &item_id)?;
|
||||
}
|
||||
self.flush_pending_code_cell_lifecycle_events(&started.code_cell_id)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the source item if the model-visible `exec` call has been reduced.
|
||||
fn source_item_id_for_pending_code_cell(
|
||||
&self,
|
||||
pending: &PendingCodeCellStart,
|
||||
) -> Result<Option<String>> {
|
||||
Ok(self
|
||||
.model_visible_code_cell_item_ids(
|
||||
&pending.thread_id,
|
||||
&pending.started.model_visible_call_id,
|
||||
ConversationItemKind::CustomToolCall,
|
||||
)
|
||||
.into_iter()
|
||||
.next())
|
||||
}
|
||||
|
||||
/// Records the runtime's first response for a code cell, or waits for its source item.
|
||||
///
|
||||
/// Code-mode execution can start and fail before the inference response payload
|
||||
/// that introduced the model-visible `exec` call has been reduced. In that
|
||||
/// case the cell start is already pending; keep the lifecycle event beside it
|
||||
/// instead of weakening the invariant that every reduced cell has a source
|
||||
/// conversation item.
|
||||
pub(super) fn record_or_queue_code_cell_initial_response(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
code_cell_id: CodeCellId,
|
||||
runtime_cell_id: String,
|
||||
status: CodeCellRuntimeStatus,
|
||||
) -> Result<()> {
|
||||
if !self.rollout.code_cells.contains_key(&code_cell_id) {
|
||||
if self.pending_code_cell_starts.contains_key(&code_cell_id) {
|
||||
self.queue_code_cell_lifecycle_event(
|
||||
code_cell_id,
|
||||
PendingCodeCellLifecycleEvent {
|
||||
seq,
|
||||
wall_time_unix_ms,
|
||||
kind: PendingCodeCellLifecycleEventKind::InitialResponse {
|
||||
runtime_cell_id,
|
||||
status,
|
||||
},
|
||||
},
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
bail!("code cell initial response referenced unknown cell {code_cell_id}");
|
||||
}
|
||||
self.record_code_cell_initial_response(
|
||||
seq,
|
||||
wall_time_unix_ms,
|
||||
code_cell_id,
|
||||
runtime_cell_id,
|
||||
status,
|
||||
)
|
||||
}
|
||||
|
||||
fn record_code_cell_initial_response(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
code_cell_id: CodeCellId,
|
||||
runtime_cell_id: String,
|
||||
status: CodeCellRuntimeStatus,
|
||||
) -> Result<()> {
|
||||
let Some(cell) = self.rollout.code_cells.get_mut(&code_cell_id) else {
|
||||
bail!("code cell initial response referenced unknown cell {code_cell_id}");
|
||||
};
|
||||
|
||||
cell.runtime_cell_id = Some(runtime_cell_id);
|
||||
if cell.initial_response_at_unix_ms.is_none() {
|
||||
cell.initial_response_at_unix_ms = Some(wall_time_unix_ms);
|
||||
cell.initial_response_seq = Some(seq);
|
||||
}
|
||||
if status == CodeCellRuntimeStatus::Yielded {
|
||||
cell.yielded_at_unix_ms = Some(wall_time_unix_ms);
|
||||
cell.yielded_seq = Some(seq);
|
||||
}
|
||||
cell.runtime_status = status;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Ends a code cell, or waits until its queued start can materialize.
|
||||
///
|
||||
/// This mirrors `record_or_queue_code_cell_initial_response`: the reducer is
|
||||
/// strict about unknown cells, but a cell whose start is pending on the
|
||||
/// model-visible source item is known and just needs its lifecycle replayed
|
||||
/// after the source item appears.
|
||||
pub(super) fn end_or_queue_code_cell(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
code_cell_id: CodeCellId,
|
||||
status: CodeCellRuntimeStatus,
|
||||
) -> Result<()> {
|
||||
if !self.rollout.code_cells.contains_key(&code_cell_id) {
|
||||
if self.pending_code_cell_starts.contains_key(&code_cell_id) {
|
||||
self.queue_code_cell_lifecycle_event(
|
||||
code_cell_id,
|
||||
PendingCodeCellLifecycleEvent {
|
||||
seq,
|
||||
wall_time_unix_ms,
|
||||
kind: PendingCodeCellLifecycleEventKind::Ended { status },
|
||||
},
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
bail!("code cell end referenced unknown cell {code_cell_id}");
|
||||
}
|
||||
self.end_code_cell(seq, wall_time_unix_ms, code_cell_id, status)
|
||||
}
|
||||
|
||||
fn end_code_cell(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
code_cell_id: CodeCellId,
|
||||
status: CodeCellRuntimeStatus,
|
||||
) -> Result<()> {
|
||||
let Some(cell) = self.rollout.code_cells.get_mut(&code_cell_id) else {
|
||||
bail!("code cell end referenced unknown cell {code_cell_id}");
|
||||
};
|
||||
|
||||
if cell.initial_response_at_unix_ms.is_none() {
|
||||
cell.initial_response_at_unix_ms = Some(wall_time_unix_ms);
|
||||
cell.initial_response_seq = Some(seq);
|
||||
}
|
||||
cell.execution.ended_at_unix_ms = Some(wall_time_unix_ms);
|
||||
cell.execution.ended_seq = Some(seq);
|
||||
cell.execution.status = execution_status_for_code_cell(&status);
|
||||
cell.runtime_status = status;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Closes unfinished code cells when their owning turn is interrupted.
|
||||
///
|
||||
/// A yielded code cell can outlive a completed turn and be resumed by a
|
||||
/// later `wait`, so normal turn completion must not imply cell completion.
|
||||
/// Cancellation/failure is different: the model-visible JS frame has been
|
||||
/// abandoned even if nested terminal work reports late runtime events. In
|
||||
/// that case leaving the cell `running` makes a completed trace look live.
|
||||
pub(super) fn terminate_running_code_cells_for_turn_end(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
codex_turn_id: &str,
|
||||
turn_status: &ExecutionStatus,
|
||||
) -> Result<()> {
|
||||
let runtime_status = match turn_status {
|
||||
ExecutionStatus::Running | ExecutionStatus::Completed => return Ok(()),
|
||||
ExecutionStatus::Failed => CodeCellRuntimeStatus::Failed,
|
||||
ExecutionStatus::Cancelled | ExecutionStatus::Aborted => {
|
||||
CodeCellRuntimeStatus::Terminated
|
||||
}
|
||||
};
|
||||
let code_cell_ids: Vec<_> = self
|
||||
.rollout
|
||||
.code_cells
|
||||
.values()
|
||||
.filter(|cell| {
|
||||
cell.codex_turn_id == codex_turn_id
|
||||
&& cell.execution.status == ExecutionStatus::Running
|
||||
})
|
||||
.map(|cell| cell.code_cell_id.clone())
|
||||
.collect();
|
||||
|
||||
for code_cell_id in code_cell_ids {
|
||||
self.end_code_cell(seq, wall_time_unix_ms, code_cell_id, runtime_status.clone())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn queue_code_cell_lifecycle_event(
|
||||
&mut self,
|
||||
code_cell_id: CodeCellId,
|
||||
event: PendingCodeCellLifecycleEvent,
|
||||
) {
|
||||
let events = self
|
||||
.pending_code_cell_lifecycle_events
|
||||
.entry(code_cell_id)
|
||||
.or_default();
|
||||
events.push(event);
|
||||
events.sort_by_key(|event| event.seq);
|
||||
}
|
||||
|
||||
fn flush_pending_code_cell_lifecycle_events(&mut self, code_cell_id: &str) -> Result<()> {
|
||||
let Some(events) = self.pending_code_cell_lifecycle_events.remove(code_cell_id) else {
|
||||
return Ok(());
|
||||
};
|
||||
for event in events {
|
||||
match event.kind {
|
||||
PendingCodeCellLifecycleEventKind::InitialResponse {
|
||||
runtime_cell_id,
|
||||
status,
|
||||
} => self.record_code_cell_initial_response(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
code_cell_id.to_string(),
|
||||
runtime_cell_id,
|
||||
status,
|
||||
)?,
|
||||
PendingCodeCellLifecycleEventKind::Ended { status } => self.end_code_cell(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
code_cell_id.to_string(),
|
||||
status,
|
||||
)?,
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Links a nested tool call back to its parent code cell.
|
||||
///
|
||||
/// If the parent cell is still queued, the link is recovered later from already
|
||||
/// reduced tool calls when the cell materializes.
|
||||
pub(super) fn link_tool_call_to_code_cell(
|
||||
&mut self,
|
||||
tool_call_id: &ToolCallId,
|
||||
requester: &ToolCallRequester,
|
||||
) -> Result<()> {
|
||||
let ToolCallRequester::CodeCell { code_cell_id } = requester else {
|
||||
return Ok(());
|
||||
};
|
||||
let Some(cell) = self.rollout.code_cells.get_mut(code_cell_id) else {
|
||||
// The cell start may still be queued behind the inference payload
|
||||
// that contains its model-visible source item. `start_code_cell`
|
||||
// backfills these already-reduced nested calls once the source
|
||||
// ownership can be proven.
|
||||
return Ok(());
|
||||
};
|
||||
push_unique(&mut cell.nested_tool_call_ids, tool_call_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Records that a model-visible wait call is waiting on a runtime code cell.
|
||||
///
|
||||
/// Wait calls are not nested JavaScript tools, so the relationship is inferred
|
||||
/// from the runtime cell id inside the function arguments.
|
||||
pub(super) fn link_wait_tool_call_from_request_payload(
|
||||
&mut self,
|
||||
thread_id: &str,
|
||||
tool_call_id: &ToolCallId,
|
||||
request_payload: Option<&RawPayloadRef>,
|
||||
) -> Result<()> {
|
||||
let Some(request_payload) = request_payload else {
|
||||
return Ok(());
|
||||
};
|
||||
let payload = self.read_payload_json(request_payload)?;
|
||||
if payload.get("tool_name").and_then(Value::as_str) != Some("wait") {
|
||||
return Ok(());
|
||||
}
|
||||
// `wait` is a normal model-visible function call, not a nested JS tool
|
||||
// request. The only stable edge back to the code cell is the runtime
|
||||
// `cell_id` inside the function arguments.
|
||||
let Some(arguments) = payload
|
||||
.get("payload")
|
||||
.and_then(|payload| payload.get("arguments"))
|
||||
.and_then(Value::as_str)
|
||||
else {
|
||||
bail!(
|
||||
"wait tool request payload {} did not contain function arguments",
|
||||
request_payload.raw_payload_id
|
||||
);
|
||||
};
|
||||
let arguments: Value = serde_json::from_str(arguments).with_context(|| {
|
||||
format!(
|
||||
"wait tool request payload {} had invalid JSON arguments",
|
||||
request_payload.raw_payload_id
|
||||
)
|
||||
})?;
|
||||
let Some(runtime_cell_id) = arguments.get("cell_id").and_then(Value::as_str) else {
|
||||
bail!(
|
||||
"wait tool request payload {} did not contain cell_id",
|
||||
request_payload.raw_payload_id
|
||||
);
|
||||
};
|
||||
let Some(code_cell_id) =
|
||||
self.code_cell_id_for_runtime_cell_id_if_known(thread_id, runtime_cell_id)
|
||||
else {
|
||||
return Ok(());
|
||||
};
|
||||
let Some(cell) = self.rollout.code_cells.get_mut(&code_cell_id) else {
|
||||
return Ok(());
|
||||
};
|
||||
push_unique(&mut cell.wait_tool_call_ids, tool_call_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Attaches a later-observed model-visible output item to its code cell.
|
||||
///
|
||||
/// This is used when an inference request carries a custom-tool output after
|
||||
/// the runtime cell already exists.
|
||||
pub(super) fn attach_model_visible_code_cell_item(
|
||||
&mut self,
|
||||
item_id: &str,
|
||||
call_id: Option<&str>,
|
||||
kind: &ConversationItemKind,
|
||||
) -> Result<()> {
|
||||
let Some(call_id) = call_id else {
|
||||
return Ok(());
|
||||
};
|
||||
if *kind != ConversationItemKind::CustomToolCallOutput {
|
||||
return Ok(());
|
||||
}
|
||||
// The output item can be observed after the CodeCell was created, e.g.
|
||||
// when a later inference request carries the custom-tool result back to
|
||||
// the model. Add the reverse ProducerRef at that later observation
|
||||
// point instead of copying runtime bytes into the conversation model.
|
||||
let code_cell_id = self.reduced_code_cell_id_for_model_visible_call(call_id);
|
||||
if !self.rollout.code_cells.contains_key(&code_cell_id) {
|
||||
return Ok(());
|
||||
}
|
||||
self.add_code_cell_output_item(&code_cell_id, item_id)
|
||||
}
|
||||
|
||||
/// Resolves the owning thread for a code-cell runtime event.
|
||||
///
|
||||
/// Runtime events should carry a thread id, but older/raw paths may only have
|
||||
/// the turn id. The fallback keeps replay strict while avoiding duplicate logic
|
||||
/// in every code-cell event arm.
|
||||
pub(super) fn code_cell_event_thread_id(
|
||||
&self,
|
||||
thread_id: Option<String>,
|
||||
codex_turn_id: Option<&str>,
|
||||
runtime_cell_id: &str,
|
||||
event_name: &str,
|
||||
) -> Result<String> {
|
||||
if let Some(thread_id) = thread_id {
|
||||
return Ok(thread_id);
|
||||
}
|
||||
let Some(codex_turn_id) = codex_turn_id else {
|
||||
bail!("{event_name} {runtime_cell_id} did not include a thread id");
|
||||
};
|
||||
self.rollout
|
||||
.codex_turns
|
||||
.get(codex_turn_id)
|
||||
.map(|turn| turn.thread_id.clone())
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"{event_name} {runtime_cell_id} referenced unknown Codex turn {codex_turn_id}"
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Derives the stable reduced code-cell id from the model-visible exec call id.
|
||||
pub(super) fn reduced_code_cell_id_for_model_visible_call(
|
||||
&self,
|
||||
model_visible_call_id: &str,
|
||||
) -> CodeCellId {
|
||||
// The model-visible `exec` call is the durable source identity. The
|
||||
// runtime `cell_id` is only a thread-local handle used for later waits
|
||||
// and nested tool calls.
|
||||
format!("code_cell:{model_visible_call_id}")
|
||||
}
|
||||
|
||||
/// Records the thread-local runtime cell id to reduced code-cell id mapping.
|
||||
///
|
||||
/// Runtime ids can repeat across threads, so callers must provide the owning
|
||||
/// thread id when creating or resolving this bridge.
|
||||
pub(super) fn record_runtime_code_cell_id(
|
||||
&mut self,
|
||||
thread_id: &str,
|
||||
runtime_cell_id: &str,
|
||||
code_cell_id: &str,
|
||||
) -> Result<()> {
|
||||
let key = runtime_code_cell_key(thread_id, runtime_cell_id);
|
||||
if let Some(existing) = self.code_cell_ids_by_runtime.get(&key) {
|
||||
if existing == code_cell_id {
|
||||
return Ok(());
|
||||
}
|
||||
bail!(
|
||||
"runtime code cell {runtime_cell_id} in thread {thread_id} mapped to both \
|
||||
{existing} and {code_cell_id}"
|
||||
);
|
||||
}
|
||||
self.code_cell_ids_by_runtime
|
||||
.insert(key, code_cell_id.to_string());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Resolves a runtime cell id to the reduced code-cell id for the given thread.
|
||||
pub(super) fn code_cell_id_for_runtime_cell_id(
|
||||
&self,
|
||||
thread_id: &str,
|
||||
runtime_cell_id: &str,
|
||||
event_name: &str,
|
||||
) -> Result<CodeCellId> {
|
||||
self.code_cell_id_for_runtime_cell_id_if_known(thread_id, runtime_cell_id)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"{event_name} referenced unknown runtime cell {runtime_cell_id} \
|
||||
in thread {thread_id}"
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn code_cell_id_for_runtime_cell_id_if_known(
|
||||
&self,
|
||||
thread_id: &str,
|
||||
runtime_cell_id: &str,
|
||||
) -> Option<CodeCellId> {
|
||||
self.code_cell_ids_by_runtime
|
||||
.get(&runtime_code_cell_key(thread_id, runtime_cell_id))
|
||||
.cloned()
|
||||
}
|
||||
|
||||
/// Converts a raw tool requester into the reduced graph requester.
|
||||
///
|
||||
/// Code-mode tool requests arrive with a runtime cell id, so this method is
|
||||
/// the boundary that turns that runtime handle into a stable code-cell anchor.
|
||||
pub(super) fn reduce_tool_call_requester(
|
||||
&self,
|
||||
thread_id: &str,
|
||||
requester: RawToolCallRequester,
|
||||
) -> Result<ToolCallRequester> {
|
||||
match requester {
|
||||
RawToolCallRequester::Model => Ok(ToolCallRequester::Model),
|
||||
RawToolCallRequester::CodeCell { runtime_cell_id } => Ok(ToolCallRequester::CodeCell {
|
||||
code_cell_id: self.code_cell_id_for_runtime_cell_id(
|
||||
thread_id,
|
||||
&runtime_cell_id,
|
||||
"code-mode nested tool",
|
||||
)?,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_code_cell_turn(&self, thread_id: &str, codex_turn_id: &str) -> Result<()> {
|
||||
if !self.rollout.threads.contains_key(thread_id) {
|
||||
bail!("code cell start referenced unknown thread {thread_id}");
|
||||
}
|
||||
let Some(turn) = self.rollout.codex_turns.get(codex_turn_id) else {
|
||||
bail!("code cell start referenced unknown Codex turn {codex_turn_id}");
|
||||
};
|
||||
if turn.thread_id != thread_id {
|
||||
bail!(
|
||||
"code cell start used thread {thread_id}, but Codex turn {codex_turn_id} belongs \
|
||||
to {}",
|
||||
turn.thread_id
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn model_visible_code_cell_item_ids(
|
||||
&self,
|
||||
thread_id: &str,
|
||||
call_id: &str,
|
||||
kind: ConversationItemKind,
|
||||
) -> Vec<String> {
|
||||
self.rollout
|
||||
.conversation_items
|
||||
.values()
|
||||
.filter(|item| {
|
||||
item.thread_id == thread_id
|
||||
&& item.call_id.as_deref() == Some(call_id)
|
||||
&& item.kind == kind
|
||||
})
|
||||
.map(|item| item.item_id.clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn source_item_id_for_code_cell_start(
|
||||
&self,
|
||||
thread_id: &str,
|
||||
code_cell_id: &str,
|
||||
model_visible_call_id: &str,
|
||||
) -> Result<String> {
|
||||
self.model_visible_code_cell_item_ids(
|
||||
thread_id,
|
||||
model_visible_call_id,
|
||||
ConversationItemKind::CustomToolCall,
|
||||
)
|
||||
.into_iter()
|
||||
.next()
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"code cell {code_cell_id} referenced model-visible call {model_visible_call_id}, \
|
||||
but no custom tool call item was observed"
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn add_code_cell_output_item(&mut self, code_cell_id: &str, item_id: &str) -> Result<()> {
|
||||
let Some(cell) = self.rollout.code_cells.get_mut(code_cell_id) else {
|
||||
bail!("code cell {code_cell_id} disappeared during output linking");
|
||||
};
|
||||
push_unique(&mut cell.output_item_ids, item_id);
|
||||
|
||||
let Some(item) = self.rollout.conversation_items.get_mut(item_id) else {
|
||||
bail!("conversation item {item_id} disappeared during code-cell output linking");
|
||||
};
|
||||
let producer = ProducerRef::CodeCell {
|
||||
code_cell_id: code_cell_id.to_string(),
|
||||
};
|
||||
if !item.produced_by.contains(&producer) {
|
||||
item.produced_by.push(producer);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn execution_status_for_code_cell(status: &CodeCellRuntimeStatus) -> ExecutionStatus {
|
||||
match status {
|
||||
CodeCellRuntimeStatus::Starting
|
||||
| CodeCellRuntimeStatus::Running
|
||||
| CodeCellRuntimeStatus::Yielded => ExecutionStatus::Running,
|
||||
CodeCellRuntimeStatus::Completed => ExecutionStatus::Completed,
|
||||
CodeCellRuntimeStatus::Failed => ExecutionStatus::Failed,
|
||||
CodeCellRuntimeStatus::Terminated => ExecutionStatus::Cancelled,
|
||||
}
|
||||
}
|
||||
|
||||
fn push_unique(items: &mut Vec<String>, item_id: &str) {
|
||||
if !items.iter().any(|existing| existing == item_id) {
|
||||
items.push(item_id.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
fn runtime_code_cell_key(thread_id: &str, runtime_cell_id: &str) -> (String, String) {
|
||||
(thread_id.to_string(), runtime_cell_id.to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "code_cell_tests.rs"]
|
||||
mod tests;
|
||||
423
codex-rs/rollout-trace/src/reducer/code_cell_tests.rs
Normal file
423
codex-rs/rollout-trace/src/reducer/code_cell_tests.rs
Normal file
@@ -0,0 +1,423 @@
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::model::CodeCellRuntimeStatus;
|
||||
use crate::model::ConversationItemKind;
|
||||
use crate::model::ExecutionStatus;
|
||||
use crate::model::ProducerRef;
|
||||
use crate::model::ToolCallKind;
|
||||
use crate::model::ToolCallSummary;
|
||||
use crate::payload::RawPayloadKind;
|
||||
use crate::raw_event::RawToolCallRequester;
|
||||
use crate::raw_event::RawTraceEventPayload;
|
||||
use crate::reducer::test_support::create_started_writer;
|
||||
use crate::reducer::test_support::message;
|
||||
use crate::reducer::test_support::start_turn;
|
||||
use crate::reducer::test_support::start_turn_for_thread;
|
||||
use crate::reducer::test_support::trace_context;
|
||||
use crate::reducer::test_support::trace_context_for_thread;
|
||||
use crate::replay_bundle;
|
||||
|
||||
#[test]
|
||||
fn code_cell_lifecycle_links_nested_tools_waits_and_outputs() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [message("user", "count files")]
|
||||
}),
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::InferenceStarted {
|
||||
inference_call_id: "inference-1".to_string(),
|
||||
thread_id: "thread-root".to_string(),
|
||||
codex_turn_id: "turn-1".to_string(),
|
||||
model: "gpt-test".to_string(),
|
||||
provider_name: "test-provider".to_string(),
|
||||
request_payload: request,
|
||||
})?;
|
||||
let response = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceResponse,
|
||||
&json!({
|
||||
"response_id": "resp-1",
|
||||
"output_items": [{
|
||||
"type": "custom_tool_call",
|
||||
"name": "exec",
|
||||
"call_id": "call-code",
|
||||
"input": "text('hi')"
|
||||
}]
|
||||
}),
|
||||
)?;
|
||||
// Runtime tool dispatch starts before the stream-completion hook has
|
||||
// reduced the model response that requested `exec`.
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::CodeCellStarted {
|
||||
runtime_cell_id: "1".to_string(),
|
||||
model_visible_call_id: "call-code".to_string(),
|
||||
source_js: "text('hi')".to_string(),
|
||||
},
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::InferenceCompleted {
|
||||
inference_call_id: "inference-1".to_string(),
|
||||
response_id: Some("resp-1".to_string()),
|
||||
response_payload: response,
|
||||
})?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::CodeCellInitialResponse {
|
||||
runtime_cell_id: "1".to_string(),
|
||||
status: CodeCellRuntimeStatus::Yielded,
|
||||
response_payload: None,
|
||||
},
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::ToolCallStarted {
|
||||
tool_call_id: "nested-tool-1".to_string(),
|
||||
model_visible_call_id: None,
|
||||
code_mode_runtime_tool_id: Some("tool-1".to_string()),
|
||||
requester: RawToolCallRequester::CodeCell {
|
||||
runtime_cell_id: "1".to_string(),
|
||||
},
|
||||
kind: ToolCallKind::ExecCommand,
|
||||
summary: ToolCallSummary::Generic {
|
||||
label: "exec_command".to_string(),
|
||||
input_preview: Some("pwd".to_string()),
|
||||
output_preview: None,
|
||||
},
|
||||
invocation_payload: None,
|
||||
},
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::ToolCallEnded {
|
||||
tool_call_id: "nested-tool-1".to_string(),
|
||||
status: ExecutionStatus::Completed,
|
||||
result_payload: None,
|
||||
},
|
||||
)?;
|
||||
|
||||
start_turn(&writer, "turn-2")?;
|
||||
let followup = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"previous_response_id": "resp-1",
|
||||
"input": [{
|
||||
"type": "custom_tool_call_output",
|
||||
"call_id": "call-code",
|
||||
"output": "Script running with cell ID 1"
|
||||
}]
|
||||
}),
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::InferenceStarted {
|
||||
inference_call_id: "inference-2".to_string(),
|
||||
thread_id: "thread-root".to_string(),
|
||||
codex_turn_id: "turn-2".to_string(),
|
||||
model: "gpt-test".to_string(),
|
||||
provider_name: "test-provider".to_string(),
|
||||
request_payload: followup,
|
||||
})?;
|
||||
let wait_request = writer.write_json_payload(
|
||||
RawPayloadKind::ToolInvocation,
|
||||
&json!({
|
||||
"tool_name": "wait",
|
||||
"tool_namespace": null,
|
||||
"payload": {
|
||||
"type": "function",
|
||||
"arguments": "{\"cell_id\":\"1\"}"
|
||||
}
|
||||
}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-2"),
|
||||
RawTraceEventPayload::ToolCallStarted {
|
||||
tool_call_id: "wait-tool-1".to_string(),
|
||||
model_visible_call_id: Some("wait-call".to_string()),
|
||||
code_mode_runtime_tool_id: None,
|
||||
requester: RawToolCallRequester::Model,
|
||||
kind: ToolCallKind::Other {
|
||||
name: "wait".to_string(),
|
||||
},
|
||||
summary: ToolCallSummary::Generic {
|
||||
label: "wait".to_string(),
|
||||
input_preview: Some("{\"cell_id\":\"1\"}".to_string()),
|
||||
output_preview: None,
|
||||
},
|
||||
invocation_payload: Some(wait_request),
|
||||
},
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-2"),
|
||||
RawTraceEventPayload::CodeCellEnded {
|
||||
runtime_cell_id: "1".to_string(),
|
||||
status: CodeCellRuntimeStatus::Completed,
|
||||
response_payload: None,
|
||||
},
|
||||
)?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let code_cell_id = test_reduced_code_cell_id("call-code");
|
||||
let cell = &rollout.code_cells[&code_cell_id];
|
||||
let output_item_id = rollout.inference_calls["inference-2"]
|
||||
.request_item_ids
|
||||
.last()
|
||||
.expect("exec output item");
|
||||
|
||||
assert_eq!(cell.thread_id, "thread-root");
|
||||
assert_eq!(cell.runtime_status, CodeCellRuntimeStatus::Completed);
|
||||
assert_eq!(cell.execution.status, ExecutionStatus::Completed);
|
||||
assert_eq!(cell.runtime_cell_id, Some("1".to_string()));
|
||||
assert_eq!(cell.nested_tool_call_ids, vec!["nested-tool-1"]);
|
||||
assert_eq!(cell.wait_tool_call_ids, vec!["wait-tool-1"]);
|
||||
assert_eq!(cell.output_item_ids, vec![output_item_id.clone()]);
|
||||
assert_eq!(
|
||||
rollout.conversation_items[output_item_id].produced_by,
|
||||
vec![ProducerRef::CodeCell {
|
||||
code_cell_id: code_cell_id.clone(),
|
||||
}]
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.conversation_items[&cell.source_item_id].kind,
|
||||
ConversationItemKind::CustomToolCall,
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fast_code_cell_lifecycle_waits_for_source_item() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [message("user", "count files")]
|
||||
}),
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::InferenceStarted {
|
||||
inference_call_id: "inference-1".to_string(),
|
||||
thread_id: "thread-root".to_string(),
|
||||
codex_turn_id: "turn-1".to_string(),
|
||||
model: "gpt-test".to_string(),
|
||||
provider_name: "test-provider".to_string(),
|
||||
request_payload: request,
|
||||
})?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::CodeCellStarted {
|
||||
runtime_cell_id: "1".to_string(),
|
||||
model_visible_call_id: "call-code".to_string(),
|
||||
source_js: "not valid js".to_string(),
|
||||
},
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::CodeCellInitialResponse {
|
||||
runtime_cell_id: "1".to_string(),
|
||||
status: CodeCellRuntimeStatus::Failed,
|
||||
response_payload: None,
|
||||
},
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::CodeCellEnded {
|
||||
runtime_cell_id: "1".to_string(),
|
||||
status: CodeCellRuntimeStatus::Failed,
|
||||
response_payload: None,
|
||||
},
|
||||
)?;
|
||||
let response = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceResponse,
|
||||
&json!({
|
||||
"response_id": "resp-1",
|
||||
"output_items": [{
|
||||
"type": "custom_tool_call",
|
||||
"name": "exec",
|
||||
"call_id": "call-code",
|
||||
"input": "not valid js"
|
||||
}]
|
||||
}),
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::InferenceCompleted {
|
||||
inference_call_id: "inference-1".to_string(),
|
||||
response_id: Some("resp-1".to_string()),
|
||||
response_payload: response,
|
||||
})?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let code_cell_id = test_reduced_code_cell_id("call-code");
|
||||
let cell = &rollout.code_cells[&code_cell_id];
|
||||
|
||||
assert_eq!(cell.thread_id, "thread-root");
|
||||
assert_eq!(cell.runtime_status, CodeCellRuntimeStatus::Failed);
|
||||
assert_eq!(cell.execution.status, ExecutionStatus::Failed);
|
||||
assert_eq!(cell.runtime_cell_id, Some("1".to_string()));
|
||||
assert_eq!(
|
||||
rollout.conversation_items[&cell.source_item_id].kind,
|
||||
ConversationItemKind::CustomToolCall,
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cancelled_turn_terminates_unfinished_code_cell() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [message("user", "count files")]
|
||||
}),
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::InferenceStarted {
|
||||
inference_call_id: "inference-1".to_string(),
|
||||
thread_id: "thread-root".to_string(),
|
||||
codex_turn_id: "turn-1".to_string(),
|
||||
model: "gpt-test".to_string(),
|
||||
provider_name: "test-provider".to_string(),
|
||||
request_payload: request,
|
||||
})?;
|
||||
let response = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceResponse,
|
||||
&json!({
|
||||
"response_id": "resp-1",
|
||||
"output_items": [{
|
||||
"type": "custom_tool_call",
|
||||
"name": "exec",
|
||||
"call_id": "call-code",
|
||||
"input": "await tools.exec_command({cmd: 'slow'});"
|
||||
}]
|
||||
}),
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::InferenceCompleted {
|
||||
inference_call_id: "inference-1".to_string(),
|
||||
response_id: Some("resp-1".to_string()),
|
||||
response_payload: response,
|
||||
})?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::CodeCellStarted {
|
||||
runtime_cell_id: "1".to_string(),
|
||||
model_visible_call_id: "call-code".to_string(),
|
||||
source_js: "await tools.exec_command({cmd: 'slow'});".to_string(),
|
||||
},
|
||||
)?;
|
||||
let turn_end = writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::CodexTurnEnded {
|
||||
codex_turn_id: "turn-1".to_string(),
|
||||
status: ExecutionStatus::Cancelled,
|
||||
},
|
||||
)?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let code_cell_id = test_reduced_code_cell_id("call-code");
|
||||
let cell = &rollout.code_cells[&code_cell_id];
|
||||
|
||||
assert_eq!(cell.runtime_status, CodeCellRuntimeStatus::Terminated);
|
||||
assert_eq!(cell.execution.status, ExecutionStatus::Cancelled);
|
||||
assert_eq!(cell.execution.ended_seq, Some(turn_end.seq));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn runtime_code_cell_ids_can_repeat_across_threads() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
writer.append(RawTraceEventPayload::ThreadStarted {
|
||||
thread_id: "thread-child".to_string(),
|
||||
agent_path: "/root/child".to_string(),
|
||||
metadata_payload: None,
|
||||
})?;
|
||||
start_turn_for_thread(&writer, "thread-root", "turn-root")?;
|
||||
start_turn_for_thread(&writer, "thread-child", "turn-child")?;
|
||||
|
||||
for (thread_id, turn_id, inference_call_id, call_id) in [
|
||||
("thread-root", "turn-root", "inference-root", "call-root"),
|
||||
(
|
||||
"thread-child",
|
||||
"turn-child",
|
||||
"inference-child",
|
||||
"call-child",
|
||||
),
|
||||
] {
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [message("user", "run code")]
|
||||
}),
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::InferenceStarted {
|
||||
inference_call_id: inference_call_id.to_string(),
|
||||
thread_id: thread_id.to_string(),
|
||||
codex_turn_id: turn_id.to_string(),
|
||||
model: "gpt-test".to_string(),
|
||||
provider_name: "test-provider".to_string(),
|
||||
request_payload: request,
|
||||
})?;
|
||||
writer.append_with_context(
|
||||
trace_context_for_thread(thread_id, turn_id),
|
||||
RawTraceEventPayload::CodeCellStarted {
|
||||
runtime_cell_id: "1".to_string(),
|
||||
model_visible_call_id: call_id.to_string(),
|
||||
source_js: "text('hi')".to_string(),
|
||||
},
|
||||
)?;
|
||||
let response = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceResponse,
|
||||
&json!({
|
||||
"response_id": format!("resp-{thread_id}"),
|
||||
"output_items": [{
|
||||
"type": "custom_tool_call",
|
||||
"name": "exec",
|
||||
"call_id": call_id,
|
||||
"input": "text('hi')"
|
||||
}]
|
||||
}),
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::InferenceCompleted {
|
||||
inference_call_id: inference_call_id.to_string(),
|
||||
response_id: Some(format!("resp-{thread_id}")),
|
||||
response_payload: response,
|
||||
})?;
|
||||
writer.append_with_context(
|
||||
trace_context_for_thread(thread_id, turn_id),
|
||||
RawTraceEventPayload::CodeCellEnded {
|
||||
runtime_cell_id: "1".to_string(),
|
||||
status: CodeCellRuntimeStatus::Completed,
|
||||
response_payload: None,
|
||||
},
|
||||
)?;
|
||||
}
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let root_cell_id = test_reduced_code_cell_id("call-root");
|
||||
let child_cell_id = test_reduced_code_cell_id("call-child");
|
||||
|
||||
assert_eq!(rollout.code_cells[&root_cell_id].thread_id, "thread-root");
|
||||
assert_eq!(rollout.code_cells[&child_cell_id].thread_id, "thread-child");
|
||||
assert_eq!(
|
||||
rollout.code_cells[&root_cell_id].runtime_cell_id,
|
||||
Some("1".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.code_cells[&child_cell_id].runtime_cell_id,
|
||||
Some("1".to_string())
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn test_reduced_code_cell_id(model_visible_call_id: &str) -> String {
|
||||
format!("code_cell:{model_visible_call_id}")
|
||||
}
|
||||
183
codex-rs/rollout-trace/src/reducer/compaction.rs
Normal file
183
codex-rs/rollout-trace/src/reducer/compaction.rs
Normal file
@@ -0,0 +1,183 @@
|
||||
//! Reducer support for the remote compaction lifecycle.
|
||||
//!
|
||||
//! This module owns request/checkpoint bookkeeping. Conversation item reconciliation stays in
|
||||
//! `conversation` because it depends on the same normalization and reuse invariants as inference
|
||||
//! requests.
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::bail;
|
||||
|
||||
use super::TraceReducer;
|
||||
use crate::model::Compaction;
|
||||
use crate::model::CompactionRequest;
|
||||
use crate::model::CompactionRequestId;
|
||||
use crate::model::ExecutionStatus;
|
||||
use crate::model::ExecutionWindow;
|
||||
use crate::payload::RawPayloadRef;
|
||||
use crate::raw_event::RawEventSeq;
|
||||
|
||||
impl TraceReducer {
|
||||
/// Starts one upstream request attempt for a compaction operation.
|
||||
pub(super) fn start_compaction_request(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
started: StartedCompactionRequest,
|
||||
) -> Result<()> {
|
||||
if self
|
||||
.rollout
|
||||
.compaction_requests
|
||||
.contains_key(&started.compaction_request_id)
|
||||
{
|
||||
bail!(
|
||||
"duplicate compaction request start for {}",
|
||||
started.compaction_request_id
|
||||
);
|
||||
}
|
||||
self.thread_mut(&started.thread_id)?;
|
||||
let Some(turn) = self.rollout.codex_turns.get(&started.codex_turn_id) else {
|
||||
bail!(
|
||||
"compaction request {} referenced unknown codex turn {}",
|
||||
started.compaction_request_id,
|
||||
started.codex_turn_id
|
||||
);
|
||||
};
|
||||
if turn.thread_id != started.thread_id {
|
||||
bail!(
|
||||
"compaction request {} used thread {}, but codex turn {} belongs to {}",
|
||||
started.compaction_request_id,
|
||||
started.thread_id,
|
||||
started.codex_turn_id,
|
||||
turn.thread_id
|
||||
);
|
||||
}
|
||||
|
||||
self.rollout.compaction_requests.insert(
|
||||
started.compaction_request_id.clone(),
|
||||
CompactionRequest {
|
||||
compaction_request_id: started.compaction_request_id,
|
||||
compaction_id: started.compaction_id,
|
||||
thread_id: started.thread_id,
|
||||
codex_turn_id: started.codex_turn_id,
|
||||
execution: ExecutionWindow {
|
||||
started_at_unix_ms: wall_time_unix_ms,
|
||||
started_seq: seq,
|
||||
ended_at_unix_ms: None,
|
||||
ended_seq: None,
|
||||
status: ExecutionStatus::Running,
|
||||
},
|
||||
model: started.model,
|
||||
provider_name: started.provider_name,
|
||||
raw_request_payload_id: started.request_payload.raw_payload_id,
|
||||
raw_response_payload_id: None,
|
||||
},
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Completes an upstream compaction request attempt without modifying conversation history.
|
||||
///
|
||||
/// The request/response payloads are evidence for the remote call. The live
|
||||
/// conversation changes only when a separate install event provides the checkpoint.
|
||||
pub(super) fn complete_compaction_request(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
compaction_id: String,
|
||||
compaction_request_id: CompactionRequestId,
|
||||
status: ExecutionStatus,
|
||||
response_payload: Option<RawPayloadRef>,
|
||||
) -> Result<()> {
|
||||
let Some(request) = self
|
||||
.rollout
|
||||
.compaction_requests
|
||||
.get_mut(&compaction_request_id)
|
||||
else {
|
||||
bail!(
|
||||
"compaction request completion referenced unknown request {compaction_request_id}"
|
||||
);
|
||||
};
|
||||
if request.compaction_id != compaction_id {
|
||||
bail!(
|
||||
"compaction request {compaction_request_id} completion used compaction {compaction_id}, but start used {}",
|
||||
request.compaction_id
|
||||
);
|
||||
}
|
||||
request.execution.ended_at_unix_ms = Some(wall_time_unix_ms);
|
||||
request.execution.ended_seq = Some(seq);
|
||||
request.execution.status = status;
|
||||
request.raw_response_payload_id = response_payload.map(|payload| payload.raw_payload_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Installs a compaction checkpoint into the reduced conversation graph.
|
||||
///
|
||||
/// This is the semantic boundary where replacement history becomes the live
|
||||
/// thread history; request attempts alone do not imply that change.
|
||||
pub(super) fn reduce_compaction_installed_event(
|
||||
&mut self,
|
||||
wall_time_unix_ms: i64,
|
||||
thread_id: String,
|
||||
codex_turn_id: String,
|
||||
compaction_id: String,
|
||||
checkpoint_payload: RawPayloadRef,
|
||||
) -> Result<()> {
|
||||
if self.rollout.compactions.contains_key(&compaction_id) {
|
||||
bail!("duplicate compaction install for {compaction_id}");
|
||||
}
|
||||
self.thread_mut(&thread_id)?;
|
||||
let Some(turn) = self.rollout.codex_turns.get(&codex_turn_id) else {
|
||||
bail!(
|
||||
"compaction install {compaction_id} referenced unknown codex turn {codex_turn_id}"
|
||||
);
|
||||
};
|
||||
if turn.thread_id != thread_id {
|
||||
bail!(
|
||||
"compaction install {compaction_id} used thread {thread_id}, but codex turn {codex_turn_id} belongs to {}",
|
||||
turn.thread_id
|
||||
);
|
||||
}
|
||||
let checkpoint = self.reduce_compaction_checkpoint(
|
||||
wall_time_unix_ms,
|
||||
&thread_id,
|
||||
codex_turn_id.as_str(),
|
||||
&compaction_id,
|
||||
&checkpoint_payload,
|
||||
)?;
|
||||
let request_ids = self
|
||||
.rollout
|
||||
.compaction_requests
|
||||
.values()
|
||||
.filter(|request| request.compaction_id == compaction_id)
|
||||
.map(|request| request.compaction_request_id.clone())
|
||||
.collect();
|
||||
|
||||
self.pending_compaction_replacement_item_ids
|
||||
.insert(thread_id.clone(), checkpoint.replacement_item_ids.clone());
|
||||
self.rollout.compactions.insert(
|
||||
compaction_id.clone(),
|
||||
Compaction {
|
||||
compaction_id,
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
installed_at_unix_ms: wall_time_unix_ms,
|
||||
marker_item_id: checkpoint.marker_item_id,
|
||||
request_ids,
|
||||
input_item_ids: checkpoint.input_item_ids,
|
||||
replacement_item_ids: checkpoint.replacement_item_ids,
|
||||
},
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Raw compaction-request start fields after dispatch has stripped the event envelope.
|
||||
pub(super) struct StartedCompactionRequest {
|
||||
pub(super) compaction_id: String,
|
||||
pub(super) compaction_request_id: String,
|
||||
pub(super) thread_id: String,
|
||||
pub(super) codex_turn_id: String,
|
||||
pub(super) model: String,
|
||||
pub(super) provider_name: String,
|
||||
pub(super) request_payload: RawPayloadRef,
|
||||
}
|
||||
700
codex-rs/rollout-trace/src/reducer/conversation.rs
Normal file
700
codex-rs/rollout-trace/src/reducer/conversation.rs
Normal file
@@ -0,0 +1,700 @@
|
||||
//! Conversation reduction from model-facing payload snapshots.
|
||||
//!
|
||||
//! Inference request inputs and response outputs are both part of the logical
|
||||
//! conversation because they are the payloads exchanged with the model. Runtime
|
||||
//! observations, such as local tool output, stay outside the transcript until a
|
||||
//! later model-facing payload carries their content.
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::bail;
|
||||
use serde_json::Value;
|
||||
|
||||
use self::normalize::NormalizedConversationItem;
|
||||
use super::TraceReducer;
|
||||
use crate::model::CompactionId;
|
||||
use crate::model::ConversationBody;
|
||||
use crate::model::ConversationItem;
|
||||
use crate::model::ConversationItemKind;
|
||||
use crate::model::ConversationPart;
|
||||
use crate::model::ConversationRole;
|
||||
use crate::model::InferenceCallId;
|
||||
use crate::model::ProducerRef;
|
||||
use crate::payload::RawPayloadRef;
|
||||
|
||||
mod normalize;
|
||||
|
||||
impl TraceReducer {
|
||||
/// Reduces an inference request input snapshot into model-visible conversation items.
|
||||
///
|
||||
/// Request snapshots are reconciled by position against the previous model-visible
|
||||
/// snapshot for the thread so repeated history reuses ids while newly inserted
|
||||
/// items remain distinct.
|
||||
pub(super) fn reduce_inference_request(
|
||||
&mut self,
|
||||
wall_time_unix_ms: i64,
|
||||
inference_call_id: &InferenceCallId,
|
||||
thread_id: &str,
|
||||
codex_turn_id: &str,
|
||||
request_payload: &RawPayloadRef,
|
||||
) -> Result<Vec<String>> {
|
||||
let payload = self.read_payload_json(request_payload)?;
|
||||
let Some(input) = payload.get("input") else {
|
||||
bail!(
|
||||
"inference request payload {} did not contain input",
|
||||
request_payload.raw_payload_id
|
||||
);
|
||||
};
|
||||
let Some(request_items) = input.as_array() else {
|
||||
bail!(
|
||||
"inference request payload {} had non-array input",
|
||||
request_payload.raw_payload_id
|
||||
);
|
||||
};
|
||||
|
||||
let items = normalize::normalize_model_items(request_items, request_payload)?;
|
||||
|
||||
let previous_response_id = payload.get("previous_response_id").and_then(Value::as_str);
|
||||
// After compaction, the next full request is compared against the installed replacement
|
||||
// history, not the pre-compaction prompt. Any repeated developer/context prefix that Codex
|
||||
// reinjects must therefore become a fresh post-compaction conversation item.
|
||||
let post_compaction_snapshot = if previous_response_id.is_none() {
|
||||
self.pending_compaction_replacement_item_ids
|
||||
.get(thread_id)
|
||||
.cloned()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let request_item_ids = if let Some(previous_response_id) = previous_response_id {
|
||||
// Streaming follow-up requests can send only the new input plus a
|
||||
// `previous_response_id`. The trace model still exposes the full
|
||||
// model-visible input, so rebuild the omitted prefix from the
|
||||
// previous request and response before reducing this delta.
|
||||
let previous_items = self
|
||||
.rollout
|
||||
.inference_calls
|
||||
.values()
|
||||
.find(|inference| {
|
||||
inference.thread_id == thread_id
|
||||
&& inference.upstream_request_id.as_deref() == Some(previous_response_id)
|
||||
})
|
||||
.map(|inference| {
|
||||
let mut ids = inference.request_item_ids.clone();
|
||||
ids.extend(inference.response_item_ids.clone());
|
||||
ids
|
||||
});
|
||||
let Some(mut item_ids) = previous_items else {
|
||||
bail!(
|
||||
"incremental inference request {inference_call_id} referenced unknown previous_response_id {previous_response_id}"
|
||||
);
|
||||
};
|
||||
let delta_item_ids = self.reconcile_conversation_items(
|
||||
items,
|
||||
ReconcileItems {
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
wall_time_unix_ms,
|
||||
produced_by: Vec::new(),
|
||||
start_index: item_ids.len(),
|
||||
mode: ReconcileMode::AppendOnly,
|
||||
snapshot_override: None,
|
||||
},
|
||||
)?;
|
||||
item_ids.extend(delta_item_ids);
|
||||
item_ids
|
||||
} else {
|
||||
self.reconcile_conversation_items(
|
||||
items,
|
||||
ReconcileItems {
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
wall_time_unix_ms,
|
||||
produced_by: Vec::new(),
|
||||
start_index: 0,
|
||||
mode: ReconcileMode::FullSnapshot,
|
||||
snapshot_override: post_compaction_snapshot.as_deref(),
|
||||
},
|
||||
)?
|
||||
};
|
||||
|
||||
self.append_thread_conversation_items(thread_id, &request_item_ids)?;
|
||||
if post_compaction_snapshot.is_some() {
|
||||
self.pending_compaction_replacement_item_ids
|
||||
.remove(thread_id);
|
||||
}
|
||||
self.thread_conversation_snapshots
|
||||
.insert(thread_id.to_string(), request_item_ids.clone());
|
||||
Ok(request_item_ids)
|
||||
}
|
||||
|
||||
/// Reduces an inference response payload into conversation items produced by the call.
|
||||
pub(super) fn reduce_inference_response(
|
||||
&mut self,
|
||||
wall_time_unix_ms: i64,
|
||||
inference_call_id: &InferenceCallId,
|
||||
response_payload: &RawPayloadRef,
|
||||
) -> Result<Vec<String>> {
|
||||
let payload = self.read_payload_json(response_payload)?;
|
||||
let Some(output_items) = payload.get("output_items").and_then(Value::as_array) else {
|
||||
bail!(
|
||||
"inference response payload {} did not contain output_items",
|
||||
response_payload.raw_payload_id
|
||||
);
|
||||
};
|
||||
|
||||
let Some((thread_id, codex_turn_id)) = self
|
||||
.rollout
|
||||
.inference_calls
|
||||
.get(inference_call_id)
|
||||
.map(|inference| (inference.thread_id.clone(), inference.codex_turn_id.clone()))
|
||||
else {
|
||||
bail!("inference response referenced unknown call {inference_call_id}");
|
||||
};
|
||||
|
||||
let items = normalize::normalize_model_items(output_items, response_payload)?;
|
||||
// Response output is appended immediately: it was produced by the model,
|
||||
// so it is conversation even before a later request carries it forward.
|
||||
let append_at = self
|
||||
.thread_conversation_snapshots
|
||||
.get(&thread_id)
|
||||
.map_or(0, Vec::len);
|
||||
let response_item_ids = self.reconcile_conversation_items(
|
||||
items,
|
||||
ReconcileItems {
|
||||
thread_id: &thread_id,
|
||||
codex_turn_id: &codex_turn_id,
|
||||
wall_time_unix_ms,
|
||||
produced_by: vec![ProducerRef::Inference {
|
||||
inference_call_id: inference_call_id.clone(),
|
||||
}],
|
||||
start_index: append_at,
|
||||
mode: ReconcileMode::AppendOnly,
|
||||
snapshot_override: None,
|
||||
},
|
||||
)?;
|
||||
self.append_thread_conversation_items(&thread_id, &response_item_ids)?;
|
||||
self.thread_conversation_snapshots
|
||||
.entry(thread_id)
|
||||
.or_default()
|
||||
.extend(response_item_ids.clone());
|
||||
|
||||
if let Some(usage) = payload
|
||||
.get("token_usage")
|
||||
.and_then(normalize::token_usage_from_value)
|
||||
&& let Some(inference) = self.rollout.inference_calls.get_mut(inference_call_id)
|
||||
{
|
||||
inference.usage = Some(usage);
|
||||
}
|
||||
|
||||
Ok(response_item_ids)
|
||||
}
|
||||
|
||||
fn reconcile_conversation_items(
|
||||
&mut self,
|
||||
items: Vec<NormalizedConversationItem>,
|
||||
context: ReconcileItems<'_>,
|
||||
) -> Result<Vec<String>> {
|
||||
let previous_snapshot = context.snapshot_override.map_or_else(
|
||||
|| {
|
||||
self.thread_conversation_snapshots
|
||||
.get(context.thread_id)
|
||||
.cloned()
|
||||
.unwrap_or_default()
|
||||
},
|
||||
<[_]>::to_vec,
|
||||
);
|
||||
let mut item_ids = Vec::with_capacity(items.len());
|
||||
|
||||
for (offset, item) in items.into_iter().enumerate() {
|
||||
let index = context.start_index + offset;
|
||||
let tool_link_item = item.clone();
|
||||
self.ensure_call_id_consistency(context.thread_id, &item)?;
|
||||
self.ensure_reasoning_consistency(context.thread_id, &item)?;
|
||||
let item_id = if let Some(previous_item_id) = previous_snapshot.get(index) {
|
||||
if self.item_matches(previous_item_id, &item) {
|
||||
previous_item_id.clone()
|
||||
} else if matches!(context.mode, ReconcileMode::FullSnapshot) {
|
||||
self.find_matching_snapshot_item(&previous_snapshot, &item_ids, &item)
|
||||
.unwrap_or_else(|| {
|
||||
self.create_conversation_item(
|
||||
context.thread_id,
|
||||
Some(context.codex_turn_id.to_string()),
|
||||
context.wall_time_unix_ms,
|
||||
item,
|
||||
context.produced_by.clone(),
|
||||
)
|
||||
})
|
||||
} else {
|
||||
let codex_turn_id = context.codex_turn_id;
|
||||
let thread_id = context.thread_id;
|
||||
bail!(
|
||||
"model conversation mismatch while reducing turn {codex_turn_id} for \
|
||||
thread {thread_id} at item index {index}: existing item \
|
||||
{previous_item_id} does not match the current model payload item"
|
||||
);
|
||||
}
|
||||
} else if matches!(context.mode, ReconcileMode::FullSnapshot) {
|
||||
self.find_matching_snapshot_item(&previous_snapshot, &item_ids, &item)
|
||||
.unwrap_or_else(|| {
|
||||
self.create_conversation_item(
|
||||
context.thread_id,
|
||||
Some(context.codex_turn_id.to_string()),
|
||||
context.wall_time_unix_ms,
|
||||
item,
|
||||
context.produced_by.clone(),
|
||||
)
|
||||
})
|
||||
} else {
|
||||
self.create_conversation_item(
|
||||
context.thread_id,
|
||||
Some(context.codex_turn_id.to_string()),
|
||||
context.wall_time_unix_ms,
|
||||
item,
|
||||
context.produced_by.clone(),
|
||||
)
|
||||
};
|
||||
self.update_conversation_item_from_sighting(
|
||||
&item_id,
|
||||
&tool_link_item,
|
||||
&context.produced_by,
|
||||
)?;
|
||||
self.attach_model_visible_tool_item(
|
||||
&item_id,
|
||||
tool_link_item.call_id.as_deref(),
|
||||
&tool_link_item.kind,
|
||||
)?;
|
||||
self.attach_model_visible_code_cell_item(
|
||||
&item_id,
|
||||
tool_link_item.call_id.as_deref(),
|
||||
&tool_link_item.kind,
|
||||
)?;
|
||||
self.resolve_pending_agent_edges_for_item(&item_id)?;
|
||||
item_ids.push(item_id);
|
||||
}
|
||||
|
||||
self.flush_pending_code_cell_starts()?;
|
||||
Ok(item_ids)
|
||||
}
|
||||
|
||||
/// Reduces a compaction checkpoint payload into installed replacement history.
|
||||
///
|
||||
/// The returned ids let the compaction reducer record both the boundary marker
|
||||
/// and the snapshot that future full requests should reconcile against.
|
||||
pub(super) fn reduce_compaction_checkpoint(
|
||||
&mut self,
|
||||
wall_time_unix_ms: i64,
|
||||
thread_id: &str,
|
||||
codex_turn_id: &str,
|
||||
compaction_id: &CompactionId,
|
||||
checkpoint_payload: &RawPayloadRef,
|
||||
) -> Result<ReducedCompactionCheckpoint> {
|
||||
let payload = self.read_payload_json(checkpoint_payload)?;
|
||||
let input_history = required_array(&payload, "input_history", checkpoint_payload)?;
|
||||
let replacement_history =
|
||||
required_array(&payload, "replacement_history", checkpoint_payload)?;
|
||||
|
||||
let input_items = normalize::normalize_model_items(input_history, checkpoint_payload)?;
|
||||
let replacement_items =
|
||||
normalize::normalize_model_items(replacement_history, checkpoint_payload)?;
|
||||
let input_candidates = self
|
||||
.thread_conversation_snapshots
|
||||
.get(thread_id)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
let input_item_ids = self.reconcile_detached_conversation_items(
|
||||
input_items,
|
||||
DetachedReconcileItems {
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
wall_time_unix_ms,
|
||||
produced_by: Vec::new(),
|
||||
candidates: input_candidates,
|
||||
},
|
||||
)?;
|
||||
// A compaction checkpoint has two transcript effects. First, record the structural
|
||||
// boundary where old live history ended. Then append the replacement items, including
|
||||
// the provider-visible summary item if the compact endpoint returned one.
|
||||
let marker_item_id = self.create_conversation_item(
|
||||
thread_id,
|
||||
Some(codex_turn_id.to_string()),
|
||||
wall_time_unix_ms,
|
||||
NormalizedConversationItem {
|
||||
role: ConversationRole::Assistant,
|
||||
channel: None,
|
||||
kind: ConversationItemKind::CompactionMarker,
|
||||
// The summary is a separate model/provider-visible item. Keep the marker body
|
||||
// empty so transcript renderers cannot mistake the boundary for prompt content.
|
||||
body: ConversationBody { parts: Vec::new() },
|
||||
call_id: None,
|
||||
},
|
||||
vec![ProducerRef::Compaction {
|
||||
compaction_id: compaction_id.clone(),
|
||||
}],
|
||||
);
|
||||
let replacement_item_ids = self.reconcile_detached_conversation_items(
|
||||
replacement_items,
|
||||
DetachedReconcileItems {
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
wall_time_unix_ms,
|
||||
produced_by: vec![ProducerRef::Compaction {
|
||||
compaction_id: compaction_id.clone(),
|
||||
}],
|
||||
// Replacement history is a rewrite boundary. Even if the compact endpoint emits
|
||||
// text that matches old history, the installed item is a new post-compaction
|
||||
// conversation item and should not reuse a pre-compaction ID.
|
||||
candidates: Vec::new(),
|
||||
},
|
||||
)?;
|
||||
self.append_thread_conversation_items(thread_id, &input_item_ids)?;
|
||||
self.append_thread_conversation_items(thread_id, std::slice::from_ref(&marker_item_id))?;
|
||||
self.append_thread_conversation_items(thread_id, &replacement_item_ids)?;
|
||||
Ok(ReducedCompactionCheckpoint {
|
||||
input_item_ids,
|
||||
marker_item_id,
|
||||
replacement_item_ids,
|
||||
})
|
||||
}
|
||||
|
||||
fn reconcile_detached_conversation_items(
|
||||
&mut self,
|
||||
items: Vec<NormalizedConversationItem>,
|
||||
context: DetachedReconcileItems<'_>,
|
||||
) -> Result<Vec<String>> {
|
||||
let mut item_ids = Vec::with_capacity(items.len());
|
||||
|
||||
for item in items {
|
||||
let tool_link_item = item.clone();
|
||||
self.ensure_call_id_consistency(context.thread_id, &item)?;
|
||||
self.ensure_reasoning_consistency(context.thread_id, &item)?;
|
||||
let item_id = self
|
||||
.find_matching_snapshot_item(&context.candidates, &item_ids, &item)
|
||||
.unwrap_or_else(|| {
|
||||
self.create_conversation_item(
|
||||
context.thread_id,
|
||||
Some(context.codex_turn_id.to_string()),
|
||||
context.wall_time_unix_ms,
|
||||
item,
|
||||
context.produced_by.clone(),
|
||||
)
|
||||
});
|
||||
self.update_conversation_item_from_sighting(
|
||||
&item_id,
|
||||
&tool_link_item,
|
||||
&context.produced_by,
|
||||
)?;
|
||||
self.attach_model_visible_tool_item(
|
||||
&item_id,
|
||||
tool_link_item.call_id.as_deref(),
|
||||
&tool_link_item.kind,
|
||||
)?;
|
||||
self.attach_model_visible_code_cell_item(
|
||||
&item_id,
|
||||
tool_link_item.call_id.as_deref(),
|
||||
&tool_link_item.kind,
|
||||
)?;
|
||||
self.resolve_pending_agent_edges_for_item(&item_id)?;
|
||||
item_ids.push(item_id);
|
||||
}
|
||||
|
||||
self.flush_pending_code_cell_starts()?;
|
||||
Ok(item_ids)
|
||||
}
|
||||
|
||||
fn create_conversation_item(
|
||||
&mut self,
|
||||
thread_id: &str,
|
||||
codex_turn_id: Option<String>,
|
||||
first_seen_at_unix_ms: i64,
|
||||
item: NormalizedConversationItem,
|
||||
produced_by: Vec<ProducerRef>,
|
||||
) -> String {
|
||||
let item_id = self.next_conversation_item_id();
|
||||
self.rollout.conversation_items.insert(
|
||||
item_id.clone(),
|
||||
ConversationItem {
|
||||
item_id: item_id.clone(),
|
||||
thread_id: thread_id.to_string(),
|
||||
codex_turn_id,
|
||||
first_seen_at_unix_ms,
|
||||
role: item.role,
|
||||
channel: item.channel,
|
||||
kind: item.kind,
|
||||
body: item.body,
|
||||
call_id: item.call_id,
|
||||
produced_by,
|
||||
},
|
||||
);
|
||||
item_id
|
||||
}
|
||||
|
||||
fn update_conversation_item_from_sighting(
|
||||
&mut self,
|
||||
item_id: &str,
|
||||
normalized: &NormalizedConversationItem,
|
||||
produced_by: &[ProducerRef],
|
||||
) -> Result<()> {
|
||||
let Some(item) = self.rollout.conversation_items.get_mut(item_id) else {
|
||||
bail!("conversation item {item_id} was referenced before it was created");
|
||||
};
|
||||
|
||||
if item.kind == ConversationItemKind::Reasoning {
|
||||
merge_reasoning_body(&mut item.body, &normalized.body)?;
|
||||
}
|
||||
for producer in produced_by {
|
||||
if !item.produced_by.contains(producer) {
|
||||
item.produced_by.push(producer.clone());
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn append_thread_conversation_items(
|
||||
&mut self,
|
||||
thread_id: &str,
|
||||
item_ids: &[String],
|
||||
) -> Result<()> {
|
||||
let thread = self.thread_mut(thread_id)?;
|
||||
for item_id in item_ids {
|
||||
if !thread.conversation_item_ids.contains(item_id) {
|
||||
thread.conversation_item_ids.push(item_id.clone());
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn find_matching_snapshot_item(
|
||||
&self,
|
||||
previous_snapshot: &[String],
|
||||
used_item_ids: &[String],
|
||||
normalized: &NormalizedConversationItem,
|
||||
) -> Option<String> {
|
||||
previous_snapshot
|
||||
.iter()
|
||||
.find(|item_id| {
|
||||
!used_item_ids.contains(item_id) && self.item_matches(item_id, normalized)
|
||||
})
|
||||
.cloned()
|
||||
}
|
||||
|
||||
fn ensure_call_id_consistency(
|
||||
&self,
|
||||
thread_id: &str,
|
||||
normalized: &NormalizedConversationItem,
|
||||
) -> Result<()> {
|
||||
let Some(call_id) = normalized.call_id.as_deref() else {
|
||||
return Ok(());
|
||||
};
|
||||
for item in self.rollout.conversation_items.values() {
|
||||
if item.thread_id == thread_id
|
||||
&& item.call_id.as_deref() == Some(call_id)
|
||||
&& item.kind == normalized.kind
|
||||
&& !conversation_item_matches(item, normalized)
|
||||
{
|
||||
bail!("model-visible call id {call_id} was reused with different content");
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn ensure_reasoning_consistency(
|
||||
&self,
|
||||
thread_id: &str,
|
||||
normalized: &NormalizedConversationItem,
|
||||
) -> Result<()> {
|
||||
if normalized.kind != ConversationItemKind::Reasoning {
|
||||
return Ok(());
|
||||
};
|
||||
let Some((label, value)) = reasoning_encoded_part(&normalized.body) else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
for item in self.rollout.conversation_items.values() {
|
||||
if item.thread_id == thread_id
|
||||
&& item.kind == ConversationItemKind::Reasoning
|
||||
&& item.channel == normalized.channel
|
||||
&& reasoning_encoded_part(&item.body) == Some((label, value))
|
||||
&& !reasoning_body_matches(&item.body, &normalized.body)
|
||||
{
|
||||
bail!("reasoning encrypted_content was reused with different readable content");
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn item_matches(&self, item_id: &str, normalized: &NormalizedConversationItem) -> bool {
|
||||
let Some(item) = self.rollout.conversation_items.get(item_id) else {
|
||||
return false;
|
||||
};
|
||||
conversation_item_matches(item, normalized)
|
||||
}
|
||||
|
||||
fn next_conversation_item_id(&mut self) -> String {
|
||||
let ordinal = self.next_conversation_item_ordinal;
|
||||
self.next_conversation_item_ordinal += 1;
|
||||
format!("conversation_item:{ordinal}")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum ReconcileMode {
|
||||
/// Full model requests are authoritative snapshots of the live context. The
|
||||
/// prompt builder can reorder already-observed items or replace history
|
||||
/// with synthetic summary messages, so item identity is "same content,
|
||||
/// reused at most once in this snapshot" rather than "same position only".
|
||||
FullSnapshot,
|
||||
/// Incremental request deltas and response outputs append to a known prefix.
|
||||
/// A mismatch at an occupied position means our reconstructed prefix is
|
||||
/// wrong and should fail replay.
|
||||
AppendOnly,
|
||||
}
|
||||
|
||||
struct ReconcileItems<'a> {
|
||||
thread_id: &'a str,
|
||||
codex_turn_id: &'a str,
|
||||
wall_time_unix_ms: i64,
|
||||
produced_by: Vec<ProducerRef>,
|
||||
start_index: usize,
|
||||
mode: ReconcileMode,
|
||||
snapshot_override: Option<&'a [String]>,
|
||||
}
|
||||
|
||||
struct DetachedReconcileItems<'a> {
|
||||
thread_id: &'a str,
|
||||
codex_turn_id: &'a str,
|
||||
wall_time_unix_ms: i64,
|
||||
produced_by: Vec<ProducerRef>,
|
||||
candidates: Vec<String>,
|
||||
}
|
||||
|
||||
/// Conversation ids produced when a compaction checkpoint is installed.
|
||||
///
|
||||
/// The marker item records the boundary, while replacement items are the live
|
||||
/// history that subsequent full requests should treat as their baseline.
|
||||
pub(super) struct ReducedCompactionCheckpoint {
|
||||
pub(super) input_item_ids: Vec<String>,
|
||||
pub(super) marker_item_id: String,
|
||||
pub(super) replacement_item_ids: Vec<String>,
|
||||
}
|
||||
|
||||
fn required_array<'a>(
|
||||
payload: &'a Value,
|
||||
key: &str,
|
||||
raw_payload: &RawPayloadRef,
|
||||
) -> Result<&'a Vec<Value>> {
|
||||
payload.get(key).and_then(Value::as_array).with_context(|| {
|
||||
format!(
|
||||
"compaction checkpoint payload {} did not contain array {key}",
|
||||
raw_payload.raw_payload_id
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn conversation_item_matches(
|
||||
item: &ConversationItem,
|
||||
normalized: &NormalizedConversationItem,
|
||||
) -> bool {
|
||||
let body_matches = if item.kind == ConversationItemKind::Reasoning
|
||||
&& normalized.kind == ConversationItemKind::Reasoning
|
||||
{
|
||||
reasoning_body_matches(&item.body, &normalized.body)
|
||||
} else {
|
||||
conversation_body_matches(&item.body, &normalized.body)
|
||||
};
|
||||
|
||||
item.role == normalized.role
|
||||
&& item.channel == normalized.channel
|
||||
&& item.kind == normalized.kind
|
||||
&& body_matches
|
||||
&& item.call_id == normalized.call_id
|
||||
}
|
||||
|
||||
fn conversation_body_matches(left: &ConversationBody, right: &ConversationBody) -> bool {
|
||||
left.parts.len() == right.parts.len()
|
||||
&& left
|
||||
.parts
|
||||
.iter()
|
||||
.zip(&right.parts)
|
||||
.all(|(left, right)| match (left, right) {
|
||||
(
|
||||
ConversationPart::Json {
|
||||
summary: left_summary,
|
||||
raw_payload_id: _,
|
||||
},
|
||||
ConversationPart::Json {
|
||||
summary: right_summary,
|
||||
raw_payload_id: _,
|
||||
},
|
||||
) => left_summary == right_summary,
|
||||
_ => left == right,
|
||||
})
|
||||
}
|
||||
|
||||
fn reasoning_body_matches(left: &ConversationBody, right: &ConversationBody) -> bool {
|
||||
if conversation_body_matches(left, right) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// The Responses API may return readable reasoning on completion, but later
|
||||
// request snapshots often replay only the encrypted blob. The blob is the
|
||||
// stable model-visible identity; readable text/summary is extra evidence
|
||||
// that must agree whenever both sides provide it.
|
||||
let Some(left_encoded) = reasoning_encoded_part(left) else {
|
||||
return false;
|
||||
};
|
||||
let Some(right_encoded) = reasoning_encoded_part(right) else {
|
||||
return false;
|
||||
};
|
||||
|
||||
left_encoded == right_encoded && readable_reasoning_parts_match(left, right)
|
||||
}
|
||||
|
||||
fn merge_reasoning_body(
|
||||
existing: &mut ConversationBody,
|
||||
incoming: &ConversationBody,
|
||||
) -> Result<()> {
|
||||
if conversation_body_matches(existing, incoming) {
|
||||
return Ok(());
|
||||
}
|
||||
if !reasoning_body_matches(existing, incoming) {
|
||||
bail!("reasoning encrypted_content was reused with different readable content");
|
||||
}
|
||||
if readable_reasoning_parts(existing).is_empty()
|
||||
&& !readable_reasoning_parts(incoming).is_empty()
|
||||
{
|
||||
existing.parts = incoming.parts.clone();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn reasoning_encoded_part(body: &ConversationBody) -> Option<(&str, &str)> {
|
||||
body.parts.iter().find_map(|part| {
|
||||
if let ConversationPart::Encoded { label, value } = part {
|
||||
Some((label.as_str(), value.as_str()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn readable_reasoning_parts_match(left: &ConversationBody, right: &ConversationBody) -> bool {
|
||||
let left = readable_reasoning_parts(left);
|
||||
let right = readable_reasoning_parts(right);
|
||||
left.is_empty() || right.is_empty() || left == right
|
||||
}
|
||||
|
||||
fn readable_reasoning_parts(body: &ConversationBody) -> Vec<&ConversationPart> {
|
||||
body.parts
|
||||
.iter()
|
||||
.filter(|part| {
|
||||
matches!(
|
||||
part,
|
||||
ConversationPart::Text { .. } | ConversationPart::Summary { .. }
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "conversation_tests.rs"]
|
||||
mod tests;
|
||||
446
codex-rs/rollout-trace/src/reducer/conversation/normalize.rs
Normal file
446
codex-rs/rollout-trace/src/reducer/conversation/normalize.rs
Normal file
@@ -0,0 +1,446 @@
|
||||
//! Normalization from Responses-shaped JSON items into conversation item data.
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::bail;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::model::ConversationBody;
|
||||
use crate::model::ConversationChannel;
|
||||
use crate::model::ConversationItemKind;
|
||||
use crate::model::ConversationPart;
|
||||
use crate::model::ConversationRole;
|
||||
use crate::model::TokenUsage;
|
||||
use crate::payload::RawPayloadRef;
|
||||
|
||||
/// Conversation fields parsed from one Responses item before trace identity.
|
||||
///
|
||||
/// IDs and provenance are assigned after positional reconciliation. Keeping the
|
||||
/// normalized data separate from `ConversationItem` makes reuse vs insertion a
|
||||
/// single reducer decision instead of something the parser has to know about.
|
||||
#[derive(Clone)]
|
||||
pub(super) struct NormalizedConversationItem {
|
||||
pub(super) role: ConversationRole,
|
||||
pub(super) channel: Option<ConversationChannel>,
|
||||
pub(super) kind: ConversationItemKind,
|
||||
pub(super) body: ConversationBody,
|
||||
pub(super) call_id: Option<String>,
|
||||
}
|
||||
|
||||
pub(super) fn normalize_model_items(
|
||||
items: &[Value],
|
||||
raw_payload: &RawPayloadRef,
|
||||
) -> Result<Vec<NormalizedConversationItem>> {
|
||||
let mut normalized_items = Vec::new();
|
||||
for item in items {
|
||||
normalized_items.push(normalize_model_item(item, raw_payload)?);
|
||||
}
|
||||
Ok(normalized_items)
|
||||
}
|
||||
|
||||
pub(super) fn token_usage_from_value(value: &Value) -> Option<TokenUsage> {
|
||||
Some(TokenUsage {
|
||||
input_tokens: u64_field(value, "input_tokens")?,
|
||||
cached_input_tokens: u64_field(value, "cached_input_tokens")?,
|
||||
output_tokens: u64_field(value, "output_tokens")?,
|
||||
reasoning_output_tokens: u64_field(value, "reasoning_output_tokens")?,
|
||||
})
|
||||
}
|
||||
|
||||
fn normalize_model_item(
|
||||
item: &Value,
|
||||
raw_payload: &RawPayloadRef,
|
||||
) -> Result<NormalizedConversationItem> {
|
||||
let Some(item_type) = item.get("type").and_then(Value::as_str) else {
|
||||
bail!(
|
||||
"model item in payload {} did not contain a string type",
|
||||
raw_payload.raw_payload_id
|
||||
);
|
||||
};
|
||||
match item_type {
|
||||
"message" => normalize_message_item(item, raw_payload),
|
||||
"reasoning" => normalize_reasoning_item(item, raw_payload),
|
||||
"function_call" => Ok(NormalizedConversationItem {
|
||||
role: ConversationRole::Assistant,
|
||||
channel: Some(ConversationChannel::Commentary),
|
||||
kind: ConversationItemKind::FunctionCall,
|
||||
body: raw_text_or_json_body(item.get("arguments"), raw_payload),
|
||||
call_id: item
|
||||
.get("call_id")
|
||||
.and_then(Value::as_str)
|
||||
.map(ToString::to_string),
|
||||
}),
|
||||
"function_call_output" => Ok(NormalizedConversationItem {
|
||||
role: ConversationRole::Tool,
|
||||
channel: Some(ConversationChannel::Commentary),
|
||||
kind: ConversationItemKind::FunctionCallOutput,
|
||||
body: tool_output_body(item.get("output"), raw_payload),
|
||||
call_id: item
|
||||
.get("call_id")
|
||||
.and_then(Value::as_str)
|
||||
.map(ToString::to_string),
|
||||
}),
|
||||
"custom_tool_call" => Ok(NormalizedConversationItem {
|
||||
role: ConversationRole::Assistant,
|
||||
channel: Some(ConversationChannel::Commentary),
|
||||
kind: ConversationItemKind::CustomToolCall,
|
||||
body: custom_tool_call_body(item, raw_payload),
|
||||
call_id: item
|
||||
.get("call_id")
|
||||
.and_then(Value::as_str)
|
||||
.map(ToString::to_string),
|
||||
}),
|
||||
"custom_tool_call_output" => Ok(NormalizedConversationItem {
|
||||
role: ConversationRole::Tool,
|
||||
channel: Some(ConversationChannel::Commentary),
|
||||
kind: ConversationItemKind::CustomToolCallOutput,
|
||||
body: tool_output_body(item.get("output"), raw_payload),
|
||||
call_id: item
|
||||
.get("call_id")
|
||||
.and_then(Value::as_str)
|
||||
.map(ToString::to_string),
|
||||
}),
|
||||
"tool_search_call" | "web_search_call" | "image_generation_call" | "local_shell_call" => {
|
||||
Ok(NormalizedConversationItem {
|
||||
role: ConversationRole::Assistant,
|
||||
channel: Some(ConversationChannel::Commentary),
|
||||
kind: ConversationItemKind::FunctionCall,
|
||||
body: json_body(item, raw_payload),
|
||||
call_id: item
|
||||
.get("call_id")
|
||||
.and_then(Value::as_str)
|
||||
.map(ToString::to_string),
|
||||
})
|
||||
}
|
||||
"tool_search_output" | "mcp_tool_call_output" => Ok(NormalizedConversationItem {
|
||||
role: ConversationRole::Tool,
|
||||
channel: Some(ConversationChannel::Commentary),
|
||||
kind: ConversationItemKind::FunctionCallOutput,
|
||||
body: json_body(item, raw_payload),
|
||||
call_id: item
|
||||
.get("call_id")
|
||||
.and_then(Value::as_str)
|
||||
.map(ToString::to_string),
|
||||
}),
|
||||
"compaction" | "compaction_summary" => Ok(NormalizedConversationItem {
|
||||
role: ConversationRole::Assistant,
|
||||
channel: Some(ConversationChannel::Summary),
|
||||
kind: ConversationItemKind::Message,
|
||||
body: compaction_body(item, raw_payload)?,
|
||||
call_id: None,
|
||||
}),
|
||||
_ => bail!(
|
||||
"unsupported model item type {item_type} in payload {}",
|
||||
raw_payload.raw_payload_id
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_message_item(
|
||||
item: &Value,
|
||||
raw_payload: &RawPayloadRef,
|
||||
) -> Result<NormalizedConversationItem> {
|
||||
let Some(role) = item.get("role").and_then(Value::as_str) else {
|
||||
bail!(
|
||||
"message item in payload {} did not contain a string role",
|
||||
raw_payload.raw_payload_id
|
||||
);
|
||||
};
|
||||
let Some(role) = role_from_str(role) else {
|
||||
bail!(
|
||||
"unsupported message role {role} in payload {}",
|
||||
raw_payload.raw_payload_id
|
||||
);
|
||||
};
|
||||
Ok(NormalizedConversationItem {
|
||||
role,
|
||||
channel: item
|
||||
.get("phase")
|
||||
.and_then(Value::as_str)
|
||||
.and_then(channel_from_phase),
|
||||
kind: ConversationItemKind::Message,
|
||||
body: ConversationBody {
|
||||
parts: content_parts(item.get("content"), raw_payload),
|
||||
},
|
||||
call_id: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn normalize_reasoning_item(
|
||||
item: &Value,
|
||||
raw_payload: &RawPayloadRef,
|
||||
) -> Result<NormalizedConversationItem> {
|
||||
let mut parts = Vec::new();
|
||||
append_reasoning_parts(
|
||||
item,
|
||||
"content",
|
||||
ReasoningPartKind::Content,
|
||||
raw_payload,
|
||||
&mut parts,
|
||||
)?;
|
||||
append_reasoning_parts(
|
||||
item,
|
||||
"summary",
|
||||
ReasoningPartKind::Summary,
|
||||
raw_payload,
|
||||
&mut parts,
|
||||
)?;
|
||||
|
||||
if let Some(encrypted_content) = item.get("encrypted_content") {
|
||||
let encrypted_content = match encrypted_content {
|
||||
Value::Null => None,
|
||||
Value::String(encrypted_content) => Some(encrypted_content),
|
||||
_ => {
|
||||
bail!(
|
||||
"reasoning item in payload {} had non-string encrypted_content",
|
||||
raw_payload.raw_payload_id
|
||||
);
|
||||
}
|
||||
};
|
||||
if let Some(encrypted_content) = encrypted_content {
|
||||
parts.push(ConversationPart::Encoded {
|
||||
label: "encrypted_content".to_string(),
|
||||
value: encrypted_content.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if parts.is_empty() {
|
||||
bail!(
|
||||
"reasoning item in payload {} contained no content, summary, or encrypted_content",
|
||||
raw_payload.raw_payload_id
|
||||
);
|
||||
}
|
||||
|
||||
Ok(NormalizedConversationItem {
|
||||
role: ConversationRole::Assistant,
|
||||
channel: Some(ConversationChannel::Analysis),
|
||||
kind: ConversationItemKind::Reasoning,
|
||||
body: ConversationBody { parts },
|
||||
call_id: None,
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
enum ReasoningPartKind {
|
||||
Content,
|
||||
Summary,
|
||||
}
|
||||
|
||||
fn append_reasoning_parts(
|
||||
item: &Value,
|
||||
key: &str,
|
||||
kind: ReasoningPartKind,
|
||||
raw_payload: &RawPayloadRef,
|
||||
parts: &mut Vec<ConversationPart>,
|
||||
) -> Result<()> {
|
||||
let Some(items) = item.get(key) else {
|
||||
return Ok(());
|
||||
};
|
||||
if matches!((kind, items), (ReasoningPartKind::Content, Value::Null)) {
|
||||
return Ok(());
|
||||
}
|
||||
let Some(items) = items.as_array() else {
|
||||
bail!(
|
||||
"reasoning item in payload {} had non-array {key}",
|
||||
raw_payload.raw_payload_id
|
||||
);
|
||||
};
|
||||
|
||||
for content_item in items {
|
||||
let Some(item_type) = content_item.get("type").and_then(Value::as_str) else {
|
||||
bail!(
|
||||
"reasoning item in payload {} had {key} entry without string type",
|
||||
raw_payload.raw_payload_id
|
||||
);
|
||||
};
|
||||
let expected_type = match kind {
|
||||
ReasoningPartKind::Content => {
|
||||
if !matches!(item_type, "reasoning_text" | "text") {
|
||||
bail!(
|
||||
"reasoning item in payload {} had unsupported content type {item_type}",
|
||||
raw_payload.raw_payload_id
|
||||
);
|
||||
}
|
||||
"content"
|
||||
}
|
||||
ReasoningPartKind::Summary => {
|
||||
if item_type != "summary_text" {
|
||||
bail!(
|
||||
"reasoning item in payload {} had unsupported summary type {item_type}",
|
||||
raw_payload.raw_payload_id
|
||||
);
|
||||
}
|
||||
"summary"
|
||||
}
|
||||
};
|
||||
|
||||
let Some(text) = content_item.get("text").and_then(Value::as_str) else {
|
||||
bail!(
|
||||
"reasoning item in payload {} had {expected_type} entry without string text",
|
||||
raw_payload.raw_payload_id
|
||||
);
|
||||
};
|
||||
match kind {
|
||||
ReasoningPartKind::Content => parts.push(ConversationPart::Text {
|
||||
text: text.to_string(),
|
||||
}),
|
||||
ReasoningPartKind::Summary => parts.push(ConversationPart::Summary {
|
||||
text: text.to_string(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn role_from_str(role: &str) -> Option<ConversationRole> {
|
||||
match role {
|
||||
"system" => Some(ConversationRole::System),
|
||||
"developer" => Some(ConversationRole::Developer),
|
||||
"user" => Some(ConversationRole::User),
|
||||
"assistant" => Some(ConversationRole::Assistant),
|
||||
"tool" => Some(ConversationRole::Tool),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn channel_from_phase(phase: &str) -> Option<ConversationChannel> {
|
||||
match phase {
|
||||
"commentary" => Some(ConversationChannel::Commentary),
|
||||
"final_answer" => Some(ConversationChannel::Final),
|
||||
"summary" => Some(ConversationChannel::Summary),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn content_parts(content: Option<&Value>, raw_payload: &RawPayloadRef) -> Vec<ConversationPart> {
|
||||
let Some(content) = content.and_then(Value::as_array) else {
|
||||
return vec![payload_ref_part("content", raw_payload)];
|
||||
};
|
||||
|
||||
let mut parts = Vec::new();
|
||||
for part in content {
|
||||
match part.get("type").and_then(Value::as_str) {
|
||||
Some("input_text" | "output_text" | "text") => {
|
||||
if let Some(text) = part.get("text").and_then(Value::as_str) {
|
||||
parts.push(ConversationPart::Text {
|
||||
text: text.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
Some("input_image") => parts.push(payload_ref_part("input_image", raw_payload)),
|
||||
Some(other) => parts.push(payload_ref_part(other, raw_payload)),
|
||||
None => parts.push(payload_ref_part("content", raw_payload)),
|
||||
}
|
||||
}
|
||||
|
||||
if parts.is_empty() {
|
||||
parts.push(payload_ref_part("empty_content", raw_payload));
|
||||
}
|
||||
parts
|
||||
}
|
||||
|
||||
fn custom_tool_call_body(item: &Value, raw_payload: &RawPayloadRef) -> ConversationBody {
|
||||
let Some(input) = item.get("input").and_then(Value::as_str) else {
|
||||
return json_body(item, raw_payload);
|
||||
};
|
||||
if item.get("name").and_then(Value::as_str) == Some("exec") {
|
||||
ConversationBody {
|
||||
parts: vec![ConversationPart::Code {
|
||||
language: "javascript".to_string(),
|
||||
source: input.to_string(),
|
||||
}],
|
||||
}
|
||||
} else {
|
||||
ConversationBody {
|
||||
parts: vec![ConversationPart::Text {
|
||||
text: input.to_string(),
|
||||
}],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn raw_text_or_json_body(value: Option<&Value>, raw_payload: &RawPayloadRef) -> ConversationBody {
|
||||
match value {
|
||||
Some(Value::String(text)) => {
|
||||
if let Ok(json) = serde_json::from_str::<Value>(text) {
|
||||
json_body(&json, raw_payload)
|
||||
} else {
|
||||
ConversationBody {
|
||||
parts: vec![ConversationPart::Text { text: text.clone() }],
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(value) => json_body(value, raw_payload),
|
||||
None => ConversationBody {
|
||||
parts: vec![payload_ref_part("payload", raw_payload)],
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn tool_output_body(output: Option<&Value>, raw_payload: &RawPayloadRef) -> ConversationBody {
|
||||
match output {
|
||||
Some(Value::String(text)) => ConversationBody {
|
||||
parts: vec![ConversationPart::Text { text: text.clone() }],
|
||||
},
|
||||
Some(Value::Array(_)) => ConversationBody {
|
||||
parts: content_parts(output, raw_payload),
|
||||
},
|
||||
Some(value) => json_body(value, raw_payload),
|
||||
None => ConversationBody {
|
||||
parts: vec![payload_ref_part("tool_output", raw_payload)],
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn compaction_body(item: &Value, raw_payload: &RawPayloadRef) -> Result<ConversationBody> {
|
||||
let Some(encrypted_content) = item.get("encrypted_content").and_then(Value::as_str) else {
|
||||
bail!(
|
||||
"compaction item in payload {} did not contain string encrypted_content",
|
||||
raw_payload.raw_payload_id
|
||||
);
|
||||
};
|
||||
// `type: "compaction"` is the remote-compaction summary that later re-enters model requests.
|
||||
// The structural "history was cut here" marker is inserted separately when the checkpoint is
|
||||
// installed; payload refs are observation-local, so the encoded summary itself is identity.
|
||||
Ok(ConversationBody {
|
||||
parts: vec![ConversationPart::Encoded {
|
||||
label: "encrypted_content".to_string(),
|
||||
value: encrypted_content.to_string(),
|
||||
}],
|
||||
})
|
||||
}
|
||||
|
||||
fn json_body(value: &Value, raw_payload: &RawPayloadRef) -> ConversationBody {
|
||||
ConversationBody {
|
||||
parts: vec![ConversationPart::Json {
|
||||
summary: summarize_json(value),
|
||||
raw_payload_id: raw_payload.raw_payload_id.clone(),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
||||
fn payload_ref_part(label: &str, raw_payload: &RawPayloadRef) -> ConversationPart {
|
||||
ConversationPart::PayloadRef {
|
||||
label: label.to_string(),
|
||||
raw_payload_id: raw_payload.raw_payload_id.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn summarize_json(value: &Value) -> String {
|
||||
const MAX_JSON_SUMMARY_LEN: usize = 240;
|
||||
let mut summary =
|
||||
serde_json::to_string(value).unwrap_or_else(|_| "<unserializable json>".to_string());
|
||||
if summary.len() > MAX_JSON_SUMMARY_LEN {
|
||||
summary.truncate(MAX_JSON_SUMMARY_LEN);
|
||||
summary.push_str("...");
|
||||
}
|
||||
summary
|
||||
}
|
||||
|
||||
fn u64_field(value: &Value, field: &str) -> Option<u64> {
|
||||
value
|
||||
.get(field)
|
||||
.and_then(Value::as_i64)
|
||||
.map(|value| value.max(0) as u64)
|
||||
}
|
||||
808
codex-rs/rollout-trace/src/reducer/conversation_tests.rs
Normal file
808
codex-rs/rollout-trace/src/reducer/conversation_tests.rs
Normal file
@@ -0,0 +1,808 @@
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::model::ConversationChannel;
|
||||
use crate::model::ConversationItemKind;
|
||||
use crate::model::ConversationPart;
|
||||
use crate::model::ExecutionStatus;
|
||||
use crate::model::ProducerRef;
|
||||
use crate::model::ToolCallKind;
|
||||
use crate::model::ToolCallSummary;
|
||||
use crate::payload::RawPayloadKind;
|
||||
use crate::raw_event::RawTraceEventPayload;
|
||||
use crate::reducer::test_support::append_inference_completion;
|
||||
use crate::reducer::test_support::append_inference_start;
|
||||
use crate::reducer::test_support::create_started_writer;
|
||||
use crate::reducer::test_support::expect_replay_error;
|
||||
use crate::reducer::test_support::message;
|
||||
use crate::reducer::test_support::start_turn;
|
||||
use crate::reducer::test_support::trace_context;
|
||||
use crate::replay_bundle;
|
||||
|
||||
#[test]
|
||||
fn request_snapshots_reuse_history_without_deduping_new_identical_items() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let first_request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [message("user", "ok")]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-1", "turn-1", first_request)?;
|
||||
start_turn(&writer, "turn-2")?;
|
||||
|
||||
let second_request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [
|
||||
message("user", "ok"),
|
||||
message("assistant", "ack"),
|
||||
message("user", "ok")
|
||||
]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-2", "turn-2", second_request)?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let first = &rollout.inference_calls["inference-1"].request_item_ids;
|
||||
let second = &rollout.inference_calls["inference-2"].request_item_ids;
|
||||
|
||||
assert_eq!(first.len(), 1);
|
||||
assert_eq!(second.len(), 3);
|
||||
assert_eq!(second[0], first[0]);
|
||||
assert_ne!(second[2], first[0]);
|
||||
assert_eq!(rollout.conversation_items.len(), 3);
|
||||
assert_eq!(
|
||||
rollout.threads["thread-root"].conversation_item_ids,
|
||||
*second
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn response_outputs_enter_thread_conversation_on_completion() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [message("user", "run tests")]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-1", "turn-1", request)?;
|
||||
|
||||
let response = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceResponse,
|
||||
&json!({
|
||||
"response_id": "resp-1",
|
||||
"output_items": [
|
||||
{
|
||||
"type": "message",
|
||||
"role": "assistant",
|
||||
"content": [{"type": "output_text", "text": "tests passed"}]
|
||||
}
|
||||
]
|
||||
}),
|
||||
)?;
|
||||
append_inference_completion(&writer, "inference-1", "resp-1", response)?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let inference = &rollout.inference_calls["inference-1"];
|
||||
let mut expected_thread_items = inference.request_item_ids.clone();
|
||||
expected_thread_items.extend(inference.response_item_ids.clone());
|
||||
|
||||
assert_eq!(inference.response_item_ids.len(), 1);
|
||||
assert_eq!(
|
||||
rollout.threads["thread-root"].conversation_item_ids,
|
||||
expected_thread_items,
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn later_full_request_reuses_prior_json_tool_call_by_position() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [message("user", "run tests")]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-1", "turn-1", request)?;
|
||||
|
||||
let response = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceResponse,
|
||||
&json!({
|
||||
"response_id": "resp-1",
|
||||
"output_items": [{
|
||||
"type": "function_call",
|
||||
"name": "shell",
|
||||
"arguments": "{\"cmd\":\"cargo test\"}",
|
||||
"call_id": "call-1"
|
||||
}]
|
||||
}),
|
||||
)?;
|
||||
append_inference_completion(&writer, "inference-1", "resp-1", response)?;
|
||||
start_turn(&writer, "turn-2")?;
|
||||
|
||||
let next_request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [
|
||||
message("user", "run tests"),
|
||||
{
|
||||
"type": "function_call",
|
||||
"name": "shell",
|
||||
"arguments": "{\"cmd\":\"cargo test\"}",
|
||||
"call_id": "call-1"
|
||||
}
|
||||
]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-2", "turn-2", next_request)?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let first = &rollout.inference_calls["inference-1"];
|
||||
let second = &rollout.inference_calls["inference-2"];
|
||||
|
||||
assert_eq!(
|
||||
second.request_item_ids,
|
||||
vec![
|
||||
first.request_item_ids[0].clone(),
|
||||
first.response_item_ids[0].clone(),
|
||||
],
|
||||
);
|
||||
assert_eq!(rollout.conversation_items.len(), 2);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn incremental_request_carries_prior_request_and_response_items_forward() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [message("user", "run tests")]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-1", "turn-1", request)?;
|
||||
|
||||
let response = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceResponse,
|
||||
&json!({
|
||||
"response_id": "resp-1",
|
||||
"token_usage": {
|
||||
"input_tokens": 10,
|
||||
"cached_input_tokens": 1,
|
||||
"output_tokens": 5,
|
||||
"reasoning_output_tokens": 2,
|
||||
"total_tokens": 15
|
||||
},
|
||||
"output_items": [
|
||||
{
|
||||
"type": "function_call",
|
||||
"name": "shell",
|
||||
"arguments": "{\"cmd\":\"cargo test\"}",
|
||||
"call_id": "call-1"
|
||||
}
|
||||
]
|
||||
}),
|
||||
)?;
|
||||
append_inference_completion(&writer, "inference-1", "resp-1", response)?;
|
||||
start_turn(&writer, "turn-2")?;
|
||||
|
||||
let incremental_request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"type": "response.create",
|
||||
"previous_response_id": "resp-1",
|
||||
"input": [
|
||||
{
|
||||
"type": "function_call_output",
|
||||
"call_id": "call-1",
|
||||
"output": "tests passed"
|
||||
}
|
||||
]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-2", "turn-2", incremental_request)?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let first = &rollout.inference_calls["inference-1"];
|
||||
let second = &rollout.inference_calls["inference-2"];
|
||||
|
||||
assert_eq!(first.response_item_ids.len(), 1);
|
||||
assert_eq!(
|
||||
second.request_item_ids,
|
||||
vec![
|
||||
first.request_item_ids[0].clone(),
|
||||
first.response_item_ids[0].clone(),
|
||||
rollout.threads["thread-root"].conversation_item_ids[2].clone(),
|
||||
],
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.threads["thread-root"].conversation_item_ids,
|
||||
second.request_item_ids,
|
||||
);
|
||||
assert_eq!(
|
||||
first.usage.as_ref().map(|usage| usage.input_tokens),
|
||||
Some(10),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn full_request_snapshot_can_reorder_existing_items_and_insert_summary() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [
|
||||
message("developer", "follow the repo rules"),
|
||||
message("user", "count files")
|
||||
]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-1", "turn-1", request)?;
|
||||
start_turn(&writer, "turn-2")?;
|
||||
|
||||
let compacted_request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [
|
||||
message("user", "count files"),
|
||||
message("user", "summary from a compacted prior attempt"),
|
||||
message("developer", "follow the repo rules")
|
||||
]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-2", "turn-2", compacted_request)?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let first = &rollout.inference_calls["inference-1"].request_item_ids;
|
||||
let second = &rollout.inference_calls["inference-2"].request_item_ids;
|
||||
|
||||
assert_eq!(second[0], first[1]);
|
||||
assert_eq!(second[2], first[0]);
|
||||
assert_ne!(second[1], first[0]);
|
||||
assert_ne!(second[1], first[1]);
|
||||
assert_eq!(rollout.conversation_items.len(), 3);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reasoning_body_preserves_text_summary_and_encoded_content() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [message("user", "think visibly")]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-1", "turn-1", request)?;
|
||||
|
||||
let response = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceResponse,
|
||||
&json!({
|
||||
"response_id": "resp-1",
|
||||
"output_items": [{
|
||||
"type": "reasoning",
|
||||
"content": [{"type": "reasoning_text", "text": "raw reasoning"}],
|
||||
"summary": [{"type": "summary_text", "text": "brief summary"}],
|
||||
"encrypted_content": "encoded-reasoning"
|
||||
}]
|
||||
}),
|
||||
)?;
|
||||
append_inference_completion(&writer, "inference-1", "resp-1", response)?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let reasoning_item_id = &rollout.inference_calls["inference-1"].response_item_ids[0];
|
||||
|
||||
assert_eq!(
|
||||
rollout.conversation_items[reasoning_item_id].body.parts,
|
||||
vec![
|
||||
ConversationPart::Text {
|
||||
text: "raw reasoning".to_string(),
|
||||
},
|
||||
ConversationPart::Summary {
|
||||
text: "brief summary".to_string(),
|
||||
},
|
||||
ConversationPart::Encoded {
|
||||
label: "encrypted_content".to_string(),
|
||||
value: "encoded-reasoning".to_string(),
|
||||
},
|
||||
],
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encrypted_reasoning_reuses_response_item_in_later_request() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let user = message("user", "count files");
|
||||
let function_call = json!({
|
||||
"type": "function_call",
|
||||
"name": "shell",
|
||||
"arguments": "{\"cmd\":\"find . -maxdepth 1 -type f | wc -l\"}",
|
||||
"call_id": "call-1"
|
||||
});
|
||||
let encrypted_reasoning = json!({
|
||||
"type": "reasoning",
|
||||
"summary": [],
|
||||
"encrypted_content": "encoded-reasoning"
|
||||
});
|
||||
let readable_reasoning = json!({
|
||||
"type": "reasoning",
|
||||
"content": [{"type": "text", "text": "need count"}],
|
||||
"summary": [],
|
||||
"encrypted_content": "encoded-reasoning"
|
||||
});
|
||||
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [user]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-1", "turn-1", request)?;
|
||||
|
||||
let response = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceResponse,
|
||||
&json!({
|
||||
"response_id": "resp-1",
|
||||
"output_items": [
|
||||
readable_reasoning,
|
||||
function_call
|
||||
]
|
||||
}),
|
||||
)?;
|
||||
append_inference_completion(&writer, "inference-1", "resp-1", response)?;
|
||||
start_turn(&writer, "turn-2")?;
|
||||
|
||||
let followup = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [
|
||||
user,
|
||||
encrypted_reasoning,
|
||||
function_call,
|
||||
{
|
||||
"type": "function_call_output",
|
||||
"call_id": "call-1",
|
||||
"output": "31\n"
|
||||
}
|
||||
]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-2", "turn-2", followup)?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let first = &rollout.inference_calls["inference-1"];
|
||||
let second = &rollout.inference_calls["inference-2"];
|
||||
let output_item_id = rollout.threads["thread-root"].conversation_item_ids[3].clone();
|
||||
|
||||
assert_eq!(
|
||||
second.request_item_ids,
|
||||
vec![
|
||||
first.request_item_ids[0].clone(),
|
||||
first.response_item_ids[0].clone(),
|
||||
first.response_item_ids[1].clone(),
|
||||
output_item_id,
|
||||
],
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.conversation_items[&first.response_item_ids[0]]
|
||||
.body
|
||||
.parts,
|
||||
vec![
|
||||
ConversationPart::Text {
|
||||
text: "need count".to_string(),
|
||||
},
|
||||
ConversationPart::Encoded {
|
||||
label: "encrypted_content".to_string(),
|
||||
value: "encoded-reasoning".to_string(),
|
||||
},
|
||||
],
|
||||
);
|
||||
assert_eq!(rollout.conversation_items.len(), 4);
|
||||
assert_eq!(
|
||||
rollout.threads["thread-root"].conversation_item_ids,
|
||||
second.request_item_ids,
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn same_encrypted_reasoning_with_different_text_is_reducer_error() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let user = message("user", "count files");
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [user]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-1", "turn-1", request)?;
|
||||
|
||||
let response = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceResponse,
|
||||
&json!({
|
||||
"response_id": "resp-1",
|
||||
"output_items": [{
|
||||
"type": "reasoning",
|
||||
"content": [{"type": "text", "text": "first text"}],
|
||||
"summary": [],
|
||||
"encrypted_content": "encoded-reasoning"
|
||||
}]
|
||||
}),
|
||||
)?;
|
||||
append_inference_completion(&writer, "inference-1", "resp-1", response)?;
|
||||
start_turn(&writer, "turn-2")?;
|
||||
|
||||
let conflicting_request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [
|
||||
user,
|
||||
{
|
||||
"type": "reasoning",
|
||||
"content": [{"type": "text", "text": "different text"}],
|
||||
"summary": [],
|
||||
"encrypted_content": "encoded-reasoning"
|
||||
}
|
||||
]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-2", "turn-2", conflicting_request)?;
|
||||
|
||||
expect_replay_error(
|
||||
&temp,
|
||||
"reasoning encrypted_content was reused with different readable content",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn model_visible_call_id_reuse_with_different_content_is_reducer_error() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [{
|
||||
"type": "function_call",
|
||||
"name": "shell",
|
||||
"arguments": "{\"cmd\":\"cargo test\"}",
|
||||
"call_id": "call-1"
|
||||
}]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-1", "turn-1", request)?;
|
||||
start_turn(&writer, "turn-2")?;
|
||||
|
||||
let conflicting_request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [{
|
||||
"type": "function_call",
|
||||
"name": "shell",
|
||||
"arguments": "{\"cmd\":\"cargo check\"}",
|
||||
"call_id": "call-1"
|
||||
}]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-2", "turn-2", conflicting_request)?;
|
||||
|
||||
expect_replay_error(
|
||||
&temp,
|
||||
"model-visible call id call-1 was reused with different content",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unsupported_model_item_is_reducer_error() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [
|
||||
{
|
||||
"type": "new_unhandled_model_item",
|
||||
"payload": "must not be silently skipped"
|
||||
}
|
||||
]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-1", "turn-1", request)?;
|
||||
|
||||
expect_replay_error(
|
||||
&temp,
|
||||
"unsupported model item type new_unhandled_model_item",
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_request_input_is_reducer_error() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"model": "gpt-test"
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-1", "turn-1", request)?;
|
||||
|
||||
expect_replay_error(&temp, "did not contain input")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_previous_response_id_is_reducer_error() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"previous_response_id": "resp-missing",
|
||||
"input": [message("user", "still here")]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-1", "turn-1", request)?;
|
||||
|
||||
expect_replay_error(&temp, "unknown previous_response_id resp-missing")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compaction_boundary_repeats_prefix_and_reuses_replacement_items() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let developer = message("developer", "follow repo rules");
|
||||
let user = message("user", "count files");
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [developer, user]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-1", "turn-1", request)?;
|
||||
|
||||
let summary = message("user", "summary from compacted history");
|
||||
let compaction_summary = json!({
|
||||
"type": "compaction",
|
||||
"encrypted_content": "encrypted-summary",
|
||||
});
|
||||
let checkpoint = writer.write_json_payload(
|
||||
RawPayloadKind::CompactionCheckpoint,
|
||||
&json!({
|
||||
"input_history": [developer, user],
|
||||
"replacement_history": [user, summary, compaction_summary]
|
||||
}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::CompactionInstalled {
|
||||
compaction_id: "compaction-1".to_string(),
|
||||
checkpoint_payload: checkpoint,
|
||||
},
|
||||
)?;
|
||||
|
||||
start_turn(&writer, "turn-2")?;
|
||||
let post_compaction_request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [developer, user, summary, compaction_summary]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-2", "turn-2", post_compaction_request)?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let first = &rollout.inference_calls["inference-1"];
|
||||
let second = &rollout.inference_calls["inference-2"];
|
||||
let compaction = &rollout.compactions["compaction-1"];
|
||||
|
||||
assert_eq!(compaction.input_item_ids, first.request_item_ids);
|
||||
assert_eq!(second.request_item_ids.len(), 4);
|
||||
assert_eq!(
|
||||
&second.request_item_ids[1..],
|
||||
compaction.replacement_item_ids.as_slice()
|
||||
);
|
||||
let marker = &rollout.conversation_items[&compaction.marker_item_id];
|
||||
assert_eq!(marker.kind, ConversationItemKind::CompactionMarker);
|
||||
assert_eq!(marker.body.parts, Vec::<ConversationPart>::new());
|
||||
assert_eq!(
|
||||
marker.produced_by,
|
||||
vec![ProducerRef::Compaction {
|
||||
compaction_id: "compaction-1".to_string()
|
||||
}],
|
||||
);
|
||||
assert_ne!(second.request_item_ids[0], first.request_item_ids[0]);
|
||||
assert_ne!(
|
||||
compaction.replacement_item_ids[0],
|
||||
first.request_item_ids[1]
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.conversation_items[&compaction.replacement_item_ids[0]].produced_by,
|
||||
vec![ProducerRef::Compaction {
|
||||
compaction_id: "compaction-1".to_string()
|
||||
}],
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.conversation_items[&compaction.replacement_item_ids[1]].produced_by,
|
||||
vec![ProducerRef::Compaction {
|
||||
compaction_id: "compaction-1".to_string()
|
||||
}],
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.conversation_items[&compaction.replacement_item_ids[2]].channel,
|
||||
Some(ConversationChannel::Summary),
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.conversation_items[&compaction.replacement_item_ids[2]].kind,
|
||||
ConversationItemKind::Message,
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.conversation_items[&compaction.replacement_item_ids[2]]
|
||||
.body
|
||||
.parts,
|
||||
vec![ConversationPart::Encoded {
|
||||
label: "encrypted_content".to_string(),
|
||||
value: "encrypted-summary".to_string(),
|
||||
}],
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tool_call_links_model_call_and_followup_output_items() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [message("user", "run tests")]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-1", "turn-1", request)?;
|
||||
|
||||
let response = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceResponse,
|
||||
&json!({
|
||||
"response_id": "resp-1",
|
||||
"output_items": [{
|
||||
"type": "function_call",
|
||||
"name": "exec_command",
|
||||
"arguments": "{\"cmd\":\"cargo test\"}",
|
||||
"call_id": "call-1"
|
||||
}]
|
||||
}),
|
||||
)?;
|
||||
append_inference_completion(&writer, "inference-1", "resp-1", response)?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::ToolCallStarted {
|
||||
tool_call_id: "tool-1".to_string(),
|
||||
model_visible_call_id: Some("call-1".to_string()),
|
||||
code_mode_runtime_tool_id: None,
|
||||
requester: crate::raw_event::RawToolCallRequester::Model,
|
||||
kind: ToolCallKind::ExecCommand,
|
||||
summary: ToolCallSummary::Generic {
|
||||
label: "exec_command".to_string(),
|
||||
input_preview: Some("cargo test".to_string()),
|
||||
output_preview: None,
|
||||
},
|
||||
invocation_payload: None,
|
||||
},
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::ToolCallEnded {
|
||||
tool_call_id: "tool-1".to_string(),
|
||||
status: ExecutionStatus::Completed,
|
||||
result_payload: None,
|
||||
},
|
||||
)?;
|
||||
|
||||
start_turn(&writer, "turn-2")?;
|
||||
let followup = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"previous_response_id": "resp-1",
|
||||
"input": [{
|
||||
"type": "function_call_output",
|
||||
"call_id": "call-1",
|
||||
"output": "tests passed"
|
||||
}]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-2", "turn-2", followup)?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let first_inference = &rollout.inference_calls["inference-1"];
|
||||
let second_inference = &rollout.inference_calls["inference-2"];
|
||||
let tool_call = &rollout.tool_calls["tool-1"];
|
||||
let output_item_id = second_inference
|
||||
.request_item_ids
|
||||
.last()
|
||||
.expect("follow-up output item");
|
||||
|
||||
assert_eq!(
|
||||
first_inference.tool_call_ids_started_by_response,
|
||||
vec!["tool-1".to_string()],
|
||||
);
|
||||
assert_eq!(
|
||||
tool_call.model_visible_call_item_ids,
|
||||
first_inference.response_item_ids,
|
||||
);
|
||||
assert_eq!(
|
||||
tool_call.model_visible_output_item_ids,
|
||||
vec![output_item_id.clone()],
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.conversation_items[output_item_id].produced_by,
|
||||
vec![ProducerRef::Tool {
|
||||
tool_call_id: "tool-1".to_string(),
|
||||
}],
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inference_start_rejects_unknown_codex_turn() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [message("user", "hello")]
|
||||
}),
|
||||
)?;
|
||||
append_inference_start(&writer, "inference-1", "turn-missing", request)?;
|
||||
|
||||
expect_replay_error(&temp, "referenced unknown codex turn turn-missing")
|
||||
}
|
||||
143
codex-rs/rollout-trace/src/reducer/inference.rs
Normal file
143
codex-rs/rollout-trace/src/reducer/inference.rs
Normal file
@@ -0,0 +1,143 @@
|
||||
//! Inference call lifecycle reduction.
|
||||
//!
|
||||
//! Conversation request/response normalization lives in the conversation module;
|
||||
//! this module owns the runtime envelope around those model-facing payloads.
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::bail;
|
||||
|
||||
use super::TraceReducer;
|
||||
use crate::model::ExecutionStatus;
|
||||
use crate::model::ExecutionWindow;
|
||||
use crate::model::InferenceCall;
|
||||
use crate::model::InferenceCallId;
|
||||
use crate::payload::RawPayloadRef;
|
||||
use crate::raw_event::RawEventSeq;
|
||||
|
||||
/// Raw inference-start fields after dispatch has stripped the common event envelope.
|
||||
///
|
||||
/// Keeping this as one argument prevents callsites from passing a long list of
|
||||
/// adjacent strings whose ordering is easy to mix up.
|
||||
pub(super) struct StartedInferenceCall {
|
||||
pub(super) inference_call_id: InferenceCallId,
|
||||
pub(super) thread_id: String,
|
||||
pub(super) codex_turn_id: String,
|
||||
pub(super) model: String,
|
||||
pub(super) provider_name: String,
|
||||
pub(super) request_payload: RawPayloadRef,
|
||||
}
|
||||
|
||||
impl TraceReducer {
|
||||
/// Starts an inference call and reduces its request payload into conversation items.
|
||||
///
|
||||
/// Requests are model-visible transcript evidence, so the inference object is only
|
||||
/// inserted after the request snapshot has been normalized and linked to the turn.
|
||||
pub(super) fn start_inference_call(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
started: StartedInferenceCall,
|
||||
) -> Result<()> {
|
||||
if self
|
||||
.rollout
|
||||
.inference_calls
|
||||
.contains_key(&started.inference_call_id)
|
||||
{
|
||||
bail!(
|
||||
"duplicate inference start for {}",
|
||||
started.inference_call_id
|
||||
);
|
||||
}
|
||||
|
||||
let inference_call_id = started.inference_call_id.clone();
|
||||
let thread_id = started.thread_id.clone();
|
||||
let codex_turn_id = started.codex_turn_id.clone();
|
||||
let request_payload = started.request_payload.clone();
|
||||
let Some(turn) = self.rollout.codex_turns.get(&codex_turn_id) else {
|
||||
bail!(
|
||||
"inference start {inference_call_id} referenced unknown codex turn {codex_turn_id}"
|
||||
);
|
||||
};
|
||||
if turn.thread_id != thread_id {
|
||||
bail!(
|
||||
"inference start {inference_call_id} used thread {thread_id}, \
|
||||
but codex turn {codex_turn_id} belongs to {}",
|
||||
turn.thread_id
|
||||
);
|
||||
}
|
||||
|
||||
let request_item_ids = self.reduce_inference_request(
|
||||
wall_time_unix_ms,
|
||||
&inference_call_id,
|
||||
&thread_id,
|
||||
&codex_turn_id,
|
||||
&request_payload,
|
||||
)?;
|
||||
|
||||
self.thread_mut(&thread_id)?;
|
||||
|
||||
self.rollout.inference_calls.insert(
|
||||
inference_call_id.clone(),
|
||||
InferenceCall {
|
||||
inference_call_id,
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
execution: ExecutionWindow {
|
||||
started_at_unix_ms: wall_time_unix_ms,
|
||||
started_seq: seq,
|
||||
ended_at_unix_ms: None,
|
||||
ended_seq: None,
|
||||
status: ExecutionStatus::Running,
|
||||
},
|
||||
model: started.model,
|
||||
provider_name: started.provider_name,
|
||||
upstream_request_id: None,
|
||||
request_item_ids,
|
||||
response_item_ids: Vec::new(),
|
||||
tool_call_ids_started_by_response: Vec::new(),
|
||||
usage: None,
|
||||
raw_request_payload_id: started.request_payload.raw_payload_id,
|
||||
raw_response_payload_id: None,
|
||||
},
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Completes an inference call and, when present, reduces response output items.
|
||||
pub(super) fn complete_inference_call(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
inference_call_id: InferenceCallId,
|
||||
status: ExecutionStatus,
|
||||
response_id: Option<String>,
|
||||
response_payload: Option<RawPayloadRef>,
|
||||
) -> Result<()> {
|
||||
if !self
|
||||
.rollout
|
||||
.inference_calls
|
||||
.contains_key(&inference_call_id)
|
||||
{
|
||||
bail!("inference completion referenced unknown call {inference_call_id}");
|
||||
}
|
||||
|
||||
let response_item_ids = response_payload
|
||||
.as_ref()
|
||||
.map(|payload| {
|
||||
self.reduce_inference_response(wall_time_unix_ms, &inference_call_id, payload)
|
||||
})
|
||||
.transpose()?;
|
||||
let Some(inference) = self.rollout.inference_calls.get_mut(&inference_call_id) else {
|
||||
bail!("inference call {inference_call_id} disappeared during response reduction");
|
||||
};
|
||||
inference.execution.ended_at_unix_ms = Some(wall_time_unix_ms);
|
||||
inference.execution.ended_seq = Some(seq);
|
||||
inference.execution.status = status;
|
||||
inference.upstream_request_id = response_id;
|
||||
inference.raw_response_payload_id = response_payload.map(|payload| payload.raw_payload_id);
|
||||
if let Some(response_item_ids) = response_item_ids {
|
||||
inference.response_item_ids = response_item_ids;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
504
codex-rs/rollout-trace/src/reducer/mod.rs
Normal file
504
codex-rs/rollout-trace/src/reducer/mod.rs
Normal file
@@ -0,0 +1,504 @@
|
||||
//! Deterministic replay from raw trace events to `RolloutTrace`.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs::File;
|
||||
use std::io::BufRead;
|
||||
use std::io::BufReader;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::bail;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::bundle::MANIFEST_FILE_NAME;
|
||||
use crate::bundle::RAW_EVENT_LOG_FILE_NAME;
|
||||
use crate::bundle::REDUCED_TRACE_SCHEMA_VERSION;
|
||||
use crate::bundle::TraceBundleManifest;
|
||||
use crate::model::ExecutionStatus;
|
||||
use crate::model::RolloutTrace;
|
||||
use crate::payload::RawPayloadRef;
|
||||
use crate::raw_event::RawTraceEvent;
|
||||
use crate::raw_event::RawTraceEventPayload;
|
||||
|
||||
mod code_cell;
|
||||
mod compaction;
|
||||
mod conversation;
|
||||
mod inference;
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test_support;
|
||||
mod thread;
|
||||
mod tool;
|
||||
|
||||
use self::code_cell::PendingCodeCellLifecycleEvent;
|
||||
use self::code_cell::PendingCodeCellStart;
|
||||
use self::code_cell::StartedCodeCell;
|
||||
use self::compaction::StartedCompactionRequest;
|
||||
use self::inference::StartedInferenceCall;
|
||||
use self::tool::ObservedAgentResultEdge;
|
||||
use self::tool::PendingAgentInteractionEdge;
|
||||
use self::tool::ToolCallStarted;
|
||||
|
||||
/// Replays a local trace bundle into a reduced rollout graph.
|
||||
pub fn replay_bundle(bundle_dir: impl AsRef<Path>) -> Result<RolloutTrace> {
|
||||
let bundle_dir = bundle_dir.as_ref();
|
||||
let manifest: TraceBundleManifest =
|
||||
serde_json::from_reader(File::open(bundle_dir.join(MANIFEST_FILE_NAME))?)
|
||||
.with_context(|| format!("read {}", bundle_dir.join(MANIFEST_FILE_NAME).display()))?;
|
||||
let mut reducer = TraceReducer {
|
||||
rollout: RolloutTrace::new(
|
||||
REDUCED_TRACE_SCHEMA_VERSION,
|
||||
manifest.trace_id,
|
||||
manifest.rollout_id,
|
||||
manifest.root_thread_id,
|
||||
manifest.started_at_unix_ms,
|
||||
),
|
||||
bundle_dir: bundle_dir.to_path_buf(),
|
||||
next_conversation_item_ordinal: 1,
|
||||
next_terminal_operation_ordinal: 1,
|
||||
thread_conversation_snapshots: BTreeMap::new(),
|
||||
pending_compaction_replacement_item_ids: BTreeMap::new(),
|
||||
code_cell_ids_by_runtime: BTreeMap::new(),
|
||||
pending_code_cell_starts: BTreeMap::new(),
|
||||
pending_code_cell_lifecycle_events: BTreeMap::new(),
|
||||
pending_agent_interaction_edges: Vec::new(),
|
||||
};
|
||||
|
||||
let event_log_path = bundle_dir.join(RAW_EVENT_LOG_FILE_NAME);
|
||||
let event_log = File::open(&event_log_path)
|
||||
.with_context(|| format!("open trace event log {}", event_log_path.display()))?;
|
||||
for (line_index, line) in BufReader::new(event_log).lines().enumerate() {
|
||||
let line = line.with_context(|| format!("read trace event line {}", line_index + 1))?;
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let event: RawTraceEvent = serde_json::from_str(&line)
|
||||
.with_context(|| format!("parse trace event line {}", line_index + 1))?;
|
||||
reducer.apply_event(event)?;
|
||||
}
|
||||
// Spawn edges prefer the child task message as their target, but a child can
|
||||
// fail before that message is ever reduced. Only after replaying the whole
|
||||
// bundle do we know which spawn deliveries need the child-thread fallback.
|
||||
reducer.resolve_pending_spawn_edge_fallbacks()?;
|
||||
|
||||
Ok(reducer.rollout)
|
||||
}
|
||||
|
||||
struct TraceReducer {
|
||||
rollout: RolloutTrace,
|
||||
bundle_dir: PathBuf,
|
||||
next_conversation_item_ordinal: u64,
|
||||
next_terminal_operation_ordinal: u64,
|
||||
/// Last model-visible conversation snapshot per thread.
|
||||
///
|
||||
/// Requests and responses both advance this sequence because both are
|
||||
/// model-facing payloads. Repeated request snapshots reuse item IDs only
|
||||
/// when the same normalized item appears at the same position; identical
|
||||
/// content at a new position must remain a distinct conversation item.
|
||||
thread_conversation_snapshots: BTreeMap<String, Vec<String>>,
|
||||
/// Replacement snapshot installed by compaction but not yet seen in a sampling request.
|
||||
///
|
||||
/// The first full request after compaction should compare against the installed replacement
|
||||
/// history, not against the pre-compaction request. That keeps repeated prefix/context messages
|
||||
/// as fresh post-compaction conversation items while still reusing the summary/replacement
|
||||
/// items that actually became live history.
|
||||
pending_compaction_replacement_item_ids: BTreeMap<String, Vec<String>>,
|
||||
/// Runtime cell ids indexed by thread-local code-mode handle.
|
||||
///
|
||||
/// Reduced `CodeCellId`s are based on the model-visible `exec` call id
|
||||
/// because that is the durable source identity. Runtime lifecycle, nested
|
||||
/// tools, and `wait` calls arrive with the runtime-local `cell_id`, so this
|
||||
/// index is the one intentional bridge between those namespaces.
|
||||
code_cell_ids_by_runtime: BTreeMap<(String, String), String>,
|
||||
/// Code-cell starts whose model-visible `custom_tool_call` item has not
|
||||
/// been reduced yet.
|
||||
///
|
||||
/// Core begins executing tools before the stream-completion hook records
|
||||
/// the response payload that requested them. Queueing keeps replay strict
|
||||
/// about eventual source-item ownership without requiring trace producers
|
||||
/// to reorder runtime events behind inference completion.
|
||||
pending_code_cell_starts: BTreeMap<String, PendingCodeCellStart>,
|
||||
/// Initial/end events that arrived while the matching start was queued.
|
||||
///
|
||||
/// Fast cells can return before the inference response payload that proves
|
||||
/// the model-visible `exec` source item has been reduced. The start remains
|
||||
/// queued for ownership validation; these lifecycle events wait with it and
|
||||
/// are replayed in raw sequence order once the cell materializes.
|
||||
pending_code_cell_lifecycle_events: BTreeMap<String, Vec<PendingCodeCellLifecycleEvent>>,
|
||||
/// Multi-agent deliveries whose recipient-side transcript item has not been observed yet.
|
||||
///
|
||||
/// V2 agent tools enqueue mailbox messages in the target thread. The trace event for the
|
||||
/// sending tool arrives before the recipient inference request materializes that mailbox item
|
||||
/// as a `ConversationItem`, so the reducer keeps the delivery edge pending until it can point
|
||||
/// at the exact model-visible item instead of a coarse thread.
|
||||
pending_agent_interaction_edges: Vec<PendingAgentInteractionEdge>,
|
||||
}
|
||||
|
||||
impl TraceReducer {
|
||||
fn read_payload_json(&self, payload: &RawPayloadRef) -> Result<Value> {
|
||||
// Reducers keep raw bodies out of the graph, but typed replay sometimes
|
||||
// needs a small subset of fields to build semantic objects.
|
||||
let payload_path = self.bundle_dir.join(&payload.path);
|
||||
let file = File::open(&payload_path)
|
||||
.with_context(|| format!("open payload {}", payload.raw_payload_id))?;
|
||||
serde_json::from_reader(file)
|
||||
.with_context(|| format!("parse payload {}", payload.raw_payload_id))
|
||||
}
|
||||
|
||||
fn apply_event(&mut self, event: RawTraceEvent) -> Result<()> {
|
||||
// Raw payload refs are reducer-wide evidence, not owned by a single
|
||||
// semantic arm. Keep this bookkeeping separate so typed reduction can
|
||||
// stay strict without duplicating payload insertion in every case.
|
||||
for payload in event.payload.raw_payload_refs() {
|
||||
self.insert_raw_payload(payload);
|
||||
}
|
||||
|
||||
match event.payload {
|
||||
RawTraceEventPayload::RolloutStarted {
|
||||
trace_id,
|
||||
root_thread_id,
|
||||
} => {
|
||||
self.rollout.trace_id = trace_id;
|
||||
self.rollout.root_thread_id = root_thread_id;
|
||||
}
|
||||
RawTraceEventPayload::RolloutEnded { status } => {
|
||||
self.rollout.status = status;
|
||||
self.rollout.ended_at_unix_ms = Some(event.wall_time_unix_ms);
|
||||
}
|
||||
RawTraceEventPayload::ThreadStarted {
|
||||
thread_id,
|
||||
agent_path,
|
||||
metadata_payload,
|
||||
} => {
|
||||
self.start_thread(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
thread_id,
|
||||
agent_path,
|
||||
metadata_payload,
|
||||
)?;
|
||||
}
|
||||
RawTraceEventPayload::ThreadEnded { thread_id, status } => {
|
||||
self.end_thread(event.seq, event.wall_time_unix_ms, thread_id, status)?;
|
||||
}
|
||||
RawTraceEventPayload::CodexTurnStarted {
|
||||
codex_turn_id,
|
||||
thread_id,
|
||||
} => {
|
||||
self.start_codex_turn(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
codex_turn_id,
|
||||
thread_id,
|
||||
)?;
|
||||
}
|
||||
RawTraceEventPayload::CodexTurnEnded {
|
||||
codex_turn_id,
|
||||
status,
|
||||
} => {
|
||||
self.end_codex_turn(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
event.thread_id,
|
||||
codex_turn_id,
|
||||
status,
|
||||
)?;
|
||||
}
|
||||
RawTraceEventPayload::InferenceStarted {
|
||||
inference_call_id,
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
model,
|
||||
provider_name,
|
||||
request_payload,
|
||||
} => {
|
||||
self.start_inference_call(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
StartedInferenceCall {
|
||||
inference_call_id,
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
model,
|
||||
provider_name,
|
||||
request_payload,
|
||||
},
|
||||
)?;
|
||||
}
|
||||
RawTraceEventPayload::InferenceCompleted {
|
||||
inference_call_id,
|
||||
response_id,
|
||||
response_payload,
|
||||
} => {
|
||||
self.complete_inference_call(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
inference_call_id,
|
||||
ExecutionStatus::Completed,
|
||||
response_id,
|
||||
Some(response_payload),
|
||||
)?;
|
||||
}
|
||||
RawTraceEventPayload::InferenceFailed {
|
||||
inference_call_id,
|
||||
partial_response_payload,
|
||||
..
|
||||
} => {
|
||||
self.complete_inference_call(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
inference_call_id,
|
||||
ExecutionStatus::Failed,
|
||||
/*response_id*/ None,
|
||||
partial_response_payload,
|
||||
)?;
|
||||
}
|
||||
RawTraceEventPayload::ProtocolEventObserved { .. } => {
|
||||
// Protocol wrappers are raw debug breadcrumbs. Typed hooks own
|
||||
// the reduced graph, so these payload refs are retained without
|
||||
// creating semantic objects.
|
||||
}
|
||||
RawTraceEventPayload::ToolCallStarted {
|
||||
tool_call_id,
|
||||
model_visible_call_id,
|
||||
code_mode_runtime_tool_id,
|
||||
requester,
|
||||
kind,
|
||||
summary,
|
||||
invocation_payload,
|
||||
} => {
|
||||
self.start_tool_call(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
event.thread_id,
|
||||
event.codex_turn_id,
|
||||
ToolCallStarted {
|
||||
tool_call_id,
|
||||
model_visible_call_id,
|
||||
code_mode_runtime_tool_id,
|
||||
requester,
|
||||
kind,
|
||||
summary,
|
||||
invocation_payload,
|
||||
},
|
||||
)?;
|
||||
}
|
||||
RawTraceEventPayload::ToolCallRuntimeStarted {
|
||||
tool_call_id,
|
||||
runtime_payload,
|
||||
} => {
|
||||
self.start_tool_runtime_observation(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
tool_call_id,
|
||||
runtime_payload,
|
||||
)?;
|
||||
}
|
||||
RawTraceEventPayload::ToolCallRuntimeEnded {
|
||||
tool_call_id,
|
||||
status,
|
||||
runtime_payload,
|
||||
} => {
|
||||
self.end_tool_runtime_observation(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
tool_call_id,
|
||||
status,
|
||||
runtime_payload,
|
||||
)?;
|
||||
}
|
||||
RawTraceEventPayload::ToolCallEnded {
|
||||
tool_call_id,
|
||||
status,
|
||||
result_payload,
|
||||
} => {
|
||||
self.end_tool_call(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
tool_call_id,
|
||||
status,
|
||||
result_payload,
|
||||
)?;
|
||||
}
|
||||
RawTraceEventPayload::CodeCellStarted {
|
||||
runtime_cell_id,
|
||||
model_visible_call_id,
|
||||
source_js,
|
||||
} => {
|
||||
let thread_id = self.code_cell_event_thread_id(
|
||||
event.thread_id,
|
||||
event.codex_turn_id.as_deref(),
|
||||
&runtime_cell_id,
|
||||
"code cell start",
|
||||
)?;
|
||||
let reduced_code_cell_id =
|
||||
self.reduced_code_cell_id_for_model_visible_call(&model_visible_call_id);
|
||||
self.record_runtime_code_cell_id(
|
||||
&thread_id,
|
||||
&runtime_cell_id,
|
||||
&reduced_code_cell_id,
|
||||
)?;
|
||||
self.start_or_queue_code_cell(PendingCodeCellStart {
|
||||
seq: event.seq,
|
||||
wall_time_unix_ms: event.wall_time_unix_ms,
|
||||
thread_id,
|
||||
codex_turn_id: event.codex_turn_id,
|
||||
started: StartedCodeCell {
|
||||
code_cell_id: reduced_code_cell_id,
|
||||
runtime_cell_id,
|
||||
model_visible_call_id,
|
||||
source_js,
|
||||
},
|
||||
})?;
|
||||
}
|
||||
RawTraceEventPayload::CodeCellInitialResponse {
|
||||
runtime_cell_id,
|
||||
status,
|
||||
..
|
||||
} => {
|
||||
let thread_id = self.code_cell_event_thread_id(
|
||||
event.thread_id,
|
||||
event.codex_turn_id.as_deref(),
|
||||
&runtime_cell_id,
|
||||
"code cell initial response",
|
||||
)?;
|
||||
let code_cell_id = self.code_cell_id_for_runtime_cell_id(
|
||||
&thread_id,
|
||||
&runtime_cell_id,
|
||||
"code cell initial response",
|
||||
)?;
|
||||
self.record_or_queue_code_cell_initial_response(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
code_cell_id,
|
||||
runtime_cell_id,
|
||||
status,
|
||||
)?;
|
||||
}
|
||||
RawTraceEventPayload::CodeCellEnded {
|
||||
runtime_cell_id,
|
||||
status,
|
||||
..
|
||||
} => {
|
||||
let thread_id = self.code_cell_event_thread_id(
|
||||
event.thread_id,
|
||||
event.codex_turn_id.as_deref(),
|
||||
&runtime_cell_id,
|
||||
"code cell end",
|
||||
)?;
|
||||
let code_cell_id = self.code_cell_id_for_runtime_cell_id(
|
||||
&thread_id,
|
||||
&runtime_cell_id,
|
||||
"code cell end",
|
||||
)?;
|
||||
self.end_or_queue_code_cell(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
code_cell_id,
|
||||
status,
|
||||
)?;
|
||||
}
|
||||
RawTraceEventPayload::CompactionRequestStarted {
|
||||
compaction_id,
|
||||
compaction_request_id,
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
model,
|
||||
provider_name,
|
||||
request_payload,
|
||||
} => {
|
||||
self.start_compaction_request(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
StartedCompactionRequest {
|
||||
compaction_id,
|
||||
compaction_request_id,
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
model,
|
||||
provider_name,
|
||||
request_payload,
|
||||
},
|
||||
)?;
|
||||
}
|
||||
RawTraceEventPayload::CompactionRequestCompleted {
|
||||
compaction_id,
|
||||
compaction_request_id,
|
||||
response_payload,
|
||||
} => {
|
||||
self.complete_compaction_request(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
compaction_id,
|
||||
compaction_request_id,
|
||||
ExecutionStatus::Completed,
|
||||
Some(response_payload),
|
||||
)?;
|
||||
}
|
||||
RawTraceEventPayload::CompactionRequestFailed {
|
||||
compaction_id,
|
||||
compaction_request_id,
|
||||
..
|
||||
} => {
|
||||
self.complete_compaction_request(
|
||||
event.seq,
|
||||
event.wall_time_unix_ms,
|
||||
compaction_id,
|
||||
compaction_request_id,
|
||||
ExecutionStatus::Failed,
|
||||
/*response_payload*/ None,
|
||||
)?;
|
||||
}
|
||||
RawTraceEventPayload::CompactionInstalled {
|
||||
compaction_id,
|
||||
checkpoint_payload,
|
||||
} => {
|
||||
let Some(thread_id) = event.thread_id else {
|
||||
bail!("compaction installed event {compaction_id} did not include a thread id");
|
||||
};
|
||||
let Some(codex_turn_id) = event.codex_turn_id else {
|
||||
bail!(
|
||||
"compaction installed event {compaction_id} did not include a codex turn id"
|
||||
);
|
||||
};
|
||||
self.reduce_compaction_installed_event(
|
||||
event.wall_time_unix_ms,
|
||||
thread_id,
|
||||
codex_turn_id,
|
||||
compaction_id,
|
||||
checkpoint_payload,
|
||||
)?;
|
||||
}
|
||||
RawTraceEventPayload::AgentResultObserved {
|
||||
edge_id,
|
||||
child_thread_id,
|
||||
child_codex_turn_id,
|
||||
parent_thread_id,
|
||||
message,
|
||||
carried_payload,
|
||||
} => {
|
||||
self.queue_agent_result_interaction_edge(ObservedAgentResultEdge {
|
||||
wall_time_unix_ms: event.wall_time_unix_ms,
|
||||
edge_id,
|
||||
child_thread_id,
|
||||
child_codex_turn_id,
|
||||
parent_thread_id,
|
||||
message,
|
||||
carried_payload,
|
||||
})?;
|
||||
}
|
||||
RawTraceEventPayload::Other { .. } => {
|
||||
bail!("raw trace event has no reducer implementation");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert_raw_payload(&mut self, payload: &RawPayloadRef) {
|
||||
self.rollout
|
||||
.raw_payloads
|
||||
.insert(payload.raw_payload_id.clone(), payload.clone());
|
||||
}
|
||||
}
|
||||
200
codex-rs/rollout-trace/src/reducer/test_support.rs
Normal file
200
codex-rs/rollout-trace/src/reducer/test_support.rs
Normal file
@@ -0,0 +1,200 @@
|
||||
//! Shared reducer test fixtures.
|
||||
//!
|
||||
//! These helpers only write common trace scaffolding. Scenario-specific event
|
||||
//! sequences stay in each test so the behavior under test remains visible.
|
||||
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::model::ToolCallSummary;
|
||||
use crate::payload::RawPayloadKind;
|
||||
use crate::payload::RawPayloadRef;
|
||||
use crate::raw_event::RawTraceEventContext;
|
||||
use crate::raw_event::RawTraceEventPayload;
|
||||
use crate::replay_bundle;
|
||||
use crate::writer::TraceWriter;
|
||||
|
||||
pub(crate) const ROOT_THREAD_ID: &str = "thread-root";
|
||||
pub(crate) const AGENT_ROOT_THREAD_ID: &str = "019d0000-0000-7000-8000-000000000001";
|
||||
|
||||
pub(crate) fn message(role: &str, text: &str) -> serde_json::Value {
|
||||
json!({
|
||||
"type": "message",
|
||||
"role": role,
|
||||
"content": [{"type": "input_text", "text": text}]
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn generic_summary(label: &str) -> ToolCallSummary {
|
||||
ToolCallSummary::Generic {
|
||||
label: label.to_string(),
|
||||
input_preview: None,
|
||||
output_preview: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn create_started_writer(temp: &TempDir) -> anyhow::Result<TraceWriter> {
|
||||
create_started_writer_for_thread(temp, ROOT_THREAD_ID, "/root")
|
||||
}
|
||||
|
||||
pub(crate) fn create_started_agent_writer(temp: &TempDir) -> anyhow::Result<TraceWriter> {
|
||||
create_started_writer_for_thread(temp, AGENT_ROOT_THREAD_ID, "/root")
|
||||
}
|
||||
|
||||
pub(crate) fn create_started_writer_for_thread(
|
||||
temp: &TempDir,
|
||||
thread_id: &str,
|
||||
agent_path: &str,
|
||||
) -> anyhow::Result<TraceWriter> {
|
||||
let writer = TraceWriter::create(
|
||||
temp.path(),
|
||||
"trace-1".to_string(),
|
||||
"rollout-1".to_string(),
|
||||
thread_id.to_string(),
|
||||
)?;
|
||||
start_thread(&writer, thread_id, agent_path)?;
|
||||
Ok(writer)
|
||||
}
|
||||
|
||||
pub(crate) fn start_thread(
|
||||
writer: &TraceWriter,
|
||||
thread_id: &str,
|
||||
agent_path: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
writer.append(RawTraceEventPayload::ThreadStarted {
|
||||
thread_id: thread_id.to_string(),
|
||||
agent_path: agent_path.to_string(),
|
||||
metadata_payload: None,
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn start_turn(writer: &TraceWriter, turn_id: &str) -> anyhow::Result<()> {
|
||||
start_turn_for_thread(writer, ROOT_THREAD_ID, turn_id)
|
||||
}
|
||||
|
||||
pub(crate) fn start_agent_turn(writer: &TraceWriter, turn_id: &str) -> anyhow::Result<()> {
|
||||
start_turn_for_thread(writer, AGENT_ROOT_THREAD_ID, turn_id)
|
||||
}
|
||||
|
||||
pub(crate) fn start_turn_for_thread(
|
||||
writer: &TraceWriter,
|
||||
thread_id: &str,
|
||||
turn_id: &str,
|
||||
) -> anyhow::Result<()> {
|
||||
writer.append(RawTraceEventPayload::CodexTurnStarted {
|
||||
codex_turn_id: turn_id.to_string(),
|
||||
thread_id: thread_id.to_string(),
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn trace_context(turn_id: &str) -> RawTraceEventContext {
|
||||
trace_context_for_thread(ROOT_THREAD_ID, turn_id)
|
||||
}
|
||||
|
||||
pub(crate) fn trace_context_for_agent(turn_id: &str) -> RawTraceEventContext {
|
||||
trace_context_for_thread(AGENT_ROOT_THREAD_ID, turn_id)
|
||||
}
|
||||
|
||||
pub(crate) fn trace_context_for_thread(thread_id: &str, turn_id: &str) -> RawTraceEventContext {
|
||||
RawTraceEventContext {
|
||||
thread_id: Some(thread_id.to_string()),
|
||||
codex_turn_id: Some(turn_id.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn append_inference_start(
|
||||
writer: &TraceWriter,
|
||||
inference_call_id: &str,
|
||||
codex_turn_id: &str,
|
||||
request_payload: RawPayloadRef,
|
||||
) -> anyhow::Result<()> {
|
||||
append_inference_start_for_thread(
|
||||
writer,
|
||||
ROOT_THREAD_ID,
|
||||
codex_turn_id,
|
||||
inference_call_id,
|
||||
request_payload,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn append_inference_start_for_thread(
|
||||
writer: &TraceWriter,
|
||||
thread_id: &str,
|
||||
codex_turn_id: &str,
|
||||
inference_call_id: &str,
|
||||
request_payload: RawPayloadRef,
|
||||
) -> anyhow::Result<()> {
|
||||
writer.append(RawTraceEventPayload::InferenceStarted {
|
||||
inference_call_id: inference_call_id.to_string(),
|
||||
thread_id: thread_id.to_string(),
|
||||
codex_turn_id: codex_turn_id.to_string(),
|
||||
model: "gpt-test".to_string(),
|
||||
provider_name: "test-provider".to_string(),
|
||||
request_payload,
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn append_inference_completion(
|
||||
writer: &TraceWriter,
|
||||
inference_call_id: &str,
|
||||
response_id: &str,
|
||||
response_payload: RawPayloadRef,
|
||||
) -> anyhow::Result<()> {
|
||||
writer.append(RawTraceEventPayload::InferenceCompleted {
|
||||
inference_call_id: inference_call_id.to_string(),
|
||||
response_id: Some(response_id.to_string()),
|
||||
response_payload,
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn append_inference_request(
|
||||
writer: &TraceWriter,
|
||||
thread_id: &str,
|
||||
turn_id: &str,
|
||||
inference_id: &str,
|
||||
input: Vec<serde_json::Value>,
|
||||
) -> anyhow::Result<()> {
|
||||
let request =
|
||||
writer.write_json_payload(RawPayloadKind::InferenceRequest, &json!({ "input": input }))?;
|
||||
append_inference_start_for_thread(writer, thread_id, turn_id, inference_id, request)
|
||||
}
|
||||
|
||||
pub(crate) fn append_completed_inference(
|
||||
writer: &TraceWriter,
|
||||
thread_id: &str,
|
||||
turn_id: &str,
|
||||
inference_id: &str,
|
||||
input: Vec<serde_json::Value>,
|
||||
output_items: Vec<serde_json::Value>,
|
||||
) -> anyhow::Result<()> {
|
||||
append_inference_request(writer, thread_id, turn_id, inference_id, input)?;
|
||||
let response = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceResponse,
|
||||
&json!({
|
||||
"response_id": format!("resp-{inference_id}"),
|
||||
"output_items": output_items,
|
||||
}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context_for_thread(thread_id, turn_id),
|
||||
RawTraceEventPayload::InferenceCompleted {
|
||||
inference_call_id: inference_id.to_string(),
|
||||
response_id: Some(format!("resp-{inference_id}")),
|
||||
response_payload: response,
|
||||
},
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn expect_replay_error(temp: &TempDir, expected: &str) -> anyhow::Result<()> {
|
||||
let Err(err) = replay_bundle(temp.path()) else {
|
||||
panic!("expected replay error containing {expected}");
|
||||
};
|
||||
let message = err.to_string();
|
||||
assert!(message.contains(expected), "unexpected error: {message}");
|
||||
Ok(())
|
||||
}
|
||||
264
codex-rs/rollout-trace/src/reducer/thread.rs
Normal file
264
codex-rs/rollout-trace/src/reducer/thread.rs
Normal file
@@ -0,0 +1,264 @@
|
||||
//! Thread and turn reduction.
|
||||
//!
|
||||
//! Threads are the container that every other reducer module links into. This
|
||||
//! module owns the identity metadata parsing as well, so the central dispatcher
|
||||
//! does not need to know the shape of multi-agent session-source payloads.
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::bail;
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::TraceReducer;
|
||||
use super::tool::spawn_edge_id;
|
||||
use crate::model::AgentOrigin;
|
||||
use crate::model::AgentThread;
|
||||
use crate::model::CodexTurn;
|
||||
use crate::model::CodexTurnId;
|
||||
use crate::model::ExecutionStatus;
|
||||
use crate::model::ExecutionWindow;
|
||||
use crate::model::RolloutStatus;
|
||||
use crate::payload::RawPayloadRef;
|
||||
use crate::raw_event::RawEventSeq;
|
||||
|
||||
impl TraceReducer {
|
||||
/// Inserts a thread and derives its multi-agent identity from optional metadata.
|
||||
///
|
||||
/// The raw event carries a denormalized agent path; when v2 subagent metadata is
|
||||
/// present, that metadata is authoritative because it also drives spawn edges and task names.
|
||||
pub(super) fn start_thread(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
thread_id: String,
|
||||
agent_path: String,
|
||||
metadata_payload: Option<RawPayloadRef>,
|
||||
) -> Result<()> {
|
||||
if self.rollout.threads.contains_key(&thread_id) {
|
||||
bail!("duplicate thread start for {thread_id}");
|
||||
}
|
||||
|
||||
let metadata = metadata_payload
|
||||
.as_ref()
|
||||
.map(|payload| self.thread_started_metadata(payload))
|
||||
.transpose()?;
|
||||
let spawn = metadata
|
||||
.as_ref()
|
||||
.and_then(ThreadStartedMetadata::thread_spawn);
|
||||
// The v2 SessionSource is the authoritative child identity record.
|
||||
// Prefer its nested agent_path over the denormalized event field so
|
||||
// task derivation and the spawn edge are based on the same metadata.
|
||||
let agent_path = spawn
|
||||
.as_ref()
|
||||
.and_then(|spawn| spawn.agent_path.clone())
|
||||
.or_else(|| {
|
||||
metadata
|
||||
.as_ref()
|
||||
.and_then(|metadata| metadata.agent_path.clone())
|
||||
})
|
||||
.unwrap_or(agent_path);
|
||||
let nickname = metadata
|
||||
.as_ref()
|
||||
.and_then(|metadata| metadata.nickname.clone());
|
||||
let default_model = metadata
|
||||
.as_ref()
|
||||
.and_then(|metadata| metadata.model.clone());
|
||||
let origin = if let Some(spawn) = spawn {
|
||||
let edge_id = spawn_edge_id(&spawn.parent_thread_id, &thread_id);
|
||||
let task_name = spawn
|
||||
.task_name
|
||||
.clone()
|
||||
.unwrap_or_else(|| task_name_from_agent_path(&agent_path));
|
||||
let agent_role = spawn.agent_role.clone().unwrap_or_default();
|
||||
|
||||
AgentOrigin::Spawned {
|
||||
parent_thread_id: spawn.parent_thread_id,
|
||||
spawn_edge_id: edge_id,
|
||||
task_name,
|
||||
agent_role,
|
||||
}
|
||||
} else {
|
||||
AgentOrigin::Root
|
||||
};
|
||||
|
||||
self.rollout.threads.insert(
|
||||
thread_id.clone(),
|
||||
AgentThread {
|
||||
thread_id,
|
||||
agent_path,
|
||||
nickname,
|
||||
origin,
|
||||
execution: ExecutionWindow {
|
||||
started_at_unix_ms: wall_time_unix_ms,
|
||||
started_seq: seq,
|
||||
ended_at_unix_ms: None,
|
||||
ended_seq: None,
|
||||
status: ExecutionStatus::Running,
|
||||
},
|
||||
default_model,
|
||||
conversation_item_ids: Vec::new(),
|
||||
},
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Marks a thread terminal without treating child shutdown as rollout completion.
|
||||
pub(super) fn end_thread(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
thread_id: String,
|
||||
status: RolloutStatus,
|
||||
) -> Result<()> {
|
||||
let thread = self.thread_mut(&thread_id)?;
|
||||
thread.execution.ended_at_unix_ms = Some(wall_time_unix_ms);
|
||||
thread.execution.ended_seq = Some(seq);
|
||||
thread.execution.status = match status {
|
||||
RolloutStatus::Running => ExecutionStatus::Running,
|
||||
RolloutStatus::Completed => ExecutionStatus::Completed,
|
||||
RolloutStatus::Failed => ExecutionStatus::Failed,
|
||||
RolloutStatus::Aborted => ExecutionStatus::Aborted,
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Starts a Codex turn inside an existing thread.
|
||||
pub(super) fn start_codex_turn(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
codex_turn_id: CodexTurnId,
|
||||
thread_id: String,
|
||||
) -> Result<()> {
|
||||
if self.rollout.codex_turns.contains_key(&codex_turn_id) {
|
||||
bail!("duplicate codex turn start for {codex_turn_id}");
|
||||
}
|
||||
|
||||
self.thread_mut(&thread_id)?;
|
||||
|
||||
self.rollout.codex_turns.insert(
|
||||
codex_turn_id.clone(),
|
||||
CodexTurn {
|
||||
codex_turn_id,
|
||||
thread_id,
|
||||
execution: ExecutionWindow {
|
||||
started_at_unix_ms: wall_time_unix_ms,
|
||||
started_seq: seq,
|
||||
ended_at_unix_ms: None,
|
||||
ended_seq: None,
|
||||
status: ExecutionStatus::Running,
|
||||
},
|
||||
input_item_ids: Vec::new(),
|
||||
},
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Marks a Codex turn terminal and validates any thread id carried by the raw event.
|
||||
pub(super) fn end_codex_turn(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
thread_id: Option<String>,
|
||||
codex_turn_id: CodexTurnId,
|
||||
status: ExecutionStatus,
|
||||
) -> Result<()> {
|
||||
if let Some(event_thread_id) = thread_id.as_deref()
|
||||
&& let Some(turn) = self.rollout.codex_turns.get(&codex_turn_id)
|
||||
&& turn.thread_id != event_thread_id
|
||||
{
|
||||
bail!(
|
||||
"codex turn end for {codex_turn_id} used thread {event_thread_id}, \
|
||||
but the turn belongs to {}",
|
||||
turn.thread_id
|
||||
);
|
||||
}
|
||||
|
||||
let Some(turn) = self.rollout.codex_turns.get_mut(&codex_turn_id) else {
|
||||
bail!("codex turn end referenced unknown turn {codex_turn_id}");
|
||||
};
|
||||
turn.execution.ended_at_unix_ms = Some(wall_time_unix_ms);
|
||||
turn.execution.ended_seq = Some(seq);
|
||||
turn.execution.status = status.clone();
|
||||
self.terminate_running_code_cells_for_turn_end(
|
||||
seq,
|
||||
wall_time_unix_ms,
|
||||
&codex_turn_id,
|
||||
&status,
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns a mutable thread or reports a reducer error tied to the unknown id.
|
||||
pub(super) fn thread_mut(&mut self, thread_id: &str) -> Result<&mut AgentThread> {
|
||||
self.rollout
|
||||
.threads
|
||||
.get_mut(thread_id)
|
||||
.with_context(|| format!("trace event referenced unknown thread {thread_id}"))
|
||||
}
|
||||
|
||||
fn thread_started_metadata(
|
||||
&self,
|
||||
metadata_payload: &RawPayloadRef,
|
||||
) -> Result<ThreadStartedMetadata> {
|
||||
let value = self.read_payload_json(metadata_payload)?;
|
||||
serde_json::from_value(value)
|
||||
.with_context(|| format!("parse thread metadata {}", metadata_payload.raw_payload_id))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ThreadStartedMetadata {
|
||||
agent_path: Option<String>,
|
||||
task_name: Option<String>,
|
||||
nickname: Option<String>,
|
||||
agent_role: Option<String>,
|
||||
model: Option<String>,
|
||||
session_source: Option<Value>,
|
||||
}
|
||||
|
||||
impl ThreadStartedMetadata {
|
||||
fn thread_spawn(&self) -> Option<ThreadSpawnMetadata> {
|
||||
let spawn = self
|
||||
.session_source
|
||||
.as_ref()?
|
||||
.get("subagent")?
|
||||
.get("thread_spawn")?;
|
||||
let agent_path = spawn
|
||||
.get("agent_path")
|
||||
.and_then(Value::as_str)
|
||||
.map(str::to_string)
|
||||
.or_else(|| self.agent_path.clone());
|
||||
Some(ThreadSpawnMetadata {
|
||||
parent_thread_id: spawn.get("parent_thread_id")?.as_str()?.to_string(),
|
||||
agent_path: agent_path.clone(),
|
||||
task_name: spawn
|
||||
.get("task_name")
|
||||
.and_then(Value::as_str)
|
||||
.map(str::to_string)
|
||||
.or_else(|| self.task_name.clone())
|
||||
.or_else(|| agent_path.as_deref().map(task_name_from_agent_path)),
|
||||
agent_role: spawn
|
||||
.get("agent_role")
|
||||
.and_then(Value::as_str)
|
||||
.map(str::to_string)
|
||||
.or_else(|| self.agent_role.clone()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
struct ThreadSpawnMetadata {
|
||||
parent_thread_id: String,
|
||||
agent_path: Option<String>,
|
||||
task_name: Option<String>,
|
||||
agent_role: Option<String>,
|
||||
}
|
||||
|
||||
fn task_name_from_agent_path(agent_path: &str) -> String {
|
||||
agent_path
|
||||
.rsplit('/')
|
||||
.find(|segment| !segment.is_empty())
|
||||
.unwrap_or(agent_path)
|
||||
.to_string()
|
||||
}
|
||||
500
codex-rs/rollout-trace/src/reducer/tool.rs
Normal file
500
codex-rs/rollout-trace/src/reducer/tool.rs
Normal file
@@ -0,0 +1,500 @@
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::bail;
|
||||
|
||||
use super::TraceReducer;
|
||||
use crate::model::CodeModeRuntimeToolId;
|
||||
use crate::model::ConversationItemKind;
|
||||
use crate::model::ExecutionStatus;
|
||||
use crate::model::ExecutionWindow;
|
||||
use crate::model::ModelVisibleCallId;
|
||||
use crate::model::ProducerRef;
|
||||
use crate::model::ToolCall;
|
||||
use crate::model::ToolCallId;
|
||||
use crate::model::ToolCallKind;
|
||||
use crate::model::ToolCallSummary;
|
||||
use crate::payload::RawPayloadRef;
|
||||
use crate::raw_event::RawEventSeq;
|
||||
use crate::raw_event::RawToolCallRequester;
|
||||
|
||||
mod agents;
|
||||
mod terminal;
|
||||
|
||||
pub(super) use agents::ObservedAgentResultEdge;
|
||||
pub(super) use agents::PendingAgentInteractionEdge;
|
||||
pub(super) use agents::spawn_edge_id;
|
||||
|
||||
/// Raw tool-start fields after dispatch has stripped the common event envelope.
|
||||
///
|
||||
/// Tool starts carry several optional identity namespaces: model-visible calls,
|
||||
/// code-mode runtime tools, and canonical invocation payloads. Grouping them keeps
|
||||
/// the reducer callsite readable and avoids positional argument mistakes.
|
||||
pub(super) struct ToolCallStarted {
|
||||
pub(super) tool_call_id: ToolCallId,
|
||||
pub(super) model_visible_call_id: Option<ModelVisibleCallId>,
|
||||
pub(super) code_mode_runtime_tool_id: Option<CodeModeRuntimeToolId>,
|
||||
pub(super) requester: RawToolCallRequester,
|
||||
pub(super) kind: ToolCallKind,
|
||||
pub(super) summary: ToolCallSummary,
|
||||
pub(super) invocation_payload: Option<RawPayloadRef>,
|
||||
}
|
||||
|
||||
impl TraceReducer {
|
||||
/// Starts a tool call and links it to model-visible items or runtime parents when available.
|
||||
///
|
||||
/// Some tools also create richer domain objects, such as terminal operations, from
|
||||
/// the same invocation payload. The generic ToolCall remains the common index.
|
||||
pub(super) fn start_tool_call(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
thread_id: Option<String>,
|
||||
codex_turn_id: Option<String>,
|
||||
started: ToolCallStarted,
|
||||
) -> Result<()> {
|
||||
let tool_call_id = started.tool_call_id.clone();
|
||||
if self.rollout.tool_calls.contains_key(&tool_call_id) {
|
||||
bail!("duplicate tool call start for {tool_call_id}");
|
||||
}
|
||||
self.ensure_unique_model_visible_tool_call(
|
||||
started.model_visible_call_id.as_deref(),
|
||||
&tool_call_id,
|
||||
)?;
|
||||
|
||||
let thread_id = self.tool_thread_id(thread_id, codex_turn_id.as_deref())?;
|
||||
self.validate_tool_turn(&thread_id, codex_turn_id.as_deref())?;
|
||||
|
||||
let model_visible_call_id = started.model_visible_call_id.clone();
|
||||
let requester = self.reduce_tool_call_requester(&thread_id, started.requester.clone())?;
|
||||
let model_visible_call_item_ids = model_visible_call_id
|
||||
.as_deref()
|
||||
.map(|call_id| {
|
||||
self.model_visible_tool_item_ids(
|
||||
&thread_id,
|
||||
call_id,
|
||||
&[
|
||||
ConversationItemKind::FunctionCall,
|
||||
ConversationItemKind::CustomToolCall,
|
||||
],
|
||||
)
|
||||
})
|
||||
.unwrap_or_default();
|
||||
let model_visible_output_item_ids = model_visible_call_id
|
||||
.as_deref()
|
||||
.map(|call_id| {
|
||||
self.model_visible_tool_item_ids(
|
||||
&thread_id,
|
||||
call_id,
|
||||
&[
|
||||
ConversationItemKind::FunctionCallOutput,
|
||||
ConversationItemKind::CustomToolCallOutput,
|
||||
],
|
||||
)
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
self.thread_mut(&thread_id)?;
|
||||
|
||||
// Some terminal-like tools, notably write_stdin, do not emit a richer
|
||||
// runtime begin event. For those tools the canonical invocation is the
|
||||
// only place to recover the terminal/session join key.
|
||||
let terminal_operation_id = self.start_terminal_operation_from_invocation(
|
||||
seq,
|
||||
wall_time_unix_ms,
|
||||
&thread_id,
|
||||
&tool_call_id,
|
||||
&started.kind,
|
||||
started.invocation_payload.as_ref(),
|
||||
)?;
|
||||
// Terminal-backed tools should render through the richer terminal
|
||||
// operation instead of the generic tool summary captured by producers.
|
||||
let summary = terminal_operation_id
|
||||
.as_ref()
|
||||
.map(|operation_id| ToolCallSummary::Terminal {
|
||||
operation_id: operation_id.clone(),
|
||||
})
|
||||
.unwrap_or(started.summary);
|
||||
let raw_invocation_payload_id = started
|
||||
.invocation_payload
|
||||
.as_ref()
|
||||
.map(|payload| payload.raw_payload_id.clone());
|
||||
self.link_wait_tool_call_from_request_payload(
|
||||
&thread_id,
|
||||
&tool_call_id,
|
||||
started.invocation_payload.as_ref(),
|
||||
)?;
|
||||
|
||||
self.rollout.tool_calls.insert(
|
||||
tool_call_id.clone(),
|
||||
ToolCall {
|
||||
tool_call_id: tool_call_id.clone(),
|
||||
model_visible_call_id,
|
||||
code_mode_runtime_tool_id: started.code_mode_runtime_tool_id,
|
||||
thread_id,
|
||||
started_by_codex_turn_id: codex_turn_id,
|
||||
execution: ExecutionWindow {
|
||||
started_at_unix_ms: wall_time_unix_ms,
|
||||
started_seq: seq,
|
||||
ended_at_unix_ms: None,
|
||||
ended_seq: None,
|
||||
status: ExecutionStatus::Running,
|
||||
},
|
||||
requester: requester.clone(),
|
||||
kind: started.kind,
|
||||
model_visible_call_item_ids,
|
||||
model_visible_output_item_ids: Vec::new(),
|
||||
terminal_operation_id,
|
||||
summary,
|
||||
raw_invocation_payload_id,
|
||||
raw_result_payload_id: None,
|
||||
raw_runtime_payload_ids: Vec::new(),
|
||||
},
|
||||
);
|
||||
|
||||
self.link_tool_call_to_code_cell(&tool_call_id, &requester)?;
|
||||
self.link_tool_to_inference_response(&tool_call_id);
|
||||
// Output items need the reverse ProducerRef edge as well, so attach
|
||||
// them after insertion through the same helper used by the transcript
|
||||
// reducer when the output is observed after the tool start.
|
||||
for item_id in model_visible_output_item_ids {
|
||||
self.add_tool_output_item(&tool_call_id, &item_id)?;
|
||||
}
|
||||
// The call/output items may have been observed before this tool start.
|
||||
// Re-sync after insertion so terminal observations get both directions
|
||||
// of the model-visible link.
|
||||
self.sync_terminal_model_observation(&tool_call_id)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Completes the canonical tool call and any terminal operation driven by dispatch output.
|
||||
///
|
||||
/// Protocol-backed terminal tools end from runtime events; direct tools
|
||||
/// may only have the canonical result payload, so this method handles both paths.
|
||||
pub(super) fn end_tool_call(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
tool_call_id: ToolCallId,
|
||||
status: ExecutionStatus,
|
||||
result_payload: Option<RawPayloadRef>,
|
||||
) -> Result<()> {
|
||||
let (terminal_operation_id, thread_id, end_terminal_from_result) = {
|
||||
let Some(tool_call) = self.rollout.tool_calls.get_mut(&tool_call_id) else {
|
||||
bail!("tool call end referenced unknown call {tool_call_id}");
|
||||
};
|
||||
tool_call.execution.ended_at_unix_ms = Some(wall_time_unix_ms);
|
||||
tool_call.execution.ended_seq = Some(seq);
|
||||
tool_call.execution.status = status.clone();
|
||||
tool_call.raw_result_payload_id = result_payload
|
||||
.as_ref()
|
||||
.map(|payload| payload.raw_payload_id.clone());
|
||||
(
|
||||
tool_call.terminal_operation_id.clone(),
|
||||
tool_call.thread_id.clone(),
|
||||
// Protocol-backed tools end terminal operations from
|
||||
// runtime observations. Dispatch result payloads are still kept
|
||||
// on ToolCall, but they are caller-facing and may be transformed
|
||||
// relative to the raw terminal output.
|
||||
tool_call.raw_runtime_payload_ids.is_empty(),
|
||||
)
|
||||
};
|
||||
if end_terminal_from_result && let Some(operation_id) = terminal_operation_id {
|
||||
self.end_terminal_operation(
|
||||
seq,
|
||||
wall_time_unix_ms,
|
||||
&thread_id,
|
||||
&operation_id,
|
||||
status,
|
||||
result_payload.as_ref(),
|
||||
)?;
|
||||
}
|
||||
self.attach_agent_interaction_tool_result(&tool_call_id, result_payload.as_ref())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Records a runtime-begin observation for an already started tool call.
|
||||
///
|
||||
/// Runtime observations enrich the generic tool with protocol facts and may
|
||||
/// create domain-specific children such as terminal operations or agent edges.
|
||||
pub(super) fn start_tool_runtime_observation(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
tool_call_id: ToolCallId,
|
||||
runtime_payload: RawPayloadRef,
|
||||
) -> Result<()> {
|
||||
let (thread_id, _requester, kind, existing_terminal_operation_id) = {
|
||||
let Some(tool_call) = self.rollout.tool_calls.get_mut(&tool_call_id) else {
|
||||
bail!("tool runtime start referenced unknown call {tool_call_id}");
|
||||
};
|
||||
push_unique(
|
||||
&mut tool_call.raw_runtime_payload_ids,
|
||||
&runtime_payload.raw_payload_id,
|
||||
);
|
||||
(
|
||||
tool_call.thread_id.clone(),
|
||||
tool_call.requester.clone(),
|
||||
tool_call.kind.clone(),
|
||||
tool_call.terminal_operation_id.clone(),
|
||||
)
|
||||
};
|
||||
if existing_terminal_operation_id.is_some()
|
||||
&& matches!(kind, ToolCallKind::ExecCommand | ToolCallKind::WriteStdin)
|
||||
{
|
||||
bail!("tool runtime start would create a second terminal operation for {tool_call_id}");
|
||||
}
|
||||
|
||||
// Protocol begin events carry runtime facts such as process ids and
|
||||
// cwd. These facts should create terminal rows, but they must not
|
||||
// replace the canonical invocation payload captured at dispatch.
|
||||
let terminal_operation_id = self.start_terminal_operation_from_runtime(
|
||||
seq,
|
||||
wall_time_unix_ms,
|
||||
&thread_id,
|
||||
&tool_call_id,
|
||||
&kind,
|
||||
&runtime_payload,
|
||||
)?;
|
||||
|
||||
if let Some(operation_id) = &terminal_operation_id {
|
||||
let Some(tool_call) = self.rollout.tool_calls.get_mut(&tool_call_id) else {
|
||||
bail!("tool call {tool_call_id} disappeared during runtime start reduction");
|
||||
};
|
||||
if tool_call.terminal_operation_id.is_none() {
|
||||
tool_call.terminal_operation_id = Some(operation_id.clone());
|
||||
tool_call.summary = ToolCallSummary::Terminal {
|
||||
operation_id: operation_id.clone(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if terminal_operation_id.is_some() {
|
||||
self.sync_terminal_model_observation(&tool_call_id)?;
|
||||
}
|
||||
self.start_agent_interaction_from_runtime(&tool_call_id, &runtime_payload)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Records a runtime-end observation for an already started tool call.
|
||||
pub(super) fn end_tool_runtime_observation(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
tool_call_id: ToolCallId,
|
||||
status: ExecutionStatus,
|
||||
runtime_payload: RawPayloadRef,
|
||||
) -> Result<()> {
|
||||
let (thread_id, terminal_operation_id) = {
|
||||
let Some(tool_call) = self.rollout.tool_calls.get_mut(&tool_call_id) else {
|
||||
bail!("tool runtime end referenced unknown call {tool_call_id}");
|
||||
};
|
||||
push_unique(
|
||||
&mut tool_call.raw_runtime_payload_ids,
|
||||
&runtime_payload.raw_payload_id,
|
||||
);
|
||||
(
|
||||
tool_call.thread_id.clone(),
|
||||
tool_call.terminal_operation_id.clone(),
|
||||
)
|
||||
};
|
||||
|
||||
if let Some(operation_id) = terminal_operation_id {
|
||||
self.end_terminal_operation(
|
||||
seq,
|
||||
wall_time_unix_ms,
|
||||
&thread_id,
|
||||
&operation_id,
|
||||
status,
|
||||
Some(&runtime_payload),
|
||||
)?;
|
||||
}
|
||||
self.end_agent_interaction_from_runtime(
|
||||
wall_time_unix_ms,
|
||||
&tool_call_id,
|
||||
&runtime_payload,
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Attaches a conversation item observed after the tool call was reduced.
|
||||
///
|
||||
/// Inference request/response ordering can expose call/output items after the
|
||||
/// runtime tool object exists, so transcript reduction calls back here to add
|
||||
/// reverse links without duplicating matching logic.
|
||||
pub(super) fn attach_model_visible_tool_item(
|
||||
&mut self,
|
||||
item_id: &str,
|
||||
call_id: Option<&str>,
|
||||
kind: &ConversationItemKind,
|
||||
) -> Result<()> {
|
||||
let Some(call_id) = call_id else {
|
||||
return Ok(());
|
||||
};
|
||||
match kind {
|
||||
ConversationItemKind::FunctionCall | ConversationItemKind::CustomToolCall => {
|
||||
if let Some(tool_call_id) = self.single_tool_for_model_visible_call(call_id)? {
|
||||
self.add_tool_call_item(&tool_call_id, item_id)?;
|
||||
self.link_tool_to_inference_response(&tool_call_id);
|
||||
self.sync_terminal_model_observation(&tool_call_id)?;
|
||||
}
|
||||
}
|
||||
ConversationItemKind::FunctionCallOutput
|
||||
| ConversationItemKind::CustomToolCallOutput => {
|
||||
if let Some(tool_call_id) = self.single_tool_for_model_visible_call(call_id)? {
|
||||
self.add_tool_output_item(&tool_call_id, item_id)?;
|
||||
self.sync_terminal_model_observation(&tool_call_id)?;
|
||||
}
|
||||
}
|
||||
ConversationItemKind::Message
|
||||
| ConversationItemKind::Reasoning
|
||||
| ConversationItemKind::CompactionMarker => {}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn tool_thread_id(
|
||||
&self,
|
||||
thread_id: Option<String>,
|
||||
codex_turn_id: Option<&str>,
|
||||
) -> Result<String> {
|
||||
if let Some(thread_id) = thread_id {
|
||||
return Ok(thread_id);
|
||||
}
|
||||
let Some(codex_turn_id) = codex_turn_id else {
|
||||
bail!("tool call start did not include thread or Codex turn context");
|
||||
};
|
||||
self.rollout
|
||||
.codex_turns
|
||||
.get(codex_turn_id)
|
||||
.map(|turn| turn.thread_id.clone())
|
||||
.with_context(|| {
|
||||
format!("tool call start referenced unknown Codex turn {codex_turn_id}")
|
||||
})
|
||||
}
|
||||
|
||||
fn validate_tool_turn(&self, thread_id: &str, codex_turn_id: Option<&str>) -> Result<()> {
|
||||
if !self.rollout.threads.contains_key(thread_id) {
|
||||
bail!("tool call start referenced unknown thread {thread_id}");
|
||||
}
|
||||
if let Some(codex_turn_id) = codex_turn_id {
|
||||
let Some(turn) = self.rollout.codex_turns.get(codex_turn_id) else {
|
||||
bail!("tool call start referenced unknown Codex turn {codex_turn_id}");
|
||||
};
|
||||
if turn.thread_id != thread_id {
|
||||
bail!(
|
||||
"tool call start used thread {thread_id}, but Codex turn {codex_turn_id} \
|
||||
belongs to {}",
|
||||
turn.thread_id
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn ensure_unique_model_visible_tool_call(
|
||||
&self,
|
||||
model_visible_call_id: Option<&str>,
|
||||
tool_call_id: &str,
|
||||
) -> Result<()> {
|
||||
let Some(model_visible_call_id) = model_visible_call_id else {
|
||||
return Ok(());
|
||||
};
|
||||
if let Some(existing) = self.single_tool_for_model_visible_call(model_visible_call_id)?
|
||||
&& existing != tool_call_id
|
||||
{
|
||||
bail!("duplicate tool call for model-visible call id {model_visible_call_id}");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn single_tool_for_model_visible_call(
|
||||
&self,
|
||||
model_visible_call_id: &str,
|
||||
) -> Result<Option<ToolCallId>> {
|
||||
let mut matching = self
|
||||
.rollout
|
||||
.tool_calls
|
||||
.values()
|
||||
.filter(|tool| tool.model_visible_call_id.as_deref() == Some(model_visible_call_id))
|
||||
.map(|tool| tool.tool_call_id.clone());
|
||||
let first = matching.next();
|
||||
if matching.next().is_some() {
|
||||
bail!("multiple tool calls matched model-visible call id {model_visible_call_id}");
|
||||
}
|
||||
Ok(first)
|
||||
}
|
||||
|
||||
fn model_visible_tool_item_ids(
|
||||
&self,
|
||||
thread_id: &str,
|
||||
call_id: &str,
|
||||
kinds: &[ConversationItemKind],
|
||||
) -> Vec<String> {
|
||||
self.rollout
|
||||
.conversation_items
|
||||
.values()
|
||||
.filter(|item| {
|
||||
item.thread_id == thread_id
|
||||
&& item.call_id.as_deref() == Some(call_id)
|
||||
&& kinds.contains(&item.kind)
|
||||
})
|
||||
.map(|item| item.item_id.clone())
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
fn add_tool_call_item(&mut self, tool_call_id: &str, item_id: &str) -> Result<()> {
|
||||
let Some(tool_call) = self.rollout.tool_calls.get_mut(tool_call_id) else {
|
||||
bail!("tool call {tool_call_id} disappeared during conversation linking");
|
||||
};
|
||||
push_unique(&mut tool_call.model_visible_call_item_ids, item_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn add_tool_output_item(&mut self, tool_call_id: &str, item_id: &str) -> Result<()> {
|
||||
let Some(tool_call) = self.rollout.tool_calls.get_mut(tool_call_id) else {
|
||||
bail!("tool call {tool_call_id} disappeared during output linking");
|
||||
};
|
||||
push_unique(&mut tool_call.model_visible_output_item_ids, item_id);
|
||||
|
||||
let Some(item) = self.rollout.conversation_items.get_mut(item_id) else {
|
||||
bail!("conversation item {item_id} disappeared during output linking");
|
||||
};
|
||||
let producer = ProducerRef::Tool {
|
||||
tool_call_id: tool_call_id.to_string(),
|
||||
};
|
||||
if !item.produced_by.contains(&producer) {
|
||||
item.produced_by.push(producer);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn link_tool_to_inference_response(&mut self, tool_call_id: &str) {
|
||||
let Some(tool_call) = self.rollout.tool_calls.get(tool_call_id) else {
|
||||
return;
|
||||
};
|
||||
let call_item_ids = tool_call.model_visible_call_item_ids.clone();
|
||||
if call_item_ids.is_empty() {
|
||||
return;
|
||||
}
|
||||
for inference in self.rollout.inference_calls.values_mut() {
|
||||
if inference
|
||||
.response_item_ids
|
||||
.iter()
|
||||
.any(|item_id| call_item_ids.contains(item_id))
|
||||
&& !inference
|
||||
.tool_call_ids_started_by_response
|
||||
.contains(&tool_call_id.to_string())
|
||||
{
|
||||
inference
|
||||
.tool_call_ids_started_by_response
|
||||
.push(tool_call_id.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn push_unique(items: &mut Vec<String>, item_id: &str) {
|
||||
if !items.iter().any(|existing| existing == item_id) {
|
||||
items.push(item_id.to_string());
|
||||
}
|
||||
}
|
||||
621
codex-rs/rollout-trace/src/reducer/tool/agents.rs
Normal file
621
codex-rs/rollout-trace/src/reducer/tool/agents.rs
Normal file
@@ -0,0 +1,621 @@
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::bail;
|
||||
use codex_protocol::protocol::CollabAgentInteractionBeginEvent;
|
||||
use codex_protocol::protocol::CollabAgentInteractionEndEvent;
|
||||
use codex_protocol::protocol::CollabAgentSpawnEndEvent;
|
||||
use codex_protocol::protocol::CollabCloseBeginEvent;
|
||||
use codex_protocol::protocol::CollabCloseEndEvent;
|
||||
use codex_protocol::protocol::InterAgentCommunication;
|
||||
|
||||
use super::super::TraceReducer;
|
||||
use crate::model::ConversationItem;
|
||||
use crate::model::ConversationItemKind;
|
||||
use crate::model::ConversationPart;
|
||||
use crate::model::ConversationRole;
|
||||
use crate::model::InteractionEdge;
|
||||
use crate::model::InteractionEdgeKind;
|
||||
use crate::model::ToolCallKind;
|
||||
use crate::model::TraceAnchor;
|
||||
use crate::payload::RawPayloadRef;
|
||||
|
||||
/// Agent delivery edge waiting for the recipient-side conversation item.
|
||||
///
|
||||
/// Multi-agent v2 records the sender tool before the target thread necessarily
|
||||
/// includes the delivered mailbox message in a model-visible request. The edge
|
||||
/// stays pending so it can target that exact conversation item when possible.
|
||||
pub(in crate::reducer) struct PendingAgentInteractionEdge {
|
||||
pub(in crate::reducer) edge_id: String,
|
||||
pub(in crate::reducer) kind: InteractionEdgeKind,
|
||||
pub(in crate::reducer) source: TraceAnchor,
|
||||
pub(in crate::reducer) target_thread_id: String,
|
||||
pub(in crate::reducer) message_content: String,
|
||||
/// Spawn-only fallback for children that fail before their task message is model-visible.
|
||||
pub(in crate::reducer) unresolved_spawn_thread_id: Option<String>,
|
||||
pub(in crate::reducer) started_at_unix_ms: i64,
|
||||
pub(in crate::reducer) ended_at_unix_ms: Option<i64>,
|
||||
pub(in crate::reducer) carried_raw_payload_ids: Vec<String>,
|
||||
}
|
||||
|
||||
/// Typed reducer input for a multi-agent v2 child completion notification.
|
||||
///
|
||||
/// Child results are observed outside the normal tool lifecycle, but they still
|
||||
/// carry a parent-thread notification. This wrapper keeps the dispatcher from
|
||||
/// passing a positional bundle of thread and turn ids.
|
||||
pub(in crate::reducer) struct ObservedAgentResultEdge {
|
||||
pub(in crate::reducer) wall_time_unix_ms: i64,
|
||||
pub(in crate::reducer) edge_id: String,
|
||||
pub(in crate::reducer) child_thread_id: String,
|
||||
pub(in crate::reducer) child_codex_turn_id: String,
|
||||
pub(in crate::reducer) parent_thread_id: String,
|
||||
pub(in crate::reducer) message: String,
|
||||
pub(in crate::reducer) carried_payload: Option<RawPayloadRef>,
|
||||
}
|
||||
|
||||
/// Builds the stable edge id for the spawn relationship between two threads.
|
||||
pub(in crate::reducer) fn spawn_edge_id(parent_thread_id: &str, child_thread_id: &str) -> String {
|
||||
format!("edge:spawn:{parent_thread_id}:{child_thread_id}")
|
||||
}
|
||||
|
||||
impl TraceReducer {
|
||||
/// Starts a multi-agent edge from a runtime begin payload, when the tool kind supports one.
|
||||
pub(super) fn start_agent_interaction_from_runtime(
|
||||
&mut self,
|
||||
tool_call_id: &str,
|
||||
runtime_payload: &RawPayloadRef,
|
||||
) -> Result<()> {
|
||||
let kind = self
|
||||
.rollout
|
||||
.tool_calls
|
||||
.get(tool_call_id)
|
||||
.with_context(|| format!("agent edge referenced unknown tool call {tool_call_id}"))?
|
||||
.kind
|
||||
.clone();
|
||||
match kind {
|
||||
ToolCallKind::AssignAgentTask => {
|
||||
let payload: CollabAgentInteractionBeginEvent =
|
||||
serde_json::from_value(self.read_payload_json(runtime_payload)?)?;
|
||||
self.queue_message_agent_interaction(
|
||||
tool_call_id,
|
||||
InteractionEdgeKind::AssignAgentTask,
|
||||
payload.receiver_thread_id.to_string(),
|
||||
payload.prompt,
|
||||
/*ended_at_unix_ms*/ None,
|
||||
)
|
||||
}
|
||||
ToolCallKind::SendMessage => {
|
||||
let payload: CollabAgentInteractionBeginEvent =
|
||||
serde_json::from_value(self.read_payload_json(runtime_payload)?)?;
|
||||
self.queue_message_agent_interaction(
|
||||
tool_call_id,
|
||||
InteractionEdgeKind::SendMessage,
|
||||
payload.receiver_thread_id.to_string(),
|
||||
payload.prompt,
|
||||
/*ended_at_unix_ms*/ None,
|
||||
)
|
||||
}
|
||||
ToolCallKind::CloseAgent => {
|
||||
let payload: CollabCloseBeginEvent =
|
||||
serde_json::from_value(self.read_payload_json(runtime_payload)?)?;
|
||||
self.upsert_close_agent_interaction(
|
||||
tool_call_id,
|
||||
payload.receiver_thread_id.to_string(),
|
||||
/*ended_at_unix_ms*/ None,
|
||||
)
|
||||
}
|
||||
ToolCallKind::ExecCommand
|
||||
| ToolCallKind::WriteStdin
|
||||
| ToolCallKind::ApplyPatch
|
||||
| ToolCallKind::Mcp { .. }
|
||||
| ToolCallKind::Web
|
||||
| ToolCallKind::ImageGeneration
|
||||
| ToolCallKind::SpawnAgent
|
||||
| ToolCallKind::WaitAgent
|
||||
| ToolCallKind::Other { .. } => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Ends or enriches a multi-agent edge from a runtime end payload.
|
||||
pub(super) fn end_agent_interaction_from_runtime(
|
||||
&mut self,
|
||||
wall_time_unix_ms: i64,
|
||||
tool_call_id: &str,
|
||||
runtime_payload: &RawPayloadRef,
|
||||
) -> Result<()> {
|
||||
let kind = self.rollout.tool_calls[tool_call_id].kind.clone();
|
||||
match kind {
|
||||
ToolCallKind::SpawnAgent => {
|
||||
let payload: CollabAgentSpawnEndEvent =
|
||||
serde_json::from_value(self.read_payload_json(runtime_payload)?)?;
|
||||
self.end_spawn_agent_interaction(wall_time_unix_ms, tool_call_id, &payload)
|
||||
}
|
||||
ToolCallKind::AssignAgentTask => {
|
||||
let payload: CollabAgentInteractionEndEvent =
|
||||
serde_json::from_value(self.read_payload_json(runtime_payload)?)?;
|
||||
self.end_message_agent_interaction(
|
||||
wall_time_unix_ms,
|
||||
tool_call_id,
|
||||
InteractionEdgeKind::AssignAgentTask,
|
||||
&payload,
|
||||
)
|
||||
}
|
||||
ToolCallKind::SendMessage => {
|
||||
let payload: CollabAgentInteractionEndEvent =
|
||||
serde_json::from_value(self.read_payload_json(runtime_payload)?)?;
|
||||
self.end_message_agent_interaction(
|
||||
wall_time_unix_ms,
|
||||
tool_call_id,
|
||||
InteractionEdgeKind::SendMessage,
|
||||
&payload,
|
||||
)
|
||||
}
|
||||
ToolCallKind::CloseAgent => {
|
||||
let payload: CollabCloseEndEvent =
|
||||
serde_json::from_value(self.read_payload_json(runtime_payload)?)?;
|
||||
self.upsert_close_agent_interaction(
|
||||
tool_call_id,
|
||||
payload.receiver_thread_id.to_string(),
|
||||
Some(wall_time_unix_ms),
|
||||
)
|
||||
}
|
||||
ToolCallKind::ExecCommand
|
||||
| ToolCallKind::WriteStdin
|
||||
| ToolCallKind::ApplyPatch
|
||||
| ToolCallKind::Mcp { .. }
|
||||
| ToolCallKind::Web
|
||||
| ToolCallKind::ImageGeneration
|
||||
| ToolCallKind::WaitAgent
|
||||
| ToolCallKind::Other { .. } => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds the canonical tool result payload to an already reduced multi-agent edge.
|
||||
pub(super) fn attach_agent_interaction_tool_result(
|
||||
&mut self,
|
||||
tool_call_id: &str,
|
||||
result_payload: Option<&RawPayloadRef>,
|
||||
) -> Result<()> {
|
||||
let Some(result_payload) = result_payload else {
|
||||
return Ok(());
|
||||
};
|
||||
if let Some(edge) = self
|
||||
.rollout
|
||||
.interaction_edges
|
||||
.values_mut()
|
||||
.find(|edge| tool_call_source_matches(&edge.source, tool_call_id))
|
||||
{
|
||||
push_unique(
|
||||
&mut edge.carried_raw_payload_ids,
|
||||
&result_payload.raw_payload_id,
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Agent delivery edges intentionally wait for the recipient-side
|
||||
// conversation item. Tool end can arrive before that item is
|
||||
// reduced, so preserve the response payload on the pending edge rather
|
||||
// than dropping evidence until the delivery materializes.
|
||||
if let Some(pending) = self
|
||||
.pending_agent_interaction_edges
|
||||
.iter_mut()
|
||||
.find(|pending| tool_call_source_matches(&pending.source, tool_call_id))
|
||||
{
|
||||
push_unique(
|
||||
&mut pending.carried_raw_payload_ids,
|
||||
&result_payload.raw_payload_id,
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn end_spawn_agent_interaction(
|
||||
&mut self,
|
||||
wall_time_unix_ms: i64,
|
||||
tool_call_id: &str,
|
||||
payload: &CollabAgentSpawnEndEvent,
|
||||
) -> Result<()> {
|
||||
let Some(child_thread_id) = payload.new_thread_id else {
|
||||
return Ok(());
|
||||
};
|
||||
let tool_call = &self.rollout.tool_calls[tool_call_id];
|
||||
let child_thread_id = child_thread_id.to_string();
|
||||
let edge_id = spawn_edge_id(&payload.sender_thread_id.to_string(), &child_thread_id);
|
||||
|
||||
self.queue_or_resolve_agent_interaction_edge(PendingAgentInteractionEdge {
|
||||
edge_id,
|
||||
kind: InteractionEdgeKind::SpawnAgent,
|
||||
source: TraceAnchor::ToolCall {
|
||||
tool_call_id: tool_call_id.to_string(),
|
||||
},
|
||||
target_thread_id: child_thread_id.clone(),
|
||||
message_content: payload.prompt.clone(),
|
||||
unresolved_spawn_thread_id: Some(child_thread_id),
|
||||
started_at_unix_ms: tool_call.execution.started_at_unix_ms,
|
||||
ended_at_unix_ms: Some(wall_time_unix_ms),
|
||||
carried_raw_payload_ids: self.agent_tool_payload_ids(tool_call_id)?,
|
||||
})
|
||||
}
|
||||
|
||||
fn end_message_agent_interaction(
|
||||
&mut self,
|
||||
wall_time_unix_ms: i64,
|
||||
tool_call_id: &str,
|
||||
edge_kind: InteractionEdgeKind,
|
||||
payload: &CollabAgentInteractionEndEvent,
|
||||
) -> Result<()> {
|
||||
self.queue_message_agent_interaction(
|
||||
tool_call_id,
|
||||
edge_kind,
|
||||
payload.receiver_thread_id.to_string(),
|
||||
payload.prompt.clone(),
|
||||
Some(wall_time_unix_ms),
|
||||
)
|
||||
}
|
||||
|
||||
fn queue_message_agent_interaction(
|
||||
&mut self,
|
||||
tool_call_id: &str,
|
||||
kind: InteractionEdgeKind,
|
||||
target_thread_id: String,
|
||||
message_content: String,
|
||||
ended_at_unix_ms: Option<i64>,
|
||||
) -> Result<()> {
|
||||
let tool_call = &self.rollout.tool_calls[tool_call_id];
|
||||
self.queue_or_resolve_agent_interaction_edge(PendingAgentInteractionEdge {
|
||||
edge_id: tool_edge_id(tool_call_id),
|
||||
kind,
|
||||
source: TraceAnchor::ToolCall {
|
||||
tool_call_id: tool_call_id.to_string(),
|
||||
},
|
||||
target_thread_id,
|
||||
message_content,
|
||||
unresolved_spawn_thread_id: None,
|
||||
started_at_unix_ms: tool_call.execution.started_at_unix_ms,
|
||||
ended_at_unix_ms,
|
||||
carried_raw_payload_ids: self.agent_tool_payload_ids(tool_call_id)?,
|
||||
})
|
||||
}
|
||||
|
||||
fn agent_tool_payload_ids(&self, tool_call_id: &str) -> Result<Vec<String>> {
|
||||
let tool_call =
|
||||
self.rollout.tool_calls.get(tool_call_id).with_context(|| {
|
||||
format!("agent edge referenced unknown tool call {tool_call_id}")
|
||||
})?;
|
||||
let mut payload_ids = Vec::new();
|
||||
if let Some(payload_id) = &tool_call.raw_invocation_payload_id {
|
||||
push_unique(&mut payload_ids, payload_id);
|
||||
}
|
||||
for payload_id in &tool_call.raw_runtime_payload_ids {
|
||||
push_unique(&mut payload_ids, payload_id);
|
||||
}
|
||||
if let Some(payload_id) = &tool_call.raw_result_payload_id {
|
||||
push_unique(&mut payload_ids, payload_id);
|
||||
}
|
||||
Ok(payload_ids)
|
||||
}
|
||||
|
||||
fn upsert_close_agent_interaction(
|
||||
&mut self,
|
||||
tool_call_id: &str,
|
||||
target_thread_id: String,
|
||||
ended_at_unix_ms: Option<i64>,
|
||||
) -> Result<()> {
|
||||
if !self.rollout.threads.contains_key(&target_thread_id) {
|
||||
// A failed close can name a thread that never participated in this
|
||||
// trace. Keep that evidence on the ToolCall raw payloads rather
|
||||
// than creating an anchor to a non-existent reduced object.
|
||||
return Ok(());
|
||||
}
|
||||
let started_at_unix_ms = self
|
||||
.rollout
|
||||
.tool_calls
|
||||
.get(tool_call_id)
|
||||
.with_context(|| format!("close edge referenced unknown tool call {tool_call_id}"))?
|
||||
.execution
|
||||
.started_at_unix_ms;
|
||||
let carried_raw_payload_ids = self.agent_tool_payload_ids(tool_call_id)?;
|
||||
self.upsert_interaction_edge(InteractionEdge {
|
||||
edge_id: tool_edge_id(tool_call_id),
|
||||
kind: InteractionEdgeKind::CloseAgent,
|
||||
source: TraceAnchor::ToolCall {
|
||||
tool_call_id: tool_call_id.to_string(),
|
||||
},
|
||||
target: TraceAnchor::Thread {
|
||||
thread_id: target_thread_id,
|
||||
},
|
||||
started_at_unix_ms,
|
||||
ended_at_unix_ms,
|
||||
carried_item_ids: Vec::new(),
|
||||
carried_raw_payload_ids,
|
||||
})
|
||||
}
|
||||
|
||||
/// Queues or resolves the edge from a child completion to its parent notification.
|
||||
pub(in crate::reducer) fn queue_agent_result_interaction_edge(
|
||||
&mut self,
|
||||
observed: ObservedAgentResultEdge,
|
||||
) -> Result<()> {
|
||||
let source = if let Some(source_item_id) = self.latest_assistant_message_item_for_turn(
|
||||
&observed.child_thread_id,
|
||||
&observed.child_codex_turn_id,
|
||||
) {
|
||||
TraceAnchor::ConversationItem {
|
||||
item_id: source_item_id,
|
||||
}
|
||||
} else {
|
||||
// Child completion is delivered from AgentStatus, not from transcript
|
||||
// content. Failed or cancelled children can therefore notify the parent
|
||||
// without producing a final assistant message. Anchor those edges to
|
||||
// the child thread so the trace keeps the valid delivery instead of
|
||||
// inventing a missing conversation item.
|
||||
TraceAnchor::Thread {
|
||||
thread_id: observed.child_thread_id,
|
||||
}
|
||||
};
|
||||
|
||||
self.queue_or_resolve_agent_interaction_edge(PendingAgentInteractionEdge {
|
||||
edge_id: observed.edge_id,
|
||||
kind: InteractionEdgeKind::AgentResult,
|
||||
source,
|
||||
target_thread_id: observed.parent_thread_id,
|
||||
message_content: observed.message,
|
||||
unresolved_spawn_thread_id: None,
|
||||
started_at_unix_ms: observed.wall_time_unix_ms,
|
||||
ended_at_unix_ms: Some(observed.wall_time_unix_ms),
|
||||
carried_raw_payload_ids: observed
|
||||
.carried_payload
|
||||
.map(|payload| vec![payload.raw_payload_id])
|
||||
.unwrap_or_default(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Resolves pending agent edges whose target is the newly reduced conversation item.
|
||||
pub(in crate::reducer) fn resolve_pending_agent_edges_for_item(
|
||||
&mut self,
|
||||
item_id: &str,
|
||||
) -> Result<()> {
|
||||
let Some((thread_id, message_content)) = self.inter_agent_message_item(item_id) else {
|
||||
return Ok(());
|
||||
};
|
||||
let Some(pending_index) = self
|
||||
.pending_agent_interaction_edges
|
||||
.iter()
|
||||
.position(|pending| {
|
||||
pending.target_thread_id == thread_id && pending.message_content == message_content
|
||||
})
|
||||
else {
|
||||
return Ok(());
|
||||
};
|
||||
let pending = self.pending_agent_interaction_edges.remove(pending_index);
|
||||
self.upsert_agent_interaction_edge_for_item(pending, item_id.to_string())
|
||||
}
|
||||
|
||||
fn queue_or_resolve_agent_interaction_edge(
|
||||
&mut self,
|
||||
pending: PendingAgentInteractionEdge,
|
||||
) -> Result<()> {
|
||||
if let Some(item_id) = self.find_unlinked_inter_agent_message_item(
|
||||
&pending.target_thread_id,
|
||||
&pending.message_content,
|
||||
) {
|
||||
return self.upsert_agent_interaction_edge_for_item(pending, item_id);
|
||||
}
|
||||
|
||||
if let Some(existing) = self
|
||||
.pending_agent_interaction_edges
|
||||
.iter_mut()
|
||||
.find(|existing| existing.edge_id == pending.edge_id)
|
||||
{
|
||||
if existing.kind != pending.kind
|
||||
|| existing.source != pending.source
|
||||
|| existing.target_thread_id != pending.target_thread_id
|
||||
|| existing.message_content != pending.message_content
|
||||
|| existing.unresolved_spawn_thread_id != pending.unresolved_spawn_thread_id
|
||||
{
|
||||
bail!(
|
||||
"pending interaction edge {} was observed with conflicting delivery data",
|
||||
pending.edge_id
|
||||
);
|
||||
}
|
||||
existing.started_at_unix_ms =
|
||||
existing.started_at_unix_ms.min(pending.started_at_unix_ms);
|
||||
existing.ended_at_unix_ms = match (existing.ended_at_unix_ms, pending.ended_at_unix_ms)
|
||||
{
|
||||
(Some(existing_ended), Some(pending_ended)) => {
|
||||
Some(existing_ended.max(pending_ended))
|
||||
}
|
||||
(None, ended) | (ended, None) => ended,
|
||||
};
|
||||
extend_unique(
|
||||
&mut existing.carried_raw_payload_ids,
|
||||
pending.carried_raw_payload_ids,
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.pending_agent_interaction_edges.push(pending);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Materializes unresolved spawn edges that have a valid child-thread fallback target.
|
||||
pub(in crate::reducer) fn resolve_pending_spawn_edge_fallbacks(&mut self) -> Result<()> {
|
||||
let pending_edges = std::mem::take(&mut self.pending_agent_interaction_edges);
|
||||
for pending in pending_edges {
|
||||
let Some(child_thread_id) = pending.unresolved_spawn_thread_id else {
|
||||
continue;
|
||||
};
|
||||
if pending.kind != InteractionEdgeKind::SpawnAgent {
|
||||
bail!(
|
||||
"non-spawn interaction edge {} carried a spawn fallback target",
|
||||
pending.edge_id
|
||||
);
|
||||
}
|
||||
if !self.rollout.threads.contains_key(&child_thread_id) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Spawn normally resolves to the child task message because that is
|
||||
// where the delegated work first becomes model-visible. A child can
|
||||
// fail before that transcript item exists, but the spawned thread is
|
||||
// still real and the spawning tool still created it. Preserve that
|
||||
// relationship with the thread fallback instead of dropping the edge.
|
||||
self.upsert_interaction_edge(InteractionEdge {
|
||||
edge_id: pending.edge_id,
|
||||
kind: pending.kind,
|
||||
source: pending.source,
|
||||
target: TraceAnchor::Thread {
|
||||
thread_id: child_thread_id,
|
||||
},
|
||||
started_at_unix_ms: pending.started_at_unix_ms,
|
||||
ended_at_unix_ms: pending.ended_at_unix_ms,
|
||||
carried_item_ids: Vec::new(),
|
||||
carried_raw_payload_ids: pending.carried_raw_payload_ids,
|
||||
})?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn upsert_agent_interaction_edge_for_item(
|
||||
&mut self,
|
||||
pending: PendingAgentInteractionEdge,
|
||||
target_item_id: String,
|
||||
) -> Result<()> {
|
||||
self.upsert_interaction_edge(InteractionEdge {
|
||||
edge_id: pending.edge_id,
|
||||
kind: pending.kind,
|
||||
source: pending.source,
|
||||
target: TraceAnchor::ConversationItem {
|
||||
item_id: target_item_id.clone(),
|
||||
},
|
||||
started_at_unix_ms: pending.started_at_unix_ms,
|
||||
ended_at_unix_ms: pending.ended_at_unix_ms,
|
||||
carried_item_ids: vec![target_item_id],
|
||||
carried_raw_payload_ids: pending.carried_raw_payload_ids,
|
||||
})
|
||||
}
|
||||
|
||||
fn upsert_interaction_edge(&mut self, edge: InteractionEdge) -> Result<()> {
|
||||
if let Some(existing) = self.rollout.interaction_edges.get_mut(&edge.edge_id) {
|
||||
if existing.kind != edge.kind
|
||||
|| existing.source != edge.source
|
||||
|| existing.target != edge.target
|
||||
{
|
||||
bail!(
|
||||
"interaction edge {} was observed with conflicting endpoints",
|
||||
edge.edge_id
|
||||
);
|
||||
}
|
||||
existing.started_at_unix_ms = existing.started_at_unix_ms.min(edge.started_at_unix_ms);
|
||||
existing.ended_at_unix_ms = match (existing.ended_at_unix_ms, edge.ended_at_unix_ms) {
|
||||
(Some(existing_ended), Some(edge_ended)) => Some(existing_ended.max(edge_ended)),
|
||||
(None, ended) | (ended, None) => ended,
|
||||
};
|
||||
extend_unique(&mut existing.carried_item_ids, edge.carried_item_ids);
|
||||
extend_unique(
|
||||
&mut existing.carried_raw_payload_ids,
|
||||
edge.carried_raw_payload_ids,
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.rollout
|
||||
.interaction_edges
|
||||
.insert(edge.edge_id.clone(), edge);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn find_unlinked_inter_agent_message_item(
|
||||
&self,
|
||||
thread_id: &str,
|
||||
message_content: &str,
|
||||
) -> Option<String> {
|
||||
self.rollout
|
||||
.threads
|
||||
.get(thread_id)?
|
||||
.conversation_item_ids
|
||||
.iter()
|
||||
.find(|item_id| {
|
||||
!self.is_interaction_edge_target_item(item_id)
|
||||
&& self
|
||||
.inter_agent_message_item(item_id)
|
||||
.is_some_and(|(_, content)| content == message_content)
|
||||
})
|
||||
.cloned()
|
||||
}
|
||||
|
||||
fn inter_agent_message_item(&self, item_id: &str) -> Option<(String, String)> {
|
||||
let item = self.rollout.conversation_items.get(item_id)?;
|
||||
let (recipient_agent_path, message_content) = inter_agent_message_fields(item)?;
|
||||
let thread = self.rollout.threads.get(&item.thread_id)?;
|
||||
if recipient_agent_path != thread.agent_path {
|
||||
return None;
|
||||
}
|
||||
Some((item.thread_id.clone(), message_content))
|
||||
}
|
||||
|
||||
fn is_interaction_edge_target_item(&self, item_id: &str) -> bool {
|
||||
self.rollout
|
||||
.interaction_edges
|
||||
.values()
|
||||
.any(|edge| matches!(&edge.target, TraceAnchor::ConversationItem { item_id: target } if target == item_id))
|
||||
}
|
||||
|
||||
fn latest_assistant_message_item_for_turn(
|
||||
&self,
|
||||
thread_id: &str,
|
||||
codex_turn_id: &str,
|
||||
) -> Option<String> {
|
||||
self.rollout
|
||||
.conversation_items
|
||||
.values()
|
||||
.filter(|item| {
|
||||
item.thread_id == thread_id
|
||||
&& item.codex_turn_id.as_deref() == Some(codex_turn_id)
|
||||
&& item.role == ConversationRole::Assistant
|
||||
&& item.kind == ConversationItemKind::Message
|
||||
})
|
||||
.max_by_key(|item| item.first_seen_at_unix_ms)
|
||||
.map(|item| item.item_id.clone())
|
||||
}
|
||||
}
|
||||
|
||||
fn extend_unique(items: &mut Vec<String>, new_items: Vec<String>) {
|
||||
for item in new_items {
|
||||
if !items.iter().any(|existing| existing == &item) {
|
||||
items.push(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn tool_edge_id(tool_call_id: &str) -> String {
|
||||
format!("edge:tool:{tool_call_id}")
|
||||
}
|
||||
|
||||
fn tool_call_source_matches(anchor: &TraceAnchor, tool_call_id: &str) -> bool {
|
||||
matches!(anchor, TraceAnchor::ToolCall { tool_call_id: source } if source == tool_call_id)
|
||||
}
|
||||
|
||||
fn push_unique(items: &mut Vec<String>, item: &str) {
|
||||
if !items.iter().any(|existing| existing == item) {
|
||||
items.push(item.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
fn inter_agent_message_fields(item: &ConversationItem) -> Option<(String, String)> {
|
||||
// Multi-agent v2 injects mailbox deliveries as assistant messages whose
|
||||
// text is serialized `InterAgentCommunication`. Treat only that exact
|
||||
// transport shape as an edge target; ordinary assistant JSON must not be
|
||||
// mistaken for cross-thread delivery.
|
||||
if item.role != ConversationRole::Assistant || item.kind != ConversationItemKind::Message {
|
||||
return None;
|
||||
}
|
||||
let [ConversationPart::Text { text }] = item.body.parts.as_slice() else {
|
||||
return None;
|
||||
};
|
||||
let communication = serde_json::from_str::<InterAgentCommunication>(text).ok()?;
|
||||
Some((communication.recipient.to_string(), communication.content))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "agents_tests.rs"]
|
||||
mod tests;
|
||||
717
codex-rs/rollout-trace/src/reducer/tool/agents_tests.rs
Normal file
717
codex-rs/rollout-trace/src/reducer/tool/agents_tests.rs
Normal file
@@ -0,0 +1,717 @@
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::model::AgentOrigin;
|
||||
use crate::model::ExecutionStatus;
|
||||
use crate::model::InteractionEdgeKind;
|
||||
use crate::model::RolloutStatus;
|
||||
use crate::model::ToolCallKind;
|
||||
use crate::model::ToolCallSummary;
|
||||
use crate::model::TraceAnchor;
|
||||
use crate::payload::RawPayloadKind;
|
||||
use crate::payload::RawPayloadRef;
|
||||
use crate::raw_event::RawToolCallRequester;
|
||||
use crate::raw_event::RawTraceEventPayload;
|
||||
use crate::reducer::test_support::append_completed_inference;
|
||||
use crate::reducer::test_support::append_inference_request;
|
||||
use crate::reducer::test_support::create_started_agent_writer;
|
||||
use crate::reducer::test_support::message;
|
||||
use crate::reducer::test_support::start_agent_turn;
|
||||
use crate::reducer::test_support::start_thread;
|
||||
use crate::reducer::test_support::start_turn_for_thread;
|
||||
use crate::reducer::test_support::trace_context_for_agent;
|
||||
use crate::reducer::test_support::trace_context_for_thread;
|
||||
use crate::replay_bundle;
|
||||
use crate::writer::TraceWriter;
|
||||
|
||||
#[test]
|
||||
fn child_thread_metadata_creates_spawn_origin_without_delivery_edge() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = TraceWriter::create(
|
||||
temp.path(),
|
||||
"trace-1".to_string(),
|
||||
"rollout-1".to_string(),
|
||||
"019d0000-0000-7000-8000-000000000002".to_string(),
|
||||
)?;
|
||||
let metadata = writer.write_json_payload(
|
||||
RawPayloadKind::SessionMetadata,
|
||||
&json!({
|
||||
"nickname": "James",
|
||||
"agent_role": "explorer",
|
||||
"task_name": "repo_file_counter",
|
||||
"model": "gpt-test",
|
||||
"session_source": {
|
||||
"subagent": {
|
||||
"thread_spawn": {
|
||||
"parent_thread_id": "019d0000-0000-7000-8000-000000000001",
|
||||
"agent_path": "/root/repo_file_counter",
|
||||
"agent_nickname": "James",
|
||||
"agent_role": "explorer"
|
||||
}
|
||||
}
|
||||
}
|
||||
}),
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::ThreadStarted {
|
||||
thread_id: "019d0000-0000-7000-8000-000000000002".to_string(),
|
||||
agent_path: "/root/repo_file_counter".to_string(),
|
||||
metadata_payload: Some(metadata),
|
||||
})?;
|
||||
|
||||
let replayed = replay_bundle(temp.path())?;
|
||||
let thread = &replayed.threads["019d0000-0000-7000-8000-000000000002"];
|
||||
assert_eq!(thread.nickname, Some("James".to_string()));
|
||||
assert_eq!(thread.default_model, Some("gpt-test".to_string()));
|
||||
assert_eq!(
|
||||
thread.origin,
|
||||
AgentOrigin::Spawned {
|
||||
parent_thread_id: "019d0000-0000-7000-8000-000000000001".to_string(),
|
||||
spawn_edge_id: "edge:spawn:019d0000-0000-7000-8000-000000000001:019d0000-0000-7000-8000-000000000002".to_string(),
|
||||
task_name: "repo_file_counter".to_string(),
|
||||
agent_role: "explorer".to_string(),
|
||||
}
|
||||
);
|
||||
assert!(
|
||||
!replayed.interaction_edges.contains_key(
|
||||
"edge:spawn:019d0000-0000-7000-8000-000000000001:019d0000-0000-7000-8000-000000000002"
|
||||
),
|
||||
"spawn metadata identifies the child, but the delivery edge waits for the recipient \
|
||||
conversation item"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spawn_runtime_payload_targets_delivered_child_message() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_agent_writer(&temp)?;
|
||||
start_agent_turn(&writer, "turn-1")?;
|
||||
|
||||
let spawn_payloads = append_spawn_agent_tool_lifecycle(&writer, "turn-1")?;
|
||||
|
||||
// Then record the child-side model-visible task message. This is the
|
||||
// preferred target because it pinpoints where the delegated work entered
|
||||
// the child timeline.
|
||||
start_thread(
|
||||
&writer,
|
||||
"019d0000-0000-7000-8000-000000000002",
|
||||
"/root/repo_file_counter",
|
||||
)?;
|
||||
start_turn_for_thread(
|
||||
&writer,
|
||||
"019d0000-0000-7000-8000-000000000002",
|
||||
"turn-child-1",
|
||||
)?;
|
||||
let delivered = inter_agent_message(
|
||||
"/root",
|
||||
"/root/repo_file_counter",
|
||||
"count",
|
||||
/*trigger_turn*/ true,
|
||||
);
|
||||
append_inference_request(
|
||||
&writer,
|
||||
"019d0000-0000-7000-8000-000000000002",
|
||||
"turn-child-1",
|
||||
"inference-child-1",
|
||||
vec![message("assistant", &delivered)],
|
||||
)?;
|
||||
|
||||
let replayed = replay_bundle(temp.path())?;
|
||||
let edge = &replayed.interaction_edges["edge:spawn:019d0000-0000-7000-8000-000000000001:019d0000-0000-7000-8000-000000000002"];
|
||||
assert_eq!(edge.kind, InteractionEdgeKind::SpawnAgent);
|
||||
assert_eq!(
|
||||
edge.source,
|
||||
TraceAnchor::ToolCall {
|
||||
tool_call_id: "call-spawn".to_string()
|
||||
}
|
||||
);
|
||||
let target_item_id = target_conversation_item_id(&edge.target);
|
||||
assert_eq!(edge.carried_item_ids, vec![target_item_id.clone()]);
|
||||
assert_eq!(
|
||||
replayed.conversation_items[target_item_id].thread_id,
|
||||
"019d0000-0000-7000-8000-000000000002"
|
||||
);
|
||||
assert_eq!(
|
||||
edge.carried_raw_payload_ids,
|
||||
vec![
|
||||
spawn_payloads.invocation.raw_payload_id,
|
||||
spawn_payloads.begin.raw_payload_id,
|
||||
spawn_payloads.end.raw_payload_id,
|
||||
spawn_payloads.result.raw_payload_id,
|
||||
]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spawn_runtime_payload_falls_back_to_child_thread_without_delivery_item() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_agent_writer(&temp)?;
|
||||
start_agent_turn(&writer, "turn-1")?;
|
||||
let spawn_payloads = append_spawn_agent_tool_lifecycle(&writer, "turn-1")?;
|
||||
|
||||
// Deliberately start the child thread without appending an inference
|
||||
// request containing the inter-agent task message. This reproduces the
|
||||
// failure path where the child aborts before the reducer can target the
|
||||
// precise child-side ConversationItem.
|
||||
start_thread(
|
||||
&writer,
|
||||
"019d0000-0000-7000-8000-000000000002",
|
||||
"/root/repo_file_counter",
|
||||
)?;
|
||||
|
||||
let replayed = replay_bundle(temp.path())?;
|
||||
let edge = &replayed.interaction_edges["edge:spawn:019d0000-0000-7000-8000-000000000001:019d0000-0000-7000-8000-000000000002"];
|
||||
assert_eq!(edge.kind, InteractionEdgeKind::SpawnAgent);
|
||||
assert_eq!(
|
||||
edge.source,
|
||||
TraceAnchor::ToolCall {
|
||||
tool_call_id: "call-spawn".to_string()
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
edge.target,
|
||||
TraceAnchor::Thread {
|
||||
thread_id: "019d0000-0000-7000-8000-000000000002".to_string()
|
||||
}
|
||||
);
|
||||
// No transcript item carried the task, so the fallback edge should not
|
||||
// claim one. The raw payloads still preserve the tool evidence.
|
||||
assert!(edge.carried_item_ids.is_empty());
|
||||
assert_eq!(
|
||||
edge.carried_raw_payload_ids,
|
||||
vec![
|
||||
spawn_payloads.invocation.raw_payload_id,
|
||||
spawn_payloads.begin.raw_payload_id,
|
||||
spawn_payloads.end.raw_payload_id,
|
||||
spawn_payloads.result.raw_payload_id,
|
||||
]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn send_message_runtime_payload_targets_delivered_child_message() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_agent_writer(&temp)?;
|
||||
start_agent_turn(&writer, "turn-1")?;
|
||||
let invocation_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolInvocation,
|
||||
&json!({
|
||||
"tool_name": "send_message",
|
||||
"payload": {
|
||||
"type": "function",
|
||||
"arguments": "{\"target\":\"/root/child\",\"message\":\"hello\"}"
|
||||
}
|
||||
}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context_for_agent("turn-1"),
|
||||
RawTraceEventPayload::ToolCallStarted {
|
||||
tool_call_id: "call-send".to_string(),
|
||||
model_visible_call_id: Some("call-send".to_string()),
|
||||
code_mode_runtime_tool_id: None,
|
||||
requester: RawToolCallRequester::Model,
|
||||
kind: ToolCallKind::SendMessage,
|
||||
summary: ToolCallSummary::Generic {
|
||||
label: "send_message".to_string(),
|
||||
input_preview: None,
|
||||
output_preview: None,
|
||||
},
|
||||
invocation_payload: Some(invocation_payload),
|
||||
},
|
||||
)?;
|
||||
let begin_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolRuntimeEvent,
|
||||
&json!({
|
||||
"call_id": "call-send",
|
||||
"sender_thread_id": "019d0000-0000-7000-8000-000000000001",
|
||||
"receiver_thread_id": "019d0000-0000-7000-8000-000000000002",
|
||||
"prompt": "hello",
|
||||
"status": "running"
|
||||
}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context_for_agent("turn-1"),
|
||||
RawTraceEventPayload::ToolCallRuntimeStarted {
|
||||
tool_call_id: "call-send".to_string(),
|
||||
runtime_payload: begin_payload,
|
||||
},
|
||||
)?;
|
||||
let end_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolRuntimeEvent,
|
||||
&json!({
|
||||
"call_id": "call-send",
|
||||
"sender_thread_id": "019d0000-0000-7000-8000-000000000001",
|
||||
"receiver_thread_id": "019d0000-0000-7000-8000-000000000002",
|
||||
"prompt": "hello",
|
||||
"status": "running"
|
||||
}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context_for_agent("turn-1"),
|
||||
RawTraceEventPayload::ToolCallRuntimeEnded {
|
||||
tool_call_id: "call-send".to_string(),
|
||||
status: ExecutionStatus::Completed,
|
||||
runtime_payload: end_payload,
|
||||
},
|
||||
)?;
|
||||
start_thread(
|
||||
&writer,
|
||||
"019d0000-0000-7000-8000-000000000002",
|
||||
"/root/child",
|
||||
)?;
|
||||
start_turn_for_thread(
|
||||
&writer,
|
||||
"019d0000-0000-7000-8000-000000000002",
|
||||
"turn-child-1",
|
||||
)?;
|
||||
let delivered =
|
||||
inter_agent_message("/root", "/root/child", "hello", /*trigger_turn*/ false);
|
||||
append_inference_request(
|
||||
&writer,
|
||||
"019d0000-0000-7000-8000-000000000002",
|
||||
"turn-child-1",
|
||||
"inference-child-1",
|
||||
vec![message("assistant", &delivered)],
|
||||
)?;
|
||||
|
||||
let replayed = replay_bundle(temp.path())?;
|
||||
let edge = &replayed.interaction_edges["edge:tool:call-send"];
|
||||
assert_eq!(edge.kind, InteractionEdgeKind::SendMessage);
|
||||
assert_eq!(
|
||||
edge.source,
|
||||
TraceAnchor::ToolCall {
|
||||
tool_call_id: "call-send".to_string()
|
||||
}
|
||||
);
|
||||
let target_item_id = target_conversation_item_id(&edge.target);
|
||||
assert_eq!(edge.carried_item_ids, vec![target_item_id.clone()]);
|
||||
assert_eq!(
|
||||
replayed.conversation_items[target_item_id].thread_id,
|
||||
"019d0000-0000-7000-8000-000000000002"
|
||||
);
|
||||
assert!(edge.ended_at_unix_ms.is_some());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn close_agent_runtime_payload_targets_thread() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_agent_writer(&temp)?;
|
||||
start_thread(
|
||||
&writer,
|
||||
"019d0000-0000-7000-8000-000000000002",
|
||||
"/root/child",
|
||||
)?;
|
||||
start_agent_turn(&writer, "turn-1")?;
|
||||
let invocation_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolInvocation,
|
||||
&json!({
|
||||
"tool_name": "close_agent",
|
||||
"payload": {
|
||||
"type": "function",
|
||||
"arguments": r#"{"target":"/root/child"}"#
|
||||
}
|
||||
}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context_for_agent("turn-1"),
|
||||
RawTraceEventPayload::ToolCallStarted {
|
||||
tool_call_id: "call-close".to_string(),
|
||||
model_visible_call_id: Some("call-close".to_string()),
|
||||
code_mode_runtime_tool_id: None,
|
||||
requester: RawToolCallRequester::Model,
|
||||
kind: ToolCallKind::CloseAgent,
|
||||
summary: ToolCallSummary::Generic {
|
||||
label: "close_agent".to_string(),
|
||||
input_preview: None,
|
||||
output_preview: None,
|
||||
},
|
||||
invocation_payload: Some(invocation_payload.clone()),
|
||||
},
|
||||
)?;
|
||||
let begin_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolRuntimeEvent,
|
||||
&json!({
|
||||
"call_id": "call-close",
|
||||
"sender_thread_id": "019d0000-0000-7000-8000-000000000001",
|
||||
"receiver_thread_id": "019d0000-0000-7000-8000-000000000002"
|
||||
}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context_for_agent("turn-1"),
|
||||
RawTraceEventPayload::ToolCallRuntimeStarted {
|
||||
tool_call_id: "call-close".to_string(),
|
||||
runtime_payload: begin_payload.clone(),
|
||||
},
|
||||
)?;
|
||||
let end_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolRuntimeEvent,
|
||||
&json!({
|
||||
"call_id": "call-close",
|
||||
"sender_thread_id": "019d0000-0000-7000-8000-000000000001",
|
||||
"receiver_thread_id": "019d0000-0000-7000-8000-000000000002",
|
||||
"receiver_agent_nickname": "Scout",
|
||||
"receiver_agent_role": "explorer",
|
||||
"status": "running"
|
||||
}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context_for_agent("turn-1"),
|
||||
RawTraceEventPayload::ToolCallRuntimeEnded {
|
||||
tool_call_id: "call-close".to_string(),
|
||||
status: ExecutionStatus::Completed,
|
||||
runtime_payload: end_payload.clone(),
|
||||
},
|
||||
)?;
|
||||
let result_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolResult,
|
||||
&json!({"previous_status": "running"}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context_for_agent("turn-1"),
|
||||
RawTraceEventPayload::ToolCallEnded {
|
||||
tool_call_id: "call-close".to_string(),
|
||||
status: ExecutionStatus::Completed,
|
||||
result_payload: Some(result_payload.clone()),
|
||||
},
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::ThreadEnded {
|
||||
thread_id: "019d0000-0000-7000-8000-000000000002".to_string(),
|
||||
status: RolloutStatus::Completed,
|
||||
})?;
|
||||
|
||||
let replayed = replay_bundle(temp.path())?;
|
||||
let edge = &replayed.interaction_edges["edge:tool:call-close"];
|
||||
assert_eq!(edge.kind, InteractionEdgeKind::CloseAgent);
|
||||
assert_eq!(
|
||||
edge.source,
|
||||
TraceAnchor::ToolCall {
|
||||
tool_call_id: "call-close".to_string()
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
edge.target,
|
||||
TraceAnchor::Thread {
|
||||
thread_id: "019d0000-0000-7000-8000-000000000002".to_string()
|
||||
}
|
||||
);
|
||||
assert!(edge.carried_item_ids.is_empty());
|
||||
assert_eq!(
|
||||
edge.carried_raw_payload_ids,
|
||||
vec![
|
||||
invocation_payload.raw_payload_id,
|
||||
begin_payload.raw_payload_id,
|
||||
end_payload.raw_payload_id,
|
||||
result_payload.raw_payload_id,
|
||||
]
|
||||
);
|
||||
assert_eq!(
|
||||
replayed.threads["019d0000-0000-7000-8000-000000000002"]
|
||||
.execution
|
||||
.status,
|
||||
ExecutionStatus::Completed
|
||||
);
|
||||
assert_eq!(replayed.status, RolloutStatus::Running);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_result_edge_links_child_result_to_parent_notification() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_agent_writer(&temp)?;
|
||||
start_thread(
|
||||
&writer,
|
||||
"019d0000-0000-7000-8000-000000000002",
|
||||
"/root/child",
|
||||
)?;
|
||||
start_turn_for_thread(
|
||||
&writer,
|
||||
"019d0000-0000-7000-8000-000000000002",
|
||||
"turn-child-1",
|
||||
)?;
|
||||
append_completed_inference(
|
||||
&writer,
|
||||
"019d0000-0000-7000-8000-000000000002",
|
||||
"turn-child-1",
|
||||
"inference-child-1",
|
||||
vec![message("assistant", "task")],
|
||||
vec![message("assistant", "done")],
|
||||
)?;
|
||||
|
||||
let notification = "<subagent_notification>{\"agent_path\":\"/root/child\",\"status\":{\"completed\":\"done\"}}</subagent_notification>";
|
||||
let carried_payload = writer.write_json_payload(
|
||||
RawPayloadKind::AgentResult,
|
||||
&json!({
|
||||
"child_agent_path": "/root/child",
|
||||
"message": notification,
|
||||
"status": {"completed": "done"}
|
||||
}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context_for_thread("019d0000-0000-7000-8000-000000000002", "turn-child-1"),
|
||||
RawTraceEventPayload::AgentResultObserved {
|
||||
edge_id: "edge:agent_result:thread-child:turn-child-1:thread-root".to_string(),
|
||||
child_thread_id: "019d0000-0000-7000-8000-000000000002".to_string(),
|
||||
child_codex_turn_id: "turn-child-1".to_string(),
|
||||
parent_thread_id: "019d0000-0000-7000-8000-000000000001".to_string(),
|
||||
message: notification.to_string(),
|
||||
carried_payload: Some(carried_payload.clone()),
|
||||
},
|
||||
)?;
|
||||
|
||||
start_agent_turn(&writer, "turn-root-1")?;
|
||||
let delivered = inter_agent_message(
|
||||
"/root/child",
|
||||
"/root",
|
||||
notification,
|
||||
/*trigger_turn*/ false,
|
||||
);
|
||||
append_inference_request(
|
||||
&writer,
|
||||
"019d0000-0000-7000-8000-000000000001",
|
||||
"turn-root-1",
|
||||
"inference-root-1",
|
||||
vec![message("assistant", &delivered)],
|
||||
)?;
|
||||
|
||||
let replayed = replay_bundle(temp.path())?;
|
||||
let edge =
|
||||
&replayed.interaction_edges["edge:agent_result:thread-child:turn-child-1:thread-root"];
|
||||
assert_eq!(edge.kind, InteractionEdgeKind::AgentResult);
|
||||
let TraceAnchor::ConversationItem {
|
||||
item_id: source_item_id,
|
||||
} = &edge.source
|
||||
else {
|
||||
panic!("expected child result conversation item source");
|
||||
};
|
||||
assert_eq!(
|
||||
text_body(&replayed.conversation_items[source_item_id]),
|
||||
"done"
|
||||
);
|
||||
let target_item_id = target_conversation_item_id(&edge.target);
|
||||
assert_eq!(
|
||||
replayed.conversation_items[target_item_id].thread_id,
|
||||
"019d0000-0000-7000-8000-000000000001"
|
||||
);
|
||||
assert_eq!(edge.carried_item_ids, vec![target_item_id.clone()]);
|
||||
assert_eq!(
|
||||
edge.carried_raw_payload_ids,
|
||||
vec![carried_payload.raw_payload_id]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_result_edge_falls_back_to_child_thread_without_result_message() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_agent_writer(&temp)?;
|
||||
|
||||
// The child thread and turn exist, but there is intentionally no completed
|
||||
// assistant message for this turn. Failed child tasks can still notify the
|
||||
// parent through AgentStatus, so the result edge must not require a final
|
||||
// transcript item from the child.
|
||||
start_thread(
|
||||
&writer,
|
||||
"019d0000-0000-7000-8000-000000000002",
|
||||
"/root/child",
|
||||
)?;
|
||||
start_turn_for_thread(
|
||||
&writer,
|
||||
"019d0000-0000-7000-8000-000000000002",
|
||||
"turn-child-1",
|
||||
)?;
|
||||
|
||||
let notification = r#"<subagent_notification>{"agent_path":"/root/child","status":{"failed":"boom"}}</subagent_notification>"#;
|
||||
let carried_payload = writer.write_json_payload(
|
||||
RawPayloadKind::AgentResult,
|
||||
&json!({
|
||||
"child_agent_path": "/root/child",
|
||||
"message": notification,
|
||||
"status": {"failed": "boom"}
|
||||
}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context_for_thread("019d0000-0000-7000-8000-000000000002", "turn-child-1"),
|
||||
RawTraceEventPayload::AgentResultObserved {
|
||||
edge_id: "edge:agent_result:thread-child:turn-child-1:thread-root".to_string(),
|
||||
child_thread_id: "019d0000-0000-7000-8000-000000000002".to_string(),
|
||||
child_codex_turn_id: "turn-child-1".to_string(),
|
||||
parent_thread_id: "019d0000-0000-7000-8000-000000000001".to_string(),
|
||||
message: notification.to_string(),
|
||||
carried_payload: Some(carried_payload.clone()),
|
||||
},
|
||||
)?;
|
||||
|
||||
// The parent does receive the failure notification as a model-visible
|
||||
// mailbox item. The target should remain that precise parent-side
|
||||
// ConversationItem even though the source falls back to the child thread.
|
||||
start_agent_turn(&writer, "turn-root-1")?;
|
||||
let delivered = inter_agent_message(
|
||||
"/root/child",
|
||||
"/root",
|
||||
notification,
|
||||
/*trigger_turn*/ false,
|
||||
);
|
||||
append_inference_request(
|
||||
&writer,
|
||||
"019d0000-0000-7000-8000-000000000001",
|
||||
"turn-root-1",
|
||||
"inference-root-1",
|
||||
vec![message("assistant", &delivered)],
|
||||
)?;
|
||||
|
||||
let replayed = replay_bundle(temp.path())?;
|
||||
let edge =
|
||||
&replayed.interaction_edges["edge:agent_result:thread-child:turn-child-1:thread-root"];
|
||||
assert_eq!(edge.kind, InteractionEdgeKind::AgentResult);
|
||||
assert_eq!(
|
||||
edge.source,
|
||||
TraceAnchor::Thread {
|
||||
thread_id: "019d0000-0000-7000-8000-000000000002".to_string(),
|
||||
}
|
||||
);
|
||||
let target_item_id = target_conversation_item_id(&edge.target);
|
||||
assert_eq!(
|
||||
replayed.conversation_items[target_item_id].thread_id,
|
||||
"019d0000-0000-7000-8000-000000000001"
|
||||
);
|
||||
assert_eq!(edge.carried_item_ids, vec![target_item_id.clone()]);
|
||||
assert_eq!(
|
||||
edge.carried_raw_payload_ids,
|
||||
vec![carried_payload.raw_payload_id]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct SpawnAgentToolPayloads {
|
||||
invocation: RawPayloadRef,
|
||||
begin: RawPayloadRef,
|
||||
end: RawPayloadRef,
|
||||
result: RawPayloadRef,
|
||||
}
|
||||
|
||||
fn append_spawn_agent_tool_lifecycle(
|
||||
writer: &TraceWriter,
|
||||
turn_id: &str,
|
||||
) -> anyhow::Result<SpawnAgentToolPayloads> {
|
||||
// Keep the parent-side tool lifecycle in one place so the spawn tests can
|
||||
// focus on the child-side event that decides the edge target.
|
||||
let invocation = writer.write_json_payload(
|
||||
RawPayloadKind::ToolInvocation,
|
||||
&json!({
|
||||
"tool_name": "spawn_agent",
|
||||
"payload": {
|
||||
"type": "function",
|
||||
"arguments": r#"{"task_name":"repo_file_counter","message":"count"}"#
|
||||
}
|
||||
}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context_for_agent(turn_id),
|
||||
RawTraceEventPayload::ToolCallStarted {
|
||||
tool_call_id: "call-spawn".to_string(),
|
||||
model_visible_call_id: Some("call-spawn".to_string()),
|
||||
code_mode_runtime_tool_id: None,
|
||||
requester: RawToolCallRequester::Model,
|
||||
kind: ToolCallKind::SpawnAgent,
|
||||
summary: ToolCallSummary::Generic {
|
||||
label: "spawn_agent".to_string(),
|
||||
input_preview: None,
|
||||
output_preview: None,
|
||||
},
|
||||
invocation_payload: Some(invocation.clone()),
|
||||
},
|
||||
)?;
|
||||
|
||||
let begin = writer.write_json_payload(
|
||||
RawPayloadKind::ToolRuntimeEvent,
|
||||
&json!({
|
||||
"call_id": "call-spawn",
|
||||
"sender_thread_id": "019d0000-0000-7000-8000-000000000001",
|
||||
"prompt": "count"
|
||||
}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context_for_agent(turn_id),
|
||||
RawTraceEventPayload::ToolCallRuntimeStarted {
|
||||
tool_call_id: "call-spawn".to_string(),
|
||||
runtime_payload: begin.clone(),
|
||||
},
|
||||
)?;
|
||||
|
||||
let end = writer.write_json_payload(
|
||||
RawPayloadKind::ToolRuntimeEvent,
|
||||
&json!({
|
||||
"call_id": "call-spawn",
|
||||
"sender_thread_id": "019d0000-0000-7000-8000-000000000001",
|
||||
"new_thread_id": "019d0000-0000-7000-8000-000000000002",
|
||||
"prompt": "count",
|
||||
"model": "gpt-test",
|
||||
"reasoning_effort": "medium",
|
||||
"status": "running"
|
||||
}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context_for_agent(turn_id),
|
||||
RawTraceEventPayload::ToolCallRuntimeEnded {
|
||||
tool_call_id: "call-spawn".to_string(),
|
||||
status: ExecutionStatus::Completed,
|
||||
runtime_payload: end.clone(),
|
||||
},
|
||||
)?;
|
||||
|
||||
let result = writer.write_json_payload(
|
||||
RawPayloadKind::ToolResult,
|
||||
&json!({"task_name": "/root/repo_file_counter"}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context_for_agent(turn_id),
|
||||
RawTraceEventPayload::ToolCallEnded {
|
||||
tool_call_id: "call-spawn".to_string(),
|
||||
status: ExecutionStatus::Completed,
|
||||
result_payload: Some(result.clone()),
|
||||
},
|
||||
)?;
|
||||
|
||||
Ok(SpawnAgentToolPayloads {
|
||||
invocation,
|
||||
begin,
|
||||
end,
|
||||
result,
|
||||
})
|
||||
}
|
||||
|
||||
fn inter_agent_message(author: &str, recipient: &str, content: &str, trigger_turn: bool) -> String {
|
||||
json!({
|
||||
"author": author,
|
||||
"recipient": recipient,
|
||||
"other_recipients": [],
|
||||
"content": content,
|
||||
"trigger_turn": trigger_turn,
|
||||
})
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn target_conversation_item_id(anchor: &TraceAnchor) -> &String {
|
||||
let TraceAnchor::ConversationItem { item_id } = anchor else {
|
||||
panic!("expected conversation item target");
|
||||
};
|
||||
item_id
|
||||
}
|
||||
|
||||
fn text_body(item: &crate::model::ConversationItem) -> &str {
|
||||
let [crate::model::ConversationPart::Text { text }] = item.body.parts.as_slice() else {
|
||||
panic!("expected single text part");
|
||||
};
|
||||
text
|
||||
}
|
||||
606
codex-rs/rollout-trace/src/reducer/tool/terminal.rs
Normal file
606
codex-rs/rollout-trace/src/reducer/tool/terminal.rs
Normal file
@@ -0,0 +1,606 @@
|
||||
//! Terminal reduction for exec-like tool calls.
|
||||
//!
|
||||
//! The raw trace records terminal activity as normal tool lifecycle events.
|
||||
//! Protocol-backed exec events carry `ExecCommand*` payloads with the richest
|
||||
//! runtime details. Direct tools without protocol observations, such as
|
||||
//! `write_stdin`, can still form a terminal row from the canonical dispatch
|
||||
//! invocation/result payloads when those payloads carry the session join key.
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use anyhow::bail;
|
||||
use serde::Deserialize;
|
||||
use serde_json::Value as JsonValue;
|
||||
|
||||
use super::push_unique;
|
||||
use crate::model::ExecutionStatus;
|
||||
use crate::model::ExecutionWindow;
|
||||
use crate::model::TerminalModelObservation;
|
||||
use crate::model::TerminalObservationSource;
|
||||
use crate::model::TerminalOperation;
|
||||
use crate::model::TerminalOperationId;
|
||||
use crate::model::TerminalOperationKind;
|
||||
use crate::model::TerminalRequest;
|
||||
use crate::model::TerminalResult;
|
||||
use crate::model::TerminalSession;
|
||||
use crate::model::ToolCallKind;
|
||||
use crate::payload::RawPayloadRef;
|
||||
use crate::raw_event::RawEventSeq;
|
||||
use crate::reducer::TraceReducer;
|
||||
|
||||
impl TraceReducer {
|
||||
/// Starts a terminal operation from a canonical dispatch invocation payload.
|
||||
///
|
||||
/// This is currently needed for direct tools such as write-stdin that do not
|
||||
/// emit a richer protocol runtime-begin event with the terminal join key.
|
||||
pub(in crate::reducer) fn start_terminal_operation_from_invocation(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
thread_id: &str,
|
||||
tool_call_id: &str,
|
||||
kind: &ToolCallKind,
|
||||
invocation_payload: Option<&RawPayloadRef>,
|
||||
) -> Result<Option<TerminalOperationId>> {
|
||||
if !matches!(kind, ToolCallKind::WriteStdin) {
|
||||
return Ok(None);
|
||||
}
|
||||
let operation_kind = TerminalOperationKind::WriteStdin;
|
||||
let Some(invocation_payload) = invocation_payload else {
|
||||
// Payload writes are best-effort in the live recorder. If the
|
||||
// canonical invocation is missing, keep the ToolCall but avoid
|
||||
// fabricating a lossy terminal row.
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let payload = self.read_payload_json(invocation_payload)?;
|
||||
let request = parse_dispatch_terminal_request(payload).with_context(|| {
|
||||
format!(
|
||||
"parse terminal invocation payload {} as dispatch payload",
|
||||
invocation_payload.raw_payload_id
|
||||
)
|
||||
})?;
|
||||
self.insert_terminal_operation(TerminalOperationStart {
|
||||
seq,
|
||||
wall_time_unix_ms,
|
||||
thread_id,
|
||||
tool_call_id,
|
||||
operation_kind,
|
||||
raw_payload: invocation_payload,
|
||||
request,
|
||||
})
|
||||
}
|
||||
|
||||
/// Starts a terminal operation from a protocol runtime-begin payload.
|
||||
pub(in crate::reducer) fn start_terminal_operation_from_runtime(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
thread_id: &str,
|
||||
tool_call_id: &str,
|
||||
kind: &ToolCallKind,
|
||||
runtime_payload: &RawPayloadRef,
|
||||
) -> Result<Option<TerminalOperationId>> {
|
||||
let Some(operation_kind) = terminal_operation_kind(kind) else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let payload = self.read_payload_json(runtime_payload)?;
|
||||
let payload: ExecCommandBeginPayload =
|
||||
serde_json::from_value(payload).with_context(|| {
|
||||
format!(
|
||||
"parse terminal runtime start payload {}",
|
||||
runtime_payload.raw_payload_id
|
||||
)
|
||||
})?;
|
||||
let request = parse_protocol_terminal_request(payload, &operation_kind);
|
||||
self.insert_terminal_operation(TerminalOperationStart {
|
||||
seq,
|
||||
wall_time_unix_ms,
|
||||
thread_id,
|
||||
tool_call_id,
|
||||
operation_kind,
|
||||
raw_payload: runtime_payload,
|
||||
request,
|
||||
})
|
||||
}
|
||||
|
||||
fn insert_terminal_operation(
|
||||
&mut self,
|
||||
start: TerminalOperationStart<'_>,
|
||||
) -> Result<Option<TerminalOperationId>> {
|
||||
let operation_id = self.next_terminal_operation_id();
|
||||
let ParsedTerminalRequest {
|
||||
terminal_id,
|
||||
request,
|
||||
} = start.request;
|
||||
|
||||
self.rollout.terminal_operations.insert(
|
||||
operation_id.clone(),
|
||||
TerminalOperation {
|
||||
operation_id: operation_id.clone(),
|
||||
terminal_id: terminal_id.clone(),
|
||||
tool_call_id: start.tool_call_id.to_string(),
|
||||
kind: start.operation_kind,
|
||||
execution: ExecutionWindow {
|
||||
started_at_unix_ms: start.wall_time_unix_ms,
|
||||
started_seq: start.seq,
|
||||
ended_at_unix_ms: None,
|
||||
ended_seq: None,
|
||||
status: ExecutionStatus::Running,
|
||||
},
|
||||
request,
|
||||
result: None,
|
||||
model_observations: Vec::new(),
|
||||
raw_payload_ids: vec![start.raw_payload.raw_payload_id.clone()],
|
||||
},
|
||||
);
|
||||
|
||||
if let Some(terminal_id) = terminal_id {
|
||||
self.ensure_terminal_session(
|
||||
start.thread_id,
|
||||
&terminal_id,
|
||||
&operation_id,
|
||||
start.wall_time_unix_ms,
|
||||
start.seq,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(Some(operation_id))
|
||||
}
|
||||
|
||||
/// Completes the terminal operation associated with a tool call, if one exists.
|
||||
///
|
||||
/// Non-terminal tools flow through the same generic tool lifecycle, so callers
|
||||
/// may invoke this unconditionally and receive Ok for unrelated tool kinds.
|
||||
pub(in crate::reducer) fn end_terminal_operation(
|
||||
&mut self,
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
thread_id: &str,
|
||||
operation_id: &str,
|
||||
status: ExecutionStatus,
|
||||
response_payload: Option<&RawPayloadRef>,
|
||||
) -> Result<()> {
|
||||
let Some(operation_kind) = self
|
||||
.rollout
|
||||
.terminal_operations
|
||||
.get(operation_id)
|
||||
.map(|operation| operation.kind.clone())
|
||||
else {
|
||||
bail!("terminal end referenced unknown operation {operation_id}");
|
||||
};
|
||||
let response = response_payload
|
||||
.map(|payload| {
|
||||
let value = self.read_payload_json(payload)?;
|
||||
let response = parse_terminal_response_payload(
|
||||
value,
|
||||
&operation_kind,
|
||||
&payload.raw_payload_id,
|
||||
)?;
|
||||
Ok::<_, anyhow::Error>((payload.raw_payload_id.clone(), response))
|
||||
})
|
||||
.transpose()?;
|
||||
|
||||
let (terminal_id, started_at_unix_ms, started_seq) = {
|
||||
let Some(operation) = self.rollout.terminal_operations.get_mut(operation_id) else {
|
||||
bail!("terminal end referenced unknown operation {operation_id}");
|
||||
};
|
||||
operation.execution.ended_at_unix_ms = Some(wall_time_unix_ms);
|
||||
operation.execution.ended_seq = Some(seq);
|
||||
operation.execution.status = status;
|
||||
|
||||
if let Some((raw_payload_id, response)) = response {
|
||||
push_unique(&mut operation.raw_payload_ids, &raw_payload_id);
|
||||
// If begin and end both report a process id they must name the
|
||||
// same terminal. If begin omitted it, the end event completes
|
||||
// the session join key for this operation.
|
||||
match (&operation.terminal_id, response.terminal_id.as_deref()) {
|
||||
(Some(existing), Some(process_id)) if existing != process_id => {
|
||||
bail!(
|
||||
"terminal operation {operation_id} changed process id from \
|
||||
{existing} to {process_id}"
|
||||
);
|
||||
}
|
||||
(None, Some(process_id)) => {
|
||||
operation.terminal_id = Some(process_id.to_string());
|
||||
}
|
||||
(Some(_), Some(_)) | (Some(_), None) | (None, None) => {}
|
||||
}
|
||||
operation.result = Some(response.result);
|
||||
}
|
||||
|
||||
(
|
||||
operation.terminal_id.clone(),
|
||||
operation.execution.started_at_unix_ms,
|
||||
operation.execution.started_seq,
|
||||
)
|
||||
};
|
||||
|
||||
if let Some(terminal_id) = terminal_id {
|
||||
self.ensure_terminal_session(
|
||||
thread_id,
|
||||
&terminal_id,
|
||||
operation_id,
|
||||
started_at_unix_ms,
|
||||
started_seq,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn ensure_terminal_session(
|
||||
&mut self,
|
||||
thread_id: &str,
|
||||
terminal_id: &str,
|
||||
operation_id: &str,
|
||||
started_at_unix_ms: i64,
|
||||
started_seq: RawEventSeq,
|
||||
) -> Result<()> {
|
||||
if !self.rollout.terminal_sessions.contains_key(terminal_id) {
|
||||
self.rollout.terminal_sessions.insert(
|
||||
terminal_id.to_string(),
|
||||
TerminalSession {
|
||||
terminal_id: terminal_id.to_string(),
|
||||
thread_id: thread_id.to_string(),
|
||||
created_by_operation_id: operation_id.to_string(),
|
||||
operation_ids: Vec::new(),
|
||||
execution: ExecutionWindow {
|
||||
started_at_unix_ms,
|
||||
started_seq,
|
||||
// Current raw events do not report a terminal/session
|
||||
// shutdown boundary, so the session remains open even
|
||||
// after individual operations complete.
|
||||
ended_at_unix_ms: None,
|
||||
ended_seq: None,
|
||||
status: ExecutionStatus::Running,
|
||||
},
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
let Some(session) = self.rollout.terminal_sessions.get_mut(terminal_id) else {
|
||||
bail!("terminal session {terminal_id} disappeared during reduction");
|
||||
};
|
||||
if session.thread_id != thread_id {
|
||||
bail!(
|
||||
"terminal session {terminal_id} belongs to thread {}, not {thread_id}",
|
||||
session.thread_id
|
||||
);
|
||||
}
|
||||
push_unique(&mut session.operation_ids, operation_id);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Mirrors model-visible tool items onto the terminal observation view.
|
||||
///
|
||||
/// Runtime terminal rows are useful on their own, but the model-visible call
|
||||
/// and output item ids let viewers jump between transcript and terminal timelines.
|
||||
pub(in crate::reducer) fn sync_terminal_model_observation(
|
||||
&mut self,
|
||||
tool_call_id: &str,
|
||||
) -> Result<()> {
|
||||
let Some(tool_call) = self.rollout.tool_calls.get(tool_call_id) else {
|
||||
bail!("tool call {tool_call_id} disappeared during terminal observation linking");
|
||||
};
|
||||
let Some(operation_id) = tool_call.terminal_operation_id.clone() else {
|
||||
return Ok(());
|
||||
};
|
||||
let call_item_ids = tool_call.model_visible_call_item_ids.clone();
|
||||
let output_item_ids = tool_call.model_visible_output_item_ids.clone();
|
||||
if call_item_ids.is_empty() && output_item_ids.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let Some(operation) = self.rollout.terminal_operations.get_mut(&operation_id) else {
|
||||
bail!("terminal operation {operation_id} disappeared during observation linking");
|
||||
};
|
||||
// A terminal result and a model-visible tool output are intentionally
|
||||
// separate: the former is what the runtime saw, the latter is what later
|
||||
// inference payloads prove was shown back to the model.
|
||||
if let Some(observation) = operation
|
||||
.model_observations
|
||||
.iter_mut()
|
||||
.find(|observation| observation.source == TerminalObservationSource::DirectToolCall)
|
||||
{
|
||||
observation.call_item_ids = call_item_ids;
|
||||
observation.output_item_ids = output_item_ids;
|
||||
} else {
|
||||
operation.model_observations.push(TerminalModelObservation {
|
||||
call_item_ids,
|
||||
output_item_ids,
|
||||
source: TerminalObservationSource::DirectToolCall,
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn next_terminal_operation_id(&mut self) -> TerminalOperationId {
|
||||
let ordinal = self.next_terminal_operation_ordinal;
|
||||
self.next_terminal_operation_ordinal += 1;
|
||||
format!("terminal_operation:{ordinal}")
|
||||
}
|
||||
}
|
||||
|
||||
fn terminal_operation_kind(kind: &ToolCallKind) -> Option<TerminalOperationKind> {
|
||||
match kind {
|
||||
ToolCallKind::ExecCommand => Some(TerminalOperationKind::ExecCommand),
|
||||
ToolCallKind::WriteStdin => Some(TerminalOperationKind::WriteStdin),
|
||||
ToolCallKind::ApplyPatch
|
||||
| ToolCallKind::Mcp { .. }
|
||||
| ToolCallKind::Web
|
||||
| ToolCallKind::ImageGeneration
|
||||
| ToolCallKind::SpawnAgent
|
||||
| ToolCallKind::AssignAgentTask
|
||||
| ToolCallKind::SendMessage
|
||||
| ToolCallKind::WaitAgent
|
||||
| ToolCallKind::CloseAgent
|
||||
| ToolCallKind::Other { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
struct TerminalOperationStart<'a> {
|
||||
seq: RawEventSeq,
|
||||
wall_time_unix_ms: i64,
|
||||
thread_id: &'a str,
|
||||
tool_call_id: &'a str,
|
||||
operation_kind: TerminalOperationKind,
|
||||
raw_payload: &'a RawPayloadRef,
|
||||
request: ParsedTerminalRequest,
|
||||
}
|
||||
|
||||
struct ParsedTerminalRequest {
|
||||
terminal_id: Option<String>,
|
||||
request: TerminalRequest,
|
||||
}
|
||||
|
||||
struct ParsedTerminalResponse {
|
||||
terminal_id: Option<String>,
|
||||
result: TerminalResult,
|
||||
}
|
||||
|
||||
fn parse_protocol_terminal_request(
|
||||
payload: ExecCommandBeginPayload,
|
||||
operation_kind: &TerminalOperationKind,
|
||||
) -> ParsedTerminalRequest {
|
||||
// Startup/poll paths usually include a process id at begin time, but plain
|
||||
// exec starts may only learn it in the matching end event.
|
||||
let terminal_id = payload.process_id.clone();
|
||||
let request = match operation_kind {
|
||||
TerminalOperationKind::ExecCommand => TerminalRequest::ExecCommand {
|
||||
display_command: payload.command.join(" "),
|
||||
command: payload.command,
|
||||
cwd: payload.cwd,
|
||||
yield_time_ms: None,
|
||||
max_output_tokens: None,
|
||||
},
|
||||
TerminalOperationKind::WriteStdin => TerminalRequest::WriteStdin {
|
||||
stdin: payload.interaction_input.unwrap_or_default(),
|
||||
yield_time_ms: None,
|
||||
max_output_tokens: None,
|
||||
},
|
||||
};
|
||||
ParsedTerminalRequest {
|
||||
terminal_id,
|
||||
request,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_dispatch_terminal_request(value: JsonValue) -> Result<ParsedTerminalRequest> {
|
||||
let payload: DispatchedToolTraceRequestPayload = serde_json::from_value(value)?;
|
||||
if payload.tool_name != "write_stdin" {
|
||||
bail!(
|
||||
"dispatch terminal request is for {}, not write_stdin",
|
||||
payload.tool_name
|
||||
);
|
||||
}
|
||||
if payload.payload.kind != "function" {
|
||||
bail!(
|
||||
"write_stdin dispatch payload used unsupported {} payload",
|
||||
payload.payload.kind
|
||||
);
|
||||
}
|
||||
let arguments = payload
|
||||
.payload
|
||||
.arguments
|
||||
.context("write_stdin dispatch payload omitted function arguments")?;
|
||||
let args: DispatchedWriteStdinArgs = serde_json::from_str(&arguments)
|
||||
.context("parse write_stdin dispatch function arguments")?;
|
||||
let terminal_id = terminal_id_from_json(&args.session_id)
|
||||
.context("write_stdin dispatch payload omitted session_id")?;
|
||||
|
||||
Ok(ParsedTerminalRequest {
|
||||
terminal_id: Some(terminal_id),
|
||||
request: TerminalRequest::WriteStdin {
|
||||
stdin: args.chars,
|
||||
yield_time_ms: args.yield_time_ms,
|
||||
max_output_tokens: args.max_output_tokens,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_terminal_response_payload(
|
||||
value: JsonValue,
|
||||
operation_kind: &TerminalOperationKind,
|
||||
raw_payload_id: &str,
|
||||
) -> Result<ParsedTerminalResponse> {
|
||||
match operation_kind {
|
||||
TerminalOperationKind::ExecCommand => {
|
||||
let payload = serde_json::from_value::<ExecCommandEndPayload>(value)
|
||||
.with_context(|| format!("parse exec terminal response {raw_payload_id}"))?;
|
||||
Ok(parse_protocol_terminal_response(payload))
|
||||
}
|
||||
TerminalOperationKind::WriteStdin => {
|
||||
match serde_json::from_value::<ExecCommandEndPayload>(value.clone()) {
|
||||
Ok(payload) => Ok(parse_protocol_terminal_response(payload)),
|
||||
Err(protocol_err) => parse_dispatch_terminal_response(value).with_context(|| {
|
||||
format!(
|
||||
"parse write_stdin terminal response {raw_payload_id} as protocol payload \
|
||||
({protocol_err}) or dispatch payload"
|
||||
)
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_protocol_terminal_response(payload: ExecCommandEndPayload) -> ParsedTerminalResponse {
|
||||
ParsedTerminalResponse {
|
||||
terminal_id: payload.process_id,
|
||||
result: TerminalResult {
|
||||
exit_code: Some(payload.exit_code),
|
||||
stdout: payload.stdout,
|
||||
stderr: payload.stderr,
|
||||
formatted_output: Some(payload.formatted_output),
|
||||
original_token_count: None,
|
||||
chunk_id: None,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_dispatch_terminal_response(value: JsonValue) -> Result<ParsedTerminalResponse> {
|
||||
let payload: DispatchedToolTraceResponsePayload = serde_json::from_value(value)?;
|
||||
let result = match payload {
|
||||
DispatchedToolTraceResponsePayload::DirectResponse { response_item } => {
|
||||
let output = response_item
|
||||
.get("output")
|
||||
.and_then(json_text_content)
|
||||
.unwrap_or_else(|| response_item.to_string());
|
||||
TerminalResult {
|
||||
exit_code: None,
|
||||
stdout: output.clone(),
|
||||
stderr: String::new(),
|
||||
formatted_output: Some(output),
|
||||
original_token_count: None,
|
||||
chunk_id: None,
|
||||
}
|
||||
}
|
||||
DispatchedToolTraceResponsePayload::CodeModeResponse { value } => {
|
||||
// Code-mode returns the JavaScript-facing tool value, not the text
|
||||
// shown to the model. For write_stdin that value is the structured
|
||||
// unified-exec result, so keep ToolCall.raw_result_payload_id as the
|
||||
// raw boundary while projecting terminal-specific fields here.
|
||||
parse_code_mode_exec_result(value)
|
||||
}
|
||||
DispatchedToolTraceResponsePayload::Error { error } => TerminalResult {
|
||||
exit_code: None,
|
||||
stdout: String::new(),
|
||||
stderr: error.clone(),
|
||||
formatted_output: Some(error),
|
||||
original_token_count: None,
|
||||
chunk_id: None,
|
||||
},
|
||||
};
|
||||
Ok(ParsedTerminalResponse {
|
||||
terminal_id: None,
|
||||
result,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_code_mode_exec_result(value: JsonValue) -> TerminalResult {
|
||||
match serde_json::from_value::<CodeModeExecResult>(value.clone()) {
|
||||
Ok(result) => TerminalResult {
|
||||
exit_code: result.exit_code,
|
||||
stdout: result.output.clone(),
|
||||
stderr: String::new(),
|
||||
formatted_output: Some(result.output),
|
||||
original_token_count: result.original_token_count,
|
||||
chunk_id: result.chunk_id,
|
||||
},
|
||||
Err(_) => {
|
||||
let output = json_text_content(&value).unwrap_or_else(|| value.to_string());
|
||||
TerminalResult {
|
||||
exit_code: None,
|
||||
stdout: output.clone(),
|
||||
stderr: String::new(),
|
||||
formatted_output: Some(output),
|
||||
original_token_count: None,
|
||||
chunk_id: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn json_text_content(value: &JsonValue) -> Option<String> {
|
||||
match value {
|
||||
JsonValue::String(text) => Some(text.clone()),
|
||||
JsonValue::Array(items) => {
|
||||
let text = items
|
||||
.iter()
|
||||
.filter_map(|item| item.get("text").and_then(JsonValue::as_str))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
(!text.is_empty()).then_some(text)
|
||||
}
|
||||
JsonValue::Null => None,
|
||||
other => Some(other.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
fn terminal_id_from_json(value: &JsonValue) -> Option<String> {
|
||||
match value {
|
||||
JsonValue::String(value) if !value.is_empty() => Some(value.clone()),
|
||||
JsonValue::Number(value) => Some(value.to_string()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ExecCommandBeginPayload {
|
||||
process_id: Option<String>,
|
||||
command: Vec<String>,
|
||||
cwd: String,
|
||||
interaction_input: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ExecCommandEndPayload {
|
||||
process_id: Option<String>,
|
||||
stdout: String,
|
||||
stderr: String,
|
||||
exit_code: i32,
|
||||
formatted_output: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct DispatchedToolTraceRequestPayload {
|
||||
tool_name: String,
|
||||
payload: DispatchedToolPayload,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct DispatchedToolPayload {
|
||||
#[serde(rename = "type")]
|
||||
kind: String,
|
||||
arguments: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct DispatchedWriteStdinArgs {
|
||||
session_id: JsonValue,
|
||||
#[serde(default)]
|
||||
chars: String,
|
||||
yield_time_ms: Option<u64>,
|
||||
max_output_tokens: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(rename_all = "snake_case", tag = "type")]
|
||||
enum DispatchedToolTraceResponsePayload {
|
||||
DirectResponse { response_item: JsonValue },
|
||||
CodeModeResponse { value: JsonValue },
|
||||
Error { error: String },
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct CodeModeExecResult {
|
||||
chunk_id: Option<String>,
|
||||
exit_code: Option<i32>,
|
||||
original_token_count: Option<usize>,
|
||||
output: String,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "terminal_tests.rs"]
|
||||
mod tests;
|
||||
580
codex-rs/rollout-trace/src/reducer/tool/terminal_tests.rs
Normal file
580
codex-rs/rollout-trace/src/reducer/tool/terminal_tests.rs
Normal file
@@ -0,0 +1,580 @@
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::model::ExecutionStatus;
|
||||
use crate::model::ExecutionWindow;
|
||||
use crate::model::TerminalModelObservation;
|
||||
use crate::model::TerminalObservationSource;
|
||||
use crate::model::TerminalOperation;
|
||||
use crate::model::TerminalOperationKind;
|
||||
use crate::model::TerminalRequest;
|
||||
use crate::model::TerminalResult;
|
||||
use crate::model::TerminalSession;
|
||||
use crate::model::ToolCallKind;
|
||||
use crate::model::ToolCallSummary;
|
||||
use crate::payload::RawPayloadKind;
|
||||
use crate::raw_event::RawTraceEventPayload;
|
||||
use crate::reducer::test_support::create_started_writer;
|
||||
use crate::reducer::test_support::generic_summary;
|
||||
use crate::reducer::test_support::message;
|
||||
use crate::reducer::test_support::start_turn;
|
||||
use crate::reducer::test_support::trace_context;
|
||||
use crate::replay_bundle;
|
||||
use crate::writer::TraceWriter;
|
||||
|
||||
#[test]
|
||||
fn exec_tool_reduces_to_terminal_operation_and_session() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
append_inference_with_tool_call(&writer)?;
|
||||
|
||||
let invocation_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolInvocation,
|
||||
&json!({
|
||||
"tool_name": "exec_command",
|
||||
"tool_namespace": null,
|
||||
"payload": {
|
||||
"type": "function",
|
||||
"arguments": "{\"cmd\":\"cargo test\"}"
|
||||
}
|
||||
}),
|
||||
)?;
|
||||
let invocation_payload_id = invocation_payload.raw_payload_id.clone();
|
||||
let _tool_start = writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::ToolCallStarted {
|
||||
tool_call_id: "tool-1".to_string(),
|
||||
model_visible_call_id: Some("call-1".to_string()),
|
||||
code_mode_runtime_tool_id: None,
|
||||
requester: crate::raw_event::RawToolCallRequester::Model,
|
||||
kind: ToolCallKind::ExecCommand,
|
||||
summary: generic_summary("exec_command"),
|
||||
invocation_payload: Some(invocation_payload),
|
||||
},
|
||||
)?;
|
||||
|
||||
let runtime_start_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolRuntimeEvent,
|
||||
&json!({
|
||||
"call_id": "tool-1",
|
||||
"turn_id": "turn-1",
|
||||
"command": ["cargo", "test"],
|
||||
"cwd": "/repo"
|
||||
}),
|
||||
)?;
|
||||
let runtime_start_payload_id = runtime_start_payload.raw_payload_id.clone();
|
||||
let runtime_start = writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::ToolCallRuntimeStarted {
|
||||
tool_call_id: "tool-1".to_string(),
|
||||
runtime_payload: runtime_start_payload,
|
||||
},
|
||||
)?;
|
||||
|
||||
let runtime_end_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolRuntimeEvent,
|
||||
&json!({
|
||||
"call_id": "tool-1",
|
||||
"process_id": "pty-1",
|
||||
"turn_id": "turn-1",
|
||||
"command": ["cargo", "test"],
|
||||
"cwd": "/repo",
|
||||
"stdout": "ok\n",
|
||||
"stderr": "",
|
||||
"exit_code": 0,
|
||||
"formatted_output": "ok\n",
|
||||
"status": "completed"
|
||||
}),
|
||||
)?;
|
||||
let runtime_end_payload_id = runtime_end_payload.raw_payload_id.clone();
|
||||
let runtime_end = writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::ToolCallRuntimeEnded {
|
||||
tool_call_id: "tool-1".to_string(),
|
||||
status: ExecutionStatus::Completed,
|
||||
runtime_payload: runtime_end_payload,
|
||||
},
|
||||
)?;
|
||||
|
||||
let result_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolResult,
|
||||
&json!({
|
||||
"type": "direct_response",
|
||||
"response_item": {
|
||||
"type": "function_call_output",
|
||||
"call_id": "call-1",
|
||||
"output": "ok\n"
|
||||
}
|
||||
}),
|
||||
)?;
|
||||
let result_payload_id = result_payload.raw_payload_id.clone();
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::ToolCallEnded {
|
||||
tool_call_id: "tool-1".to_string(),
|
||||
status: ExecutionStatus::Completed,
|
||||
result_payload: Some(result_payload),
|
||||
},
|
||||
)?;
|
||||
|
||||
start_turn(&writer, "turn-2")?;
|
||||
append_followup_with_tool_output(&writer)?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let operation_id = "terminal_operation:1".to_string();
|
||||
let output_item_id = rollout.inference_calls["inference-2"]
|
||||
.request_item_ids
|
||||
.last()
|
||||
.expect("tool output item")
|
||||
.clone();
|
||||
|
||||
assert_eq!(
|
||||
rollout.tool_calls["tool-1"].terminal_operation_id,
|
||||
Some(operation_id.clone()),
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.tool_calls["tool-1"].raw_invocation_payload_id,
|
||||
Some(invocation_payload_id),
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.tool_calls["tool-1"].raw_result_payload_id,
|
||||
Some(result_payload_id),
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.tool_calls["tool-1"].raw_runtime_payload_ids,
|
||||
vec![
|
||||
runtime_start_payload_id.clone(),
|
||||
runtime_end_payload_id.clone()
|
||||
],
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.tool_calls["tool-1"].summary,
|
||||
ToolCallSummary::Terminal {
|
||||
operation_id: operation_id.clone(),
|
||||
},
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.terminal_operations[&operation_id],
|
||||
TerminalOperation {
|
||||
operation_id: operation_id.clone(),
|
||||
terminal_id: Some("pty-1".to_string()),
|
||||
tool_call_id: "tool-1".to_string(),
|
||||
kind: TerminalOperationKind::ExecCommand,
|
||||
execution: ExecutionWindow {
|
||||
started_at_unix_ms: runtime_start.wall_time_unix_ms,
|
||||
started_seq: runtime_start.seq,
|
||||
ended_at_unix_ms: Some(runtime_end.wall_time_unix_ms),
|
||||
ended_seq: Some(runtime_end.seq),
|
||||
status: ExecutionStatus::Completed,
|
||||
},
|
||||
request: TerminalRequest::ExecCommand {
|
||||
command: vec!["cargo".to_string(), "test".to_string()],
|
||||
display_command: "cargo test".to_string(),
|
||||
cwd: "/repo".to_string(),
|
||||
yield_time_ms: None,
|
||||
max_output_tokens: None,
|
||||
},
|
||||
result: Some(TerminalResult {
|
||||
exit_code: Some(0),
|
||||
stdout: "ok\n".to_string(),
|
||||
stderr: String::new(),
|
||||
formatted_output: Some("ok\n".to_string()),
|
||||
original_token_count: None,
|
||||
chunk_id: None,
|
||||
}),
|
||||
model_observations: vec![TerminalModelObservation {
|
||||
call_item_ids: rollout.inference_calls["inference-1"]
|
||||
.response_item_ids
|
||||
.clone(),
|
||||
output_item_ids: vec![output_item_id],
|
||||
source: TerminalObservationSource::DirectToolCall,
|
||||
}],
|
||||
raw_payload_ids: vec![runtime_start_payload_id, runtime_end_payload_id],
|
||||
},
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.terminal_sessions["pty-1"],
|
||||
TerminalSession {
|
||||
terminal_id: "pty-1".to_string(),
|
||||
thread_id: "thread-root".to_string(),
|
||||
created_by_operation_id: operation_id.clone(),
|
||||
operation_ids: vec![operation_id],
|
||||
execution: ExecutionWindow {
|
||||
started_at_unix_ms: runtime_start.wall_time_unix_ms,
|
||||
started_seq: runtime_start.seq,
|
||||
ended_at_unix_ms: None,
|
||||
ended_seq: None,
|
||||
status: ExecutionStatus::Running,
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn write_stdin_operation_reuses_existing_terminal_session() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let startup_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolRuntimeEvent,
|
||||
&json!({
|
||||
"call_id": "tool-start",
|
||||
"process_id": "pty-1",
|
||||
"turn_id": "turn-1",
|
||||
"command": ["bash"],
|
||||
"cwd": "/repo"
|
||||
}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::ToolCallStarted {
|
||||
tool_call_id: "tool-start".to_string(),
|
||||
model_visible_call_id: None,
|
||||
code_mode_runtime_tool_id: None,
|
||||
requester: crate::raw_event::RawToolCallRequester::Model,
|
||||
kind: ToolCallKind::ExecCommand,
|
||||
summary: generic_summary("exec_command"),
|
||||
invocation_payload: None,
|
||||
},
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::ToolCallRuntimeStarted {
|
||||
tool_call_id: "tool-start".to_string(),
|
||||
runtime_payload: startup_payload,
|
||||
},
|
||||
)?;
|
||||
|
||||
let stdin_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolRuntimeEvent,
|
||||
&json!({
|
||||
"call_id": "tool-stdin",
|
||||
"process_id": "pty-1",
|
||||
"turn_id": "turn-1",
|
||||
"command": ["bash"],
|
||||
"cwd": "/repo",
|
||||
"interaction_input": "echo hi\n"
|
||||
}),
|
||||
)?;
|
||||
let _stdin_start = writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::ToolCallStarted {
|
||||
tool_call_id: "tool-stdin".to_string(),
|
||||
model_visible_call_id: None,
|
||||
code_mode_runtime_tool_id: None,
|
||||
requester: crate::raw_event::RawToolCallRequester::Model,
|
||||
kind: ToolCallKind::WriteStdin,
|
||||
summary: generic_summary("write_stdin"),
|
||||
invocation_payload: None,
|
||||
},
|
||||
)?;
|
||||
let stdin_runtime_start = writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::ToolCallRuntimeStarted {
|
||||
tool_call_id: "tool-stdin".to_string(),
|
||||
runtime_payload: stdin_payload,
|
||||
},
|
||||
)?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let startup_operation_id = "terminal_operation:1".to_string();
|
||||
let stdin_operation_id = "terminal_operation:2".to_string();
|
||||
|
||||
assert_eq!(
|
||||
rollout.terminal_sessions["pty-1"].operation_ids,
|
||||
vec![startup_operation_id, stdin_operation_id.clone()],
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.terminal_operations[&stdin_operation_id],
|
||||
TerminalOperation {
|
||||
operation_id: stdin_operation_id.clone(),
|
||||
terminal_id: Some("pty-1".to_string()),
|
||||
tool_call_id: "tool-stdin".to_string(),
|
||||
kind: TerminalOperationKind::WriteStdin,
|
||||
execution: ExecutionWindow {
|
||||
started_at_unix_ms: stdin_runtime_start.wall_time_unix_ms,
|
||||
started_seq: stdin_runtime_start.seq,
|
||||
ended_at_unix_ms: None,
|
||||
ended_seq: None,
|
||||
status: ExecutionStatus::Running,
|
||||
},
|
||||
request: TerminalRequest::WriteStdin {
|
||||
stdin: "echo hi\n".to_string(),
|
||||
yield_time_ms: None,
|
||||
max_output_tokens: None,
|
||||
},
|
||||
result: None,
|
||||
model_observations: Vec::new(),
|
||||
raw_payload_ids: vec!["raw_payload:2".to_string()],
|
||||
},
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dispatch_write_stdin_payload_reduces_to_terminal_operation() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let request_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolInvocation,
|
||||
&json!({
|
||||
"tool_name": "write_stdin",
|
||||
"tool_namespace": null,
|
||||
"payload": {
|
||||
"type": "function",
|
||||
"arguments": json!({
|
||||
"session_id": 123,
|
||||
"chars": "echo hi\n",
|
||||
"yield_time_ms": 250,
|
||||
"max_output_tokens": 2000
|
||||
}).to_string()
|
||||
}
|
||||
}),
|
||||
)?;
|
||||
let request_payload_id = request_payload.raw_payload_id.clone();
|
||||
let tool_start = writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::ToolCallStarted {
|
||||
tool_call_id: "tool-stdin".to_string(),
|
||||
model_visible_call_id: Some("call-stdin".to_string()),
|
||||
code_mode_runtime_tool_id: None,
|
||||
requester: crate::raw_event::RawToolCallRequester::Model,
|
||||
kind: ToolCallKind::WriteStdin,
|
||||
summary: generic_summary("write_stdin"),
|
||||
invocation_payload: Some(request_payload),
|
||||
},
|
||||
)?;
|
||||
|
||||
let response_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolResult,
|
||||
&json!({
|
||||
"type": "direct_response",
|
||||
"response_item": {
|
||||
"type": "function_call_output",
|
||||
"call_id": "call-stdin",
|
||||
"output": "hi\n"
|
||||
}
|
||||
}),
|
||||
)?;
|
||||
let response_payload_id = response_payload.raw_payload_id.clone();
|
||||
let tool_end = writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::ToolCallEnded {
|
||||
tool_call_id: "tool-stdin".to_string(),
|
||||
status: ExecutionStatus::Completed,
|
||||
result_payload: Some(response_payload),
|
||||
},
|
||||
)?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
let operation_id = "terminal_operation:1".to_string();
|
||||
|
||||
assert_eq!(
|
||||
rollout.tool_calls["tool-stdin"].terminal_operation_id,
|
||||
Some(operation_id.clone()),
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.tool_calls["tool-stdin"].summary,
|
||||
ToolCallSummary::Terminal {
|
||||
operation_id: operation_id.clone(),
|
||||
},
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.terminal_operations[&operation_id],
|
||||
TerminalOperation {
|
||||
operation_id: operation_id.clone(),
|
||||
terminal_id: Some("123".to_string()),
|
||||
tool_call_id: "tool-stdin".to_string(),
|
||||
kind: TerminalOperationKind::WriteStdin,
|
||||
execution: ExecutionWindow {
|
||||
started_at_unix_ms: tool_start.wall_time_unix_ms,
|
||||
started_seq: tool_start.seq,
|
||||
ended_at_unix_ms: Some(tool_end.wall_time_unix_ms),
|
||||
ended_seq: Some(tool_end.seq),
|
||||
status: ExecutionStatus::Completed,
|
||||
},
|
||||
request: TerminalRequest::WriteStdin {
|
||||
stdin: "echo hi\n".to_string(),
|
||||
yield_time_ms: Some(250),
|
||||
max_output_tokens: Some(2000),
|
||||
},
|
||||
result: Some(TerminalResult {
|
||||
exit_code: None,
|
||||
stdout: "hi\n".to_string(),
|
||||
stderr: String::new(),
|
||||
formatted_output: Some("hi\n".to_string()),
|
||||
original_token_count: None,
|
||||
chunk_id: None,
|
||||
}),
|
||||
model_observations: Vec::new(),
|
||||
raw_payload_ids: vec![request_payload_id, response_payload_id],
|
||||
},
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.terminal_sessions["123"],
|
||||
TerminalSession {
|
||||
terminal_id: "123".to_string(),
|
||||
thread_id: "thread-root".to_string(),
|
||||
created_by_operation_id: operation_id.clone(),
|
||||
operation_ids: vec![operation_id],
|
||||
execution: ExecutionWindow {
|
||||
started_at_unix_ms: tool_start.wall_time_unix_ms,
|
||||
started_seq: tool_start.seq,
|
||||
ended_at_unix_ms: None,
|
||||
ended_seq: None,
|
||||
status: ExecutionStatus::Running,
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_mode_write_stdin_result_projects_structured_exec_fields() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = create_started_writer(&temp)?;
|
||||
start_turn(&writer, "turn-1")?;
|
||||
|
||||
let request_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolInvocation,
|
||||
&json!({
|
||||
"tool_name": "write_stdin",
|
||||
"tool_namespace": null,
|
||||
"payload": {
|
||||
"type": "function",
|
||||
"arguments": json!({
|
||||
"session_id": 456,
|
||||
"chars": "",
|
||||
"yield_time_ms": 1000,
|
||||
"max_output_tokens": 4000
|
||||
}).to_string()
|
||||
}
|
||||
}),
|
||||
)?;
|
||||
let response_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ToolResult,
|
||||
&json!({
|
||||
"type": "code_mode_response",
|
||||
"value": {
|
||||
"chunk_id": "abc123",
|
||||
"wall_time_seconds": 1.25,
|
||||
"exit_code": 0,
|
||||
"original_token_count": 3,
|
||||
"output": "done\n"
|
||||
}
|
||||
}),
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::CodeCellStarted {
|
||||
runtime_cell_id: "cell-1".to_string(),
|
||||
model_visible_call_id: "call-code".to_string(),
|
||||
source_js: "await tools.write_stdin({ chars: '' })".to_string(),
|
||||
},
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::ToolCallStarted {
|
||||
tool_call_id: "tool-stdin".to_string(),
|
||||
model_visible_call_id: None,
|
||||
code_mode_runtime_tool_id: Some("runtime-tool-1".to_string()),
|
||||
requester: crate::raw_event::RawToolCallRequester::CodeCell {
|
||||
runtime_cell_id: "cell-1".to_string(),
|
||||
},
|
||||
kind: ToolCallKind::WriteStdin,
|
||||
summary: generic_summary("write_stdin"),
|
||||
invocation_payload: Some(request_payload),
|
||||
},
|
||||
)?;
|
||||
writer.append_with_context(
|
||||
trace_context("turn-1"),
|
||||
RawTraceEventPayload::ToolCallEnded {
|
||||
tool_call_id: "tool-stdin".to_string(),
|
||||
status: ExecutionStatus::Completed,
|
||||
result_payload: Some(response_payload),
|
||||
},
|
||||
)?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
assert_eq!(
|
||||
rollout.terminal_operations["terminal_operation:1"].result,
|
||||
Some(TerminalResult {
|
||||
exit_code: Some(0),
|
||||
stdout: "done\n".to_string(),
|
||||
stderr: String::new(),
|
||||
formatted_output: Some("done\n".to_string()),
|
||||
original_token_count: Some(3),
|
||||
chunk_id: Some("abc123".to_string()),
|
||||
}),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn append_inference_with_tool_call(writer: &TraceWriter) -> anyhow::Result<()> {
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"input": [message("user", "run tests")]
|
||||
}),
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::InferenceStarted {
|
||||
inference_call_id: "inference-1".to_string(),
|
||||
thread_id: "thread-root".to_string(),
|
||||
codex_turn_id: "turn-1".to_string(),
|
||||
model: "gpt-test".to_string(),
|
||||
provider_name: "test-provider".to_string(),
|
||||
request_payload: request,
|
||||
})?;
|
||||
|
||||
let response = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceResponse,
|
||||
&json!({
|
||||
"response_id": "resp-1",
|
||||
"output_items": [{
|
||||
"type": "function_call",
|
||||
"name": "exec_command",
|
||||
"arguments": "{\"cmd\":\"cargo test\"}",
|
||||
"call_id": "call-1"
|
||||
}]
|
||||
}),
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::InferenceCompleted {
|
||||
inference_call_id: "inference-1".to_string(),
|
||||
response_id: Some("resp-1".to_string()),
|
||||
response_payload: response,
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn append_followup_with_tool_output(writer: &TraceWriter) -> anyhow::Result<()> {
|
||||
let request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"previous_response_id": "resp-1",
|
||||
"input": [{
|
||||
"type": "function_call_output",
|
||||
"call_id": "call-1",
|
||||
"output": "ok\n"
|
||||
}]
|
||||
}),
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::InferenceStarted {
|
||||
inference_call_id: "inference-2".to_string(),
|
||||
thread_id: "thread-root".to_string(),
|
||||
codex_turn_id: "turn-2".to_string(),
|
||||
model: "gpt-test".to_string(),
|
||||
provider_name: "test-provider".to_string(),
|
||||
request_payload: request,
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
264
codex-rs/rollout-trace/src/writer.rs
Normal file
264
codex-rs/rollout-trace/src/writer.rs
Normal file
@@ -0,0 +1,264 @@
|
||||
//! Hot-path trace bundle writer.
|
||||
|
||||
use std::fs::File;
|
||||
use std::fs::OpenOptions;
|
||||
use std::io::BufWriter;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::MutexGuard;
|
||||
use std::sync::PoisonError;
|
||||
use std::time::SystemTime;
|
||||
use std::time::UNIX_EPOCH;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::bundle::MANIFEST_FILE_NAME;
|
||||
use crate::bundle::PAYLOADS_DIR_NAME;
|
||||
use crate::bundle::RAW_EVENT_LOG_FILE_NAME;
|
||||
use crate::bundle::TraceBundleManifest;
|
||||
use crate::model::AgentThreadId;
|
||||
use crate::payload::RawPayloadKind;
|
||||
use crate::payload::RawPayloadRef;
|
||||
use crate::raw_event::RAW_TRACE_EVENT_SCHEMA_VERSION;
|
||||
use crate::raw_event::RawTraceEvent;
|
||||
use crate::raw_event::RawTraceEventContext;
|
||||
use crate::raw_event::RawTraceEventPayload;
|
||||
|
||||
/// Local trace bundle writer.
|
||||
///
|
||||
/// The writer appends raw events and writes payload files. It does not keep a
|
||||
/// reduced `RolloutTrace` in memory; replay is owned by the reducer.
|
||||
#[derive(Debug)]
|
||||
pub struct TraceWriter {
|
||||
inner: Mutex<TraceWriterInner>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TraceWriterInner {
|
||||
manifest: TraceBundleManifest,
|
||||
payloads_dir: PathBuf,
|
||||
event_log: BufWriter<File>,
|
||||
next_seq: u64,
|
||||
next_payload_ordinal: u64,
|
||||
}
|
||||
|
||||
impl TraceWriter {
|
||||
/// Creates a trace bundle directory and writes its manifest.
|
||||
pub fn create(
|
||||
bundle_dir: impl AsRef<Path>,
|
||||
trace_id: String,
|
||||
rollout_id: String,
|
||||
root_thread_id: AgentThreadId,
|
||||
) -> Result<Self> {
|
||||
let bundle_dir = bundle_dir.as_ref().to_path_buf();
|
||||
let payloads_dir = bundle_dir.join(PAYLOADS_DIR_NAME);
|
||||
std::fs::create_dir_all(&payloads_dir)
|
||||
.with_context(|| format!("create trace payload dir {}", payloads_dir.display()))?;
|
||||
|
||||
let started_at_unix_ms = unix_time_ms();
|
||||
let manifest =
|
||||
TraceBundleManifest::new(trace_id, rollout_id, root_thread_id, started_at_unix_ms);
|
||||
write_json_file(&bundle_dir.join(MANIFEST_FILE_NAME), &manifest)?;
|
||||
|
||||
let event_log_path = bundle_dir.join(RAW_EVENT_LOG_FILE_NAME);
|
||||
let event_log = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&event_log_path)
|
||||
.with_context(|| format!("open trace event log {}", event_log_path.display()))?;
|
||||
|
||||
Ok(Self {
|
||||
inner: Mutex::new(TraceWriterInner {
|
||||
manifest,
|
||||
payloads_dir,
|
||||
event_log: BufWriter::new(event_log),
|
||||
next_seq: 1,
|
||||
next_payload_ordinal: 1,
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
||||
/// Writes a JSON payload file and returns its reduced-state reference.
|
||||
pub fn write_json_payload(
|
||||
&self,
|
||||
kind: RawPayloadKind,
|
||||
value: &impl Serialize,
|
||||
) -> Result<RawPayloadRef> {
|
||||
let mut inner = self.lock_inner();
|
||||
let ordinal = inner.next_payload_ordinal;
|
||||
inner.next_payload_ordinal += 1;
|
||||
let raw_payload_id = format!("raw_payload:{ordinal}");
|
||||
let relative_path = format!("{PAYLOADS_DIR_NAME}/{ordinal}.json");
|
||||
let absolute_path = inner.payloads_dir.join(format!("{ordinal}.json"));
|
||||
// Payload files are created before the event that references them. A
|
||||
// replay interrupted after an event is appended should never point at a
|
||||
// payload file that the writer planned but had not written yet.
|
||||
write_json_file(&absolute_path, value)?;
|
||||
Ok(RawPayloadRef {
|
||||
raw_payload_id,
|
||||
kind,
|
||||
path: relative_path,
|
||||
})
|
||||
}
|
||||
|
||||
/// Appends one raw event with no extra envelope context.
|
||||
pub fn append(&self, payload: RawTraceEventPayload) -> Result<RawTraceEvent> {
|
||||
self.append_with_context(RawTraceEventContext::default(), payload)
|
||||
}
|
||||
|
||||
/// Appends one raw event with explicit thread/turn context.
|
||||
pub fn append_with_context(
|
||||
&self,
|
||||
context: RawTraceEventContext,
|
||||
payload: RawTraceEventPayload,
|
||||
) -> Result<RawTraceEvent> {
|
||||
let mut inner = self.lock_inner();
|
||||
let event = RawTraceEvent {
|
||||
schema_version: RAW_TRACE_EVENT_SCHEMA_VERSION,
|
||||
seq: inner.next_seq,
|
||||
wall_time_unix_ms: unix_time_ms(),
|
||||
rollout_id: inner.manifest.rollout_id.clone(),
|
||||
thread_id: context.thread_id,
|
||||
codex_turn_id: context.codex_turn_id,
|
||||
payload,
|
||||
};
|
||||
inner.next_seq += 1;
|
||||
serde_json::to_writer(&mut inner.event_log, &event)?;
|
||||
inner.event_log.write_all(b"\n")?;
|
||||
inner.event_log.flush()?;
|
||||
Ok(event)
|
||||
}
|
||||
|
||||
fn lock_inner(&self) -> MutexGuard<'_, TraceWriterInner> {
|
||||
// Preserve the event log after a panic in tracing code. Dropping the
|
||||
// writer would lose subsequent diagnostic events in exactly the session
|
||||
// we are trying to debug.
|
||||
self.inner.lock().unwrap_or_else(PoisonError::into_inner)
|
||||
}
|
||||
}
|
||||
|
||||
fn write_json_file(path: &Path, value: &impl Serialize) -> Result<()> {
|
||||
let file = File::create(path).with_context(|| format!("create {}", path.display()))?;
|
||||
serde_json::to_writer_pretty(file, value)
|
||||
.with_context(|| format!("write JSON {}", path.display()))
|
||||
}
|
||||
|
||||
pub(crate) fn unix_time_ms() -> i64 {
|
||||
let duration = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap_or_default();
|
||||
i64::try_from(duration.as_millis()).unwrap_or(i64::MAX)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::model::ExecutionStatus;
|
||||
use crate::model::RolloutStatus;
|
||||
use crate::payload::RawPayloadKind;
|
||||
use crate::raw_event::RawTraceEventPayload;
|
||||
use crate::replay_bundle;
|
||||
use crate::writer::TraceWriter;
|
||||
|
||||
#[test]
|
||||
fn writer_records_payload_refs_and_replays_rollout_status() -> anyhow::Result<()> {
|
||||
let temp = TempDir::new()?;
|
||||
let writer = TraceWriter::create(
|
||||
temp.path(),
|
||||
"trace-1".to_string(),
|
||||
"rollout-1".to_string(),
|
||||
"thread-root".to_string(),
|
||||
)?;
|
||||
|
||||
writer.append(RawTraceEventPayload::RolloutStarted {
|
||||
trace_id: "trace-1".to_string(),
|
||||
root_thread_id: "thread-root".to_string(),
|
||||
})?;
|
||||
let metadata_payload = writer.write_json_payload(
|
||||
RawPayloadKind::ProtocolEvent,
|
||||
&json!({
|
||||
"source": "test",
|
||||
"model": "gpt-test",
|
||||
}),
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::ThreadStarted {
|
||||
thread_id: "thread-root".to_string(),
|
||||
agent_path: "/root".to_string(),
|
||||
metadata_payload: Some(metadata_payload.clone()),
|
||||
})?;
|
||||
writer.append(RawTraceEventPayload::CodexTurnStarted {
|
||||
codex_turn_id: "turn-1".to_string(),
|
||||
thread_id: "thread-root".to_string(),
|
||||
})?;
|
||||
let inference_request = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceRequest,
|
||||
&json!({
|
||||
"model": "gpt-test",
|
||||
"input": [{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [{"type": "input_text", "text": "hello"}]
|
||||
}],
|
||||
}),
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::InferenceStarted {
|
||||
inference_call_id: "inference-1".to_string(),
|
||||
thread_id: "thread-root".to_string(),
|
||||
codex_turn_id: "turn-1".to_string(),
|
||||
model: "gpt-test".to_string(),
|
||||
provider_name: "test-provider".to_string(),
|
||||
request_payload: inference_request.clone(),
|
||||
})?;
|
||||
let inference_response = writer.write_json_payload(
|
||||
RawPayloadKind::InferenceResponse,
|
||||
&json!({
|
||||
"response_id": "resp-1",
|
||||
"output_items": [],
|
||||
}),
|
||||
)?;
|
||||
writer.append(RawTraceEventPayload::InferenceCompleted {
|
||||
inference_call_id: "inference-1".to_string(),
|
||||
response_id: Some("resp-1".to_string()),
|
||||
response_payload: inference_response.clone(),
|
||||
})?;
|
||||
writer.append(RawTraceEventPayload::CodexTurnEnded {
|
||||
codex_turn_id: "turn-1".to_string(),
|
||||
status: ExecutionStatus::Completed,
|
||||
})?;
|
||||
writer.append(RawTraceEventPayload::RolloutEnded {
|
||||
status: RolloutStatus::Completed,
|
||||
})?;
|
||||
|
||||
let rollout = replay_bundle(temp.path())?;
|
||||
|
||||
assert_eq!(rollout.status, RolloutStatus::Completed);
|
||||
assert_eq!(rollout.root_thread_id, "thread-root");
|
||||
assert_eq!(rollout.threads["thread-root"].agent_path, "/root");
|
||||
assert_eq!(rollout.codex_turns["turn-1"].thread_id, "thread-root");
|
||||
assert_eq!(
|
||||
rollout.codex_turns["turn-1"].execution.status,
|
||||
ExecutionStatus::Completed,
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.inference_calls["inference-1"].raw_request_payload_id,
|
||||
inference_request.raw_payload_id,
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.inference_calls["inference-1"].raw_response_payload_id,
|
||||
Some(inference_response.raw_payload_id),
|
||||
);
|
||||
assert_eq!(
|
||||
rollout.raw_payloads[&metadata_payload.raw_payload_id].path,
|
||||
"payloads/1.json"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user