Compare commits

...

13 Commits

Author SHA1 Message Date
Ahmed Ibrahim
926af1b68f feedback 2025-09-17 23:15:04 -07:00
Ahmed Ibrahim
fb77080bca feedback 2025-09-17 23:05:36 -07:00
Ahmed Ibrahim
0ee8f304e9 feedback 2025-09-17 22:56:00 -07:00
Ahmed Ibrahim
698553832f feedback 2025-09-17 22:51:18 -07:00
Ahmed Ibrahim
08c9866873 feedback 2025-09-17 22:46:26 -07:00
Ahmed Ibrahim
036a47c2f3 feedback 2025-09-17 22:37:17 -07:00
Ahmed Ibrahim
d2d059488e fix 2025-09-17 16:09:17 -07:00
Ahmed Ibrahim
0329ab2af7 fix 2025-09-17 16:07:59 -07:00
Ahmed Ibrahim
72d03c8b63 fix 2025-09-17 15:55:02 -07:00
Ahmed Ibrahim
523508593c fixture 2025-09-17 15:54:39 -07:00
Ahmed Ibrahim
6451c078b9 move 2025-09-17 15:36:48 -07:00
Ahmed Ibrahim
760d4d5704 codex/add-tests-for-backward-compatibility-64u61y 2025-09-17 15:17:57 -07:00
Ahmed Ibrahim
c61c736b4f Add rollout JSONL compatibility tests 2025-09-17 14:02:44 -07:00
21 changed files with 444 additions and 10 deletions

View File

@@ -518,7 +518,7 @@ pub enum EventMsg {
ExitedReviewMode(ExitedReviewModeEvent),
}
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)]
pub struct ExitedReviewModeEvent {
pub review_output: Option<ReviewOutputEvent>,
}
@@ -540,7 +540,7 @@ pub struct TaskStartedEvent {
pub model_context_window: Option<u64>,
}
#[derive(Debug, Clone, Deserialize, Serialize, Default, TS)]
#[derive(Debug, Clone, Deserialize, Serialize, Default, PartialEq, TS)]
pub struct TokenUsage {
pub input_tokens: u64,
pub cached_input_tokens: u64,
@@ -549,7 +549,7 @@ pub struct TokenUsage {
pub total_tokens: u64,
}
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)]
pub struct TokenUsageInfo {
pub total_token_usage: TokenUsage,
pub last_token_usage: TokenUsage,
@@ -586,7 +586,7 @@ impl TokenUsageInfo {
}
}
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)]
pub struct TokenCountEvent {
pub info: Option<TokenUsageInfo>,
}
@@ -695,12 +695,12 @@ impl fmt::Display for FinalOutput {
}
}
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)]
pub struct AgentMessageEvent {
pub message: String,
}
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)]
#[serde(rename_all = "snake_case")]
pub enum InputMessageKind {
/// Plain user text (default)
@@ -711,7 +711,7 @@ pub enum InputMessageKind {
EnvironmentContext,
}
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)]
pub struct UserMessageEvent {
pub message: String,
#[serde(skip_serializing_if = "Option::is_none")]
@@ -768,12 +768,12 @@ pub struct AgentMessageDeltaEvent {
pub delta: String,
}
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)]
pub struct AgentReasoningEvent {
pub text: String,
}
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)]
pub struct AgentReasoningRawContentEvent {
pub text: String,
}
@@ -1230,7 +1230,7 @@ pub struct Chunk {
pub inserted_lines: Vec<String>,
}
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)]
pub struct TurnAbortedEvent {
pub reason: TurnAbortReason,
}
@@ -1298,4 +1298,169 @@ mod tests {
let deserialized: ExecCommandOutputDeltaEvent = serde_json::from_str(&serialized).unwrap();
assert_eq!(deserialized, event);
}
fn normalize_rollout_value(value: serde_json::Value) -> serde_json::Value {
match value {
serde_json::Value::Object(mut map) => {
let is_response_item = matches!(
map.get("type"),
Some(serde_json::Value::String(item_type)) if item_type == "response_item"
);
if let Some(serde_json::Value::Object(payload)) = map.get_mut("payload") {
if let Some(serde_json::Value::String(item_type)) = payload.get("type") {
let known_response_types = [
"message",
"reasoning",
"local_shell_call",
"function_call",
"function_call_output",
"custom_tool_call",
"custom_tool_call_output",
"web_search_call",
];
if known_response_types.contains(&item_type.as_str()) {
payload.remove("id");
} else if is_response_item {
payload.clear();
payload.insert(
"type".to_string(),
serde_json::Value::String("other".to_string()),
);
}
}
}
for value in map.values_mut() {
let normalized = normalize_rollout_value(std::mem::take(value));
*value = normalized;
}
serde_json::Value::Object(map)
}
serde_json::Value::Array(items) => {
serde_json::Value::Array(items.into_iter().map(normalize_rollout_value).collect())
}
serde_json::Value::Number(num) => {
if num.as_i64().is_some() || num.as_u64().is_some() {
serde_json::Value::Number(num)
} else if let Some(value) = num.as_f64() {
let rounded = (value * 1_000_000.0).round() / 1_000_000.0;
serde_json::Number::from_f64(rounded)
.map(serde_json::Value::Number)
.unwrap_or_else(|| serde_json::Value::Number(num))
} else {
serde_json::Value::Number(num)
}
}
other => other,
}
}
fn assert_rollout_round_trip(case: &str, raw: &str) {
let expected: serde_json::Value =
serde_json::from_str(raw).unwrap_or_else(|err| panic!("failed to parse {case}: {err}"));
let parsed: RolloutLine =
serde_json::from_str(raw).unwrap_or_else(|err| panic!("failed to parse {case}: {err}"));
let serialized = serde_json::to_value(&parsed)
.unwrap_or_else(|err| panic!("failed to serialize {case}: {err}"));
assert_eq!(
normalize_rollout_value(serialized),
normalize_rollout_value(expected),
"case {case} failed round trip",
);
}
#[test]
fn deserialize_rollout_fixtures() {
let cases = [
(
"session_meta/with_git",
include_str!("../tests/fixtures/rollouts/session_meta/with_git.json"),
),
(
"response_item/message",
include_str!("../tests/fixtures/rollouts/response_item/message.json"),
),
(
"response_item/reasoning",
include_str!("../tests/fixtures/rollouts/response_item/reasoning.json"),
),
(
"response_item/local_shell_call",
include_str!("../tests/fixtures/rollouts/response_item/local_shell_call.json"),
),
(
"response_item/function_call",
include_str!("../tests/fixtures/rollouts/response_item/function_call.json"),
),
(
"response_item/function_call_output",
include_str!("../tests/fixtures/rollouts/response_item/function_call_output.json"),
),
(
"response_item/custom_tool_call",
include_str!("../tests/fixtures/rollouts/response_item/custom_tool_call.json"),
),
(
"response_item/custom_tool_call_output",
include_str!(
"../tests/fixtures/rollouts/response_item/custom_tool_call_output.json"
),
),
(
"response_item/web_search_call",
include_str!("../tests/fixtures/rollouts/response_item/web_search_call.json"),
),
(
"response_item/other",
include_str!("../tests/fixtures/rollouts/response_item/other.json"),
),
(
"event_msg/user_message",
include_str!("../tests/fixtures/rollouts/event_msg/user_message.json"),
),
(
"event_msg/agent_message",
include_str!("../tests/fixtures/rollouts/event_msg/agent_message.json"),
),
(
"event_msg/agent_reasoning",
include_str!("../tests/fixtures/rollouts/event_msg/agent_reasoning.json"),
),
(
"event_msg/agent_reasoning_raw_content",
include_str!(
"../tests/fixtures/rollouts/event_msg/agent_reasoning_raw_content.json"
),
),
(
"event_msg/token_count_info",
include_str!("../tests/fixtures/rollouts/event_msg/token_count_info.json"),
),
(
"event_msg/entered_review_mode",
include_str!("../tests/fixtures/rollouts/event_msg/entered_review_mode.json"),
),
(
"event_msg/exited_review_mode",
include_str!("../tests/fixtures/rollouts/event_msg/exited_review_mode.json"),
),
(
"event_msg/turn_aborted",
include_str!("../tests/fixtures/rollouts/event_msg/turn_aborted.json"),
),
(
"misc/compacted",
include_str!("../tests/fixtures/rollouts/misc/compacted.json"),
),
(
"misc/turn_context_workspace",
include_str!("../tests/fixtures/rollouts/misc/turn_context_workspace.json"),
),
];
for (case, raw) in cases {
assert_rollout_round_trip(case, raw);
}
}
}

View File

@@ -0,0 +1,8 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "event_msg",
"payload": {
"type": "agent_message",
"message": "Sure thing"
}
}

View File

@@ -0,0 +1,8 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "event_msg",
"payload": {
"type": "agent_reasoning",
"text": "Thinking..."
}
}

View File

@@ -0,0 +1,8 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "event_msg",
"payload": {
"type": "agent_reasoning_raw_content",
"text": "raw reasoning"
}
}

View File

@@ -0,0 +1,9 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "event_msg",
"payload": {
"type": "entered_review_mode",
"prompt": "Need review",
"user_facing_hint": "double-check work"
}
}

View File

@@ -0,0 +1,27 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "event_msg",
"payload": {
"type": "exited_review_mode",
"review_output": {
"findings": [
{
"title": "Bug",
"body": "Found an issue",
"confidence_score": 0.4,
"priority": 1,
"code_location": {
"absolute_file_path": "/workspace/src/lib.rs",
"line_range": {
"start": 1,
"end": 3
}
}
}
],
"overall_correctness": "needs_changes",
"overall_explanation": "Please fix",
"overall_confidence_score": 0.9
}
}
}

View File

@@ -0,0 +1,24 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "event_msg",
"payload": {
"type": "token_count",
"info": {
"total_token_usage": {
"input_tokens": 120,
"cached_input_tokens": 10,
"output_tokens": 30,
"reasoning_output_tokens": 5,
"total_tokens": 165
},
"last_token_usage": {
"input_tokens": 20,
"cached_input_tokens": 0,
"output_tokens": 15,
"reasoning_output_tokens": 5,
"total_tokens": 40
},
"model_context_window": 16000
}
}
}

View File

@@ -0,0 +1,8 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "event_msg",
"payload": {
"type": "turn_aborted",
"reason": "interrupted"
}
}

View File

@@ -0,0 +1,12 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "event_msg",
"payload": {
"type": "user_message",
"message": "Please help",
"kind": "plain",
"images": [
"data:image/png;base64,AAA"
]
}
}

View File

@@ -0,0 +1,7 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "compacted",
"payload": {
"message": "Turn summary"
}
}

View File

@@ -0,0 +1,20 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "turn_context",
"payload": {
"cwd": "/workspace",
"approval_policy": "on-request",
"sandbox_policy": {
"mode": "workspace-write",
"writable_roots": [
"/workspace/tmp"
],
"network_access": true,
"exclude_tmpdir_env_var": false,
"exclude_slash_tmp": true
},
"model": "gpt-5",
"effort": "high",
"summary": "detailed"
}
}

View File

@@ -0,0 +1,12 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "response_item",
"payload": {
"type": "custom_tool_call",
"id": "legacy-tool",
"status": "completed",
"call_id": "tool-456",
"name": "my_tool",
"input": "{\"foo\":1}"
}
}

View File

@@ -0,0 +1,9 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "response_item",
"payload": {
"type": "custom_tool_call_output",
"call_id": "tool-456",
"output": "tool finished"
}
}

View File

@@ -0,0 +1,11 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "response_item",
"payload": {
"type": "function_call",
"id": "legacy-function",
"name": "shell",
"arguments": "{\"command\":[\"echo\",\"hi\"]}",
"call_id": "call-123"
}
}

View File

@@ -0,0 +1,9 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "response_item",
"payload": {
"type": "function_call_output",
"call_id": "call-123",
"output": "{\"stdout\":\"done\"}"
}
}

View File

@@ -0,0 +1,23 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "response_item",
"payload": {
"type": "local_shell_call",
"id": "legacy-shell-call",
"call_id": "shell-call-1",
"status": "completed",
"action": {
"type": "exec",
"command": [
"ls",
"-la"
],
"timeout_ms": 1200,
"working_directory": "/workspace",
"env": {
"PATH": "/usr/bin"
},
"user": "codex"
}
}
}

View File

@@ -0,0 +1,15 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "response_item",
"payload": {
"type": "message",
"id": "legacy-message",
"role": "assistant",
"content": [
{
"type": "output_text",
"text": "Hello from assistant"
}
]
}
}

View File

@@ -0,0 +1,8 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "response_item",
"payload": {
"type": "new_future_item",
"foo": "bar"
}
}

View File

@@ -0,0 +1,21 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "response_item",
"payload": {
"type": "reasoning",
"id": "reasoning-1",
"summary": [
{
"type": "summary_text",
"text": "Summarized thoughts"
}
],
"content": [
{
"type": "reasoning_text",
"text": "Detailed reasoning"
}
],
"encrypted_content": "encrypted"
}
}

View File

@@ -0,0 +1,13 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "response_item",
"payload": {
"type": "web_search_call",
"id": "legacy-search",
"status": "completed",
"action": {
"type": "search",
"query": "weather in SF"
}
}
}

View File

@@ -0,0 +1,17 @@
{
"timestamp": "2025-01-02T03:04:05.678Z",
"type": "session_meta",
"payload": {
"id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
"timestamp": "2025-01-02T03:04:05.678Z",
"cwd": "/workspace",
"originator": "codex-cli",
"cli_version": "1.0.0",
"instructions": "Remember the tests",
"git": {
"commit_hash": "abc123",
"branch": "main",
"repository_url": "https://example.com/repo.git"
}
}
}