mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
feedback
This commit is contained in:
@@ -1384,9 +1384,6 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn deserialize_rollout_fixtures() {
|
||||
use std::collections::BTreeSet;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
const TIMESTAMP: &str = "2025-01-02T03:04:05.678Z";
|
||||
|
||||
let cases = [
|
||||
@@ -1395,107 +1392,16 @@ mod tests {
|
||||
raw: include_str!("../tests/fixtures/rollouts/session_meta/with_git.json"),
|
||||
expected_kind: ExpectedItemKind::SessionMeta,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "session_meta/without_git",
|
||||
raw: include_str!("../tests/fixtures/rollouts/session_meta/without_git.json"),
|
||||
expected_kind: ExpectedItemKind::SessionMeta,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "response_item/message",
|
||||
raw: include_str!("../tests/fixtures/rollouts/response_item/message.json"),
|
||||
expected_kind: ExpectedItemKind::Response,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "response_item/reasoning",
|
||||
raw: include_str!("../tests/fixtures/rollouts/response_item/reasoning.json"),
|
||||
expected_kind: ExpectedItemKind::Response,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "response_item/local_shell_call",
|
||||
raw: include_str!("../tests/fixtures/rollouts/response_item/local_shell_call.json"),
|
||||
expected_kind: ExpectedItemKind::Response,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "response_item/function_call",
|
||||
raw: include_str!("../tests/fixtures/rollouts/response_item/function_call.json"),
|
||||
expected_kind: ExpectedItemKind::Response,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "response_item/function_call_output",
|
||||
raw: include_str!(
|
||||
"../tests/fixtures/rollouts/response_item/function_call_output.json"
|
||||
),
|
||||
expected_kind: ExpectedItemKind::Response,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "response_item/custom_tool_call",
|
||||
raw: include_str!("../tests/fixtures/rollouts/response_item/custom_tool_call.json"),
|
||||
expected_kind: ExpectedItemKind::Response,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "response_item/custom_tool_call_output",
|
||||
raw: include_str!(
|
||||
"../tests/fixtures/rollouts/response_item/custom_tool_call_output.json"
|
||||
),
|
||||
expected_kind: ExpectedItemKind::Response,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "response_item/web_search_call",
|
||||
raw: include_str!("../tests/fixtures/rollouts/response_item/web_search_call.json"),
|
||||
expected_kind: ExpectedItemKind::Response,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "response_item/other",
|
||||
raw: include_str!("../tests/fixtures/rollouts/response_item/other.json"),
|
||||
expected_kind: ExpectedItemKind::Response,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "event_msg/user_message",
|
||||
raw: include_str!("../tests/fixtures/rollouts/event_msg/user_message.json"),
|
||||
expected_kind: ExpectedItemKind::Event,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "event_msg/agent_message",
|
||||
raw: include_str!("../tests/fixtures/rollouts/event_msg/agent_message.json"),
|
||||
expected_kind: ExpectedItemKind::Event,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "event_msg/agent_reasoning",
|
||||
raw: include_str!("../tests/fixtures/rollouts/event_msg/agent_reasoning.json"),
|
||||
expected_kind: ExpectedItemKind::Event,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "event_msg/agent_reasoning_raw_content",
|
||||
raw: include_str!(
|
||||
"../tests/fixtures/rollouts/event_msg/agent_reasoning_raw_content.json"
|
||||
),
|
||||
expected_kind: ExpectedItemKind::Event,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "event_msg/token_count_info",
|
||||
raw: include_str!("../tests/fixtures/rollouts/event_msg/token_count_info.json"),
|
||||
expected_kind: ExpectedItemKind::Event,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "event_msg/token_count_none",
|
||||
raw: include_str!("../tests/fixtures/rollouts/event_msg/token_count_none.json"),
|
||||
expected_kind: ExpectedItemKind::Event,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "event_msg/entered_review_mode",
|
||||
raw: include_str!("../tests/fixtures/rollouts/event_msg/entered_review_mode.json"),
|
||||
expected_kind: ExpectedItemKind::Event,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "event_msg/exited_review_mode",
|
||||
raw: include_str!("../tests/fixtures/rollouts/event_msg/exited_review_mode.json"),
|
||||
expected_kind: ExpectedItemKind::Event,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "event_msg/turn_aborted",
|
||||
raw: include_str!("../tests/fixtures/rollouts/event_msg/turn_aborted.json"),
|
||||
expected_kind: ExpectedItemKind::Event,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "misc/compacted",
|
||||
raw: include_str!("../tests/fixtures/rollouts/misc/compacted.json"),
|
||||
@@ -1506,21 +1412,8 @@ mod tests {
|
||||
raw: include_str!("../tests/fixtures/rollouts/misc/turn_context_workspace.json"),
|
||||
expected_kind: ExpectedItemKind::TurnContext,
|
||||
},
|
||||
RolloutFixtureCase {
|
||||
name: "misc/turn_context_read_only",
|
||||
raw: include_str!("../tests/fixtures/rollouts/misc/turn_context_read_only.json"),
|
||||
expected_kind: ExpectedItemKind::TurnContext,
|
||||
},
|
||||
];
|
||||
|
||||
let mut session_meta_git = BTreeSet::new();
|
||||
let mut session_meta_instructions = BTreeSet::new();
|
||||
let mut response_variants = BTreeSet::new();
|
||||
let mut event_variants = BTreeSet::new();
|
||||
let mut turn_context_policies = BTreeSet::new();
|
||||
let mut turn_context_modes = BTreeSet::new();
|
||||
let mut saw_compacted = false;
|
||||
|
||||
for case in cases {
|
||||
let parsed = parse_rollout_line(case.raw, case.name);
|
||||
assert_eq!(
|
||||
@@ -1530,53 +1423,11 @@ mod tests {
|
||||
);
|
||||
|
||||
match (case.expected_kind, parsed.item) {
|
||||
(ExpectedItemKind::SessionMeta, RolloutItem::SessionMeta(line)) => {
|
||||
session_meta_git.insert(line.git.is_some());
|
||||
session_meta_instructions.insert(line.meta.instructions.is_some());
|
||||
}
|
||||
(ExpectedItemKind::Response, RolloutItem::ResponseItem(item)) => {
|
||||
let variant = match item {
|
||||
ResponseItem::Message { .. } => "message",
|
||||
ResponseItem::Reasoning { .. } => "reasoning",
|
||||
ResponseItem::LocalShellCall { .. } => "local_shell_call",
|
||||
ResponseItem::FunctionCall { .. } => "function_call",
|
||||
ResponseItem::FunctionCallOutput { .. } => "function_call_output",
|
||||
ResponseItem::CustomToolCall { .. } => "custom_tool_call",
|
||||
ResponseItem::CustomToolCallOutput { .. } => "custom_tool_call_output",
|
||||
ResponseItem::WebSearchCall { .. } => "web_search_call",
|
||||
ResponseItem::Other => "other",
|
||||
};
|
||||
response_variants.insert(variant);
|
||||
}
|
||||
(ExpectedItemKind::Event, RolloutItem::EventMsg(event)) => {
|
||||
let variant = match event {
|
||||
EventMsg::UserMessage(_) => "user_message",
|
||||
EventMsg::AgentMessage(_) => "agent_message",
|
||||
EventMsg::AgentReasoning(_) => "agent_reasoning",
|
||||
EventMsg::AgentReasoningRawContent(_) => "agent_reasoning_raw_content",
|
||||
EventMsg::TokenCount(_) => "token_count",
|
||||
EventMsg::EnteredReviewMode(_) => "entered_review_mode",
|
||||
EventMsg::ExitedReviewMode(_) => "exited_review_mode",
|
||||
EventMsg::TurnAborted(_) => "turn_aborted",
|
||||
other => panic!(
|
||||
"case {} contained unexpected event variant {:?}",
|
||||
case.name, other
|
||||
),
|
||||
};
|
||||
event_variants.insert(variant);
|
||||
}
|
||||
(ExpectedItemKind::Compacted, RolloutItem::Compacted(_)) => {
|
||||
saw_compacted = true;
|
||||
}
|
||||
(ExpectedItemKind::TurnContext, RolloutItem::TurnContext(item)) => {
|
||||
turn_context_policies.insert(item.approval_policy.to_string());
|
||||
let mode = match &item.sandbox_policy {
|
||||
SandboxPolicy::DangerFullAccess => "danger_full_access",
|
||||
SandboxPolicy::ReadOnly => "read_only",
|
||||
SandboxPolicy::WorkspaceWrite { .. } => "workspace_write",
|
||||
};
|
||||
turn_context_modes.insert(mode);
|
||||
}
|
||||
(ExpectedItemKind::SessionMeta, RolloutItem::SessionMeta(_)) => {}
|
||||
(ExpectedItemKind::Response, RolloutItem::ResponseItem(_)) => {}
|
||||
(ExpectedItemKind::Event, RolloutItem::EventMsg(_)) => {}
|
||||
(ExpectedItemKind::Compacted, RolloutItem::Compacted(_)) => {}
|
||||
(ExpectedItemKind::TurnContext, RolloutItem::TurnContext(_)) => {}
|
||||
(expected, actual) => {
|
||||
panic!(
|
||||
"case {} expected {:?} but parsed {:?}",
|
||||
@@ -1585,59 +1436,5 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
session_meta_git,
|
||||
BTreeSet::from_iter([false, true]),
|
||||
"expected both presence and absence of git metadata"
|
||||
);
|
||||
assert_eq!(
|
||||
session_meta_instructions,
|
||||
BTreeSet::from_iter([false, true]),
|
||||
"expected both presence and absence of instructions"
|
||||
);
|
||||
assert_eq!(
|
||||
response_variants,
|
||||
BTreeSet::from_iter([
|
||||
"custom_tool_call",
|
||||
"custom_tool_call_output",
|
||||
"function_call",
|
||||
"function_call_output",
|
||||
"local_shell_call",
|
||||
"message",
|
||||
"other",
|
||||
"reasoning",
|
||||
"web_search_call",
|
||||
]),
|
||||
"response fixture coverage mismatch"
|
||||
);
|
||||
assert_eq!(
|
||||
event_variants,
|
||||
BTreeSet::from_iter([
|
||||
"agent_message",
|
||||
"agent_reasoning",
|
||||
"agent_reasoning_raw_content",
|
||||
"entered_review_mode",
|
||||
"exited_review_mode",
|
||||
"token_count",
|
||||
"turn_aborted",
|
||||
"user_message",
|
||||
]),
|
||||
"event fixture coverage mismatch"
|
||||
);
|
||||
assert!(
|
||||
saw_compacted,
|
||||
"expected compacted rollout case to be covered"
|
||||
);
|
||||
assert_eq!(
|
||||
turn_context_policies,
|
||||
BTreeSet::from_iter(["never".to_string(), "on-request".to_string()]),
|
||||
"turn context approval policies mismatch"
|
||||
);
|
||||
assert_eq!(
|
||||
turn_context_modes,
|
||||
BTreeSet::from_iter(["read_only", "workspace_write"]),
|
||||
"turn context sandbox modes mismatch"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "agent_message",
|
||||
"message": "Sure thing"
|
||||
}
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "agent_reasoning",
|
||||
"text": "Thinking..."
|
||||
}
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "agent_reasoning_raw_content",
|
||||
"text": "raw reasoning"
|
||||
}
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "entered_review_mode",
|
||||
"prompt": "Need review",
|
||||
"user_facing_hint": "double-check work"
|
||||
}
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "exited_review_mode",
|
||||
"review_output": {
|
||||
"findings": [
|
||||
{
|
||||
"title": "Bug",
|
||||
"body": "Found an issue",
|
||||
"confidence_score": 0.4,
|
||||
"priority": 1,
|
||||
"code_location": {
|
||||
"absolute_file_path": "/workspace/src/lib.rs",
|
||||
"line_range": {
|
||||
"start": 1,
|
||||
"end": 3
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"overall_correctness": "needs_changes",
|
||||
"overall_explanation": "Please fix",
|
||||
"overall_confidence_score": 0.9
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "token_count",
|
||||
"info": {
|
||||
"total_token_usage": {
|
||||
"input_tokens": 120,
|
||||
"cached_input_tokens": 10,
|
||||
"output_tokens": 30,
|
||||
"reasoning_output_tokens": 5,
|
||||
"total_tokens": 165
|
||||
},
|
||||
"last_token_usage": {
|
||||
"input_tokens": 20,
|
||||
"cached_input_tokens": 0,
|
||||
"output_tokens": 15,
|
||||
"reasoning_output_tokens": 5,
|
||||
"total_tokens": 40
|
||||
},
|
||||
"model_context_window": 16000
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "token_count",
|
||||
"info": null
|
||||
}
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "turn_aborted",
|
||||
"reason": "interrupted"
|
||||
}
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "turn_context",
|
||||
"payload": {
|
||||
"cwd": "/workspace",
|
||||
"approval_policy": "never",
|
||||
"sandbox_policy": {
|
||||
"mode": "read-only"
|
||||
},
|
||||
"model": "gpt-5",
|
||||
"summary": "auto"
|
||||
}
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "custom_tool_call",
|
||||
"id": "legacy-tool",
|
||||
"status": "completed",
|
||||
"call_id": "tool-456",
|
||||
"name": "my_tool",
|
||||
"input": "{\"foo\":1}"
|
||||
}
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "custom_tool_call_output",
|
||||
"call_id": "tool-456",
|
||||
"output": "tool finished"
|
||||
}
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "function_call",
|
||||
"id": "legacy-function",
|
||||
"name": "shell",
|
||||
"arguments": "{\"command\":[\"echo\",\"hi\"]}",
|
||||
"call_id": "call-123"
|
||||
}
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "function_call_output",
|
||||
"call_id": "call-123",
|
||||
"output": "{\"stdout\":\"done\"}"
|
||||
}
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "local_shell_call",
|
||||
"id": "legacy-shell-call",
|
||||
"call_id": "shell-call-1",
|
||||
"status": "completed",
|
||||
"action": {
|
||||
"type": "exec",
|
||||
"command": [
|
||||
"ls",
|
||||
"-la"
|
||||
],
|
||||
"timeout_ms": 1200,
|
||||
"working_directory": "/workspace",
|
||||
"env": {
|
||||
"PATH": "/usr/bin"
|
||||
},
|
||||
"user": "codex"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "new_future_item",
|
||||
"foo": "bar"
|
||||
}
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "reasoning",
|
||||
"id": "reasoning-1",
|
||||
"summary": [
|
||||
{
|
||||
"type": "summary_text",
|
||||
"text": "Summarized thoughts"
|
||||
}
|
||||
],
|
||||
"content": [
|
||||
{
|
||||
"type": "reasoning_text",
|
||||
"text": "Detailed reasoning"
|
||||
}
|
||||
],
|
||||
"encrypted_content": "encrypted"
|
||||
}
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "web_search_call",
|
||||
"id": "legacy-search",
|
||||
"status": "completed",
|
||||
"action": {
|
||||
"type": "search",
|
||||
"query": "weather in SF"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
{
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"type": "session_meta",
|
||||
"payload": {
|
||||
"id": "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
||||
"timestamp": "2025-01-02T03:04:05.678Z",
|
||||
"cwd": "/workspace",
|
||||
"originator": "codex-cli",
|
||||
"cli_version": "1.0.0",
|
||||
"instructions": null
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user