mirror of
https://github.com/openai/codex.git
synced 2026-05-16 17:23:57 +00:00
Compare commits
9 Commits
rust-v0.37
...
codex/add-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
06dc8960be | ||
|
|
208089e58e | ||
|
|
e5fdb5b0fd | ||
|
|
5332f6e215 | ||
|
|
5d87f5d24a | ||
|
|
791d7b125f | ||
|
|
72733e34c4 | ||
|
|
b8d2b1a576 | ||
|
|
7fe4021f95 |
48
.github/workflows/rust-release.yml
vendored
48
.github/workflows/rust-release.yml
vendored
@@ -167,6 +167,12 @@ jobs:
|
||||
needs: build
|
||||
name: release
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
actions: read
|
||||
outputs:
|
||||
version: ${{ steps.release_name.outputs.name }}
|
||||
tag: ${{ github.ref_name }}
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
@@ -220,6 +226,48 @@ jobs:
|
||||
tag: ${{ github.ref_name }}
|
||||
config: .github/dotslash-config.json
|
||||
|
||||
# Publish to npm using OIDC authentication.
|
||||
# July 31, 2025: https://github.blog/changelog/2025-07-31-npm-trusted-publishing-with-oidc-is-generally-available/
|
||||
# npm docs: https://docs.npmjs.com/trusted-publishers
|
||||
publish-npm:
|
||||
# Skip this step for pre-releases (alpha/beta).
|
||||
if: ${{ !contains(needs.release.outputs.version, '-') }}
|
||||
name: publish-npm
|
||||
needs: release
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
id-token: write # Required for OIDC
|
||||
contents: read
|
||||
|
||||
steps:
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v5
|
||||
with:
|
||||
node-version: 22
|
||||
registry-url: "https://registry.npmjs.org"
|
||||
scope: "@openai"
|
||||
|
||||
# Trusted publishing requires npm CLI version 11.5.1 or later.
|
||||
- name: Update npm
|
||||
run: npm install -g npm@latest
|
||||
|
||||
- name: Download npm tarball from release
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
version="${{ needs.release.outputs.version }}"
|
||||
tag="${{ needs.release.outputs.tag }}"
|
||||
mkdir -p dist/npm
|
||||
gh release download "$tag" \
|
||||
--repo "${GITHUB_REPOSITORY}" \
|
||||
--pattern "codex-npm-${version}.tgz" \
|
||||
--dir dist/npm
|
||||
|
||||
# No NODE_AUTH_TOKEN needed because we use OIDC.
|
||||
- name: Publish to npm
|
||||
run: npm publish "${GITHUB_WORKSPACE}/dist/npm/codex-npm-${{ needs.release.outputs.version }}.tgz"
|
||||
|
||||
update-branch:
|
||||
name: Update latest-alpha-cli branch
|
||||
permissions:
|
||||
|
||||
@@ -4,6 +4,7 @@ In the codex-rs folder where the rust code lives:
|
||||
|
||||
- Crate names are prefixed with `codex-`. For example, the `core` folder's crate is named `codex-core`
|
||||
- When using format! and you can inline variables into {}, always do that.
|
||||
- Install any commands the repo relies on (for example `just`, `rg`, or `cargo-insta`) if they aren't already available before running instructions here.
|
||||
- Never add or modify any code related to `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` or `CODEX_SANDBOX_ENV_VAR`.
|
||||
- You operate in a sandbox where `CODEX_SANDBOX_NETWORK_DISABLED=1` will be set whenever you use the `shell` tool. Any existing code that uses `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` was authored with this fact in mind. It is often used to early exit out of tests that the author knew you would not be able to run given your sandbox limitations.
|
||||
- Similarly, when you spawn a process using Seatbelt (`/usr/bin/sandbox-exec`), `CODEX_SANDBOX=seatbelt` will be set on the child process. Integration tests that want to run Seatbelt themselves cannot be run under Seatbelt, so checks for `CODEX_SANDBOX=seatbelt` are also often used to early exit out of tests, as appropriate.
|
||||
|
||||
@@ -15,7 +15,8 @@
|
||||
],
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/openai/codex.git"
|
||||
"url": "git+https://github.com/openai/codex.git",
|
||||
"directory": "codex-cli"
|
||||
},
|
||||
"dependencies": {
|
||||
"@vscode/ripgrep": "^1.15.14"
|
||||
|
||||
@@ -11,6 +11,7 @@ use std::time::Duration;
|
||||
use crate::AuthManager;
|
||||
use crate::client_common::REVIEW_PROMPT;
|
||||
use crate::event_mapping::map_response_item_to_event_messages;
|
||||
use crate::review_format::format_review_findings_block;
|
||||
use async_channel::Receiver;
|
||||
use async_channel::Sender;
|
||||
use codex_apply_patch::ApplyPatchAction;
|
||||
@@ -1154,20 +1155,16 @@ impl AgentTask {
|
||||
fn abort(self, reason: TurnAbortReason) {
|
||||
// TOCTOU?
|
||||
if !self.handle.is_finished() {
|
||||
if self.kind == AgentTaskKind::Review {
|
||||
let sess = self.sess.clone();
|
||||
let sub_id = self.sub_id.clone();
|
||||
tokio::spawn(async move {
|
||||
exit_review_mode(sess, sub_id, None).await;
|
||||
});
|
||||
}
|
||||
self.handle.abort();
|
||||
let event = Event {
|
||||
id: self.sub_id,
|
||||
id: self.sub_id.clone(),
|
||||
msg: EventMsg::TurnAborted(TurnAbortedEvent { reason }),
|
||||
};
|
||||
let sess = self.sess;
|
||||
tokio::spawn(async move {
|
||||
if self.kind == AgentTaskKind::Review {
|
||||
exit_review_mode(sess.clone(), self.sub_id, None).await;
|
||||
}
|
||||
sess.send_event(event).await;
|
||||
});
|
||||
}
|
||||
@@ -1560,7 +1557,8 @@ async fn spawn_review_thread(
|
||||
experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
|
||||
});
|
||||
|
||||
let base_instructions = Some(REVIEW_PROMPT.to_string());
|
||||
let base_instructions = REVIEW_PROMPT.to_string();
|
||||
let review_prompt = review_request.prompt.clone();
|
||||
let provider = parent_turn_context.client.get_provider();
|
||||
let auth_manager = parent_turn_context.client.get_auth_manager();
|
||||
let model_family = review_model_family.clone();
|
||||
@@ -1569,16 +1567,19 @@ async fn spawn_review_thread(
|
||||
let mut per_turn_config = (*config).clone();
|
||||
per_turn_config.model = model.clone();
|
||||
per_turn_config.model_family = model_family.clone();
|
||||
per_turn_config.model_reasoning_effort = Some(ReasoningEffortConfig::Low);
|
||||
per_turn_config.model_reasoning_summary = ReasoningSummaryConfig::Detailed;
|
||||
if let Some(model_info) = get_model_info(&model_family) {
|
||||
per_turn_config.model_context_window = Some(model_info.context_window);
|
||||
}
|
||||
|
||||
let per_turn_config = Arc::new(per_turn_config);
|
||||
let client = ModelClient::new(
|
||||
Arc::new(per_turn_config),
|
||||
per_turn_config.clone(),
|
||||
auth_manager,
|
||||
provider,
|
||||
parent_turn_context.client.get_reasoning_effort(),
|
||||
parent_turn_context.client.get_reasoning_summary(),
|
||||
per_turn_config.model_reasoning_effort,
|
||||
per_turn_config.model_reasoning_summary,
|
||||
sess.conversation_id,
|
||||
);
|
||||
|
||||
@@ -1586,7 +1587,7 @@ async fn spawn_review_thread(
|
||||
client,
|
||||
tools_config,
|
||||
user_instructions: None,
|
||||
base_instructions,
|
||||
base_instructions: Some(base_instructions.clone()),
|
||||
approval_policy: parent_turn_context.approval_policy,
|
||||
sandbox_policy: parent_turn_context.sandbox_policy.clone(),
|
||||
shell_environment_policy: parent_turn_context.shell_environment_policy.clone(),
|
||||
@@ -1596,7 +1597,7 @@ async fn spawn_review_thread(
|
||||
|
||||
// Seed the child task with the review prompt as the initial user message.
|
||||
let input: Vec<InputItem> = vec![InputItem::Text {
|
||||
text: review_request.prompt.clone(),
|
||||
text: format!("{base_instructions}\n\n---\n\nNow, here's your task: {review_prompt}"),
|
||||
}];
|
||||
let tc = Arc::new(review_turn_context);
|
||||
|
||||
@@ -1654,6 +1655,8 @@ async fn run_task(
|
||||
let is_review_mode = turn_context.is_review_mode;
|
||||
let mut review_thread_history: Vec<ResponseItem> = Vec::new();
|
||||
if is_review_mode {
|
||||
// Seed review threads with environment context so the model knows the working directory.
|
||||
review_thread_history.extend(sess.build_initial_context(turn_context.as_ref()));
|
||||
review_thread_history.push(initial_input_for_turn.into());
|
||||
} else {
|
||||
sess.record_input_and_rollout_usermsg(&initial_input_for_turn)
|
||||
@@ -3257,7 +3260,8 @@ fn convert_call_tool_result_to_function_call_output_payload(
|
||||
}
|
||||
}
|
||||
|
||||
/// Emits an ExitedReviewMode Event with optional ReviewOutput.
|
||||
/// Emits an ExitedReviewMode Event with optional ReviewOutput,
|
||||
/// and records a developer message with the review output.
|
||||
async fn exit_review_mode(
|
||||
session: Arc<Session>,
|
||||
task_sub_id: String,
|
||||
@@ -3265,9 +3269,50 @@ async fn exit_review_mode(
|
||||
) {
|
||||
let event = Event {
|
||||
id: task_sub_id,
|
||||
msg: EventMsg::ExitedReviewMode(ExitedReviewModeEvent { review_output }),
|
||||
msg: EventMsg::ExitedReviewMode(ExitedReviewModeEvent {
|
||||
review_output: review_output.clone(),
|
||||
}),
|
||||
};
|
||||
session.send_event(event).await;
|
||||
|
||||
let mut user_message = String::new();
|
||||
if let Some(out) = review_output {
|
||||
let mut findings_str = String::new();
|
||||
let text = out.overall_explanation.trim();
|
||||
if !text.is_empty() {
|
||||
findings_str.push_str(text);
|
||||
}
|
||||
if !out.findings.is_empty() {
|
||||
let block = format_review_findings_block(&out.findings, None);
|
||||
findings_str.push_str(&format!("\n{block}"));
|
||||
}
|
||||
user_message.push_str(&format!(
|
||||
r#"<user_action>
|
||||
<context>User initiated a review task. Here's the full review output from reviewer model. User may select one or more comments to resolve.</context>
|
||||
<action>review</action>
|
||||
<results>
|
||||
{findings_str}
|
||||
</results>
|
||||
</user_tool>
|
||||
"#));
|
||||
} else {
|
||||
user_message.push_str(r#"<user_action>
|
||||
<context>User initiated a review task, but was interrupted. If user asks about this, tell them to re-initiate a review with `/review` and wait for it to complete.</context>
|
||||
<action>review</action>
|
||||
<results>
|
||||
None.
|
||||
</results>
|
||||
</user_tool>
|
||||
"#);
|
||||
}
|
||||
|
||||
session
|
||||
.record_conversation_items(&[ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText { text: user_message }],
|
||||
}])
|
||||
.await;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -38,7 +38,7 @@ use toml_edit::Item as TomlItem;
|
||||
use toml_edit::Table as TomlTable;
|
||||
|
||||
const OPENAI_DEFAULT_MODEL: &str = "gpt-5";
|
||||
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5";
|
||||
const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5-codex";
|
||||
pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5-codex";
|
||||
|
||||
/// Maximum number of bytes of the documentation that will be embedded. Larger
|
||||
@@ -1581,7 +1581,7 @@ model_verbosity = "high"
|
||||
assert_eq!(
|
||||
Config {
|
||||
model: "o3".to_string(),
|
||||
review_model: "gpt-5".to_string(),
|
||||
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||
model_family: find_family_for_model("o3").expect("known model slug"),
|
||||
model_context_window: Some(200_000),
|
||||
model_max_output_tokens: Some(100_000),
|
||||
@@ -1639,7 +1639,7 @@ model_verbosity = "high"
|
||||
)?;
|
||||
let expected_gpt3_profile_config = Config {
|
||||
model: "gpt-3.5-turbo".to_string(),
|
||||
review_model: "gpt-5".to_string(),
|
||||
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||
model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"),
|
||||
model_context_window: Some(16_385),
|
||||
model_max_output_tokens: Some(4_096),
|
||||
@@ -1712,7 +1712,7 @@ model_verbosity = "high"
|
||||
)?;
|
||||
let expected_zdr_profile_config = Config {
|
||||
model: "o3".to_string(),
|
||||
review_model: "gpt-5".to_string(),
|
||||
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||
model_family: find_family_for_model("o3").expect("known model slug"),
|
||||
model_context_window: Some(200_000),
|
||||
model_max_output_tokens: Some(100_000),
|
||||
@@ -1771,7 +1771,7 @@ model_verbosity = "high"
|
||||
)?;
|
||||
let expected_gpt5_profile_config = Config {
|
||||
model: "gpt-5".to_string(),
|
||||
review_model: "gpt-5".to_string(),
|
||||
review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
|
||||
model_family: find_family_for_model("gpt-5").expect("known model slug"),
|
||||
model_context_window: Some(272_000),
|
||||
model_max_output_tokens: Some(128_000),
|
||||
|
||||
@@ -46,6 +46,7 @@ pub use model_provider_info::built_in_model_providers;
|
||||
pub use model_provider_info::create_oss_provider_with_base_url;
|
||||
mod conversation_manager;
|
||||
mod event_mapping;
|
||||
pub mod review_format;
|
||||
pub use codex_protocol::protocol::InitialHistory;
|
||||
pub use conversation_manager::ConversationManager;
|
||||
pub use conversation_manager::NewConversation;
|
||||
@@ -88,6 +89,7 @@ pub use codex_protocol::config_types as protocol_config_types;
|
||||
|
||||
pub use client::ModelClient;
|
||||
pub use client_common::Prompt;
|
||||
pub use client_common::REVIEW_PROMPT;
|
||||
pub use client_common::ResponseEvent;
|
||||
pub use client_common::ResponseStream;
|
||||
pub use codex_protocol::models::ContentItem;
|
||||
|
||||
55
codex-rs/core/src/review_format.rs
Normal file
55
codex-rs/core/src/review_format.rs
Normal file
@@ -0,0 +1,55 @@
|
||||
use crate::protocol::ReviewFinding;
|
||||
|
||||
// Note: We keep this module UI-agnostic. It returns plain strings that
|
||||
// higher layers (e.g., TUI) may style as needed.
|
||||
|
||||
fn format_location(item: &ReviewFinding) -> String {
|
||||
let path = item.code_location.absolute_file_path.display();
|
||||
let start = item.code_location.line_range.start;
|
||||
let end = item.code_location.line_range.end;
|
||||
format!("{path}:{start}-{end}")
|
||||
}
|
||||
|
||||
/// Format a full review findings block as plain text lines.
|
||||
///
|
||||
/// - When `selection` is `Some`, each item line includes a checkbox marker:
|
||||
/// "[x]" for selected items and "[ ]" for unselected. Missing indices
|
||||
/// default to selected.
|
||||
/// - When `selection` is `None`, the marker is omitted and a simple bullet is
|
||||
/// rendered ("- Title — path:start-end").
|
||||
pub fn format_review_findings_block(
|
||||
findings: &[ReviewFinding],
|
||||
selection: Option<&[bool]>,
|
||||
) -> String {
|
||||
let mut lines: Vec<String> = Vec::new();
|
||||
|
||||
// Header
|
||||
let header = if findings.len() > 1 {
|
||||
"Full review comments:"
|
||||
} else {
|
||||
"Review comment:"
|
||||
};
|
||||
lines.push(header.to_string());
|
||||
|
||||
for (idx, item) in findings.iter().enumerate() {
|
||||
lines.push(String::new());
|
||||
|
||||
let title = &item.title;
|
||||
let location = format_location(item);
|
||||
|
||||
if let Some(flags) = selection {
|
||||
// Default to selected if index is out of bounds.
|
||||
let checked = flags.get(idx).copied().unwrap_or(true);
|
||||
let marker = if checked { "[x]" } else { "[ ]" };
|
||||
lines.push(format!("- {marker} {title} — {location}"));
|
||||
} else {
|
||||
lines.push(format!("- {title} — {location}"));
|
||||
}
|
||||
|
||||
for body_line in item.body.lines() {
|
||||
lines.push(format!(" {body_line}"));
|
||||
}
|
||||
}
|
||||
|
||||
lines.join("\n")
|
||||
}
|
||||
@@ -1,9 +1,14 @@
|
||||
use codex_core::CodexAuth;
|
||||
use codex_core::CodexConversation;
|
||||
use codex_core::ContentItem;
|
||||
use codex_core::ConversationManager;
|
||||
use codex_core::ModelProviderInfo;
|
||||
use codex_core::REVIEW_PROMPT;
|
||||
use codex_core::ResponseItem;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::protocol::ConversationPathResponseEvent;
|
||||
use codex_core::protocol::ENVIRONMENT_CONTEXT_OPEN_TAG;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ExitedReviewModeEvent;
|
||||
use codex_core::protocol::InputItem;
|
||||
@@ -13,6 +18,8 @@ use codex_core::protocol::ReviewFinding;
|
||||
use codex_core::protocol::ReviewLineRange;
|
||||
use codex_core::protocol::ReviewOutputEvent;
|
||||
use codex_core::protocol::ReviewRequest;
|
||||
use codex_core::protocol::RolloutItem;
|
||||
use codex_core::protocol::RolloutLine;
|
||||
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use core_test_support::load_sse_fixture_with_id_from_str;
|
||||
@@ -115,6 +122,46 @@ async fn review_op_emits_lifecycle_and_review_output() {
|
||||
assert_eq!(expected, review);
|
||||
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// Also verify that a user message with the header and a formatted finding
|
||||
// was recorded back in the parent session's rollout.
|
||||
codex.submit(Op::GetPath).await.unwrap();
|
||||
let history_event =
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::ConversationPath(_))).await;
|
||||
let path = match history_event {
|
||||
EventMsg::ConversationPath(ConversationPathResponseEvent { path, .. }) => path,
|
||||
other => panic!("expected ConversationPath event, got {other:?}"),
|
||||
};
|
||||
let text = std::fs::read_to_string(&path).expect("read rollout file");
|
||||
|
||||
let mut saw_header = false;
|
||||
let mut saw_finding_line = false;
|
||||
for line in text.lines() {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let v: serde_json::Value = serde_json::from_str(line).expect("jsonl line");
|
||||
let rl: RolloutLine = serde_json::from_value(v).expect("rollout line");
|
||||
if let RolloutItem::ResponseItem(ResponseItem::Message { role, content, .. }) = rl.item
|
||||
&& role == "user"
|
||||
{
|
||||
for c in content {
|
||||
if let ContentItem::InputText { text } = c {
|
||||
if text.contains("full review output from reviewer model") {
|
||||
saw_header = true;
|
||||
}
|
||||
if text.contains("- Prefer Stylize helpers — /tmp/file.rs:10-20") {
|
||||
saw_finding_line = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
assert!(saw_header, "user header missing from rollout");
|
||||
assert!(
|
||||
saw_finding_line,
|
||||
"formatted finding line missing from rollout"
|
||||
);
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
@@ -419,17 +466,73 @@ async fn review_input_isolated_from_parent_history() {
|
||||
.await;
|
||||
let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// Assert the request `input` contains only the single review user message.
|
||||
// Assert the request `input` contains the environment context followed by the review prompt.
|
||||
let request = &server.received_requests().await.unwrap()[0];
|
||||
let body = request.body_json::<serde_json::Value>().unwrap();
|
||||
let expected_input = serde_json::json!([
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [{"type": "input_text", "text": review_prompt}]
|
||||
let input = body["input"].as_array().expect("input array");
|
||||
assert_eq!(
|
||||
input.len(),
|
||||
2,
|
||||
"expected environment context and review prompt"
|
||||
);
|
||||
|
||||
let env_msg = &input[0];
|
||||
assert_eq!(env_msg["type"].as_str().unwrap(), "message");
|
||||
assert_eq!(env_msg["role"].as_str().unwrap(), "user");
|
||||
let env_text = env_msg["content"][0]["text"].as_str().expect("env text");
|
||||
assert!(
|
||||
env_text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG),
|
||||
"environment context must be the first item"
|
||||
);
|
||||
assert!(
|
||||
env_text.contains("<cwd>"),
|
||||
"environment context should include cwd"
|
||||
);
|
||||
|
||||
let review_msg = &input[1];
|
||||
assert_eq!(review_msg["type"].as_str().unwrap(), "message");
|
||||
assert_eq!(review_msg["role"].as_str().unwrap(), "user");
|
||||
assert_eq!(
|
||||
review_msg["content"][0]["text"].as_str().unwrap(),
|
||||
format!("{REVIEW_PROMPT}\n\n---\n\nNow, here's your task: Please review only this",)
|
||||
);
|
||||
|
||||
// Also verify that a user interruption note was recorded in the rollout.
|
||||
codex.submit(Op::GetPath).await.unwrap();
|
||||
let history_event =
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::ConversationPath(_))).await;
|
||||
let path = match history_event {
|
||||
EventMsg::ConversationPath(ConversationPathResponseEvent { path, .. }) => path,
|
||||
other => panic!("expected ConversationPath event, got {other:?}"),
|
||||
};
|
||||
let text = std::fs::read_to_string(&path).expect("read rollout file");
|
||||
let mut saw_interruption_message = false;
|
||||
for line in text.lines() {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
]);
|
||||
assert_eq!(body["input"], expected_input);
|
||||
let v: serde_json::Value = serde_json::from_str(line).expect("jsonl line");
|
||||
let rl: RolloutLine = serde_json::from_value(v).expect("rollout line");
|
||||
if let RolloutItem::ResponseItem(ResponseItem::Message { role, content, .. }) = rl.item
|
||||
&& role == "user"
|
||||
{
|
||||
for c in content {
|
||||
if let ContentItem::InputText { text } = c
|
||||
&& text.contains("User initiated a review task, but was interrupted.")
|
||||
{
|
||||
saw_interruption_message = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if saw_interruption_message {
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
saw_interruption_message,
|
||||
"expected user interruption message in rollout"
|
||||
);
|
||||
|
||||
server.verify().await;
|
||||
}
|
||||
|
||||
@@ -1245,7 +1245,14 @@ pub enum TurnAbortReason {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::models::LocalShellAction;
|
||||
use crate::models::LocalShellStatus;
|
||||
use crate::models::ReasoningItemContent;
|
||||
use crate::models::ReasoningItemReasoningSummary;
|
||||
use crate::models::WebSearchAction;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use std::path::PathBuf;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
/// Serialize Event to verify that its JSON representation has the expected
|
||||
@@ -1298,4 +1305,665 @@ mod tests {
|
||||
let deserialized: ExecCommandOutputDeltaEvent = serde_json::from_str(&serialized).unwrap();
|
||||
assert_eq!(deserialized, event);
|
||||
}
|
||||
|
||||
fn parse_rollout_line(value: Value, case: &str) -> RolloutLine {
|
||||
let serialized = serde_json::to_string(&value)
|
||||
.unwrap_or_else(|err| panic!("failed to serialize {case}: {err}"));
|
||||
serde_json::from_str(&serialized)
|
||||
.unwrap_or_else(|err| panic!("failed to parse {case}: {err}"))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_rollout_session_meta_lines() {
|
||||
let timestamp = "2025-01-02T03:04:05.678Z";
|
||||
let conversation_id = uuid::uuid!("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa");
|
||||
let cases: Vec<(&str, Value)> = vec![
|
||||
(
|
||||
"with_git",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "session_meta",
|
||||
"payload": {
|
||||
"id": conversation_id,
|
||||
"timestamp": timestamp,
|
||||
"cwd": "/workspace",
|
||||
"originator": "codex-cli",
|
||||
"cli_version": "1.0.0",
|
||||
"instructions": "Remember the tests",
|
||||
"git": {
|
||||
"commit_hash": "abc123",
|
||||
"branch": "main",
|
||||
"repository_url": "https://example.com/repo.git"
|
||||
}
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"without_git",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "session_meta",
|
||||
"payload": {
|
||||
"id": conversation_id,
|
||||
"timestamp": timestamp,
|
||||
"cwd": "/workspace",
|
||||
"originator": "codex-cli",
|
||||
"cli_version": "1.0.0",
|
||||
"instructions": null
|
||||
}
|
||||
}),
|
||||
),
|
||||
];
|
||||
|
||||
for (case, value) in cases {
|
||||
let parsed = parse_rollout_line(value, case);
|
||||
assert_eq!(parsed.timestamp, timestamp);
|
||||
match parsed.item {
|
||||
RolloutItem::SessionMeta(session_meta) => {
|
||||
assert_eq!(session_meta.meta.id, ConversationId(conversation_id));
|
||||
assert_eq!(session_meta.meta.cli_version, "1.0.0");
|
||||
assert_eq!(session_meta.meta.originator, "codex-cli");
|
||||
assert_eq!(session_meta.meta.cwd, PathBuf::from("/workspace"));
|
||||
assert_eq!(session_meta.meta.timestamp, timestamp);
|
||||
match case {
|
||||
"with_git" => {
|
||||
assert_eq!(
|
||||
session_meta.meta.instructions.as_deref(),
|
||||
Some("Remember the tests")
|
||||
);
|
||||
let git = session_meta.git.expect("expected git info");
|
||||
assert_eq!(git.commit_hash.as_deref(), Some("abc123"));
|
||||
assert_eq!(git.branch.as_deref(), Some("main"));
|
||||
assert_eq!(
|
||||
git.repository_url.as_deref(),
|
||||
Some("https://example.com/repo.git")
|
||||
);
|
||||
}
|
||||
"without_git" => {
|
||||
assert!(session_meta.meta.instructions.is_none());
|
||||
assert!(session_meta.git.is_none());
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
other => panic!("case {case} parsed as unexpected item {other:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(clippy::too_many_lines, clippy::cognitive_complexity)]
|
||||
fn deserialize_rollout_response_item_lines() {
|
||||
let timestamp = "2025-01-02T03:04:05.678Z";
|
||||
let cases: Vec<(&str, Value)> = vec![
|
||||
(
|
||||
"message",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "message",
|
||||
"id": "legacy-message",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{ "type": "output_text", "text": "Hello from assistant" }
|
||||
]
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"reasoning",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "reasoning",
|
||||
"id": "reasoning-1",
|
||||
"summary": [
|
||||
{ "type": "summary_text", "text": "Summarized thoughts" }
|
||||
],
|
||||
"content": [
|
||||
{ "type": "reasoning_text", "text": "Detailed reasoning" }
|
||||
],
|
||||
"encrypted_content": "encrypted"
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"local_shell_call",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "local_shell_call",
|
||||
"id": "legacy-shell-call",
|
||||
"call_id": "shell-call-1",
|
||||
"status": "completed",
|
||||
"action": {
|
||||
"type": "exec",
|
||||
"command": ["ls", "-la"],
|
||||
"timeout_ms": 1200,
|
||||
"working_directory": "/workspace",
|
||||
"env": { "PATH": "/usr/bin" },
|
||||
"user": "codex"
|
||||
}
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"function_call",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "function_call",
|
||||
"id": "legacy-function",
|
||||
"name": "shell",
|
||||
"arguments": "{\"command\":[\"echo\",\"hi\"]}",
|
||||
"call_id": "call-123"
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"function_call_output",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "function_call_output",
|
||||
"call_id": "call-123",
|
||||
"output": "{\"stdout\":\"done\"}"
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"custom_tool_call",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "custom_tool_call",
|
||||
"id": "legacy-tool",
|
||||
"status": "completed",
|
||||
"call_id": "tool-456",
|
||||
"name": "my_tool",
|
||||
"input": "{\"foo\":1}"
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"custom_tool_call_output",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "custom_tool_call_output",
|
||||
"call_id": "tool-456",
|
||||
"output": "tool finished"
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"web_search_call",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "web_search_call",
|
||||
"id": "legacy-search",
|
||||
"status": "completed",
|
||||
"action": {
|
||||
"type": "search",
|
||||
"query": "weather in SF"
|
||||
}
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"other",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "new_future_item",
|
||||
"foo": "bar"
|
||||
}
|
||||
}),
|
||||
),
|
||||
];
|
||||
|
||||
for (case, value) in cases {
|
||||
let parsed = parse_rollout_line(value, case);
|
||||
assert_eq!(parsed.timestamp, timestamp);
|
||||
match (case, parsed.item) {
|
||||
(
|
||||
"message",
|
||||
RolloutItem::ResponseItem(ResponseItem::Message { role, content, .. }),
|
||||
) => {
|
||||
assert_eq!(role, "assistant");
|
||||
assert_eq!(content.len(), 1);
|
||||
if let ContentItem::OutputText { text } = &content[0] {
|
||||
assert_eq!(text, "Hello from assistant");
|
||||
} else {
|
||||
panic!("unexpected content variant in message case");
|
||||
}
|
||||
}
|
||||
(
|
||||
"reasoning",
|
||||
RolloutItem::ResponseItem(ResponseItem::Reasoning {
|
||||
summary,
|
||||
content,
|
||||
encrypted_content,
|
||||
..
|
||||
}),
|
||||
) => {
|
||||
assert_eq!(summary.len(), 1);
|
||||
match &summary[0] {
|
||||
ReasoningItemReasoningSummary::SummaryText { text } => {
|
||||
assert_eq!(text, "Summarized thoughts");
|
||||
}
|
||||
other => panic!("unexpected summary variant: {other:?}"),
|
||||
}
|
||||
let reasoning_content = content.expect("expected reasoning content");
|
||||
assert_eq!(reasoning_content.len(), 1);
|
||||
if let ReasoningItemContent::ReasoningText { text } = &reasoning_content[0] {
|
||||
assert_eq!(text, "Detailed reasoning");
|
||||
} else {
|
||||
panic!("unexpected reasoning content variant");
|
||||
}
|
||||
assert_eq!(encrypted_content.as_deref(), Some("encrypted"));
|
||||
}
|
||||
(
|
||||
"local_shell_call",
|
||||
RolloutItem::ResponseItem(ResponseItem::LocalShellCall {
|
||||
call_id,
|
||||
status,
|
||||
action,
|
||||
..
|
||||
}),
|
||||
) => {
|
||||
assert_eq!(call_id.as_deref(), Some("shell-call-1"));
|
||||
assert!(matches!(status, LocalShellStatus::Completed));
|
||||
match action {
|
||||
LocalShellAction::Exec(exec) => {
|
||||
assert_eq!(exec.command, vec!["ls", "-la"]);
|
||||
assert_eq!(exec.timeout_ms, Some(1200));
|
||||
assert_eq!(exec.working_directory.as_deref(), Some("/workspace"));
|
||||
let env = exec.env.expect("expected env map");
|
||||
assert_eq!(env.get("PATH"), Some(&"/usr/bin".to_string()));
|
||||
assert_eq!(exec.user.as_deref(), Some("codex"));
|
||||
}
|
||||
}
|
||||
}
|
||||
(
|
||||
"function_call",
|
||||
RolloutItem::ResponseItem(ResponseItem::FunctionCall {
|
||||
name,
|
||||
arguments,
|
||||
call_id,
|
||||
..
|
||||
}),
|
||||
) => {
|
||||
assert_eq!(name, "shell");
|
||||
assert_eq!(arguments, "{\"command\":[\"echo\",\"hi\"]}");
|
||||
assert_eq!(call_id, "call-123");
|
||||
}
|
||||
(
|
||||
"function_call_output",
|
||||
RolloutItem::ResponseItem(ResponseItem::FunctionCallOutput { output, .. }),
|
||||
) => {
|
||||
assert_eq!(output.content, "{\"stdout\":\"done\"}");
|
||||
assert!(output.success.is_none());
|
||||
}
|
||||
(
|
||||
"custom_tool_call",
|
||||
RolloutItem::ResponseItem(ResponseItem::CustomToolCall {
|
||||
status,
|
||||
call_id,
|
||||
name,
|
||||
input,
|
||||
..
|
||||
}),
|
||||
) => {
|
||||
assert_eq!(status.as_deref(), Some("completed"));
|
||||
assert_eq!(call_id, "tool-456");
|
||||
assert_eq!(name, "my_tool");
|
||||
assert_eq!(input, "{\"foo\":1}");
|
||||
}
|
||||
(
|
||||
"custom_tool_call_output",
|
||||
RolloutItem::ResponseItem(ResponseItem::CustomToolCallOutput {
|
||||
output, ..
|
||||
}),
|
||||
) => {
|
||||
assert_eq!(output, "tool finished");
|
||||
}
|
||||
(
|
||||
"web_search_call",
|
||||
RolloutItem::ResponseItem(ResponseItem::WebSearchCall {
|
||||
status, action, ..
|
||||
}),
|
||||
) => {
|
||||
assert_eq!(status.as_deref(), Some("completed"));
|
||||
match action {
|
||||
WebSearchAction::Search { query } => {
|
||||
assert_eq!(query, "weather in SF");
|
||||
}
|
||||
WebSearchAction::Other => panic!("unexpected web search action variant"),
|
||||
}
|
||||
}
|
||||
("other", RolloutItem::ResponseItem(ResponseItem::Other)) => {}
|
||||
(case, item) => panic!("case {case} returned unexpected item {item:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(clippy::too_many_lines, clippy::cognitive_complexity)]
|
||||
fn deserialize_rollout_event_msg_lines() {
|
||||
let timestamp = "2025-01-02T03:04:05.678Z";
|
||||
let cases: Vec<(&str, Value)> = vec![
|
||||
(
|
||||
"user_message",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "user_message",
|
||||
"message": "Please help",
|
||||
"kind": "plain",
|
||||
"images": ["data:image/png;base64,AAA"]
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"agent_message",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "agent_message",
|
||||
"message": "Sure thing"
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"agent_reasoning",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "agent_reasoning",
|
||||
"text": "Thinking..."
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"agent_reasoning_raw_content",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "agent_reasoning_raw_content",
|
||||
"text": "raw reasoning"
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"token_count_info",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "token_count",
|
||||
"info": {
|
||||
"total_token_usage": {
|
||||
"input_tokens": 120,
|
||||
"cached_input_tokens": 10,
|
||||
"output_tokens": 30,
|
||||
"reasoning_output_tokens": 5,
|
||||
"total_tokens": 165
|
||||
},
|
||||
"last_token_usage": {
|
||||
"input_tokens": 20,
|
||||
"cached_input_tokens": 0,
|
||||
"output_tokens": 15,
|
||||
"reasoning_output_tokens": 5,
|
||||
"total_tokens": 40
|
||||
},
|
||||
"model_context_window": 16000
|
||||
}
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"token_count_none",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "token_count",
|
||||
"info": null
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"entered_review_mode",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "entered_review_mode",
|
||||
"prompt": "Need review",
|
||||
"user_facing_hint": "double-check work"
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"exited_review_mode",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "exited_review_mode",
|
||||
"review_output": {
|
||||
"findings": [
|
||||
{
|
||||
"title": "Bug",
|
||||
"body": "Found an issue",
|
||||
"confidence_score": 0.4,
|
||||
"priority": 1,
|
||||
"code_location": {
|
||||
"absolute_file_path": "/workspace/src/lib.rs",
|
||||
"line_range": { "start": 1, "end": 3 }
|
||||
}
|
||||
}
|
||||
],
|
||||
"overall_correctness": "needs_changes",
|
||||
"overall_explanation": "Please fix",
|
||||
"overall_confidence_score": 0.9
|
||||
}
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"turn_aborted",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "event_msg",
|
||||
"payload": {
|
||||
"type": "turn_aborted",
|
||||
"reason": "interrupted"
|
||||
}
|
||||
}),
|
||||
),
|
||||
];
|
||||
|
||||
for (case, value) in cases {
|
||||
let parsed = parse_rollout_line(value, case);
|
||||
assert_eq!(parsed.timestamp, timestamp);
|
||||
match (case, parsed.item) {
|
||||
("user_message", RolloutItem::EventMsg(EventMsg::UserMessage(event))) => {
|
||||
assert_eq!(event.message, "Please help");
|
||||
assert!(matches!(event.kind, Some(InputMessageKind::Plain)));
|
||||
let images = event.images.expect("expected images");
|
||||
assert_eq!(images, vec!["data:image/png;base64,AAA".to_string()]);
|
||||
}
|
||||
("agent_message", RolloutItem::EventMsg(EventMsg::AgentMessage(event))) => {
|
||||
assert_eq!(event.message, "Sure thing");
|
||||
}
|
||||
("agent_reasoning", RolloutItem::EventMsg(EventMsg::AgentReasoning(event))) => {
|
||||
assert_eq!(event.text, "Thinking...");
|
||||
}
|
||||
(
|
||||
"agent_reasoning_raw_content",
|
||||
RolloutItem::EventMsg(EventMsg::AgentReasoningRawContent(event)),
|
||||
) => {
|
||||
assert_eq!(event.text, "raw reasoning");
|
||||
}
|
||||
("token_count_info", RolloutItem::EventMsg(EventMsg::TokenCount(event))) => {
|
||||
let info = event.info.expect("expected token info");
|
||||
assert_eq!(info.total_token_usage.input_tokens, 120);
|
||||
assert_eq!(info.total_token_usage.cached_input_tokens, 10);
|
||||
assert_eq!(info.total_token_usage.output_tokens, 30);
|
||||
assert_eq!(info.total_token_usage.reasoning_output_tokens, 5);
|
||||
assert_eq!(info.total_token_usage.total_tokens, 165);
|
||||
assert_eq!(info.last_token_usage.output_tokens, 15);
|
||||
assert_eq!(info.model_context_window, Some(16000));
|
||||
}
|
||||
("token_count_none", RolloutItem::EventMsg(EventMsg::TokenCount(event))) => {
|
||||
assert!(event.info.is_none());
|
||||
}
|
||||
(
|
||||
"entered_review_mode",
|
||||
RolloutItem::EventMsg(EventMsg::EnteredReviewMode(request)),
|
||||
) => {
|
||||
assert_eq!(request.prompt, "Need review");
|
||||
assert_eq!(request.user_facing_hint, "double-check work");
|
||||
}
|
||||
(
|
||||
"exited_review_mode",
|
||||
RolloutItem::EventMsg(EventMsg::ExitedReviewMode(event)),
|
||||
) => {
|
||||
let output = event.review_output.expect("expected review output");
|
||||
assert_eq!(output.findings.len(), 1);
|
||||
let finding = &output.findings[0];
|
||||
assert_eq!(finding.title, "Bug");
|
||||
assert_eq!(finding.body, "Found an issue");
|
||||
assert_eq!(finding.confidence_score, 0.4);
|
||||
assert_eq!(finding.priority, 1);
|
||||
assert_eq!(
|
||||
finding.code_location.absolute_file_path,
|
||||
PathBuf::from("/workspace/src/lib.rs")
|
||||
);
|
||||
assert_eq!(finding.code_location.line_range.start, 1);
|
||||
assert_eq!(finding.code_location.line_range.end, 3);
|
||||
assert_eq!(output.overall_correctness, "needs_changes");
|
||||
assert_eq!(output.overall_explanation, "Please fix");
|
||||
assert_eq!(output.overall_confidence_score, 0.9);
|
||||
}
|
||||
("turn_aborted", RolloutItem::EventMsg(EventMsg::TurnAborted(event))) => {
|
||||
assert_eq!(event.reason, TurnAbortReason::Interrupted);
|
||||
}
|
||||
(case, item) => panic!("case {case} returned unexpected item {item:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(clippy::too_many_lines)]
|
||||
fn deserialize_rollout_misc_lines() {
|
||||
let timestamp = "2025-01-02T03:04:05.678Z";
|
||||
let cases: Vec<(&str, Value)> = vec![
|
||||
(
|
||||
"compacted",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "compacted",
|
||||
"payload": {
|
||||
"message": "Turn summary"
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"turn_context_workspace",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "turn_context",
|
||||
"payload": {
|
||||
"cwd": "/workspace",
|
||||
"approval_policy": "on-request",
|
||||
"sandbox_policy": {
|
||||
"mode": "workspace-write",
|
||||
"writable_roots": ["/workspace/tmp"],
|
||||
"network_access": true,
|
||||
"exclude_tmpdir_env_var": false,
|
||||
"exclude_slash_tmp": true
|
||||
},
|
||||
"model": "gpt-5",
|
||||
"effort": "high",
|
||||
"summary": "detailed"
|
||||
}
|
||||
}),
|
||||
),
|
||||
(
|
||||
"turn_context_read_only",
|
||||
json!({
|
||||
"timestamp": timestamp,
|
||||
"type": "turn_context",
|
||||
"payload": {
|
||||
"cwd": "/workspace",
|
||||
"approval_policy": "never",
|
||||
"sandbox_policy": {
|
||||
"mode": "read-only"
|
||||
},
|
||||
"model": "gpt-5",
|
||||
"summary": "auto"
|
||||
}
|
||||
}),
|
||||
),
|
||||
];
|
||||
|
||||
for (case, value) in cases {
|
||||
let parsed = parse_rollout_line(value, case);
|
||||
assert_eq!(parsed.timestamp, timestamp);
|
||||
match (case, parsed.item) {
|
||||
("compacted", RolloutItem::Compacted(CompactedItem { message })) => {
|
||||
assert_eq!(message, "Turn summary");
|
||||
}
|
||||
("turn_context_workspace", RolloutItem::TurnContext(turn_context)) => {
|
||||
assert_eq!(turn_context.cwd, PathBuf::from("/workspace"));
|
||||
assert_eq!(turn_context.approval_policy, AskForApproval::OnRequest);
|
||||
match turn_context.sandbox_policy {
|
||||
SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots,
|
||||
network_access,
|
||||
exclude_tmpdir_env_var,
|
||||
exclude_slash_tmp,
|
||||
} => {
|
||||
assert_eq!(writable_roots, vec![PathBuf::from("/workspace/tmp")]);
|
||||
assert!(network_access);
|
||||
assert!(!exclude_tmpdir_env_var);
|
||||
assert!(exclude_slash_tmp);
|
||||
}
|
||||
other => panic!("expected workspace-write sandbox policy, got {other:?}"),
|
||||
}
|
||||
assert_eq!(turn_context.model, "gpt-5");
|
||||
assert_eq!(turn_context.effort, Some(ReasoningEffortConfig::High));
|
||||
assert_eq!(turn_context.summary, ReasoningSummaryConfig::Detailed);
|
||||
}
|
||||
("turn_context_read_only", RolloutItem::TurnContext(turn_context)) => {
|
||||
assert_eq!(turn_context.cwd, PathBuf::from("/workspace"));
|
||||
assert_eq!(turn_context.approval_policy, AskForApproval::Never);
|
||||
assert!(turn_context.effort.is_none());
|
||||
assert_eq!(turn_context.summary, ReasoningSummaryConfig::Auto);
|
||||
match turn_context.sandbox_policy {
|
||||
SandboxPolicy::ReadOnly => {}
|
||||
other => panic!("expected read-only sandbox policy, got {other:?}"),
|
||||
}
|
||||
}
|
||||
(case, item) => panic!("case {case} returned unexpected item {item:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -226,12 +226,11 @@ impl ChatWidget {
|
||||
// At the end of a reasoning block, record transcript-only content.
|
||||
self.full_reasoning_buffer.push_str(&self.reasoning_buffer);
|
||||
if !self.full_reasoning_buffer.is_empty() {
|
||||
for cell in history_cell::new_reasoning_summary_block(
|
||||
let cell = history_cell::new_reasoning_summary_block(
|
||||
self.full_reasoning_buffer.clone(),
|
||||
&self.config,
|
||||
) {
|
||||
self.add_boxed_history(cell);
|
||||
}
|
||||
);
|
||||
self.add_boxed_history(cell);
|
||||
}
|
||||
self.reasoning_buffer.clear();
|
||||
self.full_reasoning_buffer.clear();
|
||||
|
||||
@@ -121,6 +121,45 @@ impl HistoryCell for UserHistoryCell {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ReasoningSummaryCell {
|
||||
_header: Vec<Line<'static>>,
|
||||
content: Vec<Line<'static>>,
|
||||
}
|
||||
|
||||
impl ReasoningSummaryCell {
|
||||
pub(crate) fn new(header: Vec<Line<'static>>, content: Vec<Line<'static>>) -> Self {
|
||||
Self {
|
||||
_header: header,
|
||||
content,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl HistoryCell for ReasoningSummaryCell {
|
||||
fn display_lines(&self, width: u16) -> Vec<Line<'static>> {
|
||||
let summary_lines = self
|
||||
.content
|
||||
.iter()
|
||||
.map(|l| l.clone().dim().italic())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
word_wrap_lines(
|
||||
&summary_lines,
|
||||
RtOptions::new(width as usize)
|
||||
.initial_indent("• ".into())
|
||||
.subsequent_indent(" ".into()),
|
||||
)
|
||||
}
|
||||
|
||||
fn transcript_lines(&self) -> Vec<Line<'static>> {
|
||||
let mut out: Vec<Line<'static>> = Vec::new();
|
||||
out.push("thinking".magenta().bold().into());
|
||||
out.extend(self.content.clone());
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct AgentMessageCell {
|
||||
lines: Vec<Line<'static>>,
|
||||
@@ -1417,7 +1456,7 @@ pub(crate) fn new_reasoning_block(
|
||||
pub(crate) fn new_reasoning_summary_block(
|
||||
full_reasoning_buffer: String,
|
||||
config: &Config,
|
||||
) -> Vec<Box<dyn HistoryCell>> {
|
||||
) -> Box<dyn HistoryCell> {
|
||||
if config.model_family.reasoning_summary_format == ReasoningSummaryFormat::Experimental {
|
||||
// Experimental format is following:
|
||||
// ** header **
|
||||
@@ -1434,27 +1473,19 @@ pub(crate) fn new_reasoning_summary_block(
|
||||
// then we don't have a summary to inject into history
|
||||
if after_close_idx < full_reasoning_buffer.len() {
|
||||
let header_buffer = full_reasoning_buffer[..after_close_idx].to_string();
|
||||
let summary_buffer = full_reasoning_buffer[after_close_idx..].to_string();
|
||||
|
||||
let mut header_lines: Vec<Line<'static>> = Vec::new();
|
||||
header_lines.push(Line::from("Thinking".magenta().italic()));
|
||||
let mut header_lines = Vec::new();
|
||||
append_markdown(&header_buffer, &mut header_lines, config);
|
||||
|
||||
let mut summary_lines: Vec<Line<'static>> = Vec::new();
|
||||
summary_lines.push(Line::from("Thinking".magenta().bold()));
|
||||
let summary_buffer = full_reasoning_buffer[after_close_idx..].to_string();
|
||||
let mut summary_lines = Vec::new();
|
||||
append_markdown(&summary_buffer, &mut summary_lines, config);
|
||||
|
||||
return vec![
|
||||
Box::new(TranscriptOnlyHistoryCell {
|
||||
lines: header_lines,
|
||||
}),
|
||||
Box::new(AgentMessageCell::new(summary_lines, true)),
|
||||
];
|
||||
return Box::new(ReasoningSummaryCell::new(header_lines, summary_lines));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
vec![Box::new(new_reasoning_block(full_reasoning_buffer, config))]
|
||||
Box::new(new_reasoning_block(full_reasoning_buffer, config))
|
||||
}
|
||||
|
||||
struct OutputLinesParams {
|
||||
@@ -1558,6 +1589,7 @@ mod tests {
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_core::config::ConfigToml;
|
||||
use dirs::home_dir;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn test_config() -> Config {
|
||||
Config::load_from_base_config_with_overrides(
|
||||
@@ -2076,17 +2108,35 @@ mod tests {
|
||||
let rendered = render_lines(&lines).join("\n");
|
||||
insta::assert_snapshot!(rendered);
|
||||
}
|
||||
#[test]
|
||||
fn reasoning_summary_block() {
|
||||
let mut config = test_config();
|
||||
config.model_family.reasoning_summary_format = ReasoningSummaryFormat::Experimental;
|
||||
|
||||
let cell = new_reasoning_summary_block(
|
||||
"**High level reasoning**\n\nDetailed reasoning goes here.".to_string(),
|
||||
&config,
|
||||
);
|
||||
|
||||
let rendered_display = render_lines(&cell.display_lines(80));
|
||||
assert_eq!(rendered_display, vec!["• Detailed reasoning goes here."]);
|
||||
|
||||
let rendered_transcript = render_transcript(cell.as_ref());
|
||||
assert_eq!(
|
||||
rendered_transcript,
|
||||
vec!["thinking", "Detailed reasoning goes here."]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reasoning_summary_block_returns_reasoning_cell_when_feature_disabled() {
|
||||
let mut config = test_config();
|
||||
config.model_family.reasoning_summary_format = ReasoningSummaryFormat::Experimental;
|
||||
|
||||
let cells =
|
||||
let cell =
|
||||
new_reasoning_summary_block("Detailed reasoning goes here.".to_string(), &config);
|
||||
|
||||
assert_eq!(cells.len(), 1);
|
||||
let rendered = render_transcript(cells[0].as_ref());
|
||||
let rendered = render_transcript(cell.as_ref());
|
||||
assert_eq!(rendered, vec!["thinking", "Detailed reasoning goes here."]);
|
||||
}
|
||||
|
||||
@@ -2095,13 +2145,12 @@ mod tests {
|
||||
let mut config = test_config();
|
||||
config.model_family.reasoning_summary_format = ReasoningSummaryFormat::Experimental;
|
||||
|
||||
let cells = new_reasoning_summary_block(
|
||||
let cell = new_reasoning_summary_block(
|
||||
"**High level reasoning without closing".to_string(),
|
||||
&config,
|
||||
);
|
||||
|
||||
assert_eq!(cells.len(), 1);
|
||||
let rendered = render_transcript(cells[0].as_ref());
|
||||
let rendered = render_transcript(cell.as_ref());
|
||||
assert_eq!(
|
||||
rendered,
|
||||
vec!["thinking", "**High level reasoning without closing"]
|
||||
@@ -2113,25 +2162,23 @@ mod tests {
|
||||
let mut config = test_config();
|
||||
config.model_family.reasoning_summary_format = ReasoningSummaryFormat::Experimental;
|
||||
|
||||
let cells = new_reasoning_summary_block(
|
||||
let cell = new_reasoning_summary_block(
|
||||
"**High level reasoning without closing**".to_string(),
|
||||
&config,
|
||||
);
|
||||
|
||||
assert_eq!(cells.len(), 1);
|
||||
let rendered = render_transcript(cells[0].as_ref());
|
||||
let rendered = render_transcript(cell.as_ref());
|
||||
assert_eq!(
|
||||
rendered,
|
||||
vec!["thinking", "High level reasoning without closing"]
|
||||
);
|
||||
|
||||
let cells = new_reasoning_summary_block(
|
||||
let cell = new_reasoning_summary_block(
|
||||
"**High level reasoning without closing**\n\n ".to_string(),
|
||||
&config,
|
||||
);
|
||||
|
||||
assert_eq!(cells.len(), 1);
|
||||
let rendered = render_transcript(cells[0].as_ref());
|
||||
let rendered = render_transcript(cell.as_ref());
|
||||
assert_eq!(
|
||||
rendered,
|
||||
vec!["thinking", "High level reasoning without closing"]
|
||||
@@ -2143,21 +2190,18 @@ mod tests {
|
||||
let mut config = test_config();
|
||||
config.model_family.reasoning_summary_format = ReasoningSummaryFormat::Experimental;
|
||||
|
||||
let cells = new_reasoning_summary_block(
|
||||
let cell = new_reasoning_summary_block(
|
||||
"**High level plan**\n\nWe should fix the bug next.".to_string(),
|
||||
&config,
|
||||
);
|
||||
|
||||
assert_eq!(cells.len(), 2);
|
||||
|
||||
let header_lines = render_transcript(cells[0].as_ref());
|
||||
assert_eq!(header_lines, vec!["Thinking", "High level plan"]);
|
||||
|
||||
let summary_lines = render_transcript(cells[1].as_ref());
|
||||
let rendered_display = render_lines(&cell.display_lines(80));
|
||||
assert_eq!(rendered_display, vec!["• We should fix the bug next."]);
|
||||
|
||||
let rendered_transcript = render_transcript(cell.as_ref());
|
||||
assert_eq!(
|
||||
summary_lines,
|
||||
vec!["codex", "Thinking", "We should fix the bug next."]
|
||||
)
|
||||
rendered_transcript,
|
||||
vec!["thinking", "We should fix the bug next."]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -333,11 +333,11 @@ mod tests {
|
||||
);
|
||||
for (i, l) in non_blank.iter().enumerate() {
|
||||
assert_eq!(
|
||||
l.style.fg,
|
||||
l.spans[0].style.fg,
|
||||
Some(Color::Green),
|
||||
"wrapped line {} should preserve green style, got {:?}",
|
||||
i,
|
||||
l.style.fg
|
||||
l.spans[0].style.fg
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -187,7 +187,6 @@ where
|
||||
|
||||
// Build first wrapped line with initial indent.
|
||||
let mut first_line = rt_opts.initial_indent.clone();
|
||||
first_line.style = first_line.style.patch(line.style);
|
||||
{
|
||||
let sliced = slice_line_spans(line, &span_bounds, first_line_range);
|
||||
let mut spans = first_line.spans;
|
||||
@@ -216,7 +215,6 @@ where
|
||||
continue;
|
||||
}
|
||||
let mut subsequent_line = rt_opts.subsequent_indent.clone();
|
||||
subsequent_line.style = subsequent_line.style.patch(line.style);
|
||||
let offset_range = (r.start + base)..(r.end + base);
|
||||
let sliced = slice_line_spans(line, &span_bounds, &offset_range);
|
||||
let mut spans = subsequent_line.spans;
|
||||
|
||||
@@ -30,14 +30,7 @@ When the workflow finishes, the GitHub Release is "done," but you still have to
|
||||
|
||||
## Publishing to npm
|
||||
|
||||
After the GitHub Release is done, you can publish to npm. Note the GitHub Release includes the appropriate artifact for npm (which is the output of `npm pack`), which should be named `codex-npm-VERSION.tgz`. To publish to npm, run:
|
||||
|
||||
```
|
||||
VERSION=0.21.0
|
||||
./scripts/publish_to_npm.py "$VERSION"
|
||||
```
|
||||
|
||||
Note that you must have permissions to publish to https://www.npmjs.com/package/@openai/codex for this to succeed.
|
||||
The GitHub Action is responsible for publishing to npm.
|
||||
|
||||
## Publishing to Homebrew
|
||||
|
||||
|
||||
@@ -1,118 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Download a release artifact for the npm package and publish it.
|
||||
|
||||
Given a release version like `0.20.0`, this script:
|
||||
- Downloads the `codex-npm-<version>.tgz` asset from the GitHub release
|
||||
tagged `rust-v<version>` in the `openai/codex` repository using `gh`.
|
||||
- Runs `npm publish` on the downloaded tarball to publish `@openai/codex`.
|
||||
|
||||
Flags:
|
||||
- `--dry-run` delegates to `npm publish --dry-run`. The artifact is still
|
||||
downloaded so npm can inspect the archive contents without publishing.
|
||||
|
||||
Requirements:
|
||||
- GitHub CLI (`gh`) must be installed and authenticated to access the repo.
|
||||
- npm must be logged in with an account authorized to publish
|
||||
`@openai/codex`. This may trigger a browser for 2FA.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def run_checked(cmd: list[str], cwd: Path | None = None) -> None:
|
||||
"""Run a subprocess command and raise if it fails."""
|
||||
proc = subprocess.run(cmd, cwd=str(cwd) if cwd else None)
|
||||
proc.check_returncode()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(
|
||||
description=(
|
||||
"Download the npm release artifact for a given version and publish it."
|
||||
)
|
||||
)
|
||||
parser.add_argument(
|
||||
"version",
|
||||
help="Release version to publish, e.g. 0.20.0 (without the 'v' prefix)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dir",
|
||||
type=Path,
|
||||
help=(
|
||||
"Optional directory to download the artifact into. Defaults to a temporary directory."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"-n",
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Delegate to `npm publish --dry-run` (still downloads the artifact).",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
version: str = args.version.lstrip("v")
|
||||
tag = f"rust-v{version}"
|
||||
asset_name = f"codex-npm-{version}.tgz"
|
||||
|
||||
download_dir_context_manager = (
|
||||
tempfile.TemporaryDirectory() if args.dir is None else None
|
||||
)
|
||||
# Use provided dir if set, else the temporary one created above
|
||||
download_dir: Path = args.dir if args.dir else Path(download_dir_context_manager.name) # type: ignore[arg-type]
|
||||
download_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 1) Download the artifact using gh
|
||||
repo = "openai/codex"
|
||||
gh_cmd = [
|
||||
"gh",
|
||||
"release",
|
||||
"download",
|
||||
tag,
|
||||
"--repo",
|
||||
repo,
|
||||
"--pattern",
|
||||
asset_name,
|
||||
"--dir",
|
||||
str(download_dir),
|
||||
]
|
||||
print(f"Downloading {asset_name} from {repo}@{tag} into {download_dir}...")
|
||||
# Even in --dry-run we download so npm can inspect the tarball.
|
||||
run_checked(gh_cmd)
|
||||
|
||||
artifact_path = download_dir / asset_name
|
||||
if not args.dry_run and not artifact_path.is_file():
|
||||
print(
|
||||
f"Error: expected artifact not found after download: {artifact_path}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
# 2) Publish to npm
|
||||
npm_cmd = ["npm", "publish"]
|
||||
if args.dry_run:
|
||||
npm_cmd.append("--dry-run")
|
||||
npm_cmd.append(str(artifact_path))
|
||||
|
||||
# Ensure CI is unset so npm can open a browser for 2FA if needed.
|
||||
env = os.environ.copy()
|
||||
if env.get("CI"):
|
||||
env.pop("CI")
|
||||
|
||||
print("Running:", " ".join(npm_cmd))
|
||||
proc = subprocess.run(npm_cmd, env=env)
|
||||
proc.check_returncode()
|
||||
|
||||
print("Publish complete.")
|
||||
# Keep the temporary directory alive until here; it is cleaned up on exit
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user