mirror of
https://github.com/openai/codex.git
synced 2026-03-02 20:53:19 +00:00
Compare commits
6 Commits
fix/notify
...
nm/eval-ca
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ed7110c437 | ||
|
|
e639300f79 | ||
|
|
8f25853583 | ||
|
|
8e551617d4 | ||
|
|
049c395736 | ||
|
|
602e5fca05 |
15
codex-rs/Cargo.lock
generated
15
codex-rs/Cargo.lock
generated
@@ -1000,6 +1000,7 @@ dependencies = [
|
||||
"codex-backend-client",
|
||||
"codex-common",
|
||||
"codex-core",
|
||||
"codex-eval-case",
|
||||
"codex-feedback",
|
||||
"codex-file-search",
|
||||
"codex-login",
|
||||
@@ -1350,6 +1351,19 @@ dependencies = [
|
||||
"wiremock",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-eval-case"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"pretty_assertions",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
"time",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "codex-exec"
|
||||
version = "0.0.0"
|
||||
@@ -1724,6 +1738,7 @@ dependencies = [
|
||||
"codex-backend-client",
|
||||
"codex-common",
|
||||
"codex-core",
|
||||
"codex-eval-case",
|
||||
"codex-feedback",
|
||||
"codex-file-search",
|
||||
"codex-login",
|
||||
|
||||
@@ -19,6 +19,7 @@ members = [
|
||||
"exec-server",
|
||||
"execpolicy",
|
||||
"execpolicy-legacy",
|
||||
"eval-case",
|
||||
"keyring-store",
|
||||
"file-search",
|
||||
"linux-sandbox",
|
||||
@@ -75,6 +76,7 @@ codex-common = { path = "common" }
|
||||
codex-core = { path = "core" }
|
||||
codex-exec = { path = "exec" }
|
||||
codex-execpolicy = { path = "execpolicy" }
|
||||
codex-eval-case = { path = "eval-case" }
|
||||
codex-feedback = { path = "feedback" }
|
||||
codex-file-search = { path = "file-search" }
|
||||
codex-git = { path = "utils/git" }
|
||||
|
||||
@@ -174,6 +174,11 @@ client_request_definitions! {
|
||||
response: v2::FeedbackUploadResponse,
|
||||
},
|
||||
|
||||
EvalCaseCreate => "evalCase/create" {
|
||||
params: v2::EvalCaseCreateParams,
|
||||
response: v2::EvalCaseCreateResponse,
|
||||
},
|
||||
|
||||
/// Execute a command (argv vector) under the server's sandbox.
|
||||
OneOffCommandExec => "command/exec" {
|
||||
params: v2::CommandExecParams,
|
||||
|
||||
@@ -930,6 +930,50 @@ pub struct FeedbackUploadResponse {
|
||||
pub thread_id: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub enum EvalCaseStartMarkerKind {
|
||||
RolloutLineTimestamp,
|
||||
RolloutLineIndex,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(untagged)]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub enum EvalCaseStartMarkerValue {
|
||||
Timestamp(String),
|
||||
LineIndex(u64),
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct EvalCaseStartMarker {
|
||||
pub kind: EvalCaseStartMarkerKind,
|
||||
pub value: EvalCaseStartMarkerValue,
|
||||
pub display: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct EvalCaseCreateParams {
|
||||
pub thread_id: String,
|
||||
pub start: EvalCaseStartMarker,
|
||||
pub what_went_wrong: String,
|
||||
pub what_good_looks_like: String,
|
||||
pub include_logs: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct EvalCaseCreateResponse {
|
||||
pub case_id: String,
|
||||
pub path: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
|
||||
@@ -21,6 +21,7 @@ codex-arg0 = { workspace = true }
|
||||
codex-common = { workspace = true, features = ["cli"] }
|
||||
codex-core = { workspace = true }
|
||||
codex-backend-client = { workspace = true }
|
||||
codex-eval-case = { workspace = true }
|
||||
codex-file-search = { workspace = true }
|
||||
codex-login = { workspace = true }
|
||||
codex-protocol = { workspace = true }
|
||||
|
||||
@@ -25,6 +25,8 @@ use codex_app_server_protocol::ClientRequest;
|
||||
use codex_app_server_protocol::CommandExecParams;
|
||||
use codex_app_server_protocol::ConversationGitInfo;
|
||||
use codex_app_server_protocol::ConversationSummary;
|
||||
use codex_app_server_protocol::EvalCaseCreateParams;
|
||||
use codex_app_server_protocol::EvalCaseCreateResponse;
|
||||
use codex_app_server_protocol::ExecOneOffCommandResponse;
|
||||
use codex_app_server_protocol::FeedbackUploadParams;
|
||||
use codex_app_server_protocol::FeedbackUploadResponse;
|
||||
@@ -512,6 +514,9 @@ impl CodexMessageProcessor {
|
||||
ClientRequest::FeedbackUpload { request_id, params } => {
|
||||
self.upload_feedback(request_id, params).await;
|
||||
}
|
||||
ClientRequest::EvalCaseCreate { request_id, params } => {
|
||||
self.eval_case_create(request_id, params).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3301,6 +3306,170 @@ impl CodexMessageProcessor {
|
||||
}
|
||||
}
|
||||
|
||||
async fn eval_case_create(&self, request_id: RequestId, params: EvalCaseCreateParams) {
|
||||
let EvalCaseCreateParams {
|
||||
thread_id,
|
||||
start,
|
||||
what_went_wrong,
|
||||
what_good_looks_like,
|
||||
include_logs,
|
||||
} = params;
|
||||
|
||||
if !include_logs {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INVALID_REQUEST_ERROR_CODE,
|
||||
message: "eval case bundles always include codex-logs.log; set include_logs=true"
|
||||
.to_string(),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
return;
|
||||
}
|
||||
|
||||
let conversation_id = match ConversationId::from_string(&thread_id) {
|
||||
Ok(id) => id,
|
||||
Err(err) => {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INVALID_REQUEST_ERROR_CODE,
|
||||
message: format!("invalid thread id: {err}"),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let Some(rollout_path) = self.resolve_rollout_path(conversation_id).await else {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INVALID_REQUEST_ERROR_CODE,
|
||||
message: "could not resolve rollout path for thread".to_string(),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
return;
|
||||
};
|
||||
|
||||
let rollout_text = match std::fs::read_to_string(&rollout_path) {
|
||||
Ok(text) => text,
|
||||
Err(err) => {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INTERNAL_ERROR_CODE,
|
||||
message: format!(
|
||||
"failed to read rollout file {}: {err}",
|
||||
rollout_path.display()
|
||||
),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let start_marker = match (&start.kind, &start.value) {
|
||||
(
|
||||
codex_app_server_protocol::EvalCaseStartMarkerKind::RolloutLineTimestamp,
|
||||
codex_app_server_protocol::EvalCaseStartMarkerValue::Timestamp(value),
|
||||
) => codex_eval_case::StartMarker {
|
||||
kind: codex_eval_case::StartMarkerKind::RolloutLineTimestamp,
|
||||
value: codex_eval_case::StartMarkerValue::Timestamp(value.clone()),
|
||||
display: start.display.clone(),
|
||||
},
|
||||
(
|
||||
codex_app_server_protocol::EvalCaseStartMarkerKind::RolloutLineIndex,
|
||||
codex_app_server_protocol::EvalCaseStartMarkerValue::LineIndex(value),
|
||||
) => codex_eval_case::StartMarker {
|
||||
kind: codex_eval_case::StartMarkerKind::RolloutLineIndex,
|
||||
value: codex_eval_case::StartMarkerValue::LineIndex(*value),
|
||||
display: start.display.clone(),
|
||||
},
|
||||
_ => {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INVALID_REQUEST_ERROR_CODE,
|
||||
message: "start marker kind/value mismatch".to_string(),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let last_line_index = rollout_text.lines().count().saturating_sub(1);
|
||||
let requested_start_is_now = start.display == "Start now"
|
||||
|| matches!((&start.kind, &start.value),
|
||||
(codex_app_server_protocol::EvalCaseStartMarkerKind::RolloutLineIndex,
|
||||
codex_app_server_protocol::EvalCaseStartMarkerValue::LineIndex(index))
|
||||
if usize::try_from(*index).ok() == Some(last_line_index)
|
||||
);
|
||||
|
||||
let repo_snapshot = repo_snapshot_from_rollout(&rollout_text, &start_marker);
|
||||
if repo_snapshot.is_none() && !requested_start_is_now {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INVALID_REQUEST_ERROR_CODE,
|
||||
message: "no repo snapshot available for requested start marker".to_string(),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
return;
|
||||
}
|
||||
|
||||
let logs_bytes = include_logs.then(|| {
|
||||
self.feedback
|
||||
.snapshot(Some(conversation_id))
|
||||
.as_bytes()
|
||||
.to_vec()
|
||||
});
|
||||
|
||||
let args = codex_eval_case::CreateEvalCaseArgs {
|
||||
codex_home: self.config.codex_home.clone(),
|
||||
conversation_id: thread_id.clone(),
|
||||
rollout_path,
|
||||
start: start_marker,
|
||||
repo_cwd: self.config.cwd.clone(),
|
||||
repo_snapshot,
|
||||
notes: codex_eval_case::Notes {
|
||||
what_went_wrong,
|
||||
what_good_looks_like,
|
||||
},
|
||||
include_logs,
|
||||
logs_bytes,
|
||||
};
|
||||
|
||||
let result =
|
||||
tokio::task::spawn_blocking(move || codex_eval_case::create_eval_case_bundle(&args))
|
||||
.await;
|
||||
|
||||
let result = match result {
|
||||
Ok(outcome) => outcome,
|
||||
Err(join_err) => {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INTERNAL_ERROR_CODE,
|
||||
message: format!("failed to create eval case bundle: {join_err}"),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(outcome) => {
|
||||
let response = EvalCaseCreateResponse {
|
||||
case_id: outcome.case_id,
|
||||
path: outcome.path.display().to_string(),
|
||||
};
|
||||
self.outgoing.send_response(request_id, response).await;
|
||||
}
|
||||
Err(err) => {
|
||||
let error = JSONRPCErrorError {
|
||||
code: INTERNAL_ERROR_CODE,
|
||||
message: format!("failed to create eval case bundle: {err}"),
|
||||
data: None,
|
||||
};
|
||||
self.outgoing.send_error(request_id, error).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn resolve_rollout_path(&self, conversation_id: ConversationId) -> Option<PathBuf> {
|
||||
match self
|
||||
.conversation_manager
|
||||
@@ -3313,6 +3482,56 @@ impl CodexMessageProcessor {
|
||||
}
|
||||
}
|
||||
|
||||
fn repo_snapshot_from_rollout(
|
||||
rollout_text: &str,
|
||||
start_marker: &codex_eval_case::StartMarker,
|
||||
) -> Option<codex_eval_case::RepoSnapshot> {
|
||||
match &start_marker.value {
|
||||
codex_eval_case::StartMarkerValue::LineIndex(index) => {
|
||||
let index = usize::try_from(*index).ok()?;
|
||||
repo_snapshot_from_rollout_line_index(rollout_text, index)
|
||||
}
|
||||
codex_eval_case::StartMarkerValue::Timestamp(timestamp) => {
|
||||
repo_snapshot_from_rollout_timestamp(rollout_text, timestamp)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn repo_snapshot_from_rollout_timestamp(
|
||||
rollout_text: &str,
|
||||
timestamp: &str,
|
||||
) -> Option<codex_eval_case::RepoSnapshot> {
|
||||
let start_index = rollout_text.lines().enumerate().find_map(|(idx, line)| {
|
||||
let rollout_line =
|
||||
serde_json::from_str::<codex_protocol::protocol::RolloutLine>(line).ok()?;
|
||||
(rollout_line.timestamp == timestamp).then_some(idx)
|
||||
})?;
|
||||
repo_snapshot_from_rollout_line_index(rollout_text, start_index)
|
||||
}
|
||||
|
||||
fn repo_snapshot_from_rollout_line_index(
|
||||
rollout_text: &str,
|
||||
start_index: usize,
|
||||
) -> Option<codex_eval_case::RepoSnapshot> {
|
||||
rollout_text
|
||||
.lines()
|
||||
.enumerate()
|
||||
.skip(start_index.saturating_add(1))
|
||||
.find_map(|(_idx, line)| {
|
||||
let rollout_line =
|
||||
serde_json::from_str::<codex_protocol::protocol::RolloutLine>(line).ok()?;
|
||||
match rollout_line.item {
|
||||
codex_protocol::protocol::RolloutItem::ResponseItem(
|
||||
codex_protocol::models::ResponseItem::GhostSnapshot { ghost_commit },
|
||||
) => Some(codex_eval_case::RepoSnapshot {
|
||||
base_sha: ghost_commit.parent()?.to_string(),
|
||||
commit_sha: ghost_commit.id().to_string(),
|
||||
}),
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn skills_to_info(
|
||||
skills: &[codex_core::skills::SkillMetadata],
|
||||
) -> Vec<codex_app_server_protocol::SkillMetadata> {
|
||||
@@ -3619,4 +3838,58 @@ mod tests {
|
||||
assert_eq!(summary, expected);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn repo_snapshot_from_rollout_line_index_finds_next_ghost_snapshot() -> Result<()> {
|
||||
use serde_json::json;
|
||||
|
||||
let user_line = json!({
|
||||
"timestamp": "2025-09-05T16:53:11.850Z",
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [{
|
||||
"type": "input_text",
|
||||
"text": "hi",
|
||||
}],
|
||||
},
|
||||
});
|
||||
|
||||
let ghost_line = json!({
|
||||
"timestamp": "2025-09-05T16:53:12.000Z",
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "ghost_snapshot",
|
||||
"ghost_commit": {
|
||||
"id": "ghost-sha",
|
||||
"parent": "base-sha",
|
||||
"preexisting_untracked_files": [],
|
||||
"preexisting_untracked_dirs": [],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
let rollout_text = format!(
|
||||
"{}\n{}\n",
|
||||
serde_json::to_string(&user_line)?,
|
||||
serde_json::to_string(&ghost_line)?
|
||||
);
|
||||
|
||||
let start = codex_eval_case::StartMarker {
|
||||
kind: codex_eval_case::StartMarkerKind::RolloutLineIndex,
|
||||
value: codex_eval_case::StartMarkerValue::LineIndex(0),
|
||||
display: "From: hi".to_string(),
|
||||
};
|
||||
|
||||
let snapshot = repo_snapshot_from_rollout(&rollout_text, &start).expect("snapshot");
|
||||
assert_eq!(
|
||||
snapshot,
|
||||
codex_eval_case::RepoSnapshot {
|
||||
base_sha: "base-sha".to_string(),
|
||||
commit_sha: "ghost-sha".to_string(),
|
||||
}
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -209,34 +209,19 @@ pub fn apply_hunks(
|
||||
stdout: &mut impl std::io::Write,
|
||||
stderr: &mut impl std::io::Write,
|
||||
) -> Result<(), ApplyPatchError> {
|
||||
let _existing_paths: Vec<&Path> = hunks
|
||||
.iter()
|
||||
.filter_map(|hunk| match hunk {
|
||||
Hunk::AddFile { .. } => {
|
||||
// The file is being added, so it doesn't exist yet.
|
||||
None
|
||||
}
|
||||
Hunk::DeleteFile { path } => Some(path.as_path()),
|
||||
Hunk::UpdateFile {
|
||||
path, move_path, ..
|
||||
} => match move_path {
|
||||
Some(move_path) => {
|
||||
if std::fs::metadata(move_path)
|
||||
.map(|m| m.is_file())
|
||||
.unwrap_or(false)
|
||||
{
|
||||
Some(move_path.as_path())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
None => Some(path.as_path()),
|
||||
},
|
||||
})
|
||||
.collect::<Vec<&Path>>();
|
||||
apply_hunks_in_dir(hunks, Path::new("."), stdout, stderr)
|
||||
}
|
||||
|
||||
/// Applies hunks relative to `cwd` (without mutating the process working directory) and writes the
|
||||
/// same stdout/stderr output as `apply_hunks`.
|
||||
pub fn apply_hunks_in_dir(
|
||||
hunks: &[Hunk],
|
||||
cwd: &Path,
|
||||
stdout: &mut impl std::io::Write,
|
||||
stderr: &mut impl std::io::Write,
|
||||
) -> Result<(), ApplyPatchError> {
|
||||
// Delegate to a helper that applies each hunk to the filesystem.
|
||||
match apply_hunks_to_files(hunks) {
|
||||
match apply_hunks_to_files_in_dir(hunks, cwd) {
|
||||
Ok(affected) => {
|
||||
print_summary(&affected, stdout).map_err(ApplyPatchError::from)?;
|
||||
Ok(())
|
||||
@@ -267,7 +252,7 @@ pub struct AffectedPaths {
|
||||
|
||||
/// Apply the hunks to the filesystem, returning which files were added, modified, or deleted.
|
||||
/// Returns an error if the patch could not be applied.
|
||||
fn apply_hunks_to_files(hunks: &[Hunk]) -> anyhow::Result<AffectedPaths> {
|
||||
fn apply_hunks_to_files_in_dir(hunks: &[Hunk], cwd: &Path) -> anyhow::Result<AffectedPaths> {
|
||||
if hunks.is_empty() {
|
||||
anyhow::bail!("No files were modified.");
|
||||
}
|
||||
@@ -278,19 +263,29 @@ fn apply_hunks_to_files(hunks: &[Hunk]) -> anyhow::Result<AffectedPaths> {
|
||||
for hunk in hunks {
|
||||
match hunk {
|
||||
Hunk::AddFile { path, contents } => {
|
||||
if let Some(parent) = path.parent()
|
||||
let target_path = if path.is_absolute() {
|
||||
path.clone()
|
||||
} else {
|
||||
cwd.join(path)
|
||||
};
|
||||
if let Some(parent) = target_path.parent()
|
||||
&& !parent.as_os_str().is_empty()
|
||||
{
|
||||
std::fs::create_dir_all(parent).with_context(|| {
|
||||
format!("Failed to create parent directories for {}", path.display())
|
||||
})?;
|
||||
}
|
||||
std::fs::write(path, contents)
|
||||
std::fs::write(&target_path, contents)
|
||||
.with_context(|| format!("Failed to write file {}", path.display()))?;
|
||||
added.push(path.clone());
|
||||
}
|
||||
Hunk::DeleteFile { path } => {
|
||||
std::fs::remove_file(path)
|
||||
let target_path = if path.is_absolute() {
|
||||
path.clone()
|
||||
} else {
|
||||
cwd.join(path)
|
||||
};
|
||||
std::fs::remove_file(&target_path)
|
||||
.with_context(|| format!("Failed to delete file {}", path.display()))?;
|
||||
deleted.push(path.clone());
|
||||
}
|
||||
@@ -300,22 +295,38 @@ fn apply_hunks_to_files(hunks: &[Hunk]) -> anyhow::Result<AffectedPaths> {
|
||||
chunks,
|
||||
} => {
|
||||
let AppliedPatch { new_contents, .. } =
|
||||
derive_new_contents_from_chunks(path, chunks)?;
|
||||
derive_new_contents_from_chunks_in_dir(path, chunks, cwd)?;
|
||||
if let Some(dest) = move_path {
|
||||
if let Some(parent) = dest.parent()
|
||||
let dest_path = if dest.is_absolute() {
|
||||
dest.clone()
|
||||
} else {
|
||||
cwd.join(dest)
|
||||
};
|
||||
if let Some(parent) = dest_path.parent()
|
||||
&& !parent.as_os_str().is_empty()
|
||||
{
|
||||
std::fs::create_dir_all(parent).with_context(|| {
|
||||
format!("Failed to create parent directories for {}", dest.display())
|
||||
})?;
|
||||
}
|
||||
std::fs::write(dest, new_contents)
|
||||
std::fs::write(&dest_path, new_contents)
|
||||
.with_context(|| format!("Failed to write file {}", dest.display()))?;
|
||||
std::fs::remove_file(path)
|
||||
|
||||
let src_path = if path.is_absolute() {
|
||||
path.clone()
|
||||
} else {
|
||||
cwd.join(path)
|
||||
};
|
||||
std::fs::remove_file(&src_path)
|
||||
.with_context(|| format!("Failed to remove original {}", path.display()))?;
|
||||
modified.push(dest.clone());
|
||||
} else {
|
||||
std::fs::write(path, new_contents)
|
||||
let target_path = if path.is_absolute() {
|
||||
path.clone()
|
||||
} else {
|
||||
cwd.join(path)
|
||||
};
|
||||
std::fs::write(&target_path, new_contents)
|
||||
.with_context(|| format!("Failed to write file {}", path.display()))?;
|
||||
modified.push(path.clone());
|
||||
}
|
||||
@@ -340,7 +351,21 @@ fn derive_new_contents_from_chunks(
|
||||
path: &Path,
|
||||
chunks: &[UpdateFileChunk],
|
||||
) -> std::result::Result<AppliedPatch, ApplyPatchError> {
|
||||
let original_contents = match std::fs::read_to_string(path) {
|
||||
derive_new_contents_from_chunks_in_dir(path, chunks, Path::new("."))
|
||||
}
|
||||
|
||||
fn derive_new_contents_from_chunks_in_dir(
|
||||
path: &Path,
|
||||
chunks: &[UpdateFileChunk],
|
||||
cwd: &Path,
|
||||
) -> std::result::Result<AppliedPatch, ApplyPatchError> {
|
||||
let target_path = if path.is_absolute() {
|
||||
path.to_path_buf()
|
||||
} else {
|
||||
cwd.join(path)
|
||||
};
|
||||
|
||||
let original_contents = match std::fs::read_to_string(&target_path) {
|
||||
Ok(contents) => contents,
|
||||
Err(err) => {
|
||||
return Err(ApplyPatchError::IoError(IoError {
|
||||
|
||||
@@ -343,6 +343,7 @@ pub(crate) struct Session {
|
||||
/// The set of enabled features should be invariant for the lifetime of the
|
||||
/// session.
|
||||
features: Features,
|
||||
repo_snapshotting_enabled: AtomicBool,
|
||||
pub(crate) active_turn: Mutex<Option<ActiveTurn>>,
|
||||
pub(crate) services: SessionServices,
|
||||
next_internal_sub_id: AtomicU64,
|
||||
@@ -673,6 +674,7 @@ impl Session {
|
||||
tx_event: tx_event.clone(),
|
||||
state: Mutex::new(state),
|
||||
features: config.features.clone(),
|
||||
repo_snapshotting_enabled: AtomicBool::new(false),
|
||||
active_turn: Mutex::new(None),
|
||||
services,
|
||||
next_internal_sub_id: AtomicU64::new(0),
|
||||
@@ -1439,7 +1441,9 @@ impl Session {
|
||||
turn_context: Arc<TurnContext>,
|
||||
cancellation_token: CancellationToken,
|
||||
) {
|
||||
if !self.enabled(Feature::GhostCommit) {
|
||||
if !self.enabled(Feature::GhostCommit)
|
||||
&& !self.repo_snapshotting_enabled.load(Ordering::Relaxed)
|
||||
{
|
||||
return;
|
||||
}
|
||||
let token = match turn_context.tool_call_gate.subscribe().await {
|
||||
@@ -1609,6 +1613,9 @@ async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiv
|
||||
)
|
||||
.await;
|
||||
}
|
||||
Op::SetRepoSnapshotting { enabled } => {
|
||||
handlers::set_repo_snapshotting(&sess, enabled).await;
|
||||
}
|
||||
Op::UserInput { .. } | Op::UserTurn { .. } => {
|
||||
handlers::user_input_or_turn(&sess, sub.id.clone(), sub.op, &mut previous_context)
|
||||
.await;
|
||||
@@ -1707,6 +1714,7 @@ mod handlers {
|
||||
use mcp_types::RequestId;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::Ordering;
|
||||
use tracing::info;
|
||||
use tracing::warn;
|
||||
|
||||
@@ -1731,6 +1739,11 @@ mod handlers {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn set_repo_snapshotting(sess: &Session, enabled: bool) {
|
||||
sess.repo_snapshotting_enabled
|
||||
.store(enabled, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
pub async fn user_input_or_turn(
|
||||
sess: &Arc<Session>,
|
||||
sub_id: String,
|
||||
@@ -3161,6 +3174,7 @@ mod tests {
|
||||
tx_event,
|
||||
state: Mutex::new(state),
|
||||
features: config.features.clone(),
|
||||
repo_snapshotting_enabled: AtomicBool::new(false),
|
||||
active_turn: Mutex::new(None),
|
||||
services,
|
||||
next_internal_sub_id: AtomicU64::new(0),
|
||||
@@ -3248,6 +3262,7 @@ mod tests {
|
||||
tx_event,
|
||||
state: Mutex::new(state),
|
||||
features: config.features.clone(),
|
||||
repo_snapshotting_enabled: AtomicBool::new(false),
|
||||
active_turn: Mutex::new(None),
|
||||
services,
|
||||
next_internal_sub_id: AtomicU64::new(0),
|
||||
|
||||
@@ -23,6 +23,7 @@ use codex_protocol::protocol::ReviewDecision;
|
||||
use futures::future::BoxFuture;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct ApplyPatchRequest {
|
||||
@@ -143,6 +144,48 @@ impl ToolRuntime<ApplyPatchRequest, ExecToolCallOutput> for ApplyPatchRuntime {
|
||||
attempt: &SandboxAttempt<'_>,
|
||||
ctx: &ToolCtx<'_>,
|
||||
) -> Result<ExecToolCallOutput, ToolError> {
|
||||
// When there is no sandbox in play (DangerFullAccess / bypassed sandbox), apply the patch
|
||||
// in-process. This avoids relying on the current executable implementing the arg0/argv1
|
||||
// dispatch behavior (which is not true for unit/integration test binaries).
|
||||
if attempt.sandbox == crate::exec::SandboxType::None {
|
||||
let started = Instant::now();
|
||||
let mut stdout = Vec::new();
|
||||
let mut stderr = Vec::new();
|
||||
|
||||
// Apply the patch relative to `req.cwd` without mutating the process CWD.
|
||||
let exit_code = match codex_apply_patch::parse_patch(&req.patch) {
|
||||
Ok(args) => {
|
||||
match codex_apply_patch::apply_hunks_in_dir(
|
||||
&args.hunks,
|
||||
&req.cwd,
|
||||
&mut stdout,
|
||||
&mut stderr,
|
||||
) {
|
||||
Ok(()) => 0,
|
||||
Err(_) => 1,
|
||||
}
|
||||
}
|
||||
// Reuse codex-apply-patch's error formatting for parse errors.
|
||||
Err(_) => {
|
||||
match codex_apply_patch::apply_patch(&req.patch, &mut stdout, &mut stderr) {
|
||||
Ok(()) => 0,
|
||||
Err(_) => 1,
|
||||
}
|
||||
}
|
||||
};
|
||||
let stdout = String::from_utf8_lossy(&stdout).to_string();
|
||||
let stderr = String::from_utf8_lossy(&stderr).to_string();
|
||||
let aggregated_output = format!("{stdout}{stderr}");
|
||||
return Ok(ExecToolCallOutput {
|
||||
exit_code,
|
||||
stdout: crate::exec::StreamOutput::new(stdout),
|
||||
stderr: crate::exec::StreamOutput::new(stderr),
|
||||
aggregated_output: crate::exec::StreamOutput::new(aggregated_output),
|
||||
duration: started.elapsed(),
|
||||
timed_out: false,
|
||||
});
|
||||
}
|
||||
|
||||
let spec = Self::build_command_spec(req)?;
|
||||
let env = attempt
|
||||
.env_for(spec)
|
||||
|
||||
@@ -8,6 +8,7 @@ use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ResponsesRequest;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
@@ -277,7 +278,7 @@ async fn shell_command_snapshot_still_intercepts_apply_patch() -> Result<()> {
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(harness.server(), responses).await;
|
||||
let requests = mount_sse_sequence(harness.server(), responses).await;
|
||||
|
||||
let model = test.session_configured.model.clone();
|
||||
codex
|
||||
@@ -297,7 +298,14 @@ async fn shell_command_snapshot_still_intercepts_apply_patch() -> Result<()> {
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
assert_eq!(fs::read_to_string(&target).await?, "hello from snapshot\n");
|
||||
let tool_output = extract_call_output_text(&requests.requests(), call_id);
|
||||
let target_contents = fs::read_to_string(&target).await.unwrap_or_else(|err| {
|
||||
panic!(
|
||||
"expected patch to create {}: {err}; tool output={tool_output:?}",
|
||||
target.display()
|
||||
)
|
||||
});
|
||||
assert_eq!(target_contents, "hello from snapshot\n");
|
||||
|
||||
let mut entries = fs::read_dir(codex_home.join("shell_snapshots")).await?;
|
||||
let snapshot_path = entries
|
||||
@@ -311,6 +319,35 @@ async fn shell_command_snapshot_still_intercepts_apply_patch() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn extract_call_output_text(requests: &[ResponsesRequest], call_id: &str) -> String {
|
||||
for req in requests {
|
||||
let input = req.input();
|
||||
let item = input.iter().find(|item| {
|
||||
item.get("type").and_then(serde_json::Value::as_str) == Some("function_call_output")
|
||||
&& item.get("call_id").and_then(serde_json::Value::as_str) == Some(call_id)
|
||||
});
|
||||
let Some(item) = item else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let output = item
|
||||
.get("output")
|
||||
.cloned()
|
||||
.unwrap_or(serde_json::Value::Null);
|
||||
match output {
|
||||
serde_json::Value::String(text) => return text,
|
||||
serde_json::Value::Object(obj) => {
|
||||
if let Some(text) = obj.get("content").and_then(serde_json::Value::as_str) {
|
||||
return text.to_string();
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
"<missing tool output>".to_string()
|
||||
}
|
||||
|
||||
#[cfg_attr(target_os = "windows", ignore)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_snapshot_deleted_after_shutdown_with_skills() -> Result<()> {
|
||||
|
||||
58
codex-rs/docs/eval_capture.md
Normal file
58
codex-rs/docs/eval_capture.md
Normal file
@@ -0,0 +1,58 @@
|
||||
# Eval Capture Bundles
|
||||
|
||||
Codex can capture "eval case" bundles from the `/feedback` flow (bad result -> capture eval sample).
|
||||
These bundles are meant to turn real failures into reproducible, local-first artifacts.
|
||||
|
||||
## Where Bundles Are Written
|
||||
|
||||
Bundles are stored under:
|
||||
|
||||
`$CODEX_HOME/eval-case/<case-id>/`
|
||||
|
||||
## Bundle Contents
|
||||
|
||||
Each bundle contains:
|
||||
|
||||
- `manifest.json` - metadata about the capture (schema version, rollout start selector, repo git pointer).
|
||||
- `rollout.jsonl` - the full session rollout (multi-turn trajectory).
|
||||
- `repo.patch` - a git patch representing the repository state at the chosen start marker.
|
||||
- `codex-logs.log` - tracing logs to help maintainers debug the session.
|
||||
|
||||
## Start Marker And Repo State
|
||||
|
||||
Bundles include the entire rollout, but also record a start marker to indicate where an eval
|
||||
harness (or a human) should begin replaying/interpreting the trajectory.
|
||||
|
||||
`manifest.json` also records a `rollout.start_selector`, which is the preferred way to find the
|
||||
intended starting turn when replaying/slicing (line numbers in `rollout.jsonl` are not stable).
|
||||
|
||||
The repository patch must match that chosen start marker:
|
||||
|
||||
- If the session has repo snapshots available, `repo.patch` is derived from the ghost snapshot
|
||||
commit associated with the selected user turn (diff from the snapshot's base commit to the
|
||||
snapshot commit).
|
||||
- If no snapshot is available for a given start marker, the TUI disables that option (and may
|
||||
fall back to the basic feedback flow instead).
|
||||
|
||||
For reproducibility outside your machine, the base commit recorded in `manifest.json` should be
|
||||
reachable by maintainers (for example, pushed and available on the default branch).
|
||||
|
||||
## Reproducibility Contract
|
||||
|
||||
Reproducing the repository state in a bundle must be possible using only:
|
||||
|
||||
- `repo.git.canonical_remote` (clone URL)
|
||||
- `repo.git.commit` (base commit to `git checkout`)
|
||||
- `repo.patch` (apply if non-empty)
|
||||
|
||||
The manifest may also include machine-local paths (like `repo.root` / `repo.cwd_rel`) as debugging
|
||||
hints, but repro should not depend on them.
|
||||
|
||||
## App-Server API (For Integrations)
|
||||
|
||||
Non-TUI clients can create bundles via the app-server JSON-RPC method:
|
||||
|
||||
- `evalCase/create`
|
||||
|
||||
The handler copies the rollout into the bundle and derives `repo.patch` based on the selected start
|
||||
marker when repo snapshots are available.
|
||||
19
codex-rs/eval-case/Cargo.toml
Normal file
19
codex-rs/eval-case/Cargo.toml
Normal file
@@ -0,0 +1,19 @@
|
||||
[package]
|
||||
name = "codex-eval-case"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
time = { workspace = true, features = ["formatting", "parsing"] }
|
||||
uuid = { workspace = true, features = ["v4"] }
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
993
codex-rs/eval-case/src/lib.rs
Normal file
993
codex-rs/eval-case/src/lib.rs
Normal file
@@ -0,0 +1,993 @@
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
|
||||
use anyhow::Context as _;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use time::OffsetDateTime;
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum StartMarkerKind {
|
||||
RolloutLineTimestamp,
|
||||
RolloutLineIndex,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(untagged)]
|
||||
pub enum StartMarkerValue {
|
||||
Timestamp(String),
|
||||
LineIndex(u64),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct StartMarker {
|
||||
pub kind: StartMarkerKind,
|
||||
pub value: StartMarkerValue,
|
||||
pub display: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum RolloutStartSelectorKind {
|
||||
/// Find the first `event_msg` line where `payload.type == "user_message"` and
|
||||
/// `payload.message` contains `contains`.
|
||||
EventMsgUserMessageContains,
|
||||
/// Find the first `response_item` line where `payload.role == "user"` and the combined text
|
||||
/// from `payload.content[*].text` contains `contains`.
|
||||
ResponseItemUserMessageContains,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct RolloutInfo {
|
||||
pub filename: String,
|
||||
/// Preferred, deterministic selector for slicing the rollout when reproducing a case.
|
||||
pub start_selector: RolloutStartSelector,
|
||||
/// Debugging hint only; do not use this for correctness when slicing.
|
||||
pub start: StartMarker,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct RolloutStartSelector {
|
||||
pub kind: RolloutStartSelectorKind,
|
||||
pub contains: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub after_timestamp: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct GitRemote {
|
||||
pub name: String,
|
||||
pub fetch_url: String,
|
||||
pub push_url: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct RepoGitInfo {
|
||||
/// Base commit to `git checkout` before applying `repo.patch`.
|
||||
pub commit: String,
|
||||
pub remotes: Vec<GitRemote>,
|
||||
pub canonical_remote: Option<String>,
|
||||
pub reproducible: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reproducible_reason: Option<String>,
|
||||
pub is_dirty: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub describe: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct RepoInfo {
|
||||
/// Repository root as reported by `git rev-parse --show-toplevel` (optional).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub root: Option<String>,
|
||||
/// Relative path from `repo.root` to capture-time cwd (optional).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub cwd_rel: Option<String>,
|
||||
pub git: RepoGitInfo,
|
||||
pub patch_filename: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct Notes {
|
||||
pub what_went_wrong: String,
|
||||
pub what_good_looks_like: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct Artifacts {
|
||||
pub include_logs: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct EvalCaseManifestV1 {
|
||||
pub version: String,
|
||||
pub case_id: String,
|
||||
pub created_at: String,
|
||||
pub conversation_id: String,
|
||||
pub source: String,
|
||||
pub rollout: RolloutInfo,
|
||||
pub repo: RepoInfo,
|
||||
pub notes: Notes,
|
||||
pub artifacts: Artifacts,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CreateEvalCaseArgs {
|
||||
pub codex_home: PathBuf,
|
||||
pub conversation_id: String,
|
||||
pub rollout_path: PathBuf,
|
||||
pub start: StartMarker,
|
||||
pub repo_cwd: PathBuf,
|
||||
/// When present, derive `repo.patch` from the provided commit snapshot instead of the current
|
||||
/// working tree.
|
||||
pub repo_snapshot: Option<RepoSnapshot>,
|
||||
pub notes: Notes,
|
||||
pub include_logs: bool,
|
||||
pub logs_bytes: Option<Vec<u8>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct RepoSnapshot {
|
||||
pub base_sha: String,
|
||||
pub commit_sha: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CreateEvalCaseResult {
|
||||
pub case_id: String,
|
||||
pub path: PathBuf,
|
||||
}
|
||||
|
||||
fn sanitize_repo_slug(input: &str) -> String {
|
||||
// Keep it short + filesystem-friendly.
|
||||
let mut out = String::with_capacity(input.len());
|
||||
let mut last_was_dash = false;
|
||||
for ch in input.chars() {
|
||||
let ch = ch.to_ascii_lowercase();
|
||||
if ch.is_ascii_alphanumeric() {
|
||||
out.push(ch);
|
||||
last_was_dash = false;
|
||||
continue;
|
||||
}
|
||||
if !last_was_dash {
|
||||
out.push('-');
|
||||
last_was_dash = true;
|
||||
}
|
||||
}
|
||||
let out = out.trim_matches('-').to_string();
|
||||
if out.is_empty() {
|
||||
"repo".to_string()
|
||||
} else {
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
fn repo_slug(repo_cwd: &Path) -> String {
|
||||
// Prefer the git repo root name, but fall back to the cwd basename.
|
||||
let top_level = git_stdout(repo_cwd, &["rev-parse", "--show-toplevel"])
|
||||
.ok()
|
||||
.map(PathBuf::from);
|
||||
let basename = top_level
|
||||
.as_ref()
|
||||
.and_then(|p| p.file_name())
|
||||
.or_else(|| repo_cwd.file_name());
|
||||
let basename = basename
|
||||
.map(|name| name.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| "repo".to_string());
|
||||
sanitize_repo_slug(&basename)
|
||||
}
|
||||
|
||||
pub fn create_eval_case_bundle(args: &CreateEvalCaseArgs) -> anyhow::Result<CreateEvalCaseResult> {
|
||||
let created_at = OffsetDateTime::now_utc();
|
||||
let created_at_rfc3339 = created_at.format(&Rfc3339).context("format created_at")?;
|
||||
|
||||
let ts_for_id = format!(
|
||||
"{:04}-{:02}-{:02}T{:02}-{:02}-{:02}",
|
||||
created_at.year(),
|
||||
u8::from(created_at.month()),
|
||||
created_at.day(),
|
||||
created_at.hour(),
|
||||
created_at.minute(),
|
||||
created_at.second()
|
||||
);
|
||||
|
||||
// Short, human-scannable id: datetime + repo + 6-digit suffix.
|
||||
// Collision risk is low and acceptable for local bundles.
|
||||
let repo = repo_slug(&args.repo_cwd);
|
||||
let id6 = (Uuid::new_v4().as_u128() % 1_000_000) as u32;
|
||||
let case_id = format!("{ts_for_id}-{repo}-{id6:06}");
|
||||
|
||||
let bundle_dir = args.codex_home.join("eval-case").join(&case_id);
|
||||
std::fs::create_dir_all(&bundle_dir)
|
||||
.with_context(|| format!("create eval bundle dir {}", bundle_dir.display()))?;
|
||||
|
||||
let rollout_dst = bundle_dir.join("rollout.jsonl");
|
||||
std::fs::copy(&args.rollout_path, &rollout_dst).with_context(|| {
|
||||
format!(
|
||||
"copy rollout {} -> {}",
|
||||
args.rollout_path.display(),
|
||||
rollout_dst.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
let rollout_text = std::fs::read_to_string(&args.rollout_path).with_context(|| {
|
||||
format!(
|
||||
"read rollout for start selector {}",
|
||||
args.rollout_path.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
let (base_sha, patch) = match args.repo_snapshot.as_ref() {
|
||||
Some(snapshot) => {
|
||||
git_patch_between_commits(&args.repo_cwd, &snapshot.base_sha, &snapshot.commit_sha)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"generate patch for repo snapshot {}..{}",
|
||||
snapshot.base_sha, snapshot.commit_sha
|
||||
)
|
||||
})?
|
||||
}
|
||||
None => git_patch_against_head(&args.repo_cwd)?,
|
||||
};
|
||||
let patch_path = bundle_dir.join("repo.patch");
|
||||
std::fs::write(&patch_path, patch)
|
||||
.with_context(|| format!("write patch {}", patch_path.display()))?;
|
||||
|
||||
if args.include_logs {
|
||||
let logs_path = bundle_dir.join("codex-logs.log");
|
||||
let bytes = args
|
||||
.logs_bytes
|
||||
.clone()
|
||||
.unwrap_or_else(|| Vec::with_capacity(0));
|
||||
std::fs::write(&logs_path, bytes)
|
||||
.with_context(|| format!("write logs {}", logs_path.display()))?;
|
||||
}
|
||||
|
||||
let repo_root = git_stdout(&args.repo_cwd, &["rev-parse", "--show-toplevel"])
|
||||
.ok()
|
||||
.filter(|s| !s.is_empty());
|
||||
let cwd_rel = repo_root
|
||||
.as_ref()
|
||||
.and_then(|root| {
|
||||
let root = PathBuf::from(root);
|
||||
args.repo_cwd
|
||||
.strip_prefix(&root)
|
||||
.ok()
|
||||
.map(|p| p.display().to_string())
|
||||
})
|
||||
.filter(|s| !s.is_empty());
|
||||
|
||||
let (git_remotes, remotes_ok) = match git_remotes(&args.repo_cwd) {
|
||||
Ok(remotes) => (remotes, true),
|
||||
Err(_) => (Vec::new(), false),
|
||||
};
|
||||
let canonical_remote = select_canonical_remote(&git_remotes);
|
||||
let git_is_dirty = git_is_dirty(&args.repo_cwd).unwrap_or(false);
|
||||
let git_describe = git_stdout(
|
||||
&args.repo_cwd,
|
||||
&["describe", "--tags", "--always", "--dirty"],
|
||||
)
|
||||
.ok()
|
||||
.filter(|s| !s.is_empty());
|
||||
|
||||
let mut reproducible = true;
|
||||
let mut reproducible_reason: Option<String> = None;
|
||||
if !remotes_ok {
|
||||
reproducible = false;
|
||||
reproducible_reason = Some("not_a_git_repo".to_string());
|
||||
} else if canonical_remote.is_none() {
|
||||
reproducible = false;
|
||||
reproducible_reason = Some("no_git_remote".to_string());
|
||||
}
|
||||
if base_sha.len() != 40 || base_sha == "unknown" {
|
||||
reproducible = false;
|
||||
if reproducible_reason.is_none() {
|
||||
reproducible_reason = Some("unknown_commit".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
let start_selector = derive_rollout_start_selector(&args.start, &rollout_text);
|
||||
|
||||
let manifest = EvalCaseManifestV1 {
|
||||
version: "v1".to_string(),
|
||||
case_id: case_id.clone(),
|
||||
created_at: created_at_rfc3339,
|
||||
conversation_id: args.conversation_id.clone(),
|
||||
source: "cli".to_string(),
|
||||
rollout: RolloutInfo {
|
||||
filename: "rollout.jsonl".to_string(),
|
||||
start_selector,
|
||||
start: args.start.clone(),
|
||||
},
|
||||
repo: RepoInfo {
|
||||
root: repo_root,
|
||||
cwd_rel,
|
||||
git: RepoGitInfo {
|
||||
commit: base_sha,
|
||||
remotes: git_remotes,
|
||||
canonical_remote,
|
||||
reproducible,
|
||||
reproducible_reason,
|
||||
is_dirty: git_is_dirty,
|
||||
describe: git_describe,
|
||||
},
|
||||
patch_filename: "repo.patch".to_string(),
|
||||
},
|
||||
notes: args.notes.clone(),
|
||||
artifacts: Artifacts {
|
||||
include_logs: args.include_logs,
|
||||
},
|
||||
};
|
||||
let manifest_path = bundle_dir.join("manifest.json");
|
||||
let manifest_json = serde_json::to_string_pretty(&manifest).context("serialize manifest")?;
|
||||
std::fs::write(&manifest_path, format!("{manifest_json}\n"))
|
||||
.with_context(|| format!("write manifest {}", manifest_path.display()))?;
|
||||
|
||||
Ok(CreateEvalCaseResult {
|
||||
case_id,
|
||||
path: bundle_dir,
|
||||
})
|
||||
}
|
||||
|
||||
fn normalize_whitespace(s: &str) -> String {
|
||||
let mut out = String::with_capacity(s.len());
|
||||
for part in s.split_whitespace() {
|
||||
if !out.is_empty() {
|
||||
out.push(' ');
|
||||
}
|
||||
out.push_str(part);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn truncate_chars(s: &str, max_chars: usize) -> String {
|
||||
if s.chars().count() <= max_chars {
|
||||
return s.to_string();
|
||||
}
|
||||
s.chars().take(max_chars).collect()
|
||||
}
|
||||
|
||||
fn should_show_start_message(message: &str) -> bool {
|
||||
// The rollout can include synthetic "user" messages (environment context, AGENTS
|
||||
// instructions) that should not be used as start selectors.
|
||||
let trimmed = message.trim_start();
|
||||
if trimmed.starts_with("<environment_context>") {
|
||||
return false;
|
||||
}
|
||||
if trimmed.starts_with("# AGENTS.md instructions") {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
fn parse_rfc3339(s: &str) -> Option<OffsetDateTime> {
|
||||
OffsetDateTime::parse(s, &Rfc3339).ok()
|
||||
}
|
||||
|
||||
fn is_after_timestamp(timestamp: &str, after_timestamp: &str) -> bool {
|
||||
match (parse_rfc3339(timestamp), parse_rfc3339(after_timestamp)) {
|
||||
(Some(ts), Some(after)) => ts >= after,
|
||||
// Fall back to lexicographic compare; RFC3339 timestamps with consistent formatting
|
||||
// should still compare correctly.
|
||||
_ => timestamp >= after_timestamp,
|
||||
}
|
||||
}
|
||||
|
||||
fn derive_rollout_start_selector(start: &StartMarker, rollout_text: &str) -> RolloutStartSelector {
|
||||
let after_timestamp = match &start.value {
|
||||
StartMarkerValue::Timestamp(ts) => Some(ts.clone()),
|
||||
StartMarkerValue::LineIndex(idx) => {
|
||||
let idx = usize::try_from(*idx).ok();
|
||||
idx.and_then(|i| rollout_text.lines().nth(i))
|
||||
.and_then(|line| {
|
||||
serde_json::from_str::<serde_json::Value>(line)
|
||||
.ok()
|
||||
.and_then(|v| {
|
||||
v.get("timestamp")
|
||||
.and_then(serde_json::Value::as_str)
|
||||
.map(str::to_string)
|
||||
})
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
let (kind, message) = if let Some(message) =
|
||||
find_first_event_msg_user_message(rollout_text, after_timestamp.as_deref())
|
||||
{
|
||||
(
|
||||
RolloutStartSelectorKind::EventMsgUserMessageContains,
|
||||
Some(message),
|
||||
)
|
||||
} else if let Some(message) =
|
||||
find_first_response_item_user_message(rollout_text, after_timestamp.as_deref())
|
||||
{
|
||||
(
|
||||
RolloutStartSelectorKind::ResponseItemUserMessageContains,
|
||||
Some(message),
|
||||
)
|
||||
} else {
|
||||
(RolloutStartSelectorKind::EventMsgUserMessageContains, None)
|
||||
};
|
||||
|
||||
let contains = message
|
||||
.map(|m| truncate_chars(&normalize_whitespace(&m), 200))
|
||||
.unwrap_or_default();
|
||||
|
||||
RolloutStartSelector {
|
||||
kind,
|
||||
contains,
|
||||
after_timestamp,
|
||||
}
|
||||
}
|
||||
|
||||
fn find_first_event_msg_user_message(
|
||||
rollout_text: &str,
|
||||
after_timestamp: Option<&str>,
|
||||
) -> Option<String> {
|
||||
rollout_text.lines().find_map(|line| {
|
||||
let v = serde_json::from_str::<serde_json::Value>(line).ok()?;
|
||||
let timestamp = v.get("timestamp")?.as_str()?;
|
||||
if let Some(after) = after_timestamp
|
||||
&& !is_after_timestamp(timestamp, after)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
let ty = v.get("type")?.as_str()?;
|
||||
if ty != "event_msg" {
|
||||
return None;
|
||||
}
|
||||
let payload = v.get("payload")?;
|
||||
let payload_ty = payload.get("type")?.as_str()?;
|
||||
if payload_ty != "user_message" {
|
||||
return None;
|
||||
}
|
||||
let message = payload.get("message")?.as_str()?;
|
||||
should_show_start_message(message).then(|| message.to_string())
|
||||
})
|
||||
}
|
||||
|
||||
fn find_first_response_item_user_message(
|
||||
rollout_text: &str,
|
||||
after_timestamp: Option<&str>,
|
||||
) -> Option<String> {
|
||||
rollout_text.lines().find_map(|line| {
|
||||
let v = serde_json::from_str::<serde_json::Value>(line).ok()?;
|
||||
let timestamp = v.get("timestamp")?.as_str()?;
|
||||
if let Some(after) = after_timestamp
|
||||
&& !is_after_timestamp(timestamp, after)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
let ty = v.get("type")?.as_str()?;
|
||||
if ty != "response_item" {
|
||||
return None;
|
||||
}
|
||||
let payload = v.get("payload")?;
|
||||
let role = payload.get("role")?.as_str()?;
|
||||
if role != "user" {
|
||||
return None;
|
||||
}
|
||||
let content = payload.get("content")?.as_array()?;
|
||||
let mut out = String::new();
|
||||
for item in content {
|
||||
let Some(text) = item.get("text").and_then(serde_json::Value::as_str) else {
|
||||
continue;
|
||||
};
|
||||
if !out.is_empty() {
|
||||
out.push(' ');
|
||||
}
|
||||
out.push_str(text);
|
||||
}
|
||||
let out = normalize_whitespace(out.as_str());
|
||||
(!out.is_empty() && should_show_start_message(&out)).then_some(out)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn find_rollout_start_index(
|
||||
rollout_text: &str,
|
||||
selector: &RolloutStartSelector,
|
||||
) -> Option<usize> {
|
||||
if selector.contains.trim().is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let lines = rollout_text.lines().collect::<Vec<_>>();
|
||||
let matched_idx = lines.iter().enumerate().find_map(|(idx, line)| {
|
||||
let v = serde_json::from_str::<serde_json::Value>(line).ok()?;
|
||||
let timestamp = v.get("timestamp")?.as_str()?;
|
||||
if let Some(after) = selector.after_timestamp.as_deref()
|
||||
&& !is_after_timestamp(timestamp, after)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
match selector.kind {
|
||||
RolloutStartSelectorKind::EventMsgUserMessageContains => {
|
||||
let ty = v.get("type")?.as_str()?;
|
||||
if ty != "event_msg" {
|
||||
return None;
|
||||
}
|
||||
let payload = v.get("payload")?;
|
||||
let payload_ty = payload.get("type")?.as_str()?;
|
||||
if payload_ty != "user_message" {
|
||||
return None;
|
||||
}
|
||||
let message = payload.get("message")?.as_str()?;
|
||||
message.contains(selector.contains.as_str()).then_some(idx)
|
||||
}
|
||||
RolloutStartSelectorKind::ResponseItemUserMessageContains => {
|
||||
let ty = v.get("type")?.as_str()?;
|
||||
if ty != "response_item" {
|
||||
return None;
|
||||
}
|
||||
let payload = v.get("payload")?;
|
||||
let role = payload.get("role")?.as_str()?;
|
||||
if role != "user" {
|
||||
return None;
|
||||
}
|
||||
let content = payload.get("content")?.as_array()?;
|
||||
let mut out = String::new();
|
||||
for item in content {
|
||||
let Some(text) = item.get("text").and_then(serde_json::Value::as_str) else {
|
||||
continue;
|
||||
};
|
||||
if !out.is_empty() {
|
||||
out.push(' ');
|
||||
}
|
||||
out.push_str(text);
|
||||
}
|
||||
let out = normalize_whitespace(out.as_str());
|
||||
out.contains(selector.contains.as_str()).then_some(idx)
|
||||
}
|
||||
}
|
||||
})?;
|
||||
|
||||
// Include immediately preceding turn context lines so replay has the right environment/config.
|
||||
let mut start_idx = matched_idx;
|
||||
while start_idx > 0 {
|
||||
let prev_line = lines.get(start_idx.saturating_sub(1))?;
|
||||
let Ok(v) = serde_json::from_str::<serde_json::Value>(prev_line) else {
|
||||
break;
|
||||
};
|
||||
let Some(ty) = v.get("type").and_then(serde_json::Value::as_str) else {
|
||||
break;
|
||||
};
|
||||
if ty != "turn_context" {
|
||||
break;
|
||||
}
|
||||
start_idx = start_idx.saturating_sub(1);
|
||||
}
|
||||
|
||||
Some(start_idx)
|
||||
}
|
||||
|
||||
pub fn slice_rollout_from_selector(
|
||||
rollout_text: &str,
|
||||
selector: &RolloutStartSelector,
|
||||
) -> Option<String> {
|
||||
let start = find_rollout_start_index(rollout_text, selector)?;
|
||||
let sliced = rollout_text
|
||||
.lines()
|
||||
.skip(start)
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
Some(format!("{sliced}\n"))
|
||||
}
|
||||
|
||||
fn git_patch_against_head(repo_cwd: &Path) -> anyhow::Result<(String, Vec<u8>)> {
|
||||
let base_sha =
|
||||
git_stdout(repo_cwd, &["rev-parse", "HEAD"]).unwrap_or_else(|_| "unknown".to_string());
|
||||
|
||||
let mut patch = Vec::new();
|
||||
if let Ok(mut bytes) = git_diff(
|
||||
repo_cwd,
|
||||
&["diff", "--no-textconv", "--no-ext-diff", "--binary", "HEAD"],
|
||||
) {
|
||||
patch.append(&mut bytes);
|
||||
}
|
||||
|
||||
let untracked =
|
||||
git_stdout(repo_cwd, &["ls-files", "--others", "--exclude-standard"]).unwrap_or_default();
|
||||
for file in untracked.lines().map(str::trim).filter(|s| !s.is_empty()) {
|
||||
let null_device = if cfg!(windows) { "NUL" } else { "/dev/null" };
|
||||
let args = [
|
||||
"diff",
|
||||
"--no-textconv",
|
||||
"--no-ext-diff",
|
||||
"--binary",
|
||||
"--no-index",
|
||||
"--",
|
||||
null_device,
|
||||
file,
|
||||
];
|
||||
if let Ok(mut bytes) = git_diff(repo_cwd, &args) {
|
||||
patch.append(&mut bytes);
|
||||
}
|
||||
}
|
||||
|
||||
Ok((base_sha, patch))
|
||||
}
|
||||
|
||||
fn git_remotes(repo_cwd: &Path) -> anyhow::Result<Vec<GitRemote>> {
|
||||
let output = git_stdout(repo_cwd, &["remote", "-v"])?;
|
||||
// Use a BTreeMap so iteration is deterministic.
|
||||
let mut by_name: std::collections::BTreeMap<String, (Vec<String>, Vec<String>)> =
|
||||
std::collections::BTreeMap::new();
|
||||
|
||||
for line in output.lines().map(str::trim).filter(|s| !s.is_empty()) {
|
||||
let mut parts = line.split_whitespace();
|
||||
let Some(name) = parts.next() else {
|
||||
continue;
|
||||
};
|
||||
let Some(url) = parts.next() else {
|
||||
continue;
|
||||
};
|
||||
let Some(kind) = parts.next() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let entry = by_name
|
||||
.entry(name.to_string())
|
||||
.or_insert_with(|| (Vec::new(), Vec::new()));
|
||||
match kind {
|
||||
"(fetch)" => entry.0.push(url.to_string()),
|
||||
"(push)" => entry.1.push(url.to_string()),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let remotes = by_name
|
||||
.into_iter()
|
||||
.filter_map(|(name, (mut fetch_urls, mut push_urls))| {
|
||||
fetch_urls.sort();
|
||||
push_urls.sort();
|
||||
let fetch_url = fetch_urls.into_iter().next()?;
|
||||
let push_url = push_urls
|
||||
.into_iter()
|
||||
.next()
|
||||
.unwrap_or_else(|| fetch_url.clone());
|
||||
Some(GitRemote {
|
||||
name,
|
||||
fetch_url,
|
||||
push_url,
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
Ok(remotes)
|
||||
}
|
||||
|
||||
fn select_canonical_remote(remotes: &[GitRemote]) -> Option<String> {
|
||||
if let Some(origin) = remotes.iter().find(|r| r.name == "origin") {
|
||||
return Some(origin.fetch_url.clone());
|
||||
}
|
||||
remotes
|
||||
.iter()
|
||||
.min_by(|a, b| a.name.cmp(&b.name))
|
||||
.map(|r| r.fetch_url.clone())
|
||||
}
|
||||
|
||||
fn git_is_dirty(repo_cwd: &Path) -> anyhow::Result<bool> {
|
||||
let diff_unstaged = Command::new("git")
|
||||
.args(["diff", "--quiet"])
|
||||
.current_dir(repo_cwd)
|
||||
.status()
|
||||
.context("run git diff --quiet")?;
|
||||
if !diff_unstaged.success() {
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
let diff_staged = Command::new("git")
|
||||
.args(["diff", "--cached", "--quiet"])
|
||||
.current_dir(repo_cwd)
|
||||
.status()
|
||||
.context("run git diff --cached --quiet")?;
|
||||
if !diff_staged.success() {
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
let untracked =
|
||||
git_stdout(repo_cwd, &["ls-files", "--others", "--exclude-standard"]).unwrap_or_default();
|
||||
Ok(untracked.lines().any(|s| !s.trim().is_empty()))
|
||||
}
|
||||
|
||||
fn git_stdout(repo_cwd: &Path, args: &[&str]) -> anyhow::Result<String> {
|
||||
let output = Command::new("git")
|
||||
.args(args)
|
||||
.current_dir(repo_cwd)
|
||||
.output()
|
||||
.with_context(|| format!("run git {}", args.join(" ")))?;
|
||||
if !output.status.success() {
|
||||
anyhow::bail!("git {} failed with {}", args.join(" "), output.status);
|
||||
}
|
||||
let out = String::from_utf8(output.stdout).context("decode git stdout")?;
|
||||
Ok(out.trim().to_string())
|
||||
}
|
||||
|
||||
fn git_diff(repo_cwd: &Path, args: &[&str]) -> anyhow::Result<Vec<u8>> {
|
||||
let output = Command::new("git")
|
||||
.args(args)
|
||||
.current_dir(repo_cwd)
|
||||
.output()
|
||||
.with_context(|| format!("run git {}", args.join(" ")))?;
|
||||
|
||||
let exit_ok = output.status.code().is_some_and(|c| c == 0 || c == 1);
|
||||
if !exit_ok {
|
||||
anyhow::bail!("git {} failed with {}", args.join(" "), output.status);
|
||||
}
|
||||
Ok(output.stdout)
|
||||
}
|
||||
|
||||
fn git_patch_between_commits(
|
||||
repo_cwd: &Path,
|
||||
base_sha: &str,
|
||||
commit_sha: &str,
|
||||
) -> anyhow::Result<(String, Vec<u8>)> {
|
||||
let patch = git_diff(
|
||||
repo_cwd,
|
||||
&[
|
||||
"diff",
|
||||
"--no-textconv",
|
||||
"--no-ext-diff",
|
||||
"--binary",
|
||||
base_sha,
|
||||
commit_sha,
|
||||
],
|
||||
)?;
|
||||
Ok((base_sha.to_string(), patch))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn creates_bundle_with_manifest_and_rollout() {
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let repo_dir = TempDir::new().unwrap();
|
||||
let repo_root = repo_dir.path().join("my-repo");
|
||||
std::fs::create_dir_all(&repo_root).unwrap();
|
||||
std::fs::write(repo_root.join("README.md"), "hi\n").unwrap();
|
||||
let init_status = Command::new("git")
|
||||
.args(["init", "-q"])
|
||||
.current_dir(&repo_root)
|
||||
.status()
|
||||
.unwrap();
|
||||
assert!(init_status.success());
|
||||
let add_status = Command::new("git")
|
||||
.args(["add", "."])
|
||||
.current_dir(&repo_root)
|
||||
.status()
|
||||
.unwrap();
|
||||
assert!(add_status.success());
|
||||
let commit_status = Command::new("git")
|
||||
.args([
|
||||
"-c",
|
||||
"user.name=codex",
|
||||
"-c",
|
||||
"user.email=codex@example.com",
|
||||
"commit",
|
||||
"-m",
|
||||
"init",
|
||||
"-q",
|
||||
])
|
||||
.current_dir(&repo_root)
|
||||
.status()
|
||||
.unwrap();
|
||||
assert!(commit_status.success());
|
||||
|
||||
std::fs::write(repo_root.join("README.md"), "changed\n").unwrap();
|
||||
|
||||
let rollout_path = repo_root.join("rollout.jsonl");
|
||||
let rollout = [
|
||||
r#"{"timestamp":"2024-01-01T00:00:00Z","type":"token_count","payload":{"total":1}}"#,
|
||||
r#"{"timestamp":"2024-01-01T00:00:00Z","type":"event_msg","payload":{"type":"user_message","message":"do the thing"}}"#,
|
||||
r#"{"timestamp":"2024-01-01T00:00:01Z","type":"event_msg","payload":{"type":"agent_message","message":"ok"}}"#,
|
||||
]
|
||||
.join("\n");
|
||||
std::fs::write(&rollout_path, format!("{rollout}\n")).unwrap();
|
||||
|
||||
let args = CreateEvalCaseArgs {
|
||||
codex_home: codex_home.path().to_path_buf(),
|
||||
conversation_id: "conv-1".to_string(),
|
||||
rollout_path,
|
||||
start: StartMarker {
|
||||
kind: StartMarkerKind::RolloutLineIndex,
|
||||
value: StartMarkerValue::LineIndex(1),
|
||||
display: "Start now".to_string(),
|
||||
},
|
||||
repo_cwd: repo_root,
|
||||
repo_snapshot: None,
|
||||
notes: Notes {
|
||||
what_went_wrong: "bad".to_string(),
|
||||
what_good_looks_like: "good".to_string(),
|
||||
},
|
||||
include_logs: true,
|
||||
logs_bytes: Some(b"logs".to_vec()),
|
||||
};
|
||||
|
||||
let out = create_eval_case_bundle(&args).unwrap();
|
||||
assert!(!out.case_id.is_empty());
|
||||
assert!(out.path.exists());
|
||||
assert_eq!(out.path.file_name().unwrap(), out.case_id.as_str());
|
||||
assert!(out.path.starts_with(codex_home.path().join("eval-case")));
|
||||
assert!(out.case_id.contains("my-repo"));
|
||||
|
||||
let manifest_text = std::fs::read_to_string(out.path.join("manifest.json")).unwrap();
|
||||
let manifest: EvalCaseManifestV1 = serde_json::from_str(&manifest_text).unwrap();
|
||||
assert_eq!(manifest.version, "v1");
|
||||
assert_eq!(manifest.conversation_id, "conv-1");
|
||||
assert_eq!(manifest.notes, args.notes);
|
||||
assert!(manifest.repo.git.commit != "unknown");
|
||||
assert_eq!(manifest.repo.git.canonical_remote, None);
|
||||
assert_eq!(manifest.repo.git.reproducible, false);
|
||||
assert_eq!(
|
||||
manifest.repo.git.reproducible_reason,
|
||||
Some("no_git_remote".to_string())
|
||||
);
|
||||
assert_eq!(manifest.repo.git.is_dirty, true);
|
||||
assert_eq!(manifest.artifacts.include_logs, true);
|
||||
assert!(!manifest.rollout.start_selector.contains.trim().is_empty());
|
||||
|
||||
assert!(out.path.join("repo.patch").exists());
|
||||
assert!(out.path.join("rollout.jsonl").exists());
|
||||
assert_eq!(
|
||||
std::fs::read(out.path.join("codex-logs.log")).unwrap(),
|
||||
b"logs".to_vec()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn creates_bundle_from_repo_snapshot_commit() {
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let repo_dir = TempDir::new().unwrap();
|
||||
let repo_root = repo_dir.path().join("my-repo");
|
||||
std::fs::create_dir_all(&repo_root).unwrap();
|
||||
|
||||
std::fs::write(repo_root.join("README.md"), "base\n").unwrap();
|
||||
let init_status = Command::new("git")
|
||||
.args(["init", "-q"])
|
||||
.current_dir(&repo_root)
|
||||
.status()
|
||||
.unwrap();
|
||||
assert!(init_status.success());
|
||||
let add_status = Command::new("git")
|
||||
.args(["add", "."])
|
||||
.current_dir(&repo_root)
|
||||
.status()
|
||||
.unwrap();
|
||||
assert!(add_status.success());
|
||||
let commit_status = Command::new("git")
|
||||
.args([
|
||||
"-c",
|
||||
"user.name=codex",
|
||||
"-c",
|
||||
"user.email=codex@example.com",
|
||||
"commit",
|
||||
"-m",
|
||||
"base",
|
||||
"-q",
|
||||
])
|
||||
.current_dir(&repo_root)
|
||||
.status()
|
||||
.unwrap();
|
||||
assert!(commit_status.success());
|
||||
|
||||
let base_sha = git_stdout(&repo_root, &["rev-parse", "HEAD"]).unwrap();
|
||||
|
||||
// Create a snapshot commit.
|
||||
std::fs::write(repo_root.join("README.md"), "snapshot\n").unwrap();
|
||||
std::fs::write(repo_root.join("snap.txt"), "snap\n").unwrap();
|
||||
let add_status = Command::new("git")
|
||||
.args(["add", "."])
|
||||
.current_dir(&repo_root)
|
||||
.status()
|
||||
.unwrap();
|
||||
assert!(add_status.success());
|
||||
let commit_status = Command::new("git")
|
||||
.args([
|
||||
"-c",
|
||||
"user.name=codex",
|
||||
"-c",
|
||||
"user.email=codex@example.com",
|
||||
"commit",
|
||||
"-m",
|
||||
"snapshot",
|
||||
"-q",
|
||||
])
|
||||
.current_dir(&repo_root)
|
||||
.status()
|
||||
.unwrap();
|
||||
assert!(commit_status.success());
|
||||
let snapshot_sha = git_stdout(&repo_root, &["rev-parse", "HEAD"]).unwrap();
|
||||
|
||||
// Dirty the working tree after the snapshot commit; this should not affect repo.patch.
|
||||
std::fs::write(repo_root.join("README.md"), "worktree\n").unwrap();
|
||||
|
||||
let rollout_path = repo_root.join("rollout.jsonl");
|
||||
let rollout = [
|
||||
r#"{"timestamp":"2024-01-01T00:00:00Z","type":"event_msg","payload":{"type":"user_message","message":"snapshot run"}}"#,
|
||||
r#"{"timestamp":"2024-01-01T00:00:01Z","type":"event_msg","payload":{"type":"agent_message","message":"ok"}}"#,
|
||||
]
|
||||
.join("\n");
|
||||
std::fs::write(&rollout_path, format!("{rollout}\n")).unwrap();
|
||||
|
||||
let args = CreateEvalCaseArgs {
|
||||
codex_home: codex_home.path().to_path_buf(),
|
||||
conversation_id: "conv-2".to_string(),
|
||||
rollout_path,
|
||||
start: StartMarker {
|
||||
kind: StartMarkerKind::RolloutLineIndex,
|
||||
value: StartMarkerValue::LineIndex(0),
|
||||
display: "From: test".to_string(),
|
||||
},
|
||||
repo_cwd: repo_root,
|
||||
repo_snapshot: Some(RepoSnapshot {
|
||||
base_sha: base_sha.clone(),
|
||||
commit_sha: snapshot_sha,
|
||||
}),
|
||||
notes: Notes {
|
||||
what_went_wrong: "bad".to_string(),
|
||||
what_good_looks_like: "good".to_string(),
|
||||
},
|
||||
include_logs: false,
|
||||
logs_bytes: None,
|
||||
};
|
||||
|
||||
let out = create_eval_case_bundle(&args).unwrap();
|
||||
assert_eq!(out.path.file_name().unwrap(), out.case_id.as_str());
|
||||
assert!(out.path.starts_with(codex_home.path().join("eval-case")));
|
||||
assert!(out.case_id.contains("my-repo"));
|
||||
let manifest_text = std::fs::read_to_string(out.path.join("manifest.json")).unwrap();
|
||||
let manifest: EvalCaseManifestV1 = serde_json::from_str(&manifest_text).unwrap();
|
||||
assert_eq!(manifest.repo.git.commit, base_sha);
|
||||
|
||||
let patch_text =
|
||||
String::from_utf8(std::fs::read(out.path.join("repo.patch")).unwrap()).unwrap();
|
||||
assert!(
|
||||
patch_text.contains("snapshot"),
|
||||
"patch should include snapshot commit changes"
|
||||
);
|
||||
assert!(
|
||||
!patch_text.contains("worktree"),
|
||||
"patch should not include post-snapshot working tree changes"
|
||||
);
|
||||
assert!(patch_text.contains("snap.txt"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn slices_rollout_using_start_selector_even_when_line_numbers_shift() {
|
||||
let rollout = [
|
||||
r#"{"timestamp":"2024-01-01T00:00:00Z","type":"token_count","payload":{"total":1}}"#,
|
||||
r#"{"timestamp":"2024-01-01T00:00:00Z","type":"turn_context","payload":{"cwd":"/tmp","approval_policy":"never","sandbox_policy":"none","model":"x","summary":"none"}}"#,
|
||||
r#"{"timestamp":"2024-01-01T00:00:00Z","type":"event_msg","payload":{"type":"user_message","message":"run the tests please"}}"#,
|
||||
r#"{"timestamp":"2024-01-01T00:00:01Z","type":"event_msg","payload":{"type":"agent_message","message":"ok"}}"#,
|
||||
]
|
||||
.join("\n");
|
||||
|
||||
let selector = RolloutStartSelector {
|
||||
kind: RolloutStartSelectorKind::EventMsgUserMessageContains,
|
||||
contains: "run the tests".to_string(),
|
||||
after_timestamp: Some("2024-01-01T00:00:00Z".to_string()),
|
||||
};
|
||||
|
||||
let sliced = slice_rollout_from_selector(&rollout, &selector).unwrap();
|
||||
// Should include the turn_context that immediately precedes the matched user_message.
|
||||
assert!(
|
||||
sliced
|
||||
.lines()
|
||||
.next()
|
||||
.unwrap()
|
||||
.contains("\"type\":\"turn_context\"")
|
||||
);
|
||||
assert!(sliced.contains("\"type\":\"user_message\""));
|
||||
assert!(!sliced.contains("\"type\":\"token_count\""));
|
||||
}
|
||||
}
|
||||
@@ -155,8 +155,28 @@ pub struct CodexLogSnapshot {
|
||||
pub thread_id: String,
|
||||
}
|
||||
|
||||
pub struct FeedbackAttachment {
|
||||
pub filename: String,
|
||||
pub content_type: Option<String>,
|
||||
pub bytes: Vec<u8>,
|
||||
}
|
||||
|
||||
impl FeedbackAttachment {
|
||||
pub fn new(filename: String, content_type: Option<String>, bytes: Vec<u8>) -> Self {
|
||||
Self {
|
||||
filename,
|
||||
content_type,
|
||||
bytes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CodexLogSnapshot {
|
||||
pub(crate) fn as_bytes(&self) -> &[u8] {
|
||||
pub fn from_bytes(thread_id: String, bytes: Vec<u8>) -> Self {
|
||||
Self { bytes, thread_id }
|
||||
}
|
||||
|
||||
pub fn as_bytes(&self) -> &[u8] {
|
||||
&self.bytes
|
||||
}
|
||||
|
||||
@@ -176,6 +196,25 @@ impl CodexLogSnapshot {
|
||||
include_logs: bool,
|
||||
rollout_path: Option<&std::path::Path>,
|
||||
session_source: Option<SessionSource>,
|
||||
) -> Result<()> {
|
||||
self.upload_feedback_with_attachments(
|
||||
classification,
|
||||
reason,
|
||||
include_logs,
|
||||
rollout_path,
|
||||
session_source,
|
||||
Vec::new(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn upload_feedback_with_attachments(
|
||||
&self,
|
||||
classification: &str,
|
||||
reason: Option<&str>,
|
||||
include_logs: bool,
|
||||
rollout_path: Option<&std::path::Path>,
|
||||
session_source: Option<SessionSource>,
|
||||
extra_attachments: Vec<FeedbackAttachment>,
|
||||
) -> Result<()> {
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs;
|
||||
@@ -265,6 +304,15 @@ impl CodexLogSnapshot {
|
||||
}));
|
||||
}
|
||||
|
||||
for attachment in extra_attachments {
|
||||
envelope.add_item(EnvelopeItem::Attachment(Attachment {
|
||||
buffer: attachment.bytes,
|
||||
filename: attachment.filename,
|
||||
content_type: attachment.content_type,
|
||||
ty: None,
|
||||
}));
|
||||
}
|
||||
|
||||
client.send_envelope(envelope);
|
||||
client.flush(Some(Duration::from_secs(UPLOAD_TIMEOUT_SECS)));
|
||||
Ok(())
|
||||
|
||||
@@ -141,6 +141,11 @@ pub enum Op {
|
||||
summary: Option<ReasoningSummaryConfig>,
|
||||
},
|
||||
|
||||
/// Enable or disable repository snapshots (ghost commits) for the remainder of the session.
|
||||
///
|
||||
/// When enabled, Codex will capture per-turn git snapshots and record them in the rollout.
|
||||
SetRepoSnapshotting { enabled: bool },
|
||||
|
||||
/// Approve a command execution
|
||||
ExecApproval {
|
||||
/// The id of the submission we are approving
|
||||
|
||||
@@ -36,6 +36,7 @@ codex-common = { workspace = true, features = [
|
||||
"sandbox_summary",
|
||||
] }
|
||||
codex-core = { workspace = true }
|
||||
codex-eval-case = { workspace = true }
|
||||
codex-feedback = { workspace = true }
|
||||
codex-file-search = { workspace = true }
|
||||
codex-login = { workspace = true }
|
||||
|
||||
@@ -806,6 +806,32 @@ impl App {
|
||||
AppEvent::OpenFeedbackConsent { category } => {
|
||||
self.chat_widget.open_feedback_consent(category);
|
||||
}
|
||||
AppEvent::OpenFeedbackBadResultFork => {
|
||||
self.chat_widget.open_feedback_bad_result_fork();
|
||||
}
|
||||
AppEvent::OpenEvalCaptureIntro => {
|
||||
self.chat_widget.open_eval_capture_intro();
|
||||
}
|
||||
AppEvent::EvalCaptureIntroContinue => {
|
||||
self.chat_widget.on_eval_capture_intro_continue();
|
||||
}
|
||||
AppEvent::EvalCaptureSendConsent { upload } => {
|
||||
self.chat_widget.on_eval_capture_send_consent(upload);
|
||||
}
|
||||
AppEvent::OpenEvalCaptureNotes { start_marker } => {
|
||||
self.chat_widget.open_eval_capture_notes(start_marker);
|
||||
}
|
||||
AppEvent::CreateEvalCaptureBundle {
|
||||
start_marker,
|
||||
what_went_wrong,
|
||||
what_good_looks_like,
|
||||
} => {
|
||||
self.chat_widget.create_eval_capture_bundle(
|
||||
start_marker,
|
||||
what_went_wrong,
|
||||
what_good_looks_like,
|
||||
);
|
||||
}
|
||||
AppEvent::LaunchExternalEditor => {
|
||||
if self.chat_widget.external_editor_state() == ExternalEditorState::Active {
|
||||
self.launch_external_editor(tui).await;
|
||||
|
||||
@@ -182,6 +182,33 @@ pub(crate) enum AppEvent {
|
||||
category: FeedbackCategory,
|
||||
},
|
||||
|
||||
/// Open the bad-result fork chooser ("quick feedback" vs "capture eval sample").
|
||||
OpenFeedbackBadResultFork,
|
||||
|
||||
/// Start the "capture eval sample" flow from /feedback.
|
||||
OpenEvalCaptureIntro,
|
||||
|
||||
/// Dismiss the eval capture intro screen and continue to the start picker.
|
||||
EvalCaptureIntroContinue,
|
||||
|
||||
/// Record whether the user consented to upload the eval sample bundle once captured.
|
||||
/// This happens before the start message picker so the full flow is consented up front.
|
||||
EvalCaptureSendConsent {
|
||||
upload: bool,
|
||||
},
|
||||
|
||||
/// Open the freeform notes entry for eval capture.
|
||||
OpenEvalCaptureNotes {
|
||||
start_marker: EvalCaptureStartMarker,
|
||||
},
|
||||
|
||||
/// Create the eval bundle and report the output path/id in history.
|
||||
CreateEvalCaptureBundle {
|
||||
start_marker: EvalCaptureStartMarker,
|
||||
what_went_wrong: String,
|
||||
what_good_looks_like: String,
|
||||
},
|
||||
|
||||
/// Launch the external editor after a normal draw has completed.
|
||||
LaunchExternalEditor,
|
||||
}
|
||||
@@ -193,3 +220,27 @@ pub(crate) enum FeedbackCategory {
|
||||
Bug,
|
||||
Other,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub(crate) struct EvalCaptureRepoSnapshot {
|
||||
pub(crate) ghost_commit: String,
|
||||
pub(crate) base_sha: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub(crate) enum EvalCaptureStartMarker {
|
||||
RolloutLineIndex {
|
||||
index: usize,
|
||||
timestamp: String,
|
||||
display: String,
|
||||
message: String,
|
||||
repo_snapshot: Option<EvalCaptureRepoSnapshot>,
|
||||
},
|
||||
#[allow(dead_code)]
|
||||
RolloutLineTimestamp {
|
||||
timestamp: String,
|
||||
display: String,
|
||||
message: String,
|
||||
repo_snapshot: Option<EvalCaptureRepoSnapshot>,
|
||||
},
|
||||
}
|
||||
|
||||
376
codex-rs/tui/src/bottom_pane/eval_capture_view.rs
Normal file
376
codex-rs/tui/src/bottom_pane/eval_capture_view.rs
Normal file
@@ -0,0 +1,376 @@
|
||||
use std::cell::RefCell;
|
||||
|
||||
use crossterm::event::KeyCode;
|
||||
use crossterm::event::KeyEvent;
|
||||
use crossterm::event::KeyModifiers;
|
||||
use ratatui::buffer::Buffer;
|
||||
use ratatui::layout::Rect;
|
||||
use ratatui::style::Stylize;
|
||||
use ratatui::text::Line;
|
||||
use ratatui::widgets::Clear;
|
||||
use ratatui::widgets::Paragraph;
|
||||
use ratatui::widgets::StatefulWidgetRef;
|
||||
use ratatui::widgets::Widget;
|
||||
|
||||
use crate::app_event::AppEvent;
|
||||
use crate::app_event::EvalCaptureStartMarker;
|
||||
use crate::app_event_sender::AppEventSender;
|
||||
use crate::render::renderable::Renderable;
|
||||
|
||||
use super::CancellationEvent;
|
||||
use super::bottom_pane_view::BottomPaneView;
|
||||
use super::popup_consts::standard_popup_hint_line;
|
||||
use super::textarea::TextArea;
|
||||
use super::textarea::TextAreaState;
|
||||
|
||||
pub(crate) fn eval_capture_intro_params(
|
||||
app_event_tx: AppEventSender,
|
||||
) -> super::SelectionViewParams {
|
||||
let continue_action: super::SelectionAction = Box::new({
|
||||
let tx = app_event_tx.clone();
|
||||
move |_sender: &AppEventSender| {
|
||||
tx.send(AppEvent::EvalCaptureIntroContinue);
|
||||
}
|
||||
});
|
||||
|
||||
let basic_feedback_action: super::SelectionAction = Box::new({
|
||||
let tx = app_event_tx;
|
||||
move |_sender: &AppEventSender| {
|
||||
tx.send(AppEvent::OpenFeedbackConsent {
|
||||
category: crate::app_event::FeedbackCategory::BadResult,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
let header_lines: Vec<Box<dyn crate::render::renderable::Renderable>> = vec![
|
||||
Line::from("Capture eval sample".bold()).into(),
|
||||
Line::from("").into(),
|
||||
Line::from("Here's everything we capture as an eval sample.".dim()).into(),
|
||||
Line::from("It's stored locally on your computer:".dim()).into(),
|
||||
Line::from(vec![" • ".into(), "manifest.json".into()]).into(),
|
||||
Line::from(vec![" • ".into(), "rollout.jsonl".into()]).into(),
|
||||
Line::from(vec![" • ".into(), "repo.patch".into()]).into(),
|
||||
Line::from(vec![" • ".into(), "codex-logs.log".into()]).into(),
|
||||
Line::from("").into(),
|
||||
Line::from("Next, you can optionally upload this bundle to the team.".dim()).into(),
|
||||
];
|
||||
|
||||
super::SelectionViewParams {
|
||||
footer_hint: Some(standard_popup_hint_line()),
|
||||
title: None,
|
||||
header: Box::new(crate::render::renderable::ColumnRenderable::with(
|
||||
header_lines,
|
||||
)),
|
||||
items: vec![
|
||||
super::SelectionItem {
|
||||
name: "Continue".to_string(),
|
||||
description: Some("Create an eval sample bundle locally.".to_string()),
|
||||
actions: vec![continue_action],
|
||||
dismiss_on_select: true,
|
||||
..Default::default()
|
||||
},
|
||||
super::SelectionItem {
|
||||
name: "Send basic feedback instead".to_string(),
|
||||
description: Some(
|
||||
"Skip eval capture and send basic feedback to the team.".to_string(),
|
||||
),
|
||||
actions: vec![basic_feedback_action],
|
||||
dismiss_on_select: true,
|
||||
..Default::default()
|
||||
},
|
||||
],
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn eval_capture_send_consent_params(
|
||||
app_event_tx: AppEventSender,
|
||||
) -> super::SelectionViewParams {
|
||||
let upload_action: super::SelectionAction = Box::new({
|
||||
let tx = app_event_tx.clone();
|
||||
move |_sender: &AppEventSender| {
|
||||
tx.send(AppEvent::EvalCaptureSendConsent { upload: true });
|
||||
}
|
||||
});
|
||||
|
||||
let skip_action: super::SelectionAction = Box::new({
|
||||
let tx = app_event_tx;
|
||||
move |_sender: &AppEventSender| {
|
||||
tx.send(AppEvent::EvalCaptureSendConsent { upload: false });
|
||||
}
|
||||
});
|
||||
|
||||
let header_lines: Vec<Box<dyn crate::render::renderable::Renderable>> = vec![
|
||||
Line::from("Send eval sample to the codex team?".bold()).into(),
|
||||
Line::from("").into(),
|
||||
Line::from("If you choose Yes, it will upload the full bundle".dim()).into(),
|
||||
Line::from("to the team:".dim()).into(),
|
||||
Line::from(vec![" • ".into(), "manifest.json".into()]).into(),
|
||||
Line::from(vec![" • ".into(), "rollout.jsonl".into()]).into(),
|
||||
Line::from(vec![" • ".into(), "repo.patch".into()]).into(),
|
||||
Line::from(vec![" • ".into(), "codex-logs.log".into()]).into(),
|
||||
Line::from("").into(),
|
||||
Line::from("This may include file paths, code snippets, and tool outputs.".dim()).into(),
|
||||
];
|
||||
|
||||
super::SelectionViewParams {
|
||||
footer_hint: Some(standard_popup_hint_line()),
|
||||
title: None,
|
||||
header: Box::new(crate::render::renderable::ColumnRenderable::with(
|
||||
header_lines,
|
||||
)),
|
||||
items: vec![
|
||||
super::SelectionItem {
|
||||
name: "Yes".to_string(),
|
||||
description: Some(
|
||||
"Upload the full bundle to the team for troubleshooting and model improvement."
|
||||
.to_string(),
|
||||
),
|
||||
actions: vec![upload_action],
|
||||
dismiss_on_select: true,
|
||||
..Default::default()
|
||||
},
|
||||
super::SelectionItem {
|
||||
name: "No".to_string(),
|
||||
description: Some("Keep it local only.".to_string()),
|
||||
actions: vec![skip_action],
|
||||
dismiss_on_select: true,
|
||||
..Default::default()
|
||||
},
|
||||
],
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn eval_capture_start_picker_params(
|
||||
app_event_tx: AppEventSender,
|
||||
options: Vec<EvalCaptureStartMarker>,
|
||||
initial_selected_idx: Option<usize>,
|
||||
) -> super::SelectionViewParams {
|
||||
let items = options
|
||||
.into_iter()
|
||||
.map(|marker| {
|
||||
let action: super::SelectionAction = Box::new({
|
||||
let tx = app_event_tx.clone();
|
||||
let marker = marker.clone();
|
||||
move |_sender: &AppEventSender| {
|
||||
tx.send(AppEvent::OpenEvalCaptureNotes {
|
||||
start_marker: marker.clone(),
|
||||
});
|
||||
}
|
||||
});
|
||||
let disabled_reason = (!marker_has_repo_snapshot(&marker))
|
||||
.then_some("No repo snapshot available for this message.".to_string());
|
||||
super::SelectionItem {
|
||||
name: marker_display_name(&marker),
|
||||
// For the eval-capture start picker, we render the full message directly as the
|
||||
// row name (including a relative timestamp) to avoid duplicating it in the
|
||||
// description column.
|
||||
actions: vec![action],
|
||||
dismiss_on_select: true,
|
||||
disabled_reason,
|
||||
..Default::default()
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
super::SelectionViewParams {
|
||||
footer_hint: Some(standard_popup_hint_line()),
|
||||
title: Some("Pick the start message".to_string()),
|
||||
subtitle: Some(
|
||||
"Pick the first instruction for the specific code change or interaction you wanted to give feedback on."
|
||||
.to_string(),
|
||||
),
|
||||
items,
|
||||
initial_selected_idx,
|
||||
show_numbers: true,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
fn marker_display_name(marker: &EvalCaptureStartMarker) -> String {
|
||||
match marker {
|
||||
EvalCaptureStartMarker::RolloutLineIndex { display, .. }
|
||||
| EvalCaptureStartMarker::RolloutLineTimestamp { display, .. } => display.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
fn marker_has_repo_snapshot(marker: &EvalCaptureStartMarker) -> bool {
|
||||
match marker {
|
||||
EvalCaptureStartMarker::RolloutLineIndex { repo_snapshot, .. }
|
||||
| EvalCaptureStartMarker::RolloutLineTimestamp { repo_snapshot, .. } => {
|
||||
repo_snapshot.is_some()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct EvalCaptureNotesView {
|
||||
start_marker: EvalCaptureStartMarker,
|
||||
app_event_tx: AppEventSender,
|
||||
|
||||
textarea: TextArea,
|
||||
textarea_state: RefCell<TextAreaState>,
|
||||
complete: bool,
|
||||
}
|
||||
|
||||
impl EvalCaptureNotesView {
|
||||
pub(crate) fn new(start_marker: EvalCaptureStartMarker, app_event_tx: AppEventSender) -> Self {
|
||||
Self {
|
||||
start_marker,
|
||||
app_event_tx,
|
||||
textarea: TextArea::new(),
|
||||
textarea_state: RefCell::new(TextAreaState::default()),
|
||||
complete: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn submit(&mut self) {
|
||||
let what_went_wrong = self.textarea.text().trim().to_string();
|
||||
self.app_event_tx.send(AppEvent::CreateEvalCaptureBundle {
|
||||
start_marker: self.start_marker.clone(),
|
||||
what_went_wrong,
|
||||
what_good_looks_like: String::new(),
|
||||
});
|
||||
self.complete = true;
|
||||
}
|
||||
}
|
||||
|
||||
impl BottomPaneView for EvalCaptureNotesView {
|
||||
fn handle_key_event(&mut self, key_event: KeyEvent) {
|
||||
match key_event {
|
||||
KeyEvent {
|
||||
code: KeyCode::Esc, ..
|
||||
} => {
|
||||
self.on_ctrl_c();
|
||||
}
|
||||
KeyEvent {
|
||||
code: KeyCode::Enter,
|
||||
modifiers: KeyModifiers::NONE,
|
||||
..
|
||||
} => {
|
||||
self.submit();
|
||||
}
|
||||
other => {
|
||||
self.textarea.input(other);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn on_ctrl_c(&mut self) -> CancellationEvent {
|
||||
self.complete = true;
|
||||
CancellationEvent::Handled
|
||||
}
|
||||
|
||||
fn is_complete(&self) -> bool {
|
||||
self.complete
|
||||
}
|
||||
|
||||
fn handle_paste(&mut self, pasted: String) -> bool {
|
||||
if pasted.is_empty() {
|
||||
return false;
|
||||
}
|
||||
self.textarea.insert_str(&pasted);
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl Renderable for EvalCaptureNotesView {
|
||||
fn desired_height(&self, width: u16) -> u16 {
|
||||
self.header_lines(width).len() as u16 + self.input_height(width) + 2u16
|
||||
}
|
||||
|
||||
fn cursor_pos(&self, area: Rect) -> Option<(u16, u16)> {
|
||||
if area.height < 2 || area.width <= 2 {
|
||||
return None;
|
||||
}
|
||||
let text_area_height = self.input_height(area.width).saturating_sub(1);
|
||||
if text_area_height == 0 {
|
||||
return None;
|
||||
}
|
||||
let top_line_count = self.header_lines(area.width).len() as u16;
|
||||
let textarea_rect = Rect {
|
||||
x: area.x.saturating_add(2),
|
||||
y: area.y.saturating_add(top_line_count).saturating_add(1),
|
||||
width: area.width.saturating_sub(2),
|
||||
height: text_area_height,
|
||||
};
|
||||
let state = *self.textarea_state.borrow();
|
||||
self.textarea.cursor_pos_with_state(textarea_rect, state)
|
||||
}
|
||||
|
||||
fn render(&self, area: Rect, buf: &mut Buffer) {
|
||||
if area.height == 0 || area.width == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let placeholder = "Type here.";
|
||||
|
||||
let input_height = self.input_height(area.width);
|
||||
let header_lines = self.header_lines(area.width);
|
||||
let header_height = header_lines.len() as u16;
|
||||
|
||||
let title_area = Rect {
|
||||
x: area.x,
|
||||
y: area.y,
|
||||
width: area.width,
|
||||
height: header_height,
|
||||
};
|
||||
let input_area = Rect {
|
||||
x: area.x,
|
||||
y: area.y.saturating_add(header_height),
|
||||
width: area.width,
|
||||
height: input_height,
|
||||
};
|
||||
|
||||
Clear.render(area, buf);
|
||||
|
||||
Paragraph::new(header_lines).render(title_area, buf);
|
||||
|
||||
let textarea_rect = Rect {
|
||||
x: input_area.x.saturating_add(2),
|
||||
y: input_area.y.saturating_add(1),
|
||||
width: input_area.width.saturating_sub(2),
|
||||
height: input_area.height.saturating_sub(1),
|
||||
};
|
||||
let mut state = self.textarea_state.borrow_mut();
|
||||
StatefulWidgetRef::render_ref(&(&self.textarea), textarea_rect, buf, &mut state);
|
||||
if self.textarea.text().is_empty() {
|
||||
Paragraph::new(Line::from(placeholder.dim())).render(textarea_rect, buf);
|
||||
}
|
||||
|
||||
let hint_area = Rect {
|
||||
x: area.x,
|
||||
y: area
|
||||
.y
|
||||
.saturating_add(header_height)
|
||||
.saturating_add(input_height)
|
||||
.saturating_add(1),
|
||||
width: area.width,
|
||||
height: 1,
|
||||
};
|
||||
Paragraph::new(Line::from("Enter to continue • Esc to go back".dim()))
|
||||
.render(hint_area, buf);
|
||||
}
|
||||
}
|
||||
|
||||
impl EvalCaptureNotesView {
|
||||
fn header_lines(&self, width: u16) -> Vec<Line<'static>> {
|
||||
let title = "What went wrong with this interaction, and what should have happened for this to be a correct, delightful response?";
|
||||
let subtitle = "Please be specific, and can refer to any aspect of model behavior, including tool calls, code changes, or messages.";
|
||||
let usable_width = width.saturating_sub(2).max(1) as usize;
|
||||
let mut lines = Vec::new();
|
||||
for line in textwrap::wrap(title, usable_width) {
|
||||
lines.push(Line::from(line.into_owned().bold()));
|
||||
}
|
||||
for line in textwrap::wrap(subtitle, usable_width) {
|
||||
lines.push(Line::from(line.into_owned().dim()));
|
||||
}
|
||||
lines
|
||||
}
|
||||
|
||||
fn input_height(&self, width: u16) -> u16 {
|
||||
let usable_width = width.saturating_sub(2);
|
||||
let text_height = self.textarea.desired_height(usable_width).clamp(1, 8);
|
||||
text_height.saturating_add(1).min(9)
|
||||
}
|
||||
}
|
||||
@@ -387,7 +387,11 @@ fn make_feedback_item(
|
||||
category: FeedbackCategory,
|
||||
) -> super::SelectionItem {
|
||||
let action: super::SelectionAction = Box::new(move |_sender: &AppEventSender| {
|
||||
app_event_tx.send(AppEvent::OpenFeedbackConsent { category });
|
||||
if category == FeedbackCategory::BadResult {
|
||||
app_event_tx.send(AppEvent::OpenFeedbackBadResultFork);
|
||||
} else {
|
||||
app_event_tx.send(AppEvent::OpenFeedbackConsent { category });
|
||||
}
|
||||
});
|
||||
super::SelectionItem {
|
||||
name: name.to_string(),
|
||||
@@ -398,6 +402,52 @@ fn make_feedback_item(
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn feedback_bad_result_fork_params(
|
||||
app_event_tx: AppEventSender,
|
||||
) -> super::SelectionViewParams {
|
||||
let quick_action: super::SelectionAction = Box::new({
|
||||
let tx = app_event_tx.clone();
|
||||
move |_sender: &AppEventSender| {
|
||||
tx.send(AppEvent::OpenFeedbackConsent {
|
||||
category: FeedbackCategory::BadResult,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
let capture_action: super::SelectionAction = Box::new({
|
||||
let tx = app_event_tx;
|
||||
move |_sender: &AppEventSender| {
|
||||
tx.send(AppEvent::OpenEvalCaptureIntro);
|
||||
}
|
||||
});
|
||||
|
||||
super::SelectionViewParams {
|
||||
title: Some("Bad result".to_string()),
|
||||
subtitle: Some("Choose the fastest way to help us improve.".to_string()),
|
||||
items: vec![
|
||||
super::SelectionItem {
|
||||
name: "Quick feedback".to_string(),
|
||||
description: Some(
|
||||
"Send a short note (and optionally logs) to maintainers.".to_string(),
|
||||
),
|
||||
actions: vec![quick_action],
|
||||
dismiss_on_select: true,
|
||||
..Default::default()
|
||||
},
|
||||
super::SelectionItem {
|
||||
name: "Capture eval sample".to_string(),
|
||||
description: Some(
|
||||
"Create a local bundle you can rerun as an eval case later.".to_string(),
|
||||
),
|
||||
actions: vec![capture_action],
|
||||
dismiss_on_select: true,
|
||||
..Default::default()
|
||||
},
|
||||
],
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the upload consent popup params for a given feedback category.
|
||||
pub(crate) fn feedback_upload_consent_params(
|
||||
app_event_tx: AppEventSender,
|
||||
|
||||
@@ -54,6 +54,7 @@ pub(crate) struct SelectionViewParams {
|
||||
pub items: Vec<SelectionItem>,
|
||||
pub is_searchable: bool,
|
||||
pub search_placeholder: Option<String>,
|
||||
pub show_numbers: bool,
|
||||
pub header: Box<dyn Renderable>,
|
||||
pub initial_selected_idx: Option<usize>,
|
||||
}
|
||||
@@ -67,12 +68,52 @@ impl Default for SelectionViewParams {
|
||||
items: Vec::new(),
|
||||
is_searchable: false,
|
||||
search_placeholder: None,
|
||||
show_numbers: true,
|
||||
header: Box::new(()),
|
||||
initial_selected_idx: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct WrappedText {
|
||||
text: String,
|
||||
dim: bool,
|
||||
}
|
||||
|
||||
impl WrappedText {
|
||||
fn dim(text: String) -> Self {
|
||||
Self { text, dim: true }
|
||||
}
|
||||
|
||||
fn wrapped_lines(&self, width: u16) -> Vec<Line<'static>> {
|
||||
let usable_width = width.max(1) as usize;
|
||||
textwrap::wrap(&self.text, usable_width)
|
||||
.into_iter()
|
||||
.map(|line| {
|
||||
let span = Span::from(line.into_owned());
|
||||
if self.dim {
|
||||
Line::from(span.dim())
|
||||
} else {
|
||||
Line::from(span)
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl Renderable for WrappedText {
|
||||
fn render(&self, area: Rect, buf: &mut Buffer) {
|
||||
Paragraph::new(self.wrapped_lines(area.width)).render(area, buf);
|
||||
}
|
||||
|
||||
fn desired_height(&self, width: u16) -> u16 {
|
||||
self.wrapped_lines(width)
|
||||
.len()
|
||||
.try_into()
|
||||
.unwrap_or(u16::MAX)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct ListSelectionView {
|
||||
footer_hint: Option<Line<'static>>,
|
||||
items: Vec<SelectionItem>,
|
||||
@@ -80,6 +121,7 @@ pub(crate) struct ListSelectionView {
|
||||
complete: bool,
|
||||
app_event_tx: AppEventSender,
|
||||
is_searchable: bool,
|
||||
show_numbers: bool,
|
||||
search_query: String,
|
||||
search_placeholder: Option<String>,
|
||||
filtered_indices: Vec<usize>,
|
||||
@@ -93,7 +135,7 @@ impl ListSelectionView {
|
||||
let mut header = params.header;
|
||||
if params.title.is_some() || params.subtitle.is_some() {
|
||||
let title = params.title.map(|title| Line::from(title.bold()));
|
||||
let subtitle = params.subtitle.map(|subtitle| Line::from(subtitle.dim()));
|
||||
let subtitle = params.subtitle.map(WrappedText::dim);
|
||||
header = Box::new(ColumnRenderable::with([
|
||||
header,
|
||||
Box::new(title),
|
||||
@@ -107,6 +149,7 @@ impl ListSelectionView {
|
||||
complete: false,
|
||||
app_event_tx,
|
||||
is_searchable: params.is_searchable,
|
||||
show_numbers: params.show_numbers,
|
||||
search_query: String::new(),
|
||||
search_placeholder: if params.is_searchable {
|
||||
params.search_placeholder
|
||||
@@ -198,13 +241,9 @@ impl ListSelectionView {
|
||||
};
|
||||
let name_with_marker = format!("{name}{marker}");
|
||||
let n = visible_idx + 1;
|
||||
let wrap_prefix = if self.is_searchable {
|
||||
// The number keys don't work when search is enabled (since we let the
|
||||
// numbers be used for the search query).
|
||||
format!("{prefix} ")
|
||||
} else {
|
||||
format!("{prefix} {n}. ")
|
||||
};
|
||||
let wrap_prefix = (self.show_numbers && !self.is_searchable)
|
||||
.then(|| format!("{prefix} {n}. "))
|
||||
.unwrap_or_else(|| format!("{prefix} "));
|
||||
let wrap_prefix_width = UnicodeWidthStr::width(wrap_prefix.as_str());
|
||||
let display_name = format!("{wrap_prefix}{name_with_marker}");
|
||||
let description = is_selected
|
||||
@@ -386,6 +425,7 @@ impl BottomPaneView for ListSelectionView {
|
||||
.to_digit(10)
|
||||
.map(|d| d as usize)
|
||||
.and_then(|d| d.checked_sub(1))
|
||||
.filter(|_| self.show_numbers)
|
||||
&& idx < self.items.len()
|
||||
&& self
|
||||
.items
|
||||
|
||||
@@ -26,6 +26,7 @@ mod chat_composer;
|
||||
mod chat_composer_history;
|
||||
mod command_popup;
|
||||
pub mod custom_prompt_view;
|
||||
mod eval_capture_view;
|
||||
mod experimental_features_view;
|
||||
mod file_search_popup;
|
||||
mod footer;
|
||||
@@ -34,6 +35,11 @@ mod prompt_args;
|
||||
mod skill_popup;
|
||||
pub(crate) use list_selection_view::SelectionViewParams;
|
||||
mod feedback_view;
|
||||
pub(crate) use eval_capture_view::EvalCaptureNotesView;
|
||||
pub(crate) use eval_capture_view::eval_capture_intro_params;
|
||||
pub(crate) use eval_capture_view::eval_capture_send_consent_params;
|
||||
pub(crate) use eval_capture_view::eval_capture_start_picker_params;
|
||||
pub(crate) use feedback_view::feedback_bad_result_fork_params;
|
||||
pub(crate) use feedback_view::feedback_selection_params;
|
||||
pub(crate) use feedback_view::feedback_upload_consent_params;
|
||||
mod paste_burst;
|
||||
|
||||
@@ -110,7 +110,10 @@ fn compute_desc_col(
|
||||
let max_name_width = rows_all
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(i, _)| visible_range.contains(i))
|
||||
.filter(|(i, row)| {
|
||||
visible_range.contains(i)
|
||||
&& (row.description.is_some() || row.disabled_reason.is_some())
|
||||
})
|
||||
.map(|(_, r)| {
|
||||
let mut spans: Vec<Span> = vec![r.name.clone().into()];
|
||||
if r.disabled_reason.is_some() {
|
||||
|
||||
@@ -132,7 +132,9 @@ use self::session_header::SessionHeader;
|
||||
use crate::streaming::controller::StreamController;
|
||||
use std::path::Path;
|
||||
|
||||
use chrono::DateTime;
|
||||
use chrono::Local;
|
||||
use chrono::Utc;
|
||||
use codex_common::approval_presets::ApprovalPreset;
|
||||
use codex_common::approval_presets::builtin_approval_presets;
|
||||
use codex_core::AuthManager;
|
||||
@@ -368,6 +370,9 @@ pub(crate) struct ChatWidget {
|
||||
feedback: codex_feedback::CodexFeedback,
|
||||
// Current session rollout path (if known)
|
||||
current_rollout_path: Option<PathBuf>,
|
||||
// Whether the user consented to upload the captured eval sample to the team.
|
||||
// This is captured before the start picker so the flow is consented up front.
|
||||
eval_capture_send_to_team: Option<bool>,
|
||||
external_editor_state: ExternalEditorState,
|
||||
}
|
||||
|
||||
@@ -438,6 +443,9 @@ impl ChatWidget {
|
||||
self.set_skills(None);
|
||||
self.conversation_id = Some(event.session_id);
|
||||
self.current_rollout_path = Some(event.rollout_path.clone());
|
||||
// Repo snapshots are stored locally and are required for capturing eval cases
|
||||
// from arbitrary earlier user messages.
|
||||
self.submit_op(Op::SetRepoSnapshotting { enabled: true });
|
||||
let initial_messages = event.initial_messages.clone();
|
||||
let model_for_header = event.model.clone();
|
||||
self.session_header.set_model(&model_for_header);
|
||||
@@ -506,6 +514,479 @@ impl ChatWidget {
|
||||
self.request_redraw();
|
||||
}
|
||||
|
||||
pub(crate) fn open_feedback_bad_result_fork(&mut self) {
|
||||
let params = crate::bottom_pane::feedback_bad_result_fork_params(self.app_event_tx.clone());
|
||||
self.bottom_pane.show_selection_view(params);
|
||||
self.request_redraw();
|
||||
}
|
||||
|
||||
pub(crate) fn open_eval_capture_intro(&mut self) {
|
||||
self.eval_capture_send_to_team = None;
|
||||
if !crate::eval_capture_state::should_show_eval_capture_intro(&self.config.codex_home) {
|
||||
self.open_eval_capture_send_consent();
|
||||
return;
|
||||
}
|
||||
|
||||
let params = crate::bottom_pane::eval_capture_intro_params(self.app_event_tx.clone());
|
||||
self.bottom_pane.show_selection_view(params);
|
||||
self.request_redraw();
|
||||
}
|
||||
|
||||
pub(crate) fn on_eval_capture_intro_continue(&mut self) {
|
||||
if let Err(err) =
|
||||
crate::eval_capture_state::persist_eval_capture_intro_dismissed(&self.config.codex_home)
|
||||
{
|
||||
self.add_error_message(format!("Failed to persist eval capture preference: {err}"));
|
||||
}
|
||||
|
||||
self.open_eval_capture_send_consent();
|
||||
}
|
||||
|
||||
fn open_eval_capture_send_consent(&mut self) {
|
||||
let params =
|
||||
crate::bottom_pane::eval_capture_send_consent_params(self.app_event_tx.clone());
|
||||
self.bottom_pane.show_selection_view(params);
|
||||
self.request_redraw();
|
||||
}
|
||||
|
||||
pub(crate) fn on_eval_capture_send_consent(&mut self, upload: bool) {
|
||||
self.eval_capture_send_to_team = Some(upload);
|
||||
self.open_eval_capture_start_picker();
|
||||
}
|
||||
|
||||
fn open_eval_capture_start_picker(&mut self) {
|
||||
let Some(rollout_path) = self.current_rollout_path.as_ref() else {
|
||||
self.add_error_message(
|
||||
"No rollout file for this session; cannot capture eval sample.".to_string(),
|
||||
);
|
||||
return;
|
||||
};
|
||||
|
||||
let text = match std::fs::read_to_string(rollout_path) {
|
||||
Ok(text) => text,
|
||||
Err(err) => {
|
||||
self.add_error_message(format!(
|
||||
"Failed to read rollout file {}: {err}",
|
||||
rollout_path.display()
|
||||
));
|
||||
return;
|
||||
}
|
||||
};
|
||||
// List every user message as a candidate start point, and attach the next
|
||||
// ghost snapshot to the most recent user message. Markers without snapshots
|
||||
// will be disabled in the picker.
|
||||
let mut options = Vec::new();
|
||||
let mut last_user_marker_idx: Option<usize> = None;
|
||||
let mut max_timestamp: Option<DateTime<Utc>> = None;
|
||||
for (idx, line) in text.lines().enumerate() {
|
||||
let Ok(rollout_line) =
|
||||
serde_json::from_str::<codex_protocol::protocol::RolloutLine>(line)
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
if let Some(ts) = parse_rollout_timestamp_utc(&rollout_line.timestamp) {
|
||||
max_timestamp = Some(max_timestamp.map_or(ts, |cur| cur.max(ts)));
|
||||
}
|
||||
match rollout_line.item {
|
||||
codex_protocol::protocol::RolloutItem::ResponseItem(
|
||||
codex_protocol::models::ResponseItem::Message { role, content, .. },
|
||||
) if role == "user" => {
|
||||
let Some(message) = extract_user_message_text(&content) else {
|
||||
continue;
|
||||
};
|
||||
if !should_show_eval_capture_start_message(&message) {
|
||||
continue;
|
||||
}
|
||||
options.push(crate::app_event::EvalCaptureStartMarker::RolloutLineIndex {
|
||||
index: idx,
|
||||
timestamp: rollout_line.timestamp.clone(),
|
||||
// Filled in below once we know the latest timestamp in the rollout.
|
||||
display: String::new(),
|
||||
message,
|
||||
repo_snapshot: None,
|
||||
});
|
||||
last_user_marker_idx = Some(options.len().saturating_sub(1));
|
||||
}
|
||||
codex_protocol::protocol::RolloutItem::ResponseItem(
|
||||
codex_protocol::models::ResponseItem::GhostSnapshot { ghost_commit },
|
||||
) => {
|
||||
if let Some(marker_idx) = last_user_marker_idx
|
||||
&& let Some(base_sha) = ghost_commit.parent()
|
||||
{
|
||||
if let Some(crate::app_event::EvalCaptureStartMarker::RolloutLineIndex {
|
||||
repo_snapshot,
|
||||
..
|
||||
}) = options.get_mut(marker_idx)
|
||||
{
|
||||
*repo_snapshot = Some(crate::app_event::EvalCaptureRepoSnapshot {
|
||||
ghost_commit: ghost_commit.id().to_string(),
|
||||
base_sha: Some(base_sha.to_string()),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(now) = max_timestamp {
|
||||
for marker in &mut options {
|
||||
match marker {
|
||||
crate::app_event::EvalCaptureStartMarker::RolloutLineIndex {
|
||||
timestamp,
|
||||
display,
|
||||
message,
|
||||
..
|
||||
}
|
||||
| crate::app_event::EvalCaptureStartMarker::RolloutLineTimestamp {
|
||||
timestamp,
|
||||
display,
|
||||
message,
|
||||
..
|
||||
} => {
|
||||
let Some(then) = parse_rollout_timestamp_utc(timestamp) else {
|
||||
*display = message.clone();
|
||||
continue;
|
||||
};
|
||||
let delta = format_relative_time_ago(now, then);
|
||||
*display = format!("[{delta}] {message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for marker in &mut options {
|
||||
match marker {
|
||||
crate::app_event::EvalCaptureStartMarker::RolloutLineIndex {
|
||||
display,
|
||||
message,
|
||||
..
|
||||
}
|
||||
| crate::app_event::EvalCaptureStartMarker::RolloutLineTimestamp {
|
||||
display,
|
||||
message,
|
||||
..
|
||||
} => {
|
||||
*display = message.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
options.sort_by(|a, b| match (a, b) {
|
||||
(
|
||||
crate::app_event::EvalCaptureStartMarker::RolloutLineIndex { index: a, .. },
|
||||
crate::app_event::EvalCaptureStartMarker::RolloutLineIndex { index: b, .. },
|
||||
) => b.cmp(a),
|
||||
_ => std::cmp::Ordering::Equal,
|
||||
});
|
||||
|
||||
if !options.iter().any(|marker| match marker {
|
||||
crate::app_event::EvalCaptureStartMarker::RolloutLineIndex {
|
||||
repo_snapshot, ..
|
||||
}
|
||||
| crate::app_event::EvalCaptureStartMarker::RolloutLineTimestamp {
|
||||
repo_snapshot,
|
||||
..
|
||||
} => repo_snapshot.is_some(),
|
||||
}) {
|
||||
self.add_error_message(
|
||||
"No repo snapshots are available yet in this session. Try again after another message/turn (or in a new session)."
|
||||
.to_string(),
|
||||
);
|
||||
self.open_feedback_consent(crate::app_event::FeedbackCategory::BadResult);
|
||||
return;
|
||||
}
|
||||
|
||||
let initial_selected_idx = options.iter().position(|marker| match marker {
|
||||
crate::app_event::EvalCaptureStartMarker::RolloutLineIndex {
|
||||
repo_snapshot, ..
|
||||
}
|
||||
| crate::app_event::EvalCaptureStartMarker::RolloutLineTimestamp {
|
||||
repo_snapshot,
|
||||
..
|
||||
} => repo_snapshot.is_some(),
|
||||
});
|
||||
let params = crate::bottom_pane::eval_capture_start_picker_params(
|
||||
self.app_event_tx.clone(),
|
||||
options,
|
||||
initial_selected_idx,
|
||||
);
|
||||
self.bottom_pane.show_selection_view(params);
|
||||
self.request_redraw();
|
||||
}
|
||||
|
||||
pub(crate) fn open_eval_capture_notes(
|
||||
&mut self,
|
||||
start_marker: crate::app_event::EvalCaptureStartMarker,
|
||||
) {
|
||||
let view =
|
||||
crate::bottom_pane::EvalCaptureNotesView::new(start_marker, self.app_event_tx.clone());
|
||||
self.bottom_pane.show_view(Box::new(view));
|
||||
self.request_redraw();
|
||||
}
|
||||
|
||||
pub(crate) fn create_eval_capture_bundle(
|
||||
&mut self,
|
||||
start_marker: crate::app_event::EvalCaptureStartMarker,
|
||||
what_went_wrong: String,
|
||||
what_good_looks_like: String,
|
||||
) {
|
||||
let Some(conversation_id) = self.conversation_id else {
|
||||
self.add_error_message(
|
||||
"No active conversation; cannot capture eval sample.".to_string(),
|
||||
);
|
||||
return;
|
||||
};
|
||||
let Some(rollout_path) = self.current_rollout_path.clone() else {
|
||||
self.add_error_message(
|
||||
"No rollout file for this session; cannot capture eval sample.".to_string(),
|
||||
);
|
||||
return;
|
||||
};
|
||||
|
||||
let (start, repo_snapshot) = match start_marker {
|
||||
crate::app_event::EvalCaptureStartMarker::RolloutLineIndex {
|
||||
index,
|
||||
timestamp: _,
|
||||
display,
|
||||
message: _,
|
||||
repo_snapshot,
|
||||
} => {
|
||||
let Some(snapshot) = repo_snapshot else {
|
||||
self.add_error_message(
|
||||
"No repo snapshot available for the selected start marker; cannot capture eval sample."
|
||||
.to_string(),
|
||||
);
|
||||
return;
|
||||
};
|
||||
let Some(base_sha) = snapshot.base_sha else {
|
||||
self.add_error_message(
|
||||
"No base sha for selected repo snapshot; cannot capture eval sample."
|
||||
.to_string(),
|
||||
);
|
||||
return;
|
||||
};
|
||||
let repo_snapshot = Some(codex_eval_case::RepoSnapshot {
|
||||
base_sha,
|
||||
commit_sha: snapshot.ghost_commit,
|
||||
});
|
||||
(
|
||||
codex_eval_case::StartMarker {
|
||||
kind: codex_eval_case::StartMarkerKind::RolloutLineIndex,
|
||||
value: codex_eval_case::StartMarkerValue::LineIndex(index as u64),
|
||||
display,
|
||||
},
|
||||
repo_snapshot,
|
||||
)
|
||||
}
|
||||
crate::app_event::EvalCaptureStartMarker::RolloutLineTimestamp {
|
||||
timestamp,
|
||||
display,
|
||||
message: _,
|
||||
repo_snapshot,
|
||||
} => {
|
||||
let Some(snapshot) = repo_snapshot else {
|
||||
self.add_error_message(
|
||||
"No repo snapshot available for the selected start marker; cannot capture eval sample."
|
||||
.to_string(),
|
||||
);
|
||||
return;
|
||||
};
|
||||
let Some(base_sha) = snapshot.base_sha else {
|
||||
self.add_error_message(
|
||||
"No base sha for selected repo snapshot; cannot capture eval sample."
|
||||
.to_string(),
|
||||
);
|
||||
return;
|
||||
};
|
||||
let repo_snapshot = Some(codex_eval_case::RepoSnapshot {
|
||||
base_sha,
|
||||
commit_sha: snapshot.ghost_commit,
|
||||
});
|
||||
(
|
||||
codex_eval_case::StartMarker {
|
||||
kind: codex_eval_case::StartMarkerKind::RolloutLineTimestamp,
|
||||
value: codex_eval_case::StartMarkerValue::Timestamp(timestamp),
|
||||
display,
|
||||
},
|
||||
repo_snapshot,
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
let logs_bytes = Some(
|
||||
self.feedback
|
||||
.snapshot(Some(conversation_id))
|
||||
.as_bytes()
|
||||
.to_vec(),
|
||||
);
|
||||
|
||||
let args = codex_eval_case::CreateEvalCaseArgs {
|
||||
codex_home: self.config.codex_home.clone(),
|
||||
conversation_id: conversation_id.to_string(),
|
||||
rollout_path,
|
||||
start,
|
||||
repo_cwd: self.config.cwd.clone(),
|
||||
repo_snapshot,
|
||||
notes: codex_eval_case::Notes {
|
||||
what_went_wrong,
|
||||
what_good_looks_like,
|
||||
},
|
||||
include_logs: true,
|
||||
logs_bytes,
|
||||
};
|
||||
|
||||
match codex_eval_case::create_eval_case_bundle(&args) {
|
||||
Ok(result) => {
|
||||
use ratatui::style::Stylize as _;
|
||||
use ratatui::text::Line;
|
||||
|
||||
let case_id = result.case_id;
|
||||
let path = result.path.display().to_string();
|
||||
let lines = vec![
|
||||
Line::from("• Eval sample captured (local only).".to_string()),
|
||||
"".into(),
|
||||
Line::from(vec![" Case ID: ".into(), case_id.clone().bold()]),
|
||||
Line::from(vec![" Path: ".into(), path.clone().cyan().underlined()]),
|
||||
];
|
||||
self.app_event_tx
|
||||
.send(crate::app_event::AppEvent::InsertHistoryCell(Box::new(
|
||||
crate::history_cell::PlainHistoryCell::new(lines),
|
||||
)));
|
||||
|
||||
if self.eval_capture_send_to_team.unwrap_or(false) {
|
||||
self.upload_eval_capture_bundle(case_id, path);
|
||||
} else {
|
||||
self.eval_capture_upload_skipped(case_id, path);
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
self.app_event_tx
|
||||
.send(crate::app_event::AppEvent::InsertHistoryCell(Box::new(
|
||||
crate::history_cell::new_error_event(format!(
|
||||
"Failed to create eval sample bundle: {err}"
|
||||
)),
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn upload_eval_capture_bundle(&mut self, case_id: String, path: String) {
|
||||
let Some(conversation_id) = self.conversation_id else {
|
||||
self.add_error_message(
|
||||
"No active conversation; cannot upload eval sample.".to_string(),
|
||||
);
|
||||
return;
|
||||
};
|
||||
|
||||
let bundle_dir = std::path::PathBuf::from(&path);
|
||||
let manifest_path = bundle_dir.join("manifest.json");
|
||||
let rollout_path = bundle_dir.join("rollout.jsonl");
|
||||
let repo_patch_path = bundle_dir.join("repo.patch");
|
||||
let logs_path = bundle_dir.join("codex-logs.log");
|
||||
|
||||
let read_bytes = |p: &std::path::Path| std::fs::read(p).map_err(anyhow::Error::from);
|
||||
|
||||
let logs_bytes = match read_bytes(&logs_path) {
|
||||
Ok(bytes) => bytes,
|
||||
Err(err) => {
|
||||
self.add_error_message(format!(
|
||||
"Failed to read eval sample logs {}: {err}",
|
||||
logs_path.display()
|
||||
));
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let snapshot = self.feedback.snapshot(Some(conversation_id));
|
||||
let snapshot =
|
||||
codex_feedback::CodexLogSnapshot::from_bytes(snapshot.thread_id.clone(), logs_bytes);
|
||||
|
||||
let extra_attachments = match (read_bytes(&manifest_path), read_bytes(&repo_patch_path)) {
|
||||
(Ok(manifest_bytes), Ok(repo_patch_bytes)) => vec![
|
||||
codex_feedback::FeedbackAttachment::new(
|
||||
"manifest.json".to_string(),
|
||||
Some("application/json".to_string()),
|
||||
manifest_bytes,
|
||||
),
|
||||
codex_feedback::FeedbackAttachment::new(
|
||||
"repo.patch".to_string(),
|
||||
Some("text/plain".to_string()),
|
||||
repo_patch_bytes,
|
||||
),
|
||||
],
|
||||
(manifest_err, repo_err) => {
|
||||
if let Err(err) = manifest_err {
|
||||
self.add_error_message(format!(
|
||||
"Failed to read eval sample manifest {}: {err}",
|
||||
manifest_path.display()
|
||||
));
|
||||
}
|
||||
if let Err(err) = repo_err {
|
||||
self.add_error_message(format!(
|
||||
"Failed to read eval sample repo.patch {}: {err}",
|
||||
repo_patch_path.display()
|
||||
));
|
||||
}
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let reason = format!("Eval sample {case_id}");
|
||||
let result = snapshot.upload_feedback_with_attachments(
|
||||
"bad_result",
|
||||
Some(reason.as_str()),
|
||||
true,
|
||||
Some(rollout_path.as_path()),
|
||||
Some(codex_core::protocol::SessionSource::Cli),
|
||||
extra_attachments,
|
||||
);
|
||||
|
||||
match result {
|
||||
Ok(()) => {
|
||||
use ratatui::style::Stylize as _;
|
||||
use ratatui::text::Line;
|
||||
|
||||
let lines = vec![
|
||||
Line::from("• Eval sample uploaded.".to_string()),
|
||||
"".into(),
|
||||
Line::from(vec![
|
||||
" Thread ID: ".into(),
|
||||
snapshot.thread_id.clone().bold(),
|
||||
]),
|
||||
];
|
||||
self.app_event_tx
|
||||
.send(crate::app_event::AppEvent::InsertHistoryCell(Box::new(
|
||||
crate::history_cell::PlainHistoryCell::new(lines),
|
||||
)));
|
||||
}
|
||||
Err(err) => {
|
||||
self.app_event_tx
|
||||
.send(crate::app_event::AppEvent::InsertHistoryCell(Box::new(
|
||||
crate::history_cell::new_error_event(format!(
|
||||
"Failed to upload eval sample: {err}"
|
||||
)),
|
||||
)));
|
||||
}
|
||||
}
|
||||
self.request_redraw();
|
||||
}
|
||||
|
||||
pub(crate) fn eval_capture_upload_skipped(&mut self, case_id: String, path: String) {
|
||||
let _ = (case_id, path);
|
||||
use ratatui::text::Line;
|
||||
|
||||
// The "captured" message already prints the case id/path. Avoid duplicating it here.
|
||||
let lines = vec![Line::from(
|
||||
"• Eval sample saved locally (not uploaded).".to_string(),
|
||||
)];
|
||||
self.app_event_tx
|
||||
.send(crate::app_event::AppEvent::InsertHistoryCell(Box::new(
|
||||
crate::history_cell::PlainHistoryCell::new(lines),
|
||||
)));
|
||||
self.request_redraw();
|
||||
}
|
||||
|
||||
fn on_agent_message(&mut self, message: String) {
|
||||
// If we have a stream_controller, then the final agent message is redundant and will be a
|
||||
// duplicate of what has already been streamed.
|
||||
@@ -1482,6 +1963,7 @@ impl ChatWidget {
|
||||
last_rendered_width: std::cell::Cell::new(None),
|
||||
feedback,
|
||||
current_rollout_path: None,
|
||||
eval_capture_send_to_team: None,
|
||||
external_editor_state: ExternalEditorState::Closed,
|
||||
};
|
||||
|
||||
@@ -1569,6 +2051,7 @@ impl ChatWidget {
|
||||
last_rendered_width: std::cell::Cell::new(None),
|
||||
feedback,
|
||||
current_rollout_path: None,
|
||||
eval_capture_send_to_team: None,
|
||||
external_editor_state: ExternalEditorState::Closed,
|
||||
};
|
||||
|
||||
@@ -3695,6 +4178,73 @@ async fn fetch_rate_limits(base_url: String, auth: CodexAuth) -> Option<RateLimi
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_user_message_text(content: &[codex_protocol::models::ContentItem]) -> Option<String> {
|
||||
let mut combined = String::new();
|
||||
for item in content {
|
||||
let text = match item {
|
||||
codex_protocol::models::ContentItem::InputText { text }
|
||||
| codex_protocol::models::ContentItem::OutputText { text } => text.as_str(),
|
||||
codex_protocol::models::ContentItem::InputImage { .. } => continue,
|
||||
};
|
||||
if !combined.is_empty() {
|
||||
combined.push(' ');
|
||||
}
|
||||
combined.push_str(text);
|
||||
}
|
||||
|
||||
let mut normalized = String::new();
|
||||
for part in combined.split_whitespace() {
|
||||
if !normalized.is_empty() {
|
||||
normalized.push(' ');
|
||||
}
|
||||
normalized.push_str(part);
|
||||
}
|
||||
|
||||
let trimmed = normalized.trim();
|
||||
if trimmed.is_empty() {
|
||||
None
|
||||
} else {
|
||||
// Keep the picker readable even for very long user messages.
|
||||
Some(truncate_text(trimmed, 800))
|
||||
}
|
||||
}
|
||||
|
||||
fn should_show_eval_capture_start_message(message: &str) -> bool {
|
||||
// The rollout includes a few synthetic "user" messages (environment context,
|
||||
// AGENTS instructions) that should not be presented back to the user as start markers.
|
||||
let trimmed = message.trim_start();
|
||||
if trimmed.starts_with("<environment_context>") {
|
||||
return false;
|
||||
}
|
||||
if trimmed.starts_with("# AGENTS.md instructions") {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
fn parse_rollout_timestamp_utc(timestamp: &str) -> Option<DateTime<Utc>> {
|
||||
DateTime::parse_from_rfc3339(timestamp)
|
||||
.ok()
|
||||
.map(|dt| dt.with_timezone(&Utc))
|
||||
}
|
||||
|
||||
fn format_relative_time_ago(now: DateTime<Utc>, then: DateTime<Utc>) -> String {
|
||||
let secs = now.signed_duration_since(then).num_seconds().max(0);
|
||||
if secs < 60 {
|
||||
return format!("{secs}s ago");
|
||||
}
|
||||
let mins = secs / 60;
|
||||
if mins < 60 {
|
||||
return format!("{mins}m ago");
|
||||
}
|
||||
let hours = mins / 60;
|
||||
if hours < 24 {
|
||||
return format!("{hours}h ago");
|
||||
}
|
||||
let days = hours / 24;
|
||||
format!("{days}d ago")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) fn show_review_commit_picker_with_entries(
|
||||
chat: &mut ChatWidget,
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
---
|
||||
source: tui/src/chatwidget/tests.rs
|
||||
assertion_line: 2190
|
||||
expression: popup
|
||||
---
|
||||
Capture eval sample
|
||||
|
||||
Here's everything we capture as an eval sample.
|
||||
It's stored locally on your computer:
|
||||
• manifest.json
|
||||
• rollout.jsonl
|
||||
• repo.patch
|
||||
• codex-logs.log
|
||||
|
||||
Next, you can optionally upload this bundle to the team.
|
||||
|
||||
› 1. Continue Create an eval sample bundle locally.
|
||||
2. Send basic feedback instead Skip eval capture and send basic feedback to
|
||||
the team.
|
||||
|
||||
Press enter to confirm or esc to go back
|
||||
@@ -0,0 +1,11 @@
|
||||
---
|
||||
source: tui/src/chatwidget/tests.rs
|
||||
expression: popup
|
||||
---
|
||||
Pick the start message
|
||||
Pick the first instruction for the specific code change or interaction you
|
||||
wanted to give feedback on.
|
||||
|
||||
› 1. [0s ago] Please help me fix the build errors
|
||||
|
||||
Press enter to confirm or esc to go back
|
||||
@@ -0,0 +1,20 @@
|
||||
---
|
||||
source: tui/src/chatwidget/tests.rs
|
||||
expression: popup
|
||||
---
|
||||
Send eval sample to the codex team?
|
||||
|
||||
If you choose Yes, it will upload the full bundle
|
||||
to the team:
|
||||
• manifest.json
|
||||
• rollout.jsonl
|
||||
• repo.patch
|
||||
• codex-logs.log
|
||||
|
||||
This may include file paths, code snippets, and tool outputs.
|
||||
|
||||
› 1. Yes Upload the full bundle to the team for troubleshooting and model
|
||||
improvement.
|
||||
2. No Keep it local only.
|
||||
|
||||
Press enter to confirm or esc to go back
|
||||
@@ -0,0 +1,11 @@
|
||||
---
|
||||
source: tui/src/chatwidget/tests.rs
|
||||
expression: popup
|
||||
---
|
||||
Bad result
|
||||
Choose the fastest way to help us improve.
|
||||
|
||||
› 1. Quick feedback Send a short note (and optionally logs) to
|
||||
maintainers.
|
||||
2. Capture eval sample Create a local bundle you can rerun as an eval case
|
||||
later.
|
||||
@@ -407,6 +407,7 @@ async fn make_chatwidget_manual(
|
||||
last_rendered_width: std::cell::Cell::new(None),
|
||||
feedback: codex_feedback::CodexFeedback::new(),
|
||||
current_rollout_path: None,
|
||||
eval_capture_send_to_team: None,
|
||||
external_editor_state: ExternalEditorState::Closed,
|
||||
};
|
||||
(widget, rx, op_rx)
|
||||
@@ -2167,6 +2168,88 @@ async fn feedback_upload_consent_popup_snapshot() {
|
||||
assert_snapshot!("feedback_upload_consent_popup", popup);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn feedback_bad_result_fork_popup_snapshot() {
|
||||
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(None).await;
|
||||
|
||||
chat.open_feedback_bad_result_fork();
|
||||
|
||||
let popup = render_bottom_popup(&chat, 80);
|
||||
assert_snapshot!("feedback_bad_result_fork_popup", popup);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn eval_capture_intro_popup_snapshot() {
|
||||
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(None).await;
|
||||
|
||||
// Ensure host state doesn't leak into snapshots.
|
||||
let _ = std::fs::remove_file(chat.config.codex_home.join("eval_capture.json"));
|
||||
|
||||
chat.open_eval_capture_intro();
|
||||
|
||||
let popup = render_bottom_popup(&chat, 80);
|
||||
assert_snapshot!("eval_capture_intro_popup", popup);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn eval_capture_upload_consent_popup_snapshot() {
|
||||
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(None).await;
|
||||
|
||||
let params = crate::bottom_pane::eval_capture_send_consent_params(chat.app_event_tx.clone());
|
||||
chat.bottom_pane.show_selection_view(params);
|
||||
chat.request_redraw();
|
||||
|
||||
let popup = render_bottom_popup(&chat, 80);
|
||||
assert_snapshot!("eval_capture_upload_consent_popup", popup);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn eval_capture_start_picker_popup_snapshot() {
|
||||
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(None).await;
|
||||
|
||||
let rollout_file = NamedTempFile::new().unwrap();
|
||||
let user_line = serde_json::json!({
|
||||
"timestamp": "2025-09-05T16:53:11.850Z",
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [{
|
||||
"type": "input_text",
|
||||
"text": "Please help me fix the build errors",
|
||||
}],
|
||||
},
|
||||
});
|
||||
let ghost_line = serde_json::json!({
|
||||
"timestamp": "2025-09-05T16:53:12.000Z",
|
||||
"type": "response_item",
|
||||
"payload": {
|
||||
"type": "ghost_snapshot",
|
||||
"ghost_commit": {
|
||||
"id": "ghost-sha",
|
||||
"parent": "base-sha",
|
||||
"preexisting_untracked_files": [],
|
||||
"preexisting_untracked_dirs": [],
|
||||
},
|
||||
},
|
||||
});
|
||||
std::fs::write(
|
||||
rollout_file.path(),
|
||||
format!(
|
||||
"{}\n{}\n",
|
||||
serde_json::to_string(&user_line).unwrap(),
|
||||
serde_json::to_string(&ghost_line).unwrap()
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
chat.current_rollout_path = Some(rollout_file.path().to_path_buf());
|
||||
|
||||
chat.open_eval_capture_start_picker();
|
||||
|
||||
let popup = render_bottom_popup(&chat, 80);
|
||||
assert_snapshot!("eval_capture_start_picker_popup", popup);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn reasoning_popup_escape_returns_to_model_popup() {
|
||||
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(Some("gpt-5.1-codex-max")).await;
|
||||
|
||||
40
codex-rs/tui/src/eval_capture_state.rs
Normal file
40
codex-rs/tui/src/eval_capture_state.rs
Normal file
@@ -0,0 +1,40 @@
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
struct EvalCaptureState {
|
||||
#[serde(default)]
|
||||
intro_dismissed: bool,
|
||||
}
|
||||
|
||||
const STATE_FILENAME: &str = "eval_capture.json";
|
||||
|
||||
fn state_path(codex_home: &Path) -> PathBuf {
|
||||
codex_home.join(STATE_FILENAME)
|
||||
}
|
||||
|
||||
pub(crate) fn should_show_eval_capture_intro(codex_home: &Path) -> bool {
|
||||
let path = state_path(codex_home);
|
||||
let Ok(contents) = std::fs::read_to_string(path) else {
|
||||
return true;
|
||||
};
|
||||
serde_json::from_str::<EvalCaptureState>(&contents)
|
||||
.ok()
|
||||
.map(|s| !s.intro_dismissed)
|
||||
.unwrap_or(true)
|
||||
}
|
||||
|
||||
pub(crate) fn persist_eval_capture_intro_dismissed(codex_home: &Path) -> anyhow::Result<()> {
|
||||
let path = state_path(codex_home);
|
||||
let state = EvalCaptureState {
|
||||
intro_dismissed: true,
|
||||
};
|
||||
let json_line = format!("{}\n", serde_json::to_string(&state)?);
|
||||
if let Some(parent) = path.parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
std::fs::write(path, json_line)?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -45,6 +45,7 @@ mod clipboard_paste;
|
||||
mod color;
|
||||
pub mod custom_terminal;
|
||||
mod diff_render;
|
||||
mod eval_capture_state;
|
||||
mod exec_cell;
|
||||
mod exec_command;
|
||||
mod external_editor;
|
||||
|
||||
Reference in New Issue
Block a user