Compare commits

...

6 Commits

Author SHA1 Message Date
Noah MacCallum
ed7110c437 eval-case: add v1 manifest with repo git info and start selector 2026-01-03 22:00:52 -08:00
Noah MacCallum
e639300f79 tui: move eval capture send consent earlier 2026-01-01 15:53:23 -08:00
Noah MacCallum
8f25853583 tui: tweak eval capture notes copy 2026-01-01 15:35:24 -08:00
Noah MacCallum
8e551617d4 tui: refine eval capture start picker and notes 2026-01-01 15:18:53 -08:00
Noah MacCallum
049c395736 eval-case: implement eval capture bundles 2026-01-01 03:20:12 -08:00
Noah MacCallum
602e5fca05 tui: improve eval capture UX
Add eval capture flow to /feedback (bad result), including start picker with relative timestamps, updated notes prompts, and upload consent UI + snapshots.
2026-01-01 03:05:26 -08:00
31 changed files with 2923 additions and 50 deletions

15
codex-rs/Cargo.lock generated
View File

@@ -1000,6 +1000,7 @@ dependencies = [
"codex-backend-client",
"codex-common",
"codex-core",
"codex-eval-case",
"codex-feedback",
"codex-file-search",
"codex-login",
@@ -1350,6 +1351,19 @@ dependencies = [
"wiremock",
]
[[package]]
name = "codex-eval-case"
version = "0.0.0"
dependencies = [
"anyhow",
"pretty_assertions",
"serde",
"serde_json",
"tempfile",
"time",
"uuid",
]
[[package]]
name = "codex-exec"
version = "0.0.0"
@@ -1724,6 +1738,7 @@ dependencies = [
"codex-backend-client",
"codex-common",
"codex-core",
"codex-eval-case",
"codex-feedback",
"codex-file-search",
"codex-login",

View File

@@ -19,6 +19,7 @@ members = [
"exec-server",
"execpolicy",
"execpolicy-legacy",
"eval-case",
"keyring-store",
"file-search",
"linux-sandbox",
@@ -75,6 +76,7 @@ codex-common = { path = "common" }
codex-core = { path = "core" }
codex-exec = { path = "exec" }
codex-execpolicy = { path = "execpolicy" }
codex-eval-case = { path = "eval-case" }
codex-feedback = { path = "feedback" }
codex-file-search = { path = "file-search" }
codex-git = { path = "utils/git" }

View File

@@ -174,6 +174,11 @@ client_request_definitions! {
response: v2::FeedbackUploadResponse,
},
EvalCaseCreate => "evalCase/create" {
params: v2::EvalCaseCreateParams,
response: v2::EvalCaseCreateResponse,
},
/// Execute a command (argv vector) under the server's sandbox.
OneOffCommandExec => "command/exec" {
params: v2::CommandExecParams,

View File

@@ -930,6 +930,50 @@ pub struct FeedbackUploadResponse {
pub thread_id: String,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
#[ts(export_to = "v2/")]
pub enum EvalCaseStartMarkerKind {
RolloutLineTimestamp,
RolloutLineIndex,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(untagged)]
#[ts(export_to = "v2/")]
pub enum EvalCaseStartMarkerValue {
Timestamp(String),
LineIndex(u64),
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct EvalCaseStartMarker {
pub kind: EvalCaseStartMarkerKind,
pub value: EvalCaseStartMarkerValue,
pub display: String,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct EvalCaseCreateParams {
pub thread_id: String,
pub start: EvalCaseStartMarker,
pub what_went_wrong: String,
pub what_good_looks_like: String,
pub include_logs: bool,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]
pub struct EvalCaseCreateResponse {
pub case_id: String,
pub path: String,
}
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
#[ts(export_to = "v2/")]

View File

@@ -21,6 +21,7 @@ codex-arg0 = { workspace = true }
codex-common = { workspace = true, features = ["cli"] }
codex-core = { workspace = true }
codex-backend-client = { workspace = true }
codex-eval-case = { workspace = true }
codex-file-search = { workspace = true }
codex-login = { workspace = true }
codex-protocol = { workspace = true }

View File

@@ -25,6 +25,8 @@ use codex_app_server_protocol::ClientRequest;
use codex_app_server_protocol::CommandExecParams;
use codex_app_server_protocol::ConversationGitInfo;
use codex_app_server_protocol::ConversationSummary;
use codex_app_server_protocol::EvalCaseCreateParams;
use codex_app_server_protocol::EvalCaseCreateResponse;
use codex_app_server_protocol::ExecOneOffCommandResponse;
use codex_app_server_protocol::FeedbackUploadParams;
use codex_app_server_protocol::FeedbackUploadResponse;
@@ -512,6 +514,9 @@ impl CodexMessageProcessor {
ClientRequest::FeedbackUpload { request_id, params } => {
self.upload_feedback(request_id, params).await;
}
ClientRequest::EvalCaseCreate { request_id, params } => {
self.eval_case_create(request_id, params).await;
}
}
}
@@ -3301,6 +3306,170 @@ impl CodexMessageProcessor {
}
}
async fn eval_case_create(&self, request_id: RequestId, params: EvalCaseCreateParams) {
let EvalCaseCreateParams {
thread_id,
start,
what_went_wrong,
what_good_looks_like,
include_logs,
} = params;
if !include_logs {
let error = JSONRPCErrorError {
code: INVALID_REQUEST_ERROR_CODE,
message: "eval case bundles always include codex-logs.log; set include_logs=true"
.to_string(),
data: None,
};
self.outgoing.send_error(request_id, error).await;
return;
}
let conversation_id = match ConversationId::from_string(&thread_id) {
Ok(id) => id,
Err(err) => {
let error = JSONRPCErrorError {
code: INVALID_REQUEST_ERROR_CODE,
message: format!("invalid thread id: {err}"),
data: None,
};
self.outgoing.send_error(request_id, error).await;
return;
}
};
let Some(rollout_path) = self.resolve_rollout_path(conversation_id).await else {
let error = JSONRPCErrorError {
code: INVALID_REQUEST_ERROR_CODE,
message: "could not resolve rollout path for thread".to_string(),
data: None,
};
self.outgoing.send_error(request_id, error).await;
return;
};
let rollout_text = match std::fs::read_to_string(&rollout_path) {
Ok(text) => text,
Err(err) => {
let error = JSONRPCErrorError {
code: INTERNAL_ERROR_CODE,
message: format!(
"failed to read rollout file {}: {err}",
rollout_path.display()
),
data: None,
};
self.outgoing.send_error(request_id, error).await;
return;
}
};
let start_marker = match (&start.kind, &start.value) {
(
codex_app_server_protocol::EvalCaseStartMarkerKind::RolloutLineTimestamp,
codex_app_server_protocol::EvalCaseStartMarkerValue::Timestamp(value),
) => codex_eval_case::StartMarker {
kind: codex_eval_case::StartMarkerKind::RolloutLineTimestamp,
value: codex_eval_case::StartMarkerValue::Timestamp(value.clone()),
display: start.display.clone(),
},
(
codex_app_server_protocol::EvalCaseStartMarkerKind::RolloutLineIndex,
codex_app_server_protocol::EvalCaseStartMarkerValue::LineIndex(value),
) => codex_eval_case::StartMarker {
kind: codex_eval_case::StartMarkerKind::RolloutLineIndex,
value: codex_eval_case::StartMarkerValue::LineIndex(*value),
display: start.display.clone(),
},
_ => {
let error = JSONRPCErrorError {
code: INVALID_REQUEST_ERROR_CODE,
message: "start marker kind/value mismatch".to_string(),
data: None,
};
self.outgoing.send_error(request_id, error).await;
return;
}
};
let last_line_index = rollout_text.lines().count().saturating_sub(1);
let requested_start_is_now = start.display == "Start now"
|| matches!((&start.kind, &start.value),
(codex_app_server_protocol::EvalCaseStartMarkerKind::RolloutLineIndex,
codex_app_server_protocol::EvalCaseStartMarkerValue::LineIndex(index))
if usize::try_from(*index).ok() == Some(last_line_index)
);
let repo_snapshot = repo_snapshot_from_rollout(&rollout_text, &start_marker);
if repo_snapshot.is_none() && !requested_start_is_now {
let error = JSONRPCErrorError {
code: INVALID_REQUEST_ERROR_CODE,
message: "no repo snapshot available for requested start marker".to_string(),
data: None,
};
self.outgoing.send_error(request_id, error).await;
return;
}
let logs_bytes = include_logs.then(|| {
self.feedback
.snapshot(Some(conversation_id))
.as_bytes()
.to_vec()
});
let args = codex_eval_case::CreateEvalCaseArgs {
codex_home: self.config.codex_home.clone(),
conversation_id: thread_id.clone(),
rollout_path,
start: start_marker,
repo_cwd: self.config.cwd.clone(),
repo_snapshot,
notes: codex_eval_case::Notes {
what_went_wrong,
what_good_looks_like,
},
include_logs,
logs_bytes,
};
let result =
tokio::task::spawn_blocking(move || codex_eval_case::create_eval_case_bundle(&args))
.await;
let result = match result {
Ok(outcome) => outcome,
Err(join_err) => {
let error = JSONRPCErrorError {
code: INTERNAL_ERROR_CODE,
message: format!("failed to create eval case bundle: {join_err}"),
data: None,
};
self.outgoing.send_error(request_id, error).await;
return;
}
};
match result {
Ok(outcome) => {
let response = EvalCaseCreateResponse {
case_id: outcome.case_id,
path: outcome.path.display().to_string(),
};
self.outgoing.send_response(request_id, response).await;
}
Err(err) => {
let error = JSONRPCErrorError {
code: INTERNAL_ERROR_CODE,
message: format!("failed to create eval case bundle: {err}"),
data: None,
};
self.outgoing.send_error(request_id, error).await;
}
}
}
async fn resolve_rollout_path(&self, conversation_id: ConversationId) -> Option<PathBuf> {
match self
.conversation_manager
@@ -3313,6 +3482,56 @@ impl CodexMessageProcessor {
}
}
fn repo_snapshot_from_rollout(
rollout_text: &str,
start_marker: &codex_eval_case::StartMarker,
) -> Option<codex_eval_case::RepoSnapshot> {
match &start_marker.value {
codex_eval_case::StartMarkerValue::LineIndex(index) => {
let index = usize::try_from(*index).ok()?;
repo_snapshot_from_rollout_line_index(rollout_text, index)
}
codex_eval_case::StartMarkerValue::Timestamp(timestamp) => {
repo_snapshot_from_rollout_timestamp(rollout_text, timestamp)
}
}
}
fn repo_snapshot_from_rollout_timestamp(
rollout_text: &str,
timestamp: &str,
) -> Option<codex_eval_case::RepoSnapshot> {
let start_index = rollout_text.lines().enumerate().find_map(|(idx, line)| {
let rollout_line =
serde_json::from_str::<codex_protocol::protocol::RolloutLine>(line).ok()?;
(rollout_line.timestamp == timestamp).then_some(idx)
})?;
repo_snapshot_from_rollout_line_index(rollout_text, start_index)
}
fn repo_snapshot_from_rollout_line_index(
rollout_text: &str,
start_index: usize,
) -> Option<codex_eval_case::RepoSnapshot> {
rollout_text
.lines()
.enumerate()
.skip(start_index.saturating_add(1))
.find_map(|(_idx, line)| {
let rollout_line =
serde_json::from_str::<codex_protocol::protocol::RolloutLine>(line).ok()?;
match rollout_line.item {
codex_protocol::protocol::RolloutItem::ResponseItem(
codex_protocol::models::ResponseItem::GhostSnapshot { ghost_commit },
) => Some(codex_eval_case::RepoSnapshot {
base_sha: ghost_commit.parent()?.to_string(),
commit_sha: ghost_commit.id().to_string(),
}),
_ => None,
}
})
}
fn skills_to_info(
skills: &[codex_core::skills::SkillMetadata],
) -> Vec<codex_app_server_protocol::SkillMetadata> {
@@ -3619,4 +3838,58 @@ mod tests {
assert_eq!(summary, expected);
Ok(())
}
#[test]
fn repo_snapshot_from_rollout_line_index_finds_next_ghost_snapshot() -> Result<()> {
use serde_json::json;
let user_line = json!({
"timestamp": "2025-09-05T16:53:11.850Z",
"type": "response_item",
"payload": {
"type": "message",
"role": "user",
"content": [{
"type": "input_text",
"text": "hi",
}],
},
});
let ghost_line = json!({
"timestamp": "2025-09-05T16:53:12.000Z",
"type": "response_item",
"payload": {
"type": "ghost_snapshot",
"ghost_commit": {
"id": "ghost-sha",
"parent": "base-sha",
"preexisting_untracked_files": [],
"preexisting_untracked_dirs": [],
},
},
});
let rollout_text = format!(
"{}\n{}\n",
serde_json::to_string(&user_line)?,
serde_json::to_string(&ghost_line)?
);
let start = codex_eval_case::StartMarker {
kind: codex_eval_case::StartMarkerKind::RolloutLineIndex,
value: codex_eval_case::StartMarkerValue::LineIndex(0),
display: "From: hi".to_string(),
};
let snapshot = repo_snapshot_from_rollout(&rollout_text, &start).expect("snapshot");
assert_eq!(
snapshot,
codex_eval_case::RepoSnapshot {
base_sha: "base-sha".to_string(),
commit_sha: "ghost-sha".to_string(),
}
);
Ok(())
}
}

View File

@@ -209,34 +209,19 @@ pub fn apply_hunks(
stdout: &mut impl std::io::Write,
stderr: &mut impl std::io::Write,
) -> Result<(), ApplyPatchError> {
let _existing_paths: Vec<&Path> = hunks
.iter()
.filter_map(|hunk| match hunk {
Hunk::AddFile { .. } => {
// The file is being added, so it doesn't exist yet.
None
}
Hunk::DeleteFile { path } => Some(path.as_path()),
Hunk::UpdateFile {
path, move_path, ..
} => match move_path {
Some(move_path) => {
if std::fs::metadata(move_path)
.map(|m| m.is_file())
.unwrap_or(false)
{
Some(move_path.as_path())
} else {
None
}
}
None => Some(path.as_path()),
},
})
.collect::<Vec<&Path>>();
apply_hunks_in_dir(hunks, Path::new("."), stdout, stderr)
}
/// Applies hunks relative to `cwd` (without mutating the process working directory) and writes the
/// same stdout/stderr output as `apply_hunks`.
pub fn apply_hunks_in_dir(
hunks: &[Hunk],
cwd: &Path,
stdout: &mut impl std::io::Write,
stderr: &mut impl std::io::Write,
) -> Result<(), ApplyPatchError> {
// Delegate to a helper that applies each hunk to the filesystem.
match apply_hunks_to_files(hunks) {
match apply_hunks_to_files_in_dir(hunks, cwd) {
Ok(affected) => {
print_summary(&affected, stdout).map_err(ApplyPatchError::from)?;
Ok(())
@@ -267,7 +252,7 @@ pub struct AffectedPaths {
/// Apply the hunks to the filesystem, returning which files were added, modified, or deleted.
/// Returns an error if the patch could not be applied.
fn apply_hunks_to_files(hunks: &[Hunk]) -> anyhow::Result<AffectedPaths> {
fn apply_hunks_to_files_in_dir(hunks: &[Hunk], cwd: &Path) -> anyhow::Result<AffectedPaths> {
if hunks.is_empty() {
anyhow::bail!("No files were modified.");
}
@@ -278,19 +263,29 @@ fn apply_hunks_to_files(hunks: &[Hunk]) -> anyhow::Result<AffectedPaths> {
for hunk in hunks {
match hunk {
Hunk::AddFile { path, contents } => {
if let Some(parent) = path.parent()
let target_path = if path.is_absolute() {
path.clone()
} else {
cwd.join(path)
};
if let Some(parent) = target_path.parent()
&& !parent.as_os_str().is_empty()
{
std::fs::create_dir_all(parent).with_context(|| {
format!("Failed to create parent directories for {}", path.display())
})?;
}
std::fs::write(path, contents)
std::fs::write(&target_path, contents)
.with_context(|| format!("Failed to write file {}", path.display()))?;
added.push(path.clone());
}
Hunk::DeleteFile { path } => {
std::fs::remove_file(path)
let target_path = if path.is_absolute() {
path.clone()
} else {
cwd.join(path)
};
std::fs::remove_file(&target_path)
.with_context(|| format!("Failed to delete file {}", path.display()))?;
deleted.push(path.clone());
}
@@ -300,22 +295,38 @@ fn apply_hunks_to_files(hunks: &[Hunk]) -> anyhow::Result<AffectedPaths> {
chunks,
} => {
let AppliedPatch { new_contents, .. } =
derive_new_contents_from_chunks(path, chunks)?;
derive_new_contents_from_chunks_in_dir(path, chunks, cwd)?;
if let Some(dest) = move_path {
if let Some(parent) = dest.parent()
let dest_path = if dest.is_absolute() {
dest.clone()
} else {
cwd.join(dest)
};
if let Some(parent) = dest_path.parent()
&& !parent.as_os_str().is_empty()
{
std::fs::create_dir_all(parent).with_context(|| {
format!("Failed to create parent directories for {}", dest.display())
})?;
}
std::fs::write(dest, new_contents)
std::fs::write(&dest_path, new_contents)
.with_context(|| format!("Failed to write file {}", dest.display()))?;
std::fs::remove_file(path)
let src_path = if path.is_absolute() {
path.clone()
} else {
cwd.join(path)
};
std::fs::remove_file(&src_path)
.with_context(|| format!("Failed to remove original {}", path.display()))?;
modified.push(dest.clone());
} else {
std::fs::write(path, new_contents)
let target_path = if path.is_absolute() {
path.clone()
} else {
cwd.join(path)
};
std::fs::write(&target_path, new_contents)
.with_context(|| format!("Failed to write file {}", path.display()))?;
modified.push(path.clone());
}
@@ -340,7 +351,21 @@ fn derive_new_contents_from_chunks(
path: &Path,
chunks: &[UpdateFileChunk],
) -> std::result::Result<AppliedPatch, ApplyPatchError> {
let original_contents = match std::fs::read_to_string(path) {
derive_new_contents_from_chunks_in_dir(path, chunks, Path::new("."))
}
fn derive_new_contents_from_chunks_in_dir(
path: &Path,
chunks: &[UpdateFileChunk],
cwd: &Path,
) -> std::result::Result<AppliedPatch, ApplyPatchError> {
let target_path = if path.is_absolute() {
path.to_path_buf()
} else {
cwd.join(path)
};
let original_contents = match std::fs::read_to_string(&target_path) {
Ok(contents) => contents,
Err(err) => {
return Err(ApplyPatchError::IoError(IoError {

View File

@@ -343,6 +343,7 @@ pub(crate) struct Session {
/// The set of enabled features should be invariant for the lifetime of the
/// session.
features: Features,
repo_snapshotting_enabled: AtomicBool,
pub(crate) active_turn: Mutex<Option<ActiveTurn>>,
pub(crate) services: SessionServices,
next_internal_sub_id: AtomicU64,
@@ -673,6 +674,7 @@ impl Session {
tx_event: tx_event.clone(),
state: Mutex::new(state),
features: config.features.clone(),
repo_snapshotting_enabled: AtomicBool::new(false),
active_turn: Mutex::new(None),
services,
next_internal_sub_id: AtomicU64::new(0),
@@ -1439,7 +1441,9 @@ impl Session {
turn_context: Arc<TurnContext>,
cancellation_token: CancellationToken,
) {
if !self.enabled(Feature::GhostCommit) {
if !self.enabled(Feature::GhostCommit)
&& !self.repo_snapshotting_enabled.load(Ordering::Relaxed)
{
return;
}
let token = match turn_context.tool_call_gate.subscribe().await {
@@ -1609,6 +1613,9 @@ async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiv
)
.await;
}
Op::SetRepoSnapshotting { enabled } => {
handlers::set_repo_snapshotting(&sess, enabled).await;
}
Op::UserInput { .. } | Op::UserTurn { .. } => {
handlers::user_input_or_turn(&sess, sub.id.clone(), sub.op, &mut previous_context)
.await;
@@ -1707,6 +1714,7 @@ mod handlers {
use mcp_types::RequestId;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::atomic::Ordering;
use tracing::info;
use tracing::warn;
@@ -1731,6 +1739,11 @@ mod handlers {
}
}
pub async fn set_repo_snapshotting(sess: &Session, enabled: bool) {
sess.repo_snapshotting_enabled
.store(enabled, Ordering::Relaxed);
}
pub async fn user_input_or_turn(
sess: &Arc<Session>,
sub_id: String,
@@ -3161,6 +3174,7 @@ mod tests {
tx_event,
state: Mutex::new(state),
features: config.features.clone(),
repo_snapshotting_enabled: AtomicBool::new(false),
active_turn: Mutex::new(None),
services,
next_internal_sub_id: AtomicU64::new(0),
@@ -3248,6 +3262,7 @@ mod tests {
tx_event,
state: Mutex::new(state),
features: config.features.clone(),
repo_snapshotting_enabled: AtomicBool::new(false),
active_turn: Mutex::new(None),
services,
next_internal_sub_id: AtomicU64::new(0),

View File

@@ -23,6 +23,7 @@ use codex_protocol::protocol::ReviewDecision;
use futures::future::BoxFuture;
use std::collections::HashMap;
use std::path::PathBuf;
use std::time::Instant;
#[derive(Clone, Debug)]
pub struct ApplyPatchRequest {
@@ -143,6 +144,48 @@ impl ToolRuntime<ApplyPatchRequest, ExecToolCallOutput> for ApplyPatchRuntime {
attempt: &SandboxAttempt<'_>,
ctx: &ToolCtx<'_>,
) -> Result<ExecToolCallOutput, ToolError> {
// When there is no sandbox in play (DangerFullAccess / bypassed sandbox), apply the patch
// in-process. This avoids relying on the current executable implementing the arg0/argv1
// dispatch behavior (which is not true for unit/integration test binaries).
if attempt.sandbox == crate::exec::SandboxType::None {
let started = Instant::now();
let mut stdout = Vec::new();
let mut stderr = Vec::new();
// Apply the patch relative to `req.cwd` without mutating the process CWD.
let exit_code = match codex_apply_patch::parse_patch(&req.patch) {
Ok(args) => {
match codex_apply_patch::apply_hunks_in_dir(
&args.hunks,
&req.cwd,
&mut stdout,
&mut stderr,
) {
Ok(()) => 0,
Err(_) => 1,
}
}
// Reuse codex-apply-patch's error formatting for parse errors.
Err(_) => {
match codex_apply_patch::apply_patch(&req.patch, &mut stdout, &mut stderr) {
Ok(()) => 0,
Err(_) => 1,
}
}
};
let stdout = String::from_utf8_lossy(&stdout).to_string();
let stderr = String::from_utf8_lossy(&stderr).to_string();
let aggregated_output = format!("{stdout}{stderr}");
return Ok(ExecToolCallOutput {
exit_code,
stdout: crate::exec::StreamOutput::new(stdout),
stderr: crate::exec::StreamOutput::new(stderr),
aggregated_output: crate::exec::StreamOutput::new(aggregated_output),
duration: started.elapsed(),
timed_out: false,
});
}
let spec = Self::build_command_spec(req)?;
let env = attempt
.env_for(spec)

View File

@@ -8,6 +8,7 @@ use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ResponsesRequest;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
@@ -277,7 +278,7 @@ async fn shell_command_snapshot_still_intercepts_apply_patch() -> Result<()> {
ev_completed("resp-2"),
]),
];
mount_sse_sequence(harness.server(), responses).await;
let requests = mount_sse_sequence(harness.server(), responses).await;
let model = test.session_configured.model.clone();
codex
@@ -297,7 +298,14 @@ async fn shell_command_snapshot_still_intercepts_apply_patch() -> Result<()> {
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
assert_eq!(fs::read_to_string(&target).await?, "hello from snapshot\n");
let tool_output = extract_call_output_text(&requests.requests(), call_id);
let target_contents = fs::read_to_string(&target).await.unwrap_or_else(|err| {
panic!(
"expected patch to create {}: {err}; tool output={tool_output:?}",
target.display()
)
});
assert_eq!(target_contents, "hello from snapshot\n");
let mut entries = fs::read_dir(codex_home.join("shell_snapshots")).await?;
let snapshot_path = entries
@@ -311,6 +319,35 @@ async fn shell_command_snapshot_still_intercepts_apply_patch() -> Result<()> {
Ok(())
}
fn extract_call_output_text(requests: &[ResponsesRequest], call_id: &str) -> String {
for req in requests {
let input = req.input();
let item = input.iter().find(|item| {
item.get("type").and_then(serde_json::Value::as_str) == Some("function_call_output")
&& item.get("call_id").and_then(serde_json::Value::as_str) == Some(call_id)
});
let Some(item) = item else {
continue;
};
let output = item
.get("output")
.cloned()
.unwrap_or(serde_json::Value::Null);
match output {
serde_json::Value::String(text) => return text,
serde_json::Value::Object(obj) => {
if let Some(text) = obj.get("content").and_then(serde_json::Value::as_str) {
return text.to_string();
}
}
_ => {}
}
}
"<missing tool output>".to_string()
}
#[cfg_attr(target_os = "windows", ignore)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_snapshot_deleted_after_shutdown_with_skills() -> Result<()> {

View File

@@ -0,0 +1,58 @@
# Eval Capture Bundles
Codex can capture "eval case" bundles from the `/feedback` flow (bad result -> capture eval sample).
These bundles are meant to turn real failures into reproducible, local-first artifacts.
## Where Bundles Are Written
Bundles are stored under:
`$CODEX_HOME/eval-case/<case-id>/`
## Bundle Contents
Each bundle contains:
- `manifest.json` - metadata about the capture (schema version, rollout start selector, repo git pointer).
- `rollout.jsonl` - the full session rollout (multi-turn trajectory).
- `repo.patch` - a git patch representing the repository state at the chosen start marker.
- `codex-logs.log` - tracing logs to help maintainers debug the session.
## Start Marker And Repo State
Bundles include the entire rollout, but also record a start marker to indicate where an eval
harness (or a human) should begin replaying/interpreting the trajectory.
`manifest.json` also records a `rollout.start_selector`, which is the preferred way to find the
intended starting turn when replaying/slicing (line numbers in `rollout.jsonl` are not stable).
The repository patch must match that chosen start marker:
- If the session has repo snapshots available, `repo.patch` is derived from the ghost snapshot
commit associated with the selected user turn (diff from the snapshot's base commit to the
snapshot commit).
- If no snapshot is available for a given start marker, the TUI disables that option (and may
fall back to the basic feedback flow instead).
For reproducibility outside your machine, the base commit recorded in `manifest.json` should be
reachable by maintainers (for example, pushed and available on the default branch).
## Reproducibility Contract
Reproducing the repository state in a bundle must be possible using only:
- `repo.git.canonical_remote` (clone URL)
- `repo.git.commit` (base commit to `git checkout`)
- `repo.patch` (apply if non-empty)
The manifest may also include machine-local paths (like `repo.root` / `repo.cwd_rel`) as debugging
hints, but repro should not depend on them.
## App-Server API (For Integrations)
Non-TUI clients can create bundles via the app-server JSON-RPC method:
- `evalCase/create`
The handler copies the rollout into the bundle and derives `repo.patch` based on the selected start
marker when repo snapshots are available.

View File

@@ -0,0 +1,19 @@
[package]
name = "codex-eval-case"
version.workspace = true
edition.workspace = true
license.workspace = true
[lints]
workspace = true
[dependencies]
anyhow = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
time = { workspace = true, features = ["formatting", "parsing"] }
uuid = { workspace = true, features = ["v4"] }
[dev-dependencies]
pretty_assertions = { workspace = true }
tempfile = { workspace = true }

View File

@@ -0,0 +1,993 @@
use std::path::Path;
use std::path::PathBuf;
use std::process::Command;
use anyhow::Context as _;
use serde::Deserialize;
use serde::Serialize;
use time::OffsetDateTime;
use time::format_description::well_known::Rfc3339;
use uuid::Uuid;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum StartMarkerKind {
RolloutLineTimestamp,
RolloutLineIndex,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(untagged)]
pub enum StartMarkerValue {
Timestamp(String),
LineIndex(u64),
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct StartMarker {
pub kind: StartMarkerKind,
pub value: StartMarkerValue,
pub display: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum RolloutStartSelectorKind {
/// Find the first `event_msg` line where `payload.type == "user_message"` and
/// `payload.message` contains `contains`.
EventMsgUserMessageContains,
/// Find the first `response_item` line where `payload.role == "user"` and the combined text
/// from `payload.content[*].text` contains `contains`.
ResponseItemUserMessageContains,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct RolloutInfo {
pub filename: String,
/// Preferred, deterministic selector for slicing the rollout when reproducing a case.
pub start_selector: RolloutStartSelector,
/// Debugging hint only; do not use this for correctness when slicing.
pub start: StartMarker,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct RolloutStartSelector {
pub kind: RolloutStartSelectorKind,
pub contains: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub after_timestamp: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct GitRemote {
pub name: String,
pub fetch_url: String,
pub push_url: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct RepoGitInfo {
/// Base commit to `git checkout` before applying `repo.patch`.
pub commit: String,
pub remotes: Vec<GitRemote>,
pub canonical_remote: Option<String>,
pub reproducible: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub reproducible_reason: Option<String>,
pub is_dirty: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub describe: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct RepoInfo {
/// Repository root as reported by `git rev-parse --show-toplevel` (optional).
#[serde(skip_serializing_if = "Option::is_none")]
pub root: Option<String>,
/// Relative path from `repo.root` to capture-time cwd (optional).
#[serde(skip_serializing_if = "Option::is_none")]
pub cwd_rel: Option<String>,
pub git: RepoGitInfo,
pub patch_filename: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Notes {
pub what_went_wrong: String,
pub what_good_looks_like: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Artifacts {
pub include_logs: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct EvalCaseManifestV1 {
pub version: String,
pub case_id: String,
pub created_at: String,
pub conversation_id: String,
pub source: String,
pub rollout: RolloutInfo,
pub repo: RepoInfo,
pub notes: Notes,
pub artifacts: Artifacts,
}
#[derive(Debug, Clone)]
pub struct CreateEvalCaseArgs {
pub codex_home: PathBuf,
pub conversation_id: String,
pub rollout_path: PathBuf,
pub start: StartMarker,
pub repo_cwd: PathBuf,
/// When present, derive `repo.patch` from the provided commit snapshot instead of the current
/// working tree.
pub repo_snapshot: Option<RepoSnapshot>,
pub notes: Notes,
pub include_logs: bool,
pub logs_bytes: Option<Vec<u8>>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RepoSnapshot {
pub base_sha: String,
pub commit_sha: String,
}
#[derive(Debug, Clone)]
pub struct CreateEvalCaseResult {
pub case_id: String,
pub path: PathBuf,
}
fn sanitize_repo_slug(input: &str) -> String {
// Keep it short + filesystem-friendly.
let mut out = String::with_capacity(input.len());
let mut last_was_dash = false;
for ch in input.chars() {
let ch = ch.to_ascii_lowercase();
if ch.is_ascii_alphanumeric() {
out.push(ch);
last_was_dash = false;
continue;
}
if !last_was_dash {
out.push('-');
last_was_dash = true;
}
}
let out = out.trim_matches('-').to_string();
if out.is_empty() {
"repo".to_string()
} else {
out
}
}
fn repo_slug(repo_cwd: &Path) -> String {
// Prefer the git repo root name, but fall back to the cwd basename.
let top_level = git_stdout(repo_cwd, &["rev-parse", "--show-toplevel"])
.ok()
.map(PathBuf::from);
let basename = top_level
.as_ref()
.and_then(|p| p.file_name())
.or_else(|| repo_cwd.file_name());
let basename = basename
.map(|name| name.to_string_lossy().to_string())
.unwrap_or_else(|| "repo".to_string());
sanitize_repo_slug(&basename)
}
pub fn create_eval_case_bundle(args: &CreateEvalCaseArgs) -> anyhow::Result<CreateEvalCaseResult> {
let created_at = OffsetDateTime::now_utc();
let created_at_rfc3339 = created_at.format(&Rfc3339).context("format created_at")?;
let ts_for_id = format!(
"{:04}-{:02}-{:02}T{:02}-{:02}-{:02}",
created_at.year(),
u8::from(created_at.month()),
created_at.day(),
created_at.hour(),
created_at.minute(),
created_at.second()
);
// Short, human-scannable id: datetime + repo + 6-digit suffix.
// Collision risk is low and acceptable for local bundles.
let repo = repo_slug(&args.repo_cwd);
let id6 = (Uuid::new_v4().as_u128() % 1_000_000) as u32;
let case_id = format!("{ts_for_id}-{repo}-{id6:06}");
let bundle_dir = args.codex_home.join("eval-case").join(&case_id);
std::fs::create_dir_all(&bundle_dir)
.with_context(|| format!("create eval bundle dir {}", bundle_dir.display()))?;
let rollout_dst = bundle_dir.join("rollout.jsonl");
std::fs::copy(&args.rollout_path, &rollout_dst).with_context(|| {
format!(
"copy rollout {} -> {}",
args.rollout_path.display(),
rollout_dst.display()
)
})?;
let rollout_text = std::fs::read_to_string(&args.rollout_path).with_context(|| {
format!(
"read rollout for start selector {}",
args.rollout_path.display()
)
})?;
let (base_sha, patch) = match args.repo_snapshot.as_ref() {
Some(snapshot) => {
git_patch_between_commits(&args.repo_cwd, &snapshot.base_sha, &snapshot.commit_sha)
.with_context(|| {
format!(
"generate patch for repo snapshot {}..{}",
snapshot.base_sha, snapshot.commit_sha
)
})?
}
None => git_patch_against_head(&args.repo_cwd)?,
};
let patch_path = bundle_dir.join("repo.patch");
std::fs::write(&patch_path, patch)
.with_context(|| format!("write patch {}", patch_path.display()))?;
if args.include_logs {
let logs_path = bundle_dir.join("codex-logs.log");
let bytes = args
.logs_bytes
.clone()
.unwrap_or_else(|| Vec::with_capacity(0));
std::fs::write(&logs_path, bytes)
.with_context(|| format!("write logs {}", logs_path.display()))?;
}
let repo_root = git_stdout(&args.repo_cwd, &["rev-parse", "--show-toplevel"])
.ok()
.filter(|s| !s.is_empty());
let cwd_rel = repo_root
.as_ref()
.and_then(|root| {
let root = PathBuf::from(root);
args.repo_cwd
.strip_prefix(&root)
.ok()
.map(|p| p.display().to_string())
})
.filter(|s| !s.is_empty());
let (git_remotes, remotes_ok) = match git_remotes(&args.repo_cwd) {
Ok(remotes) => (remotes, true),
Err(_) => (Vec::new(), false),
};
let canonical_remote = select_canonical_remote(&git_remotes);
let git_is_dirty = git_is_dirty(&args.repo_cwd).unwrap_or(false);
let git_describe = git_stdout(
&args.repo_cwd,
&["describe", "--tags", "--always", "--dirty"],
)
.ok()
.filter(|s| !s.is_empty());
let mut reproducible = true;
let mut reproducible_reason: Option<String> = None;
if !remotes_ok {
reproducible = false;
reproducible_reason = Some("not_a_git_repo".to_string());
} else if canonical_remote.is_none() {
reproducible = false;
reproducible_reason = Some("no_git_remote".to_string());
}
if base_sha.len() != 40 || base_sha == "unknown" {
reproducible = false;
if reproducible_reason.is_none() {
reproducible_reason = Some("unknown_commit".to_string());
}
}
let start_selector = derive_rollout_start_selector(&args.start, &rollout_text);
let manifest = EvalCaseManifestV1 {
version: "v1".to_string(),
case_id: case_id.clone(),
created_at: created_at_rfc3339,
conversation_id: args.conversation_id.clone(),
source: "cli".to_string(),
rollout: RolloutInfo {
filename: "rollout.jsonl".to_string(),
start_selector,
start: args.start.clone(),
},
repo: RepoInfo {
root: repo_root,
cwd_rel,
git: RepoGitInfo {
commit: base_sha,
remotes: git_remotes,
canonical_remote,
reproducible,
reproducible_reason,
is_dirty: git_is_dirty,
describe: git_describe,
},
patch_filename: "repo.patch".to_string(),
},
notes: args.notes.clone(),
artifacts: Artifacts {
include_logs: args.include_logs,
},
};
let manifest_path = bundle_dir.join("manifest.json");
let manifest_json = serde_json::to_string_pretty(&manifest).context("serialize manifest")?;
std::fs::write(&manifest_path, format!("{manifest_json}\n"))
.with_context(|| format!("write manifest {}", manifest_path.display()))?;
Ok(CreateEvalCaseResult {
case_id,
path: bundle_dir,
})
}
fn normalize_whitespace(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for part in s.split_whitespace() {
if !out.is_empty() {
out.push(' ');
}
out.push_str(part);
}
out
}
fn truncate_chars(s: &str, max_chars: usize) -> String {
if s.chars().count() <= max_chars {
return s.to_string();
}
s.chars().take(max_chars).collect()
}
fn should_show_start_message(message: &str) -> bool {
// The rollout can include synthetic "user" messages (environment context, AGENTS
// instructions) that should not be used as start selectors.
let trimmed = message.trim_start();
if trimmed.starts_with("<environment_context>") {
return false;
}
if trimmed.starts_with("# AGENTS.md instructions") {
return false;
}
true
}
fn parse_rfc3339(s: &str) -> Option<OffsetDateTime> {
OffsetDateTime::parse(s, &Rfc3339).ok()
}
fn is_after_timestamp(timestamp: &str, after_timestamp: &str) -> bool {
match (parse_rfc3339(timestamp), parse_rfc3339(after_timestamp)) {
(Some(ts), Some(after)) => ts >= after,
// Fall back to lexicographic compare; RFC3339 timestamps with consistent formatting
// should still compare correctly.
_ => timestamp >= after_timestamp,
}
}
fn derive_rollout_start_selector(start: &StartMarker, rollout_text: &str) -> RolloutStartSelector {
let after_timestamp = match &start.value {
StartMarkerValue::Timestamp(ts) => Some(ts.clone()),
StartMarkerValue::LineIndex(idx) => {
let idx = usize::try_from(*idx).ok();
idx.and_then(|i| rollout_text.lines().nth(i))
.and_then(|line| {
serde_json::from_str::<serde_json::Value>(line)
.ok()
.and_then(|v| {
v.get("timestamp")
.and_then(serde_json::Value::as_str)
.map(str::to_string)
})
})
}
};
let (kind, message) = if let Some(message) =
find_first_event_msg_user_message(rollout_text, after_timestamp.as_deref())
{
(
RolloutStartSelectorKind::EventMsgUserMessageContains,
Some(message),
)
} else if let Some(message) =
find_first_response_item_user_message(rollout_text, after_timestamp.as_deref())
{
(
RolloutStartSelectorKind::ResponseItemUserMessageContains,
Some(message),
)
} else {
(RolloutStartSelectorKind::EventMsgUserMessageContains, None)
};
let contains = message
.map(|m| truncate_chars(&normalize_whitespace(&m), 200))
.unwrap_or_default();
RolloutStartSelector {
kind,
contains,
after_timestamp,
}
}
fn find_first_event_msg_user_message(
rollout_text: &str,
after_timestamp: Option<&str>,
) -> Option<String> {
rollout_text.lines().find_map(|line| {
let v = serde_json::from_str::<serde_json::Value>(line).ok()?;
let timestamp = v.get("timestamp")?.as_str()?;
if let Some(after) = after_timestamp
&& !is_after_timestamp(timestamp, after)
{
return None;
}
let ty = v.get("type")?.as_str()?;
if ty != "event_msg" {
return None;
}
let payload = v.get("payload")?;
let payload_ty = payload.get("type")?.as_str()?;
if payload_ty != "user_message" {
return None;
}
let message = payload.get("message")?.as_str()?;
should_show_start_message(message).then(|| message.to_string())
})
}
fn find_first_response_item_user_message(
rollout_text: &str,
after_timestamp: Option<&str>,
) -> Option<String> {
rollout_text.lines().find_map(|line| {
let v = serde_json::from_str::<serde_json::Value>(line).ok()?;
let timestamp = v.get("timestamp")?.as_str()?;
if let Some(after) = after_timestamp
&& !is_after_timestamp(timestamp, after)
{
return None;
}
let ty = v.get("type")?.as_str()?;
if ty != "response_item" {
return None;
}
let payload = v.get("payload")?;
let role = payload.get("role")?.as_str()?;
if role != "user" {
return None;
}
let content = payload.get("content")?.as_array()?;
let mut out = String::new();
for item in content {
let Some(text) = item.get("text").and_then(serde_json::Value::as_str) else {
continue;
};
if !out.is_empty() {
out.push(' ');
}
out.push_str(text);
}
let out = normalize_whitespace(out.as_str());
(!out.is_empty() && should_show_start_message(&out)).then_some(out)
})
}
pub fn find_rollout_start_index(
rollout_text: &str,
selector: &RolloutStartSelector,
) -> Option<usize> {
if selector.contains.trim().is_empty() {
return None;
}
let lines = rollout_text.lines().collect::<Vec<_>>();
let matched_idx = lines.iter().enumerate().find_map(|(idx, line)| {
let v = serde_json::from_str::<serde_json::Value>(line).ok()?;
let timestamp = v.get("timestamp")?.as_str()?;
if let Some(after) = selector.after_timestamp.as_deref()
&& !is_after_timestamp(timestamp, after)
{
return None;
}
match selector.kind {
RolloutStartSelectorKind::EventMsgUserMessageContains => {
let ty = v.get("type")?.as_str()?;
if ty != "event_msg" {
return None;
}
let payload = v.get("payload")?;
let payload_ty = payload.get("type")?.as_str()?;
if payload_ty != "user_message" {
return None;
}
let message = payload.get("message")?.as_str()?;
message.contains(selector.contains.as_str()).then_some(idx)
}
RolloutStartSelectorKind::ResponseItemUserMessageContains => {
let ty = v.get("type")?.as_str()?;
if ty != "response_item" {
return None;
}
let payload = v.get("payload")?;
let role = payload.get("role")?.as_str()?;
if role != "user" {
return None;
}
let content = payload.get("content")?.as_array()?;
let mut out = String::new();
for item in content {
let Some(text) = item.get("text").and_then(serde_json::Value::as_str) else {
continue;
};
if !out.is_empty() {
out.push(' ');
}
out.push_str(text);
}
let out = normalize_whitespace(out.as_str());
out.contains(selector.contains.as_str()).then_some(idx)
}
}
})?;
// Include immediately preceding turn context lines so replay has the right environment/config.
let mut start_idx = matched_idx;
while start_idx > 0 {
let prev_line = lines.get(start_idx.saturating_sub(1))?;
let Ok(v) = serde_json::from_str::<serde_json::Value>(prev_line) else {
break;
};
let Some(ty) = v.get("type").and_then(serde_json::Value::as_str) else {
break;
};
if ty != "turn_context" {
break;
}
start_idx = start_idx.saturating_sub(1);
}
Some(start_idx)
}
pub fn slice_rollout_from_selector(
rollout_text: &str,
selector: &RolloutStartSelector,
) -> Option<String> {
let start = find_rollout_start_index(rollout_text, selector)?;
let sliced = rollout_text
.lines()
.skip(start)
.collect::<Vec<_>>()
.join("\n");
Some(format!("{sliced}\n"))
}
fn git_patch_against_head(repo_cwd: &Path) -> anyhow::Result<(String, Vec<u8>)> {
let base_sha =
git_stdout(repo_cwd, &["rev-parse", "HEAD"]).unwrap_or_else(|_| "unknown".to_string());
let mut patch = Vec::new();
if let Ok(mut bytes) = git_diff(
repo_cwd,
&["diff", "--no-textconv", "--no-ext-diff", "--binary", "HEAD"],
) {
patch.append(&mut bytes);
}
let untracked =
git_stdout(repo_cwd, &["ls-files", "--others", "--exclude-standard"]).unwrap_or_default();
for file in untracked.lines().map(str::trim).filter(|s| !s.is_empty()) {
let null_device = if cfg!(windows) { "NUL" } else { "/dev/null" };
let args = [
"diff",
"--no-textconv",
"--no-ext-diff",
"--binary",
"--no-index",
"--",
null_device,
file,
];
if let Ok(mut bytes) = git_diff(repo_cwd, &args) {
patch.append(&mut bytes);
}
}
Ok((base_sha, patch))
}
fn git_remotes(repo_cwd: &Path) -> anyhow::Result<Vec<GitRemote>> {
let output = git_stdout(repo_cwd, &["remote", "-v"])?;
// Use a BTreeMap so iteration is deterministic.
let mut by_name: std::collections::BTreeMap<String, (Vec<String>, Vec<String>)> =
std::collections::BTreeMap::new();
for line in output.lines().map(str::trim).filter(|s| !s.is_empty()) {
let mut parts = line.split_whitespace();
let Some(name) = parts.next() else {
continue;
};
let Some(url) = parts.next() else {
continue;
};
let Some(kind) = parts.next() else {
continue;
};
let entry = by_name
.entry(name.to_string())
.or_insert_with(|| (Vec::new(), Vec::new()));
match kind {
"(fetch)" => entry.0.push(url.to_string()),
"(push)" => entry.1.push(url.to_string()),
_ => {}
}
}
let remotes = by_name
.into_iter()
.filter_map(|(name, (mut fetch_urls, mut push_urls))| {
fetch_urls.sort();
push_urls.sort();
let fetch_url = fetch_urls.into_iter().next()?;
let push_url = push_urls
.into_iter()
.next()
.unwrap_or_else(|| fetch_url.clone());
Some(GitRemote {
name,
fetch_url,
push_url,
})
})
.collect::<Vec<_>>();
Ok(remotes)
}
fn select_canonical_remote(remotes: &[GitRemote]) -> Option<String> {
if let Some(origin) = remotes.iter().find(|r| r.name == "origin") {
return Some(origin.fetch_url.clone());
}
remotes
.iter()
.min_by(|a, b| a.name.cmp(&b.name))
.map(|r| r.fetch_url.clone())
}
fn git_is_dirty(repo_cwd: &Path) -> anyhow::Result<bool> {
let diff_unstaged = Command::new("git")
.args(["diff", "--quiet"])
.current_dir(repo_cwd)
.status()
.context("run git diff --quiet")?;
if !diff_unstaged.success() {
return Ok(true);
}
let diff_staged = Command::new("git")
.args(["diff", "--cached", "--quiet"])
.current_dir(repo_cwd)
.status()
.context("run git diff --cached --quiet")?;
if !diff_staged.success() {
return Ok(true);
}
let untracked =
git_stdout(repo_cwd, &["ls-files", "--others", "--exclude-standard"]).unwrap_or_default();
Ok(untracked.lines().any(|s| !s.trim().is_empty()))
}
fn git_stdout(repo_cwd: &Path, args: &[&str]) -> anyhow::Result<String> {
let output = Command::new("git")
.args(args)
.current_dir(repo_cwd)
.output()
.with_context(|| format!("run git {}", args.join(" ")))?;
if !output.status.success() {
anyhow::bail!("git {} failed with {}", args.join(" "), output.status);
}
let out = String::from_utf8(output.stdout).context("decode git stdout")?;
Ok(out.trim().to_string())
}
fn git_diff(repo_cwd: &Path, args: &[&str]) -> anyhow::Result<Vec<u8>> {
let output = Command::new("git")
.args(args)
.current_dir(repo_cwd)
.output()
.with_context(|| format!("run git {}", args.join(" ")))?;
let exit_ok = output.status.code().is_some_and(|c| c == 0 || c == 1);
if !exit_ok {
anyhow::bail!("git {} failed with {}", args.join(" "), output.status);
}
Ok(output.stdout)
}
fn git_patch_between_commits(
repo_cwd: &Path,
base_sha: &str,
commit_sha: &str,
) -> anyhow::Result<(String, Vec<u8>)> {
let patch = git_diff(
repo_cwd,
&[
"diff",
"--no-textconv",
"--no-ext-diff",
"--binary",
base_sha,
commit_sha,
],
)?;
Ok((base_sha.to_string(), patch))
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
use tempfile::TempDir;
#[test]
fn creates_bundle_with_manifest_and_rollout() {
let codex_home = TempDir::new().unwrap();
let repo_dir = TempDir::new().unwrap();
let repo_root = repo_dir.path().join("my-repo");
std::fs::create_dir_all(&repo_root).unwrap();
std::fs::write(repo_root.join("README.md"), "hi\n").unwrap();
let init_status = Command::new("git")
.args(["init", "-q"])
.current_dir(&repo_root)
.status()
.unwrap();
assert!(init_status.success());
let add_status = Command::new("git")
.args(["add", "."])
.current_dir(&repo_root)
.status()
.unwrap();
assert!(add_status.success());
let commit_status = Command::new("git")
.args([
"-c",
"user.name=codex",
"-c",
"user.email=codex@example.com",
"commit",
"-m",
"init",
"-q",
])
.current_dir(&repo_root)
.status()
.unwrap();
assert!(commit_status.success());
std::fs::write(repo_root.join("README.md"), "changed\n").unwrap();
let rollout_path = repo_root.join("rollout.jsonl");
let rollout = [
r#"{"timestamp":"2024-01-01T00:00:00Z","type":"token_count","payload":{"total":1}}"#,
r#"{"timestamp":"2024-01-01T00:00:00Z","type":"event_msg","payload":{"type":"user_message","message":"do the thing"}}"#,
r#"{"timestamp":"2024-01-01T00:00:01Z","type":"event_msg","payload":{"type":"agent_message","message":"ok"}}"#,
]
.join("\n");
std::fs::write(&rollout_path, format!("{rollout}\n")).unwrap();
let args = CreateEvalCaseArgs {
codex_home: codex_home.path().to_path_buf(),
conversation_id: "conv-1".to_string(),
rollout_path,
start: StartMarker {
kind: StartMarkerKind::RolloutLineIndex,
value: StartMarkerValue::LineIndex(1),
display: "Start now".to_string(),
},
repo_cwd: repo_root,
repo_snapshot: None,
notes: Notes {
what_went_wrong: "bad".to_string(),
what_good_looks_like: "good".to_string(),
},
include_logs: true,
logs_bytes: Some(b"logs".to_vec()),
};
let out = create_eval_case_bundle(&args).unwrap();
assert!(!out.case_id.is_empty());
assert!(out.path.exists());
assert_eq!(out.path.file_name().unwrap(), out.case_id.as_str());
assert!(out.path.starts_with(codex_home.path().join("eval-case")));
assert!(out.case_id.contains("my-repo"));
let manifest_text = std::fs::read_to_string(out.path.join("manifest.json")).unwrap();
let manifest: EvalCaseManifestV1 = serde_json::from_str(&manifest_text).unwrap();
assert_eq!(manifest.version, "v1");
assert_eq!(manifest.conversation_id, "conv-1");
assert_eq!(manifest.notes, args.notes);
assert!(manifest.repo.git.commit != "unknown");
assert_eq!(manifest.repo.git.canonical_remote, None);
assert_eq!(manifest.repo.git.reproducible, false);
assert_eq!(
manifest.repo.git.reproducible_reason,
Some("no_git_remote".to_string())
);
assert_eq!(manifest.repo.git.is_dirty, true);
assert_eq!(manifest.artifacts.include_logs, true);
assert!(!manifest.rollout.start_selector.contains.trim().is_empty());
assert!(out.path.join("repo.patch").exists());
assert!(out.path.join("rollout.jsonl").exists());
assert_eq!(
std::fs::read(out.path.join("codex-logs.log")).unwrap(),
b"logs".to_vec()
);
}
#[test]
fn creates_bundle_from_repo_snapshot_commit() {
let codex_home = TempDir::new().unwrap();
let repo_dir = TempDir::new().unwrap();
let repo_root = repo_dir.path().join("my-repo");
std::fs::create_dir_all(&repo_root).unwrap();
std::fs::write(repo_root.join("README.md"), "base\n").unwrap();
let init_status = Command::new("git")
.args(["init", "-q"])
.current_dir(&repo_root)
.status()
.unwrap();
assert!(init_status.success());
let add_status = Command::new("git")
.args(["add", "."])
.current_dir(&repo_root)
.status()
.unwrap();
assert!(add_status.success());
let commit_status = Command::new("git")
.args([
"-c",
"user.name=codex",
"-c",
"user.email=codex@example.com",
"commit",
"-m",
"base",
"-q",
])
.current_dir(&repo_root)
.status()
.unwrap();
assert!(commit_status.success());
let base_sha = git_stdout(&repo_root, &["rev-parse", "HEAD"]).unwrap();
// Create a snapshot commit.
std::fs::write(repo_root.join("README.md"), "snapshot\n").unwrap();
std::fs::write(repo_root.join("snap.txt"), "snap\n").unwrap();
let add_status = Command::new("git")
.args(["add", "."])
.current_dir(&repo_root)
.status()
.unwrap();
assert!(add_status.success());
let commit_status = Command::new("git")
.args([
"-c",
"user.name=codex",
"-c",
"user.email=codex@example.com",
"commit",
"-m",
"snapshot",
"-q",
])
.current_dir(&repo_root)
.status()
.unwrap();
assert!(commit_status.success());
let snapshot_sha = git_stdout(&repo_root, &["rev-parse", "HEAD"]).unwrap();
// Dirty the working tree after the snapshot commit; this should not affect repo.patch.
std::fs::write(repo_root.join("README.md"), "worktree\n").unwrap();
let rollout_path = repo_root.join("rollout.jsonl");
let rollout = [
r#"{"timestamp":"2024-01-01T00:00:00Z","type":"event_msg","payload":{"type":"user_message","message":"snapshot run"}}"#,
r#"{"timestamp":"2024-01-01T00:00:01Z","type":"event_msg","payload":{"type":"agent_message","message":"ok"}}"#,
]
.join("\n");
std::fs::write(&rollout_path, format!("{rollout}\n")).unwrap();
let args = CreateEvalCaseArgs {
codex_home: codex_home.path().to_path_buf(),
conversation_id: "conv-2".to_string(),
rollout_path,
start: StartMarker {
kind: StartMarkerKind::RolloutLineIndex,
value: StartMarkerValue::LineIndex(0),
display: "From: test".to_string(),
},
repo_cwd: repo_root,
repo_snapshot: Some(RepoSnapshot {
base_sha: base_sha.clone(),
commit_sha: snapshot_sha,
}),
notes: Notes {
what_went_wrong: "bad".to_string(),
what_good_looks_like: "good".to_string(),
},
include_logs: false,
logs_bytes: None,
};
let out = create_eval_case_bundle(&args).unwrap();
assert_eq!(out.path.file_name().unwrap(), out.case_id.as_str());
assert!(out.path.starts_with(codex_home.path().join("eval-case")));
assert!(out.case_id.contains("my-repo"));
let manifest_text = std::fs::read_to_string(out.path.join("manifest.json")).unwrap();
let manifest: EvalCaseManifestV1 = serde_json::from_str(&manifest_text).unwrap();
assert_eq!(manifest.repo.git.commit, base_sha);
let patch_text =
String::from_utf8(std::fs::read(out.path.join("repo.patch")).unwrap()).unwrap();
assert!(
patch_text.contains("snapshot"),
"patch should include snapshot commit changes"
);
assert!(
!patch_text.contains("worktree"),
"patch should not include post-snapshot working tree changes"
);
assert!(patch_text.contains("snap.txt"));
}
#[test]
fn slices_rollout_using_start_selector_even_when_line_numbers_shift() {
let rollout = [
r#"{"timestamp":"2024-01-01T00:00:00Z","type":"token_count","payload":{"total":1}}"#,
r#"{"timestamp":"2024-01-01T00:00:00Z","type":"turn_context","payload":{"cwd":"/tmp","approval_policy":"never","sandbox_policy":"none","model":"x","summary":"none"}}"#,
r#"{"timestamp":"2024-01-01T00:00:00Z","type":"event_msg","payload":{"type":"user_message","message":"run the tests please"}}"#,
r#"{"timestamp":"2024-01-01T00:00:01Z","type":"event_msg","payload":{"type":"agent_message","message":"ok"}}"#,
]
.join("\n");
let selector = RolloutStartSelector {
kind: RolloutStartSelectorKind::EventMsgUserMessageContains,
contains: "run the tests".to_string(),
after_timestamp: Some("2024-01-01T00:00:00Z".to_string()),
};
let sliced = slice_rollout_from_selector(&rollout, &selector).unwrap();
// Should include the turn_context that immediately precedes the matched user_message.
assert!(
sliced
.lines()
.next()
.unwrap()
.contains("\"type\":\"turn_context\"")
);
assert!(sliced.contains("\"type\":\"user_message\""));
assert!(!sliced.contains("\"type\":\"token_count\""));
}
}

View File

@@ -155,8 +155,28 @@ pub struct CodexLogSnapshot {
pub thread_id: String,
}
pub struct FeedbackAttachment {
pub filename: String,
pub content_type: Option<String>,
pub bytes: Vec<u8>,
}
impl FeedbackAttachment {
pub fn new(filename: String, content_type: Option<String>, bytes: Vec<u8>) -> Self {
Self {
filename,
content_type,
bytes,
}
}
}
impl CodexLogSnapshot {
pub(crate) fn as_bytes(&self) -> &[u8] {
pub fn from_bytes(thread_id: String, bytes: Vec<u8>) -> Self {
Self { bytes, thread_id }
}
pub fn as_bytes(&self) -> &[u8] {
&self.bytes
}
@@ -176,6 +196,25 @@ impl CodexLogSnapshot {
include_logs: bool,
rollout_path: Option<&std::path::Path>,
session_source: Option<SessionSource>,
) -> Result<()> {
self.upload_feedback_with_attachments(
classification,
reason,
include_logs,
rollout_path,
session_source,
Vec::new(),
)
}
pub fn upload_feedback_with_attachments(
&self,
classification: &str,
reason: Option<&str>,
include_logs: bool,
rollout_path: Option<&std::path::Path>,
session_source: Option<SessionSource>,
extra_attachments: Vec<FeedbackAttachment>,
) -> Result<()> {
use std::collections::BTreeMap;
use std::fs;
@@ -265,6 +304,15 @@ impl CodexLogSnapshot {
}));
}
for attachment in extra_attachments {
envelope.add_item(EnvelopeItem::Attachment(Attachment {
buffer: attachment.bytes,
filename: attachment.filename,
content_type: attachment.content_type,
ty: None,
}));
}
client.send_envelope(envelope);
client.flush(Some(Duration::from_secs(UPLOAD_TIMEOUT_SECS)));
Ok(())

View File

@@ -141,6 +141,11 @@ pub enum Op {
summary: Option<ReasoningSummaryConfig>,
},
/// Enable or disable repository snapshots (ghost commits) for the remainder of the session.
///
/// When enabled, Codex will capture per-turn git snapshots and record them in the rollout.
SetRepoSnapshotting { enabled: bool },
/// Approve a command execution
ExecApproval {
/// The id of the submission we are approving

View File

@@ -36,6 +36,7 @@ codex-common = { workspace = true, features = [
"sandbox_summary",
] }
codex-core = { workspace = true }
codex-eval-case = { workspace = true }
codex-feedback = { workspace = true }
codex-file-search = { workspace = true }
codex-login = { workspace = true }

View File

@@ -806,6 +806,32 @@ impl App {
AppEvent::OpenFeedbackConsent { category } => {
self.chat_widget.open_feedback_consent(category);
}
AppEvent::OpenFeedbackBadResultFork => {
self.chat_widget.open_feedback_bad_result_fork();
}
AppEvent::OpenEvalCaptureIntro => {
self.chat_widget.open_eval_capture_intro();
}
AppEvent::EvalCaptureIntroContinue => {
self.chat_widget.on_eval_capture_intro_continue();
}
AppEvent::EvalCaptureSendConsent { upload } => {
self.chat_widget.on_eval_capture_send_consent(upload);
}
AppEvent::OpenEvalCaptureNotes { start_marker } => {
self.chat_widget.open_eval_capture_notes(start_marker);
}
AppEvent::CreateEvalCaptureBundle {
start_marker,
what_went_wrong,
what_good_looks_like,
} => {
self.chat_widget.create_eval_capture_bundle(
start_marker,
what_went_wrong,
what_good_looks_like,
);
}
AppEvent::LaunchExternalEditor => {
if self.chat_widget.external_editor_state() == ExternalEditorState::Active {
self.launch_external_editor(tui).await;

View File

@@ -182,6 +182,33 @@ pub(crate) enum AppEvent {
category: FeedbackCategory,
},
/// Open the bad-result fork chooser ("quick feedback" vs "capture eval sample").
OpenFeedbackBadResultFork,
/// Start the "capture eval sample" flow from /feedback.
OpenEvalCaptureIntro,
/// Dismiss the eval capture intro screen and continue to the start picker.
EvalCaptureIntroContinue,
/// Record whether the user consented to upload the eval sample bundle once captured.
/// This happens before the start message picker so the full flow is consented up front.
EvalCaptureSendConsent {
upload: bool,
},
/// Open the freeform notes entry for eval capture.
OpenEvalCaptureNotes {
start_marker: EvalCaptureStartMarker,
},
/// Create the eval bundle and report the output path/id in history.
CreateEvalCaptureBundle {
start_marker: EvalCaptureStartMarker,
what_went_wrong: String,
what_good_looks_like: String,
},
/// Launch the external editor after a normal draw has completed.
LaunchExternalEditor,
}
@@ -193,3 +220,27 @@ pub(crate) enum FeedbackCategory {
Bug,
Other,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct EvalCaptureRepoSnapshot {
pub(crate) ghost_commit: String,
pub(crate) base_sha: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) enum EvalCaptureStartMarker {
RolloutLineIndex {
index: usize,
timestamp: String,
display: String,
message: String,
repo_snapshot: Option<EvalCaptureRepoSnapshot>,
},
#[allow(dead_code)]
RolloutLineTimestamp {
timestamp: String,
display: String,
message: String,
repo_snapshot: Option<EvalCaptureRepoSnapshot>,
},
}

View File

@@ -0,0 +1,376 @@
use std::cell::RefCell;
use crossterm::event::KeyCode;
use crossterm::event::KeyEvent;
use crossterm::event::KeyModifiers;
use ratatui::buffer::Buffer;
use ratatui::layout::Rect;
use ratatui::style::Stylize;
use ratatui::text::Line;
use ratatui::widgets::Clear;
use ratatui::widgets::Paragraph;
use ratatui::widgets::StatefulWidgetRef;
use ratatui::widgets::Widget;
use crate::app_event::AppEvent;
use crate::app_event::EvalCaptureStartMarker;
use crate::app_event_sender::AppEventSender;
use crate::render::renderable::Renderable;
use super::CancellationEvent;
use super::bottom_pane_view::BottomPaneView;
use super::popup_consts::standard_popup_hint_line;
use super::textarea::TextArea;
use super::textarea::TextAreaState;
pub(crate) fn eval_capture_intro_params(
app_event_tx: AppEventSender,
) -> super::SelectionViewParams {
let continue_action: super::SelectionAction = Box::new({
let tx = app_event_tx.clone();
move |_sender: &AppEventSender| {
tx.send(AppEvent::EvalCaptureIntroContinue);
}
});
let basic_feedback_action: super::SelectionAction = Box::new({
let tx = app_event_tx;
move |_sender: &AppEventSender| {
tx.send(AppEvent::OpenFeedbackConsent {
category: crate::app_event::FeedbackCategory::BadResult,
});
}
});
let header_lines: Vec<Box<dyn crate::render::renderable::Renderable>> = vec![
Line::from("Capture eval sample".bold()).into(),
Line::from("").into(),
Line::from("Here's everything we capture as an eval sample.".dim()).into(),
Line::from("It's stored locally on your computer:".dim()).into(),
Line::from(vec!["".into(), "manifest.json".into()]).into(),
Line::from(vec!["".into(), "rollout.jsonl".into()]).into(),
Line::from(vec!["".into(), "repo.patch".into()]).into(),
Line::from(vec!["".into(), "codex-logs.log".into()]).into(),
Line::from("").into(),
Line::from("Next, you can optionally upload this bundle to the team.".dim()).into(),
];
super::SelectionViewParams {
footer_hint: Some(standard_popup_hint_line()),
title: None,
header: Box::new(crate::render::renderable::ColumnRenderable::with(
header_lines,
)),
items: vec![
super::SelectionItem {
name: "Continue".to_string(),
description: Some("Create an eval sample bundle locally.".to_string()),
actions: vec![continue_action],
dismiss_on_select: true,
..Default::default()
},
super::SelectionItem {
name: "Send basic feedback instead".to_string(),
description: Some(
"Skip eval capture and send basic feedback to the team.".to_string(),
),
actions: vec![basic_feedback_action],
dismiss_on_select: true,
..Default::default()
},
],
..Default::default()
}
}
pub(crate) fn eval_capture_send_consent_params(
app_event_tx: AppEventSender,
) -> super::SelectionViewParams {
let upload_action: super::SelectionAction = Box::new({
let tx = app_event_tx.clone();
move |_sender: &AppEventSender| {
tx.send(AppEvent::EvalCaptureSendConsent { upload: true });
}
});
let skip_action: super::SelectionAction = Box::new({
let tx = app_event_tx;
move |_sender: &AppEventSender| {
tx.send(AppEvent::EvalCaptureSendConsent { upload: false });
}
});
let header_lines: Vec<Box<dyn crate::render::renderable::Renderable>> = vec![
Line::from("Send eval sample to the codex team?".bold()).into(),
Line::from("").into(),
Line::from("If you choose Yes, it will upload the full bundle".dim()).into(),
Line::from("to the team:".dim()).into(),
Line::from(vec!["".into(), "manifest.json".into()]).into(),
Line::from(vec!["".into(), "rollout.jsonl".into()]).into(),
Line::from(vec!["".into(), "repo.patch".into()]).into(),
Line::from(vec!["".into(), "codex-logs.log".into()]).into(),
Line::from("").into(),
Line::from("This may include file paths, code snippets, and tool outputs.".dim()).into(),
];
super::SelectionViewParams {
footer_hint: Some(standard_popup_hint_line()),
title: None,
header: Box::new(crate::render::renderable::ColumnRenderable::with(
header_lines,
)),
items: vec![
super::SelectionItem {
name: "Yes".to_string(),
description: Some(
"Upload the full bundle to the team for troubleshooting and model improvement."
.to_string(),
),
actions: vec![upload_action],
dismiss_on_select: true,
..Default::default()
},
super::SelectionItem {
name: "No".to_string(),
description: Some("Keep it local only.".to_string()),
actions: vec![skip_action],
dismiss_on_select: true,
..Default::default()
},
],
..Default::default()
}
}
pub(crate) fn eval_capture_start_picker_params(
app_event_tx: AppEventSender,
options: Vec<EvalCaptureStartMarker>,
initial_selected_idx: Option<usize>,
) -> super::SelectionViewParams {
let items = options
.into_iter()
.map(|marker| {
let action: super::SelectionAction = Box::new({
let tx = app_event_tx.clone();
let marker = marker.clone();
move |_sender: &AppEventSender| {
tx.send(AppEvent::OpenEvalCaptureNotes {
start_marker: marker.clone(),
});
}
});
let disabled_reason = (!marker_has_repo_snapshot(&marker))
.then_some("No repo snapshot available for this message.".to_string());
super::SelectionItem {
name: marker_display_name(&marker),
// For the eval-capture start picker, we render the full message directly as the
// row name (including a relative timestamp) to avoid duplicating it in the
// description column.
actions: vec![action],
dismiss_on_select: true,
disabled_reason,
..Default::default()
}
})
.collect();
super::SelectionViewParams {
footer_hint: Some(standard_popup_hint_line()),
title: Some("Pick the start message".to_string()),
subtitle: Some(
"Pick the first instruction for the specific code change or interaction you wanted to give feedback on."
.to_string(),
),
items,
initial_selected_idx,
show_numbers: true,
..Default::default()
}
}
fn marker_display_name(marker: &EvalCaptureStartMarker) -> String {
match marker {
EvalCaptureStartMarker::RolloutLineIndex { display, .. }
| EvalCaptureStartMarker::RolloutLineTimestamp { display, .. } => display.clone(),
}
}
fn marker_has_repo_snapshot(marker: &EvalCaptureStartMarker) -> bool {
match marker {
EvalCaptureStartMarker::RolloutLineIndex { repo_snapshot, .. }
| EvalCaptureStartMarker::RolloutLineTimestamp { repo_snapshot, .. } => {
repo_snapshot.is_some()
}
}
}
pub(crate) struct EvalCaptureNotesView {
start_marker: EvalCaptureStartMarker,
app_event_tx: AppEventSender,
textarea: TextArea,
textarea_state: RefCell<TextAreaState>,
complete: bool,
}
impl EvalCaptureNotesView {
pub(crate) fn new(start_marker: EvalCaptureStartMarker, app_event_tx: AppEventSender) -> Self {
Self {
start_marker,
app_event_tx,
textarea: TextArea::new(),
textarea_state: RefCell::new(TextAreaState::default()),
complete: false,
}
}
fn submit(&mut self) {
let what_went_wrong = self.textarea.text().trim().to_string();
self.app_event_tx.send(AppEvent::CreateEvalCaptureBundle {
start_marker: self.start_marker.clone(),
what_went_wrong,
what_good_looks_like: String::new(),
});
self.complete = true;
}
}
impl BottomPaneView for EvalCaptureNotesView {
fn handle_key_event(&mut self, key_event: KeyEvent) {
match key_event {
KeyEvent {
code: KeyCode::Esc, ..
} => {
self.on_ctrl_c();
}
KeyEvent {
code: KeyCode::Enter,
modifiers: KeyModifiers::NONE,
..
} => {
self.submit();
}
other => {
self.textarea.input(other);
}
}
}
fn on_ctrl_c(&mut self) -> CancellationEvent {
self.complete = true;
CancellationEvent::Handled
}
fn is_complete(&self) -> bool {
self.complete
}
fn handle_paste(&mut self, pasted: String) -> bool {
if pasted.is_empty() {
return false;
}
self.textarea.insert_str(&pasted);
true
}
}
impl Renderable for EvalCaptureNotesView {
fn desired_height(&self, width: u16) -> u16 {
self.header_lines(width).len() as u16 + self.input_height(width) + 2u16
}
fn cursor_pos(&self, area: Rect) -> Option<(u16, u16)> {
if area.height < 2 || area.width <= 2 {
return None;
}
let text_area_height = self.input_height(area.width).saturating_sub(1);
if text_area_height == 0 {
return None;
}
let top_line_count = self.header_lines(area.width).len() as u16;
let textarea_rect = Rect {
x: area.x.saturating_add(2),
y: area.y.saturating_add(top_line_count).saturating_add(1),
width: area.width.saturating_sub(2),
height: text_area_height,
};
let state = *self.textarea_state.borrow();
self.textarea.cursor_pos_with_state(textarea_rect, state)
}
fn render(&self, area: Rect, buf: &mut Buffer) {
if area.height == 0 || area.width == 0 {
return;
}
let placeholder = "Type here.";
let input_height = self.input_height(area.width);
let header_lines = self.header_lines(area.width);
let header_height = header_lines.len() as u16;
let title_area = Rect {
x: area.x,
y: area.y,
width: area.width,
height: header_height,
};
let input_area = Rect {
x: area.x,
y: area.y.saturating_add(header_height),
width: area.width,
height: input_height,
};
Clear.render(area, buf);
Paragraph::new(header_lines).render(title_area, buf);
let textarea_rect = Rect {
x: input_area.x.saturating_add(2),
y: input_area.y.saturating_add(1),
width: input_area.width.saturating_sub(2),
height: input_area.height.saturating_sub(1),
};
let mut state = self.textarea_state.borrow_mut();
StatefulWidgetRef::render_ref(&(&self.textarea), textarea_rect, buf, &mut state);
if self.textarea.text().is_empty() {
Paragraph::new(Line::from(placeholder.dim())).render(textarea_rect, buf);
}
let hint_area = Rect {
x: area.x,
y: area
.y
.saturating_add(header_height)
.saturating_add(input_height)
.saturating_add(1),
width: area.width,
height: 1,
};
Paragraph::new(Line::from("Enter to continue • Esc to go back".dim()))
.render(hint_area, buf);
}
}
impl EvalCaptureNotesView {
fn header_lines(&self, width: u16) -> Vec<Line<'static>> {
let title = "What went wrong with this interaction, and what should have happened for this to be a correct, delightful response?";
let subtitle = "Please be specific, and can refer to any aspect of model behavior, including tool calls, code changes, or messages.";
let usable_width = width.saturating_sub(2).max(1) as usize;
let mut lines = Vec::new();
for line in textwrap::wrap(title, usable_width) {
lines.push(Line::from(line.into_owned().bold()));
}
for line in textwrap::wrap(subtitle, usable_width) {
lines.push(Line::from(line.into_owned().dim()));
}
lines
}
fn input_height(&self, width: u16) -> u16 {
let usable_width = width.saturating_sub(2);
let text_height = self.textarea.desired_height(usable_width).clamp(1, 8);
text_height.saturating_add(1).min(9)
}
}

View File

@@ -387,7 +387,11 @@ fn make_feedback_item(
category: FeedbackCategory,
) -> super::SelectionItem {
let action: super::SelectionAction = Box::new(move |_sender: &AppEventSender| {
app_event_tx.send(AppEvent::OpenFeedbackConsent { category });
if category == FeedbackCategory::BadResult {
app_event_tx.send(AppEvent::OpenFeedbackBadResultFork);
} else {
app_event_tx.send(AppEvent::OpenFeedbackConsent { category });
}
});
super::SelectionItem {
name: name.to_string(),
@@ -398,6 +402,52 @@ fn make_feedback_item(
}
}
pub(crate) fn feedback_bad_result_fork_params(
app_event_tx: AppEventSender,
) -> super::SelectionViewParams {
let quick_action: super::SelectionAction = Box::new({
let tx = app_event_tx.clone();
move |_sender: &AppEventSender| {
tx.send(AppEvent::OpenFeedbackConsent {
category: FeedbackCategory::BadResult,
});
}
});
let capture_action: super::SelectionAction = Box::new({
let tx = app_event_tx;
move |_sender: &AppEventSender| {
tx.send(AppEvent::OpenEvalCaptureIntro);
}
});
super::SelectionViewParams {
title: Some("Bad result".to_string()),
subtitle: Some("Choose the fastest way to help us improve.".to_string()),
items: vec![
super::SelectionItem {
name: "Quick feedback".to_string(),
description: Some(
"Send a short note (and optionally logs) to maintainers.".to_string(),
),
actions: vec![quick_action],
dismiss_on_select: true,
..Default::default()
},
super::SelectionItem {
name: "Capture eval sample".to_string(),
description: Some(
"Create a local bundle you can rerun as an eval case later.".to_string(),
),
actions: vec![capture_action],
dismiss_on_select: true,
..Default::default()
},
],
..Default::default()
}
}
/// Build the upload consent popup params for a given feedback category.
pub(crate) fn feedback_upload_consent_params(
app_event_tx: AppEventSender,

View File

@@ -54,6 +54,7 @@ pub(crate) struct SelectionViewParams {
pub items: Vec<SelectionItem>,
pub is_searchable: bool,
pub search_placeholder: Option<String>,
pub show_numbers: bool,
pub header: Box<dyn Renderable>,
pub initial_selected_idx: Option<usize>,
}
@@ -67,12 +68,52 @@ impl Default for SelectionViewParams {
items: Vec::new(),
is_searchable: false,
search_placeholder: None,
show_numbers: true,
header: Box::new(()),
initial_selected_idx: None,
}
}
}
struct WrappedText {
text: String,
dim: bool,
}
impl WrappedText {
fn dim(text: String) -> Self {
Self { text, dim: true }
}
fn wrapped_lines(&self, width: u16) -> Vec<Line<'static>> {
let usable_width = width.max(1) as usize;
textwrap::wrap(&self.text, usable_width)
.into_iter()
.map(|line| {
let span = Span::from(line.into_owned());
if self.dim {
Line::from(span.dim())
} else {
Line::from(span)
}
})
.collect()
}
}
impl Renderable for WrappedText {
fn render(&self, area: Rect, buf: &mut Buffer) {
Paragraph::new(self.wrapped_lines(area.width)).render(area, buf);
}
fn desired_height(&self, width: u16) -> u16 {
self.wrapped_lines(width)
.len()
.try_into()
.unwrap_or(u16::MAX)
}
}
pub(crate) struct ListSelectionView {
footer_hint: Option<Line<'static>>,
items: Vec<SelectionItem>,
@@ -80,6 +121,7 @@ pub(crate) struct ListSelectionView {
complete: bool,
app_event_tx: AppEventSender,
is_searchable: bool,
show_numbers: bool,
search_query: String,
search_placeholder: Option<String>,
filtered_indices: Vec<usize>,
@@ -93,7 +135,7 @@ impl ListSelectionView {
let mut header = params.header;
if params.title.is_some() || params.subtitle.is_some() {
let title = params.title.map(|title| Line::from(title.bold()));
let subtitle = params.subtitle.map(|subtitle| Line::from(subtitle.dim()));
let subtitle = params.subtitle.map(WrappedText::dim);
header = Box::new(ColumnRenderable::with([
header,
Box::new(title),
@@ -107,6 +149,7 @@ impl ListSelectionView {
complete: false,
app_event_tx,
is_searchable: params.is_searchable,
show_numbers: params.show_numbers,
search_query: String::new(),
search_placeholder: if params.is_searchable {
params.search_placeholder
@@ -198,13 +241,9 @@ impl ListSelectionView {
};
let name_with_marker = format!("{name}{marker}");
let n = visible_idx + 1;
let wrap_prefix = if self.is_searchable {
// The number keys don't work when search is enabled (since we let the
// numbers be used for the search query).
format!("{prefix} ")
} else {
format!("{prefix} {n}. ")
};
let wrap_prefix = (self.show_numbers && !self.is_searchable)
.then(|| format!("{prefix} {n}. "))
.unwrap_or_else(|| format!("{prefix} "));
let wrap_prefix_width = UnicodeWidthStr::width(wrap_prefix.as_str());
let display_name = format!("{wrap_prefix}{name_with_marker}");
let description = is_selected
@@ -386,6 +425,7 @@ impl BottomPaneView for ListSelectionView {
.to_digit(10)
.map(|d| d as usize)
.and_then(|d| d.checked_sub(1))
.filter(|_| self.show_numbers)
&& idx < self.items.len()
&& self
.items

View File

@@ -26,6 +26,7 @@ mod chat_composer;
mod chat_composer_history;
mod command_popup;
pub mod custom_prompt_view;
mod eval_capture_view;
mod experimental_features_view;
mod file_search_popup;
mod footer;
@@ -34,6 +35,11 @@ mod prompt_args;
mod skill_popup;
pub(crate) use list_selection_view::SelectionViewParams;
mod feedback_view;
pub(crate) use eval_capture_view::EvalCaptureNotesView;
pub(crate) use eval_capture_view::eval_capture_intro_params;
pub(crate) use eval_capture_view::eval_capture_send_consent_params;
pub(crate) use eval_capture_view::eval_capture_start_picker_params;
pub(crate) use feedback_view::feedback_bad_result_fork_params;
pub(crate) use feedback_view::feedback_selection_params;
pub(crate) use feedback_view::feedback_upload_consent_params;
mod paste_burst;

View File

@@ -110,7 +110,10 @@ fn compute_desc_col(
let max_name_width = rows_all
.iter()
.enumerate()
.filter(|(i, _)| visible_range.contains(i))
.filter(|(i, row)| {
visible_range.contains(i)
&& (row.description.is_some() || row.disabled_reason.is_some())
})
.map(|(_, r)| {
let mut spans: Vec<Span> = vec![r.name.clone().into()];
if r.disabled_reason.is_some() {

View File

@@ -132,7 +132,9 @@ use self::session_header::SessionHeader;
use crate::streaming::controller::StreamController;
use std::path::Path;
use chrono::DateTime;
use chrono::Local;
use chrono::Utc;
use codex_common::approval_presets::ApprovalPreset;
use codex_common::approval_presets::builtin_approval_presets;
use codex_core::AuthManager;
@@ -368,6 +370,9 @@ pub(crate) struct ChatWidget {
feedback: codex_feedback::CodexFeedback,
// Current session rollout path (if known)
current_rollout_path: Option<PathBuf>,
// Whether the user consented to upload the captured eval sample to the team.
// This is captured before the start picker so the flow is consented up front.
eval_capture_send_to_team: Option<bool>,
external_editor_state: ExternalEditorState,
}
@@ -438,6 +443,9 @@ impl ChatWidget {
self.set_skills(None);
self.conversation_id = Some(event.session_id);
self.current_rollout_path = Some(event.rollout_path.clone());
// Repo snapshots are stored locally and are required for capturing eval cases
// from arbitrary earlier user messages.
self.submit_op(Op::SetRepoSnapshotting { enabled: true });
let initial_messages = event.initial_messages.clone();
let model_for_header = event.model.clone();
self.session_header.set_model(&model_for_header);
@@ -506,6 +514,479 @@ impl ChatWidget {
self.request_redraw();
}
pub(crate) fn open_feedback_bad_result_fork(&mut self) {
let params = crate::bottom_pane::feedback_bad_result_fork_params(self.app_event_tx.clone());
self.bottom_pane.show_selection_view(params);
self.request_redraw();
}
pub(crate) fn open_eval_capture_intro(&mut self) {
self.eval_capture_send_to_team = None;
if !crate::eval_capture_state::should_show_eval_capture_intro(&self.config.codex_home) {
self.open_eval_capture_send_consent();
return;
}
let params = crate::bottom_pane::eval_capture_intro_params(self.app_event_tx.clone());
self.bottom_pane.show_selection_view(params);
self.request_redraw();
}
pub(crate) fn on_eval_capture_intro_continue(&mut self) {
if let Err(err) =
crate::eval_capture_state::persist_eval_capture_intro_dismissed(&self.config.codex_home)
{
self.add_error_message(format!("Failed to persist eval capture preference: {err}"));
}
self.open_eval_capture_send_consent();
}
fn open_eval_capture_send_consent(&mut self) {
let params =
crate::bottom_pane::eval_capture_send_consent_params(self.app_event_tx.clone());
self.bottom_pane.show_selection_view(params);
self.request_redraw();
}
pub(crate) fn on_eval_capture_send_consent(&mut self, upload: bool) {
self.eval_capture_send_to_team = Some(upload);
self.open_eval_capture_start_picker();
}
fn open_eval_capture_start_picker(&mut self) {
let Some(rollout_path) = self.current_rollout_path.as_ref() else {
self.add_error_message(
"No rollout file for this session; cannot capture eval sample.".to_string(),
);
return;
};
let text = match std::fs::read_to_string(rollout_path) {
Ok(text) => text,
Err(err) => {
self.add_error_message(format!(
"Failed to read rollout file {}: {err}",
rollout_path.display()
));
return;
}
};
// List every user message as a candidate start point, and attach the next
// ghost snapshot to the most recent user message. Markers without snapshots
// will be disabled in the picker.
let mut options = Vec::new();
let mut last_user_marker_idx: Option<usize> = None;
let mut max_timestamp: Option<DateTime<Utc>> = None;
for (idx, line) in text.lines().enumerate() {
let Ok(rollout_line) =
serde_json::from_str::<codex_protocol::protocol::RolloutLine>(line)
else {
continue;
};
if let Some(ts) = parse_rollout_timestamp_utc(&rollout_line.timestamp) {
max_timestamp = Some(max_timestamp.map_or(ts, |cur| cur.max(ts)));
}
match rollout_line.item {
codex_protocol::protocol::RolloutItem::ResponseItem(
codex_protocol::models::ResponseItem::Message { role, content, .. },
) if role == "user" => {
let Some(message) = extract_user_message_text(&content) else {
continue;
};
if !should_show_eval_capture_start_message(&message) {
continue;
}
options.push(crate::app_event::EvalCaptureStartMarker::RolloutLineIndex {
index: idx,
timestamp: rollout_line.timestamp.clone(),
// Filled in below once we know the latest timestamp in the rollout.
display: String::new(),
message,
repo_snapshot: None,
});
last_user_marker_idx = Some(options.len().saturating_sub(1));
}
codex_protocol::protocol::RolloutItem::ResponseItem(
codex_protocol::models::ResponseItem::GhostSnapshot { ghost_commit },
) => {
if let Some(marker_idx) = last_user_marker_idx
&& let Some(base_sha) = ghost_commit.parent()
{
if let Some(crate::app_event::EvalCaptureStartMarker::RolloutLineIndex {
repo_snapshot,
..
}) = options.get_mut(marker_idx)
{
*repo_snapshot = Some(crate::app_event::EvalCaptureRepoSnapshot {
ghost_commit: ghost_commit.id().to_string(),
base_sha: Some(base_sha.to_string()),
});
}
}
}
_ => {}
}
}
if let Some(now) = max_timestamp {
for marker in &mut options {
match marker {
crate::app_event::EvalCaptureStartMarker::RolloutLineIndex {
timestamp,
display,
message,
..
}
| crate::app_event::EvalCaptureStartMarker::RolloutLineTimestamp {
timestamp,
display,
message,
..
} => {
let Some(then) = parse_rollout_timestamp_utc(timestamp) else {
*display = message.clone();
continue;
};
let delta = format_relative_time_ago(now, then);
*display = format!("[{delta}] {message}");
}
}
}
} else {
for marker in &mut options {
match marker {
crate::app_event::EvalCaptureStartMarker::RolloutLineIndex {
display,
message,
..
}
| crate::app_event::EvalCaptureStartMarker::RolloutLineTimestamp {
display,
message,
..
} => {
*display = message.clone();
}
}
}
}
options.sort_by(|a, b| match (a, b) {
(
crate::app_event::EvalCaptureStartMarker::RolloutLineIndex { index: a, .. },
crate::app_event::EvalCaptureStartMarker::RolloutLineIndex { index: b, .. },
) => b.cmp(a),
_ => std::cmp::Ordering::Equal,
});
if !options.iter().any(|marker| match marker {
crate::app_event::EvalCaptureStartMarker::RolloutLineIndex {
repo_snapshot, ..
}
| crate::app_event::EvalCaptureStartMarker::RolloutLineTimestamp {
repo_snapshot,
..
} => repo_snapshot.is_some(),
}) {
self.add_error_message(
"No repo snapshots are available yet in this session. Try again after another message/turn (or in a new session)."
.to_string(),
);
self.open_feedback_consent(crate::app_event::FeedbackCategory::BadResult);
return;
}
let initial_selected_idx = options.iter().position(|marker| match marker {
crate::app_event::EvalCaptureStartMarker::RolloutLineIndex {
repo_snapshot, ..
}
| crate::app_event::EvalCaptureStartMarker::RolloutLineTimestamp {
repo_snapshot,
..
} => repo_snapshot.is_some(),
});
let params = crate::bottom_pane::eval_capture_start_picker_params(
self.app_event_tx.clone(),
options,
initial_selected_idx,
);
self.bottom_pane.show_selection_view(params);
self.request_redraw();
}
pub(crate) fn open_eval_capture_notes(
&mut self,
start_marker: crate::app_event::EvalCaptureStartMarker,
) {
let view =
crate::bottom_pane::EvalCaptureNotesView::new(start_marker, self.app_event_tx.clone());
self.bottom_pane.show_view(Box::new(view));
self.request_redraw();
}
pub(crate) fn create_eval_capture_bundle(
&mut self,
start_marker: crate::app_event::EvalCaptureStartMarker,
what_went_wrong: String,
what_good_looks_like: String,
) {
let Some(conversation_id) = self.conversation_id else {
self.add_error_message(
"No active conversation; cannot capture eval sample.".to_string(),
);
return;
};
let Some(rollout_path) = self.current_rollout_path.clone() else {
self.add_error_message(
"No rollout file for this session; cannot capture eval sample.".to_string(),
);
return;
};
let (start, repo_snapshot) = match start_marker {
crate::app_event::EvalCaptureStartMarker::RolloutLineIndex {
index,
timestamp: _,
display,
message: _,
repo_snapshot,
} => {
let Some(snapshot) = repo_snapshot else {
self.add_error_message(
"No repo snapshot available for the selected start marker; cannot capture eval sample."
.to_string(),
);
return;
};
let Some(base_sha) = snapshot.base_sha else {
self.add_error_message(
"No base sha for selected repo snapshot; cannot capture eval sample."
.to_string(),
);
return;
};
let repo_snapshot = Some(codex_eval_case::RepoSnapshot {
base_sha,
commit_sha: snapshot.ghost_commit,
});
(
codex_eval_case::StartMarker {
kind: codex_eval_case::StartMarkerKind::RolloutLineIndex,
value: codex_eval_case::StartMarkerValue::LineIndex(index as u64),
display,
},
repo_snapshot,
)
}
crate::app_event::EvalCaptureStartMarker::RolloutLineTimestamp {
timestamp,
display,
message: _,
repo_snapshot,
} => {
let Some(snapshot) = repo_snapshot else {
self.add_error_message(
"No repo snapshot available for the selected start marker; cannot capture eval sample."
.to_string(),
);
return;
};
let Some(base_sha) = snapshot.base_sha else {
self.add_error_message(
"No base sha for selected repo snapshot; cannot capture eval sample."
.to_string(),
);
return;
};
let repo_snapshot = Some(codex_eval_case::RepoSnapshot {
base_sha,
commit_sha: snapshot.ghost_commit,
});
(
codex_eval_case::StartMarker {
kind: codex_eval_case::StartMarkerKind::RolloutLineTimestamp,
value: codex_eval_case::StartMarkerValue::Timestamp(timestamp),
display,
},
repo_snapshot,
)
}
};
let logs_bytes = Some(
self.feedback
.snapshot(Some(conversation_id))
.as_bytes()
.to_vec(),
);
let args = codex_eval_case::CreateEvalCaseArgs {
codex_home: self.config.codex_home.clone(),
conversation_id: conversation_id.to_string(),
rollout_path,
start,
repo_cwd: self.config.cwd.clone(),
repo_snapshot,
notes: codex_eval_case::Notes {
what_went_wrong,
what_good_looks_like,
},
include_logs: true,
logs_bytes,
};
match codex_eval_case::create_eval_case_bundle(&args) {
Ok(result) => {
use ratatui::style::Stylize as _;
use ratatui::text::Line;
let case_id = result.case_id;
let path = result.path.display().to_string();
let lines = vec![
Line::from("• Eval sample captured (local only).".to_string()),
"".into(),
Line::from(vec![" Case ID: ".into(), case_id.clone().bold()]),
Line::from(vec![" Path: ".into(), path.clone().cyan().underlined()]),
];
self.app_event_tx
.send(crate::app_event::AppEvent::InsertHistoryCell(Box::new(
crate::history_cell::PlainHistoryCell::new(lines),
)));
if self.eval_capture_send_to_team.unwrap_or(false) {
self.upload_eval_capture_bundle(case_id, path);
} else {
self.eval_capture_upload_skipped(case_id, path);
}
}
Err(err) => {
self.app_event_tx
.send(crate::app_event::AppEvent::InsertHistoryCell(Box::new(
crate::history_cell::new_error_event(format!(
"Failed to create eval sample bundle: {err}"
)),
)));
}
}
}
pub(crate) fn upload_eval_capture_bundle(&mut self, case_id: String, path: String) {
let Some(conversation_id) = self.conversation_id else {
self.add_error_message(
"No active conversation; cannot upload eval sample.".to_string(),
);
return;
};
let bundle_dir = std::path::PathBuf::from(&path);
let manifest_path = bundle_dir.join("manifest.json");
let rollout_path = bundle_dir.join("rollout.jsonl");
let repo_patch_path = bundle_dir.join("repo.patch");
let logs_path = bundle_dir.join("codex-logs.log");
let read_bytes = |p: &std::path::Path| std::fs::read(p).map_err(anyhow::Error::from);
let logs_bytes = match read_bytes(&logs_path) {
Ok(bytes) => bytes,
Err(err) => {
self.add_error_message(format!(
"Failed to read eval sample logs {}: {err}",
logs_path.display()
));
return;
}
};
let snapshot = self.feedback.snapshot(Some(conversation_id));
let snapshot =
codex_feedback::CodexLogSnapshot::from_bytes(snapshot.thread_id.clone(), logs_bytes);
let extra_attachments = match (read_bytes(&manifest_path), read_bytes(&repo_patch_path)) {
(Ok(manifest_bytes), Ok(repo_patch_bytes)) => vec![
codex_feedback::FeedbackAttachment::new(
"manifest.json".to_string(),
Some("application/json".to_string()),
manifest_bytes,
),
codex_feedback::FeedbackAttachment::new(
"repo.patch".to_string(),
Some("text/plain".to_string()),
repo_patch_bytes,
),
],
(manifest_err, repo_err) => {
if let Err(err) = manifest_err {
self.add_error_message(format!(
"Failed to read eval sample manifest {}: {err}",
manifest_path.display()
));
}
if let Err(err) = repo_err {
self.add_error_message(format!(
"Failed to read eval sample repo.patch {}: {err}",
repo_patch_path.display()
));
}
return;
}
};
let reason = format!("Eval sample {case_id}");
let result = snapshot.upload_feedback_with_attachments(
"bad_result",
Some(reason.as_str()),
true,
Some(rollout_path.as_path()),
Some(codex_core::protocol::SessionSource::Cli),
extra_attachments,
);
match result {
Ok(()) => {
use ratatui::style::Stylize as _;
use ratatui::text::Line;
let lines = vec![
Line::from("• Eval sample uploaded.".to_string()),
"".into(),
Line::from(vec![
" Thread ID: ".into(),
snapshot.thread_id.clone().bold(),
]),
];
self.app_event_tx
.send(crate::app_event::AppEvent::InsertHistoryCell(Box::new(
crate::history_cell::PlainHistoryCell::new(lines),
)));
}
Err(err) => {
self.app_event_tx
.send(crate::app_event::AppEvent::InsertHistoryCell(Box::new(
crate::history_cell::new_error_event(format!(
"Failed to upload eval sample: {err}"
)),
)));
}
}
self.request_redraw();
}
pub(crate) fn eval_capture_upload_skipped(&mut self, case_id: String, path: String) {
let _ = (case_id, path);
use ratatui::text::Line;
// The "captured" message already prints the case id/path. Avoid duplicating it here.
let lines = vec![Line::from(
"• Eval sample saved locally (not uploaded).".to_string(),
)];
self.app_event_tx
.send(crate::app_event::AppEvent::InsertHistoryCell(Box::new(
crate::history_cell::PlainHistoryCell::new(lines),
)));
self.request_redraw();
}
fn on_agent_message(&mut self, message: String) {
// If we have a stream_controller, then the final agent message is redundant and will be a
// duplicate of what has already been streamed.
@@ -1482,6 +1963,7 @@ impl ChatWidget {
last_rendered_width: std::cell::Cell::new(None),
feedback,
current_rollout_path: None,
eval_capture_send_to_team: None,
external_editor_state: ExternalEditorState::Closed,
};
@@ -1569,6 +2051,7 @@ impl ChatWidget {
last_rendered_width: std::cell::Cell::new(None),
feedback,
current_rollout_path: None,
eval_capture_send_to_team: None,
external_editor_state: ExternalEditorState::Closed,
};
@@ -3695,6 +4178,73 @@ async fn fetch_rate_limits(base_url: String, auth: CodexAuth) -> Option<RateLimi
}
}
fn extract_user_message_text(content: &[codex_protocol::models::ContentItem]) -> Option<String> {
let mut combined = String::new();
for item in content {
let text = match item {
codex_protocol::models::ContentItem::InputText { text }
| codex_protocol::models::ContentItem::OutputText { text } => text.as_str(),
codex_protocol::models::ContentItem::InputImage { .. } => continue,
};
if !combined.is_empty() {
combined.push(' ');
}
combined.push_str(text);
}
let mut normalized = String::new();
for part in combined.split_whitespace() {
if !normalized.is_empty() {
normalized.push(' ');
}
normalized.push_str(part);
}
let trimmed = normalized.trim();
if trimmed.is_empty() {
None
} else {
// Keep the picker readable even for very long user messages.
Some(truncate_text(trimmed, 800))
}
}
fn should_show_eval_capture_start_message(message: &str) -> bool {
// The rollout includes a few synthetic "user" messages (environment context,
// AGENTS instructions) that should not be presented back to the user as start markers.
let trimmed = message.trim_start();
if trimmed.starts_with("<environment_context>") {
return false;
}
if trimmed.starts_with("# AGENTS.md instructions") {
return false;
}
true
}
fn parse_rollout_timestamp_utc(timestamp: &str) -> Option<DateTime<Utc>> {
DateTime::parse_from_rfc3339(timestamp)
.ok()
.map(|dt| dt.with_timezone(&Utc))
}
fn format_relative_time_ago(now: DateTime<Utc>, then: DateTime<Utc>) -> String {
let secs = now.signed_duration_since(then).num_seconds().max(0);
if secs < 60 {
return format!("{secs}s ago");
}
let mins = secs / 60;
if mins < 60 {
return format!("{mins}m ago");
}
let hours = mins / 60;
if hours < 24 {
return format!("{hours}h ago");
}
let days = hours / 24;
format!("{days}d ago")
}
#[cfg(test)]
pub(crate) fn show_review_commit_picker_with_entries(
chat: &mut ChatWidget,

View File

@@ -0,0 +1,21 @@
---
source: tui/src/chatwidget/tests.rs
assertion_line: 2190
expression: popup
---
Capture eval sample
Here's everything we capture as an eval sample.
It's stored locally on your computer:
• manifest.json
• rollout.jsonl
• repo.patch
• codex-logs.log
Next, you can optionally upload this bundle to the team.
1. Continue Create an eval sample bundle locally.
2. Send basic feedback instead Skip eval capture and send basic feedback to
the team.
Press enter to confirm or esc to go back

View File

@@ -0,0 +1,11 @@
---
source: tui/src/chatwidget/tests.rs
expression: popup
---
Pick the start message
Pick the first instruction for the specific code change or interaction you
wanted to give feedback on.
1. [0s ago] Please help me fix the build errors
Press enter to confirm or esc to go back

View File

@@ -0,0 +1,20 @@
---
source: tui/src/chatwidget/tests.rs
expression: popup
---
Send eval sample to the codex team?
If you choose Yes, it will upload the full bundle
to the team:
• manifest.json
• rollout.jsonl
• repo.patch
• codex-logs.log
This may include file paths, code snippets, and tool outputs.
1. Yes Upload the full bundle to the team for troubleshooting and model
improvement.
2. No Keep it local only.
Press enter to confirm or esc to go back

View File

@@ -0,0 +1,11 @@
---
source: tui/src/chatwidget/tests.rs
expression: popup
---
Bad result
Choose the fastest way to help us improve.
1. Quick feedback Send a short note (and optionally logs) to
maintainers.
2. Capture eval sample Create a local bundle you can rerun as an eval case
later.

View File

@@ -407,6 +407,7 @@ async fn make_chatwidget_manual(
last_rendered_width: std::cell::Cell::new(None),
feedback: codex_feedback::CodexFeedback::new(),
current_rollout_path: None,
eval_capture_send_to_team: None,
external_editor_state: ExternalEditorState::Closed,
};
(widget, rx, op_rx)
@@ -2167,6 +2168,88 @@ async fn feedback_upload_consent_popup_snapshot() {
assert_snapshot!("feedback_upload_consent_popup", popup);
}
#[tokio::test]
async fn feedback_bad_result_fork_popup_snapshot() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(None).await;
chat.open_feedback_bad_result_fork();
let popup = render_bottom_popup(&chat, 80);
assert_snapshot!("feedback_bad_result_fork_popup", popup);
}
#[tokio::test]
async fn eval_capture_intro_popup_snapshot() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(None).await;
// Ensure host state doesn't leak into snapshots.
let _ = std::fs::remove_file(chat.config.codex_home.join("eval_capture.json"));
chat.open_eval_capture_intro();
let popup = render_bottom_popup(&chat, 80);
assert_snapshot!("eval_capture_intro_popup", popup);
}
#[tokio::test]
async fn eval_capture_upload_consent_popup_snapshot() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(None).await;
let params = crate::bottom_pane::eval_capture_send_consent_params(chat.app_event_tx.clone());
chat.bottom_pane.show_selection_view(params);
chat.request_redraw();
let popup = render_bottom_popup(&chat, 80);
assert_snapshot!("eval_capture_upload_consent_popup", popup);
}
#[tokio::test]
async fn eval_capture_start_picker_popup_snapshot() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(None).await;
let rollout_file = NamedTempFile::new().unwrap();
let user_line = serde_json::json!({
"timestamp": "2025-09-05T16:53:11.850Z",
"type": "response_item",
"payload": {
"type": "message",
"role": "user",
"content": [{
"type": "input_text",
"text": "Please help me fix the build errors",
}],
},
});
let ghost_line = serde_json::json!({
"timestamp": "2025-09-05T16:53:12.000Z",
"type": "response_item",
"payload": {
"type": "ghost_snapshot",
"ghost_commit": {
"id": "ghost-sha",
"parent": "base-sha",
"preexisting_untracked_files": [],
"preexisting_untracked_dirs": [],
},
},
});
std::fs::write(
rollout_file.path(),
format!(
"{}\n{}\n",
serde_json::to_string(&user_line).unwrap(),
serde_json::to_string(&ghost_line).unwrap()
),
)
.unwrap();
chat.current_rollout_path = Some(rollout_file.path().to_path_buf());
chat.open_eval_capture_start_picker();
let popup = render_bottom_popup(&chat, 80);
assert_snapshot!("eval_capture_start_picker_popup", popup);
}
#[tokio::test]
async fn reasoning_popup_escape_returns_to_model_popup() {
let (mut chat, _rx, _op_rx) = make_chatwidget_manual(Some("gpt-5.1-codex-max")).await;

View File

@@ -0,0 +1,40 @@
use serde::Deserialize;
use serde::Serialize;
use std::path::Path;
use std::path::PathBuf;
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
struct EvalCaptureState {
#[serde(default)]
intro_dismissed: bool,
}
const STATE_FILENAME: &str = "eval_capture.json";
fn state_path(codex_home: &Path) -> PathBuf {
codex_home.join(STATE_FILENAME)
}
pub(crate) fn should_show_eval_capture_intro(codex_home: &Path) -> bool {
let path = state_path(codex_home);
let Ok(contents) = std::fs::read_to_string(path) else {
return true;
};
serde_json::from_str::<EvalCaptureState>(&contents)
.ok()
.map(|s| !s.intro_dismissed)
.unwrap_or(true)
}
pub(crate) fn persist_eval_capture_intro_dismissed(codex_home: &Path) -> anyhow::Result<()> {
let path = state_path(codex_home);
let state = EvalCaptureState {
intro_dismissed: true,
};
let json_line = format!("{}\n", serde_json::to_string(&state)?);
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
std::fs::write(path, json_line)?;
Ok(())
}

View File

@@ -45,6 +45,7 @@ mod clipboard_paste;
mod color;
pub mod custom_terminal;
mod diff_render;
mod eval_capture_state;
mod exec_cell;
mod exec_command;
mod external_editor;