Files
codex/prs/bolinfest/PR-1598.md
2025-09-02 15:17:45 -07:00

960 lines
34 KiB
Markdown
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# PR #1598: Record Git metadata to rollout
- URL: https://github.com/openai/codex/pull/1598
- Author: vishnu-oai
- Created: 2025-07-17 13:43:39 UTC
- Updated: 2025-07-24 18:35:35 UTC
- Changes: +475/-13, Files changed: 5, Commits: 18
## Description
# Summary
- Writing effective evals for codex sessions requires context of the overall repository state at the moment the session began
- This change adds this metadata (git repository, branch, commit hash) to the top of the rollout of the session (if available - if not it doesn't add anything)
- Currently, this is only effective on a clean working tree, as we can't track uncommitted/untracked changes with the current metadata set. Ideally in the future we may want to track unclean changes somehow, or perhaps prompt the user to stash or commit them.
# Testing
- Added unit tests
- `cargo test && cargo clippy --tests && cargo fmt -- --config imports_granularity=Item`
### Resulting Rollout
<img width="1243" height="127" alt="Screenshot 2025-07-17 at 1 50 00PM" src="https://github.com/user-attachments/assets/68108941-f015-45b2-985c-ea315ce05415" />
## Full Diff
```diff
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 4cc888b62e..f35348b779 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -594,7 +594,7 @@ async fn submission_loop(
let mut restored_items: Option<Vec<ResponseItem>> = None;
let rollout_recorder: Option<RolloutRecorder> =
if let Some(path) = resume_path.as_ref() {
- match RolloutRecorder::resume(path).await {
+ match RolloutRecorder::resume(path, cwd.clone()).await {
Ok((rec, saved)) => {
session_id = saved.session_id;
if !saved.items.is_empty() {
diff --git a/codex-rs/core/src/git_info.rs b/codex-rs/core/src/git_info.rs
new file mode 100644
index 0000000000..cf959d32d1
--- /dev/null
+++ b/codex-rs/core/src/git_info.rs
@@ -0,0 +1,307 @@
+use std::path::Path;
+
+use serde::Deserialize;
+use serde::Serialize;
+use tokio::process::Command;
+use tokio::time::Duration as TokioDuration;
+use tokio::time::timeout;
+
+/// Timeout for git commands to prevent freezing on large repositories
+const GIT_COMMAND_TIMEOUT: TokioDuration = TokioDuration::from_secs(5);
+
+#[derive(Serialize, Deserialize, Clone)]
+pub struct GitInfo {
+ /// Current commit hash (SHA)
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub commit_hash: Option<String>,
+ /// Current branch name
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub branch: Option<String>,
+ /// Repository URL (if available from remote)
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub repository_url: Option<String>,
+}
+
+/// Collect git repository information from the given working directory using command-line git.
+/// Returns None if no git repository is found or if git operations fail.
+/// Uses timeouts to prevent freezing on large repositories.
+/// All git commands (except the initial repo check) run in parallel for better performance.
+pub async fn collect_git_info(cwd: &Path) -> Option<GitInfo> {
+ // Check if we're in a git repository first
+ let is_git_repo = run_git_command_with_timeout(&["rev-parse", "--git-dir"], cwd)
+ .await?
+ .status
+ .success();
+
+ if !is_git_repo {
+ return None;
+ }
+
+ // Run all git info collection commands in parallel
+ let (commit_result, branch_result, url_result) = tokio::join!(
+ run_git_command_with_timeout(&["rev-parse", "HEAD"], cwd),
+ run_git_command_with_timeout(&["rev-parse", "--abbrev-ref", "HEAD"], cwd),
+ run_git_command_with_timeout(&["remote", "get-url", "origin"], cwd)
+ );
+
+ let mut git_info = GitInfo {
+ commit_hash: None,
+ branch: None,
+ repository_url: None,
+ };
+
+ // Process commit hash
+ if let Some(output) = commit_result {
+ if output.status.success() {
+ if let Ok(hash) = String::from_utf8(output.stdout) {
+ git_info.commit_hash = Some(hash.trim().to_string());
+ }
+ }
+ }
+
+ // Process branch name
+ if let Some(output) = branch_result {
+ if output.status.success() {
+ if let Ok(branch) = String::from_utf8(output.stdout) {
+ let branch = branch.trim();
+ if branch != "HEAD" {
+ git_info.branch = Some(branch.to_string());
+ }
+ }
+ }
+ }
+
+ // Process repository URL
+ if let Some(output) = url_result {
+ if output.status.success() {
+ if let Ok(url) = String::from_utf8(output.stdout) {
+ git_info.repository_url = Some(url.trim().to_string());
+ }
+ }
+ }
+
+ Some(git_info)
+}
+
+/// Run a git command with a timeout to prevent blocking on large repositories
+async fn run_git_command_with_timeout(args: &[&str], cwd: &Path) -> Option<std::process::Output> {
+ let result = timeout(
+ GIT_COMMAND_TIMEOUT,
+ Command::new("git").args(args).current_dir(cwd).output(),
+ )
+ .await;
+
+ match result {
+ Ok(Ok(output)) => Some(output),
+ _ => None, // Timeout or error
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ #![allow(clippy::expect_used)]
+ #![allow(clippy::unwrap_used)]
+
+ use super::*;
+
+ use std::fs;
+ use std::path::PathBuf;
+ use tempfile::TempDir;
+
+ // Helper function to create a test git repository
+ async fn create_test_git_repo(temp_dir: &TempDir) -> PathBuf {
+ let repo_path = temp_dir.path().to_path_buf();
+
+ // Initialize git repo
+ Command::new("git")
+ .args(["init"])
+ .current_dir(&repo_path)
+ .output()
+ .await
+ .expect("Failed to init git repo");
+
+ // Configure git user (required for commits)
+ Command::new("git")
+ .args(["config", "user.name", "Test User"])
+ .current_dir(&repo_path)
+ .output()
+ .await
+ .expect("Failed to set git user name");
+
+ Command::new("git")
+ .args(["config", "user.email", "test@example.com"])
+ .current_dir(&repo_path)
+ .output()
+ .await
+ .expect("Failed to set git user email");
+
+ // Create a test file and commit it
+ let test_file = repo_path.join("test.txt");
+ fs::write(&test_file, "test content").expect("Failed to write test file");
+
+ Command::new("git")
+ .args(["add", "."])
+ .current_dir(&repo_path)
+ .output()
+ .await
+ .expect("Failed to add files");
+
+ Command::new("git")
+ .args(["commit", "-m", "Initial commit"])
+ .current_dir(&repo_path)
+ .output()
+ .await
+ .expect("Failed to commit");
+
+ repo_path
+ }
+
+ #[tokio::test]
+ async fn test_collect_git_info_non_git_directory() {
+ let temp_dir = TempDir::new().expect("Failed to create temp dir");
+ let result = collect_git_info(temp_dir.path()).await;
+ assert!(result.is_none());
+ }
+
+ #[tokio::test]
+ async fn test_collect_git_info_git_repository() {
+ let temp_dir = TempDir::new().expect("Failed to create temp dir");
+ let repo_path = create_test_git_repo(&temp_dir).await;
+
+ let git_info = collect_git_info(&repo_path)
+ .await
+ .expect("Should collect git info from repo");
+
+ // Should have commit hash
+ assert!(git_info.commit_hash.is_some());
+ let commit_hash = git_info.commit_hash.unwrap();
+ assert_eq!(commit_hash.len(), 40); // SHA-1 hash should be 40 characters
+ assert!(commit_hash.chars().all(|c| c.is_ascii_hexdigit()));
+
+ // Should have branch (likely "main" or "master")
+ assert!(git_info.branch.is_some());
+ let branch = git_info.branch.unwrap();
+ assert!(branch == "main" || branch == "master");
+
+ // Repository URL might be None for local repos without remote
+ // This is acceptable behavior
+ }
+
+ #[tokio::test]
+ async fn test_collect_git_info_with_remote() {
+ let temp_dir = TempDir::new().expect("Failed to create temp dir");
+ let repo_path = create_test_git_repo(&temp_dir).await;
+
+ // Add a remote origin
+ Command::new("git")
+ .args([
+ "remote",
+ "add",
+ "origin",
+ "https://github.com/example/repo.git",
+ ])
+ .current_dir(&repo_path)
+ .output()
+ .await
+ .expect("Failed to add remote");
+
+ let git_info = collect_git_info(&repo_path)
+ .await
+ .expect("Should collect git info from repo");
+
+ // Should have repository URL
+ assert_eq!(
+ git_info.repository_url,
+ Some("https://github.com/example/repo.git".to_string())
+ );
+ }
+
+ #[tokio::test]
+ async fn test_collect_git_info_detached_head() {
+ let temp_dir = TempDir::new().expect("Failed to create temp dir");
+ let repo_path = create_test_git_repo(&temp_dir).await;
+
+ // Get the current commit hash
+ let output = Command::new("git")
+ .args(["rev-parse", "HEAD"])
+ .current_dir(&repo_path)
+ .output()
+ .await
+ .expect("Failed to get HEAD");
+ let commit_hash = String::from_utf8(output.stdout).unwrap().trim().to_string();
+
+ // Checkout the commit directly (detached HEAD)
+ Command::new("git")
+ .args(["checkout", &commit_hash])
+ .current_dir(&repo_path)
+ .output()
+ .await
+ .expect("Failed to checkout commit");
+
+ let git_info = collect_git_info(&repo_path)
+ .await
+ .expect("Should collect git info from repo");
+
+ // Should have commit hash
+ assert!(git_info.commit_hash.is_some());
+ // Branch should be None for detached HEAD (since rev-parse --abbrev-ref HEAD returns "HEAD")
+ assert!(git_info.branch.is_none());
+ }
+
+ #[tokio::test]
+ async fn test_collect_git_info_with_branch() {
+ let temp_dir = TempDir::new().expect("Failed to create temp dir");
+ let repo_path = create_test_git_repo(&temp_dir).await;
+
+ // Create and checkout a new branch
+ Command::new("git")
+ .args(["checkout", "-b", "feature-branch"])
+ .current_dir(&repo_path)
+ .output()
+ .await
+ .expect("Failed to create branch");
+
+ let git_info = collect_git_info(&repo_path)
+ .await
+ .expect("Should collect git info from repo");
+
+ // Should have the new branch name
+ assert_eq!(git_info.branch, Some("feature-branch".to_string()));
+ }
+
+ #[test]
+ fn test_git_info_serialization() {
+ let git_info = GitInfo {
+ commit_hash: Some("abc123def456".to_string()),
+ branch: Some("main".to_string()),
+ repository_url: Some("https://github.com/example/repo.git".to_string()),
+ };
+
+ let json = serde_json::to_string(&git_info).expect("Should serialize GitInfo");
+ let parsed: serde_json::Value = serde_json::from_str(&json).expect("Should parse JSON");
+
+ assert_eq!(parsed["commit_hash"], "abc123def456");
+ assert_eq!(parsed["branch"], "main");
+ assert_eq!(
+ parsed["repository_url"],
+ "https://github.com/example/repo.git"
+ );
+ }
+
+ #[test]
+ fn test_git_info_serialization_with_nones() {
+ let git_info = GitInfo {
+ commit_hash: None,
+ branch: None,
+ repository_url: None,
+ };
+
+ let json = serde_json::to_string(&git_info).expect("Should serialize GitInfo");
+ let parsed: serde_json::Value = serde_json::from_str(&json).expect("Should parse JSON");
+
+ // Fields with None values should be omitted due to skip_serializing_if
+ assert!(!parsed.as_object().unwrap().contains_key("commit_hash"));
+ assert!(!parsed.as_object().unwrap().contains_key("branch"));
+ assert!(!parsed.as_object().unwrap().contains_key("repository_url"));
+ }
+}
diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs
index 6812260c97..4e69e94b55 100644
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -19,6 +19,7 @@ pub mod error;
pub mod exec;
pub mod exec_env;
mod flags;
+pub mod git_info;
mod is_safe_command;
mod mcp_connection_manager;
mod mcp_tool_call;
diff --git a/codex-rs/core/src/rollout.rs b/codex-rs/core/src/rollout.rs
index 7f0f61b9eb..3e6de34d96 100644
--- a/codex-rs/core/src/rollout.rs
+++ b/codex-rs/core/src/rollout.rs
@@ -20,6 +20,8 @@ use tracing::warn;
use uuid::Uuid;
use crate::config::Config;
+use crate::git_info::GitInfo;
+use crate::git_info::collect_git_info;
use crate::models::ResponseItem;
const SESSIONS_SUBDIR: &str = "sessions";
@@ -31,6 +33,14 @@ pub struct SessionMeta {
pub instructions: Option<String>,
}
+#[derive(Serialize)]
+struct SessionMetaWithGit {
+ #[serde(flatten)]
+ meta: SessionMeta,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ git: Option<GitInfo>,
+}
+
#[derive(Serialize, Deserialize, Default, Clone)]
pub struct SessionStateSnapshot {}
@@ -86,15 +96,12 @@ impl RolloutRecorder {
.format(timestamp_format)
.map_err(|e| IoError::other(format!("failed to format timestamp: {e}")))?;
- let meta = SessionMeta {
- timestamp,
- id: session_id,
- instructions,
- };
+ // Clone the cwd for the spawned task to collect git info asynchronously
+ let cwd = config.cwd.clone();
// A reasonably-sized bounded channel. If the buffer fills up the send
// future will yield, which is fine we only need to ensure we do not
- // perform *blocking* I/O on the callers thread.
+ // perform *blocking* I/O on the caller's thread.
let (tx, rx) = mpsc::channel::<RolloutCmd>(256);
// Spawn a Tokio task that owns the file handle and performs async
@@ -103,7 +110,12 @@ impl RolloutRecorder {
tokio::task::spawn(rollout_writer(
tokio::fs::File::from_std(file),
rx,
- Some(meta),
+ Some(SessionMeta {
+ timestamp,
+ id: session_id,
+ instructions,
+ }),
+ cwd,
));
Ok(Self { tx })
@@ -143,7 +155,10 @@ impl RolloutRecorder {
.map_err(|e| IoError::other(format!("failed to queue rollout state: {e}")))
}
- pub async fn resume(path: &Path) -> std::io::Result<(Self, SavedSession)> {
+ pub async fn resume(
+ path: &Path,
+ cwd: std::path::PathBuf,
+ ) -> std::io::Result<(Self, SavedSession)> {
info!("Resuming rollout from {path:?}");
let text = tokio::fs::read_to_string(path).await?;
let mut lines = text.lines();
@@ -201,7 +216,12 @@ impl RolloutRecorder {
.open(path)?;
let (tx, rx) = mpsc::channel::<RolloutCmd>(256);
- tokio::task::spawn(rollout_writer(tokio::fs::File::from_std(file), rx, None));
+ tokio::task::spawn(rollout_writer(
+ tokio::fs::File::from_std(file),
+ rx,
+ None,
+ cwd,
+ ));
info!("Resumed rollout successfully from {path:?}");
Ok((Self { tx }, saved))
}
@@ -270,15 +290,26 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
async fn rollout_writer(
mut file: tokio::fs::File,
mut rx: mpsc::Receiver<RolloutCmd>,
- meta: Option<SessionMeta>,
+ mut meta: Option<SessionMeta>,
+ cwd: std::path::PathBuf,
) {
- if let Some(meta) = meta {
- if let Ok(json) = serde_json::to_string(&meta) {
+ // If we have a meta, collect git info asynchronously and write meta first
+ if let Some(session_meta) = meta.take() {
+ let git_info = collect_git_info(&cwd).await;
+ let session_meta_with_git = SessionMetaWithGit {
+ meta: session_meta,
+ git: git_info,
+ };
+
+ // Write the SessionMeta as the first item in the file
+ if let Ok(json) = serde_json::to_string(&session_meta_with_git) {
let _ = file.write_all(json.as_bytes()).await;
let _ = file.write_all(b"\n").await;
let _ = file.flush().await;
}
}
+
+ // Process rollout commands
while let Some(cmd) = rx.recv().await {
match cmd {
RolloutCmd::AddItems(items) => {
diff --git a/codex-rs/core/tests/cli_stream.rs b/codex-rs/core/tests/cli_stream.rs
index 567279ebd0..4694ba85ed 100644
--- a/codex-rs/core/tests/cli_stream.rs
+++ b/codex-rs/core/tests/cli_stream.rs
@@ -329,6 +329,7 @@ async fn integration_creates_and_checks_session_file() {
.env("OPENAI_API_KEY", "dummy")
.env("CODEX_RS_SSE_FIXTURE", &fixture)
.env("OPENAI_BASE_URL", "http://unused.local");
+
let output2 = cmd2.output().unwrap();
assert!(output2.status.success(), "resume codex-cli run failed");
@@ -359,3 +360,125 @@ async fn integration_creates_and_checks_session_file() {
"rollout missing resumed marker"
);
}
+
+/// Integration test to verify git info is collected and recorded in session files.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn integration_git_info_unit_test() {
+ // This test verifies git info collection works independently
+ // without depending on the full CLI integration
+
+ // 1. Create temp directory for git repo
+ let temp_dir = TempDir::new().unwrap();
+ let git_repo = temp_dir.path().to_path_buf();
+
+ // 2. Initialize a git repository with some content
+ let init_output = std::process::Command::new("git")
+ .args(["init"])
+ .current_dir(&git_repo)
+ .output()
+ .unwrap();
+ assert!(init_output.status.success(), "git init failed");
+
+ // Configure git user (required for commits)
+ std::process::Command::new("git")
+ .args(["config", "user.name", "Integration Test"])
+ .current_dir(&git_repo)
+ .output()
+ .unwrap();
+
+ std::process::Command::new("git")
+ .args(["config", "user.email", "test@example.com"])
+ .current_dir(&git_repo)
+ .output()
+ .unwrap();
+
+ // Create a test file and commit it
+ let test_file = git_repo.join("test.txt");
+ std::fs::write(&test_file, "integration test content").unwrap();
+
+ std::process::Command::new("git")
+ .args(["add", "."])
+ .current_dir(&git_repo)
+ .output()
+ .unwrap();
+
+ let commit_output = std::process::Command::new("git")
+ .args(["commit", "-m", "Integration test commit"])
+ .current_dir(&git_repo)
+ .output()
+ .unwrap();
+ assert!(commit_output.status.success(), "git commit failed");
+
+ // Create a branch to test branch detection
+ std::process::Command::new("git")
+ .args(["checkout", "-b", "integration-test-branch"])
+ .current_dir(&git_repo)
+ .output()
+ .unwrap();
+
+ // Add a remote to test repository URL detection
+ std::process::Command::new("git")
+ .args([
+ "remote",
+ "add",
+ "origin",
+ "https://github.com/example/integration-test.git",
+ ])
+ .current_dir(&git_repo)
+ .output()
+ .unwrap();
+
+ // 3. Test git info collection directly
+ let git_info = codex_core::git_info::collect_git_info(&git_repo).await;
+
+ // 4. Verify git info is present and contains expected data
+ assert!(git_info.is_some(), "Git info should be collected");
+
+ let git_info = git_info.unwrap();
+
+ // Check that we have a commit hash
+ assert!(
+ git_info.commit_hash.is_some(),
+ "Git info should contain commit_hash"
+ );
+ let commit_hash = git_info.commit_hash.as_ref().unwrap();
+ assert_eq!(commit_hash.len(), 40, "Commit hash should be 40 characters");
+ assert!(
+ commit_hash.chars().all(|c| c.is_ascii_hexdigit()),
+ "Commit hash should be hexadecimal"
+ );
+
+ // Check that we have the correct branch
+ assert!(git_info.branch.is_some(), "Git info should contain branch");
+ let branch = git_info.branch.as_ref().unwrap();
+ assert_eq!(
+ branch, "integration-test-branch",
+ "Branch should match what we created"
+ );
+
+ // Check that we have the repository URL
+ assert!(
+ git_info.repository_url.is_some(),
+ "Git info should contain repository_url"
+ );
+ let repo_url = git_info.repository_url.as_ref().unwrap();
+ assert_eq!(
+ repo_url, "https://github.com/example/integration-test.git",
+ "Repository URL should match what we configured"
+ );
+
+ println!("✅ Git info collection test passed!");
+ println!(" Commit: {commit_hash}");
+ println!(" Branch: {branch}");
+ println!(" Repo: {repo_url}");
+
+ // 5. Test serialization to ensure it works in SessionMeta
+ let serialized = serde_json::to_string(&git_info).unwrap();
+ let deserialized: codex_core::git_info::GitInfo = serde_json::from_str(&serialized).unwrap();
+
+ assert_eq!(git_info.commit_hash, deserialized.commit_hash);
+ assert_eq!(git_info.branch, deserialized.branch);
+ assert_eq!(git_info.repository_url, deserialized.repository_url);
+
+ println!("✅ Git info serialization test passed!");
+}
```
## Review Comments
### codex-rs/core/src/codex.rs
- Created: 2025-07-17 22:55:29 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2214451100
```diff
@@ -652,7 +652,9 @@ async fn submission_loop(
// overlapping rollout file. Consider passing RolloutRecorder
// from above.
let rollout_recorder =
- match RolloutRecorder::new(&config, session_id, instructions.clone()).await {
+ match RolloutRecorder::new(&config, session_id, instructions.clone(), &cwd)
```
> `cwd` is available on `Config`?
### codex-rs/core/src/git_info.rs
- Created: 2025-07-22 15:26:26 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2222934571
```diff
@@ -0,0 +1,307 @@
+use std::path::Path;
+
+use serde::Deserialize;
+use serde::Serialize;
+use tokio::process::Command;
+use tokio::time::Duration as TokioDuration;
+use tokio::time::timeout;
+
+/// Timeout for git commands to prevent freezing on large repositories
+const GIT_COMMAND_TIMEOUT: TokioDuration = TokioDuration::from_secs(5);
+
+#[derive(Serialize, Deserialize, Clone)]
+pub struct GitInfo {
+ /// Current commit hash (SHA)
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub commit_hash: Option<String>,
+ /// Current branch name
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub branch: Option<String>,
+ /// Repository URL (if available from remote)
+ #[serde(skip_serializing_if = "Option::is_none")]
+ pub repository_url: Option<String>,
+}
+
+/// Run a git command with a timeout to prevent blocking on large repositories
+async fn run_git_command_with_timeout(args: &[&str], cwd: &Path) -> Option<std::process::Output> {
+ let result = timeout(
+ GIT_COMMAND_TIMEOUT,
+ Command::new("git").args(args).current_dir(cwd).output(),
+ )
+ .await;
+
+ match result {
+ Ok(Ok(output)) => Some(output),
+ _ => None, // Timeout or error
+ }
+}
+
+/// Collect git repository information from the given working directory using command-line git.
+/// Returns None if no git repository is found or if git operations fail.
+/// Uses timeouts to prevent freezing on large repositories.
+/// All git commands (except the initial repo check) run in parallel for better performance.
+pub async fn collect_git_info(cwd: &Path) -> Option<GitInfo> {
```
> Please list this after `pub struct GitInfo` so it is at the top since it's the main "export" after the struct.
### codex-rs/core/src/rollout.rs
- Created: 2025-07-17 19:05:42 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2214082920
```diff
@@ -22,12 +27,105 @@ use crate::models::ResponseItem;
/// Folder inside `~/.codex` that holds saved rollouts.
const SESSIONS_SUBDIR: &str = "sessions";
+#[derive(Serialize)]
+struct GitInfo {
+ /// Current commit hash (SHA)
+ #[serde(skip_serializing_if = "Option::is_none")]
+ commit_hash: Option<String>,
+ /// Current branch name
+ #[serde(skip_serializing_if = "Option::is_none")]
+ branch: Option<String>,
+ /// Repository URL (if available from remote)
+ #[serde(skip_serializing_if = "Option::is_none")]
+ repository_url: Option<String>,
+}
+
#[derive(Serialize)]
struct SessionMeta {
id: String,
timestamp: String,
#[serde(skip_serializing_if = "Option::is_none")]
instructions: Option<String>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ git: Option<GitInfo>,
+}
+
+/// Timeout for git commands to prevent freezing on large repositories
+const GIT_COMMAND_TIMEOUT: Duration = Duration::from_secs(3);
+
+/// Run a git command with a timeout to prevent blocking on large repositories
+fn run_git_command_with_timeout(args: &[&str], cwd: &Path) -> Option<std::process::Output> {
```
> Can you use tokio::Command and make this `async` instead? It's cheaper to create tokio tasks than POSIX threads. You should then update `collect_git_info()` to make all these calls in parallel.
- Created: 2025-07-17 22:57:23 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2214453622
```diff
@@ -22,12 +26,105 @@ use crate::models::ResponseItem;
/// Folder inside `~/.codex` that holds saved rollouts.
const SESSIONS_SUBDIR: &str = "sessions";
+#[derive(Serialize)]
```
> Can you please move all this new code to a separate file? `git_info.rs` perhaps?
- Created: 2025-07-17 22:58:16 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2214454353
```diff
@@ -183,3 +285,212 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
timestamp,
})
}
+
+#[cfg(test)]
```
> Thanks for the nice test!
- Created: 2025-07-17 23:06:24 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2214462855
```diff
@@ -67,16 +165,20 @@ impl RolloutRecorder {
.format(timestamp_format)
.map_err(|e| IoError::other(format!("failed to format timestamp: {e}")))?;
+ // Collect git repository information
+ let git_info = collect_git_info(cwd).await;
```
> I appreciate the timeouts in `collect_git_info()`, though if I am reading it correctly, I suppose this could add ~6s to startup in the worst case? It would be nice to figure out how to make this truly async, since `RolloutRecorder::new()` is on the critical path to startup.
>
> The challenge seems to be that we have these lines below:
>
> ```rust
> recorder.record_item(&meta).await?;
> Ok(recorder)
> ```
>
> That is, we don't want `new()` to exit until the first item has recorded and now that is dependent on `collect_git_info()`. Certainly this is fixable, but the bookkeeping may be a bit ugly. What do you think?
- Created: 2025-07-17 23:15:31 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2214474763
```diff
@@ -67,16 +165,20 @@ impl RolloutRecorder {
.format(timestamp_format)
.map_err(|e| IoError::other(format!("failed to format timestamp: {e}")))?;
+ // Collect git repository information
+ let git_info = collect_git_info(cwd).await;
```
> Actually, what if we move `collect_git_info(cwd).await` into the lambda passed to `tokio::task::spawn` and then ensure it is written to `file` before the `while let Some(line) = rx.recv().await` loop starts?
>
> You could also increase the `git` timeout to 5s maybe?
- Created: 2025-07-22 15:29:16 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2222944436
```diff
@@ -27,6 +29,8 @@ pub struct SessionMeta {
pub id: Uuid,
pub timestamp: String,
pub instructions: Option<String>,
+ #[serde(skip_serializing_if = "Option::is_none")]
```
> In general, it's preferable to avoid mutability, so I would rather keep `SessionMeta` as it was before... (see `rollout.rs`)
- Created: 2025-07-22 15:32:35 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2222956970
```diff
@@ -196,7 +203,12 @@ impl RolloutRecorder {
.open(path)?;
let (tx, rx) = mpsc::channel::<RolloutCmd>(256);
- tokio::task::spawn(rollout_writer(tokio::fs::File::from_std(file), rx, None));
+ tokio::task::spawn(rollout_writer(
```
> Please update the callsite to `resume()` so that it passes `cwd` (it's available on `Op::ConfigureSession`) and then update the third arg to `rollout_writer` to be `PathBuf` instead of `Option<PathBuf>`.
- Created: 2025-07-22 15:33:02 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2222958878
```diff
@@ -250,15 +262,36 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
async fn rollout_writer(
mut file: tokio::fs::File,
mut rx: mpsc::Receiver<RolloutCmd>,
- meta: Option<SessionMeta>,
+ mut meta: Option<SessionMeta>,
+ cwd: Option<std::path::PathBuf>,
) {
- if let Some(meta) = meta {
- if let Ok(json) = serde_json::to_string(&meta) {
+ // If we have a meta and cwd, collect git info asynchronously and write meta first
+ if let (Some(mut session_meta), Some(cwd)) = (meta.take(), cwd) {
+ // Skip git collection if disabled via environment variable (for tests)
+ let git_info = if std::env::var("CODEX_DISABLE_GIT_INFO").is_ok() {
```
> If increasing the number of threads makes the test pass reliably, then let's remove this?
- Created: 2025-07-22 15:35:51 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2222969734
```diff
@@ -250,15 +262,36 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
async fn rollout_writer(
mut file: tokio::fs::File,
mut rx: mpsc::Receiver<RolloutCmd>,
- meta: Option<SessionMeta>,
+ mut meta: Option<SessionMeta>,
+ cwd: Option<std::path::PathBuf>,
) {
- if let Some(meta) = meta {
- if let Ok(json) = serde_json::to_string(&meta) {
+ // If we have a meta and cwd, collect git info asynchronously and write meta first
+ if let (Some(mut session_meta), Some(cwd)) = (meta.take(), cwd) {
+ // Skip git collection if disabled via environment variable (for tests)
+ let git_info = if std::env::var("CODEX_DISABLE_GIT_INFO").is_ok() {
+ None
+ } else {
+ // Collect git repository information asynchronously without blocking startup
+ collect_git_info(&cwd).await
+ };
+ session_meta.git = git_info;
```
> Because I suggested removing `git` from `SessionMeta` so we don't have an optional/mutable field, here you can do something like:
>
> ```suggestion
> let SessionMeta { id, timestamp, instructions } = session_meta;
> session_meta = json!({
> "id" : id,
> "timestamp": timestamp,
> "instructions": instructions,
> "git": git_info,
> });
> ```
>
> Or you could introduce a separate struct with the extra field and use `serde_json::to_string()` as before.
- Created: 2025-07-23 00:42:50 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2224080928
```diff
@@ -29,6 +31,14 @@ pub struct SessionMeta {
pub instructions: Option<String>,
}
+#[derive(Serialize)]
+struct SessionMetaWithGit {
+ #[serde(flatten)]
```
> 👍
- Created: 2025-07-23 00:43:10 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2224081213
```diff
@@ -297,3 +335,9 @@ async fn rollout_writer(
}
}
}
+
```
> Remove this block?
- Created: 2025-07-23 00:49:13 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2224085948
```diff
@@ -250,15 +262,36 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
async fn rollout_writer(
mut file: tokio::fs::File,
mut rx: mpsc::Receiver<RolloutCmd>,
- meta: Option<SessionMeta>,
+ mut meta: Option<SessionMeta>,
+ cwd: Option<std::path::PathBuf>,
) {
- if let Some(meta) = meta {
- if let Ok(json) = serde_json::to_string(&meta) {
+ // If we have a meta and cwd, collect git info asynchronously and write meta first
+ if let (Some(mut session_meta), Some(cwd)) = (meta.take(), cwd) {
+ // Skip git collection if disabled via environment variable (for tests)
+ let git_info = if std::env::var("CODEX_DISABLE_GIT_INFO").is_ok() {
```
> https://github.com/openai/codex/pull/1647 might be the fix for this? I would really like to avoid `experimental_disable_git_metadata`, if possible.
### codex-rs/core/tests/cli_stream.rs
- Created: 2025-07-22 15:24:13 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2222926115
```diff
@@ -328,7 +330,9 @@ async fn integration_creates_and_checks_session_file() {
cmd2.env("CODEX_HOME", home.path())
.env("OPENAI_API_KEY", "dummy")
.env("CODEX_RS_SSE_FIXTURE", &fixture)
- .env("OPENAI_BASE_URL", "http://unused.local");
+ .env("OPENAI_BASE_URL", "http://unused.local")
+ // Disable git info collection to avoid blocking message processing in tests
+ .env("CODEX_DISABLE_GIT_INFO", "1");
```
> I have been trying to eliminate support for environment variables in favor of using configuration. Can we just added a config option (prefixed with "experimental" like `experimental_resume`) for this?
- Created: 2025-07-22 15:25:28 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2222930978
```diff
@@ -161,7 +161,9 @@ async fn integration_creates_and_checks_session_file() {
.env("OPENAI_API_KEY", "dummy")
.env("CODEX_RS_SSE_FIXTURE", &fixture)
// Required for CLI arg parsing even though fixture short-circuits network usage.
- .env("OPENAI_BASE_URL", "http://unused.local");
+ .env("OPENAI_BASE_URL", "http://unused.local")
+ // Disable git info collection to avoid blocking message processing in tests
```
> Alternatively, should we increase the number of threads, so increase `2` in this line?
>
> ```
> #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
> ```