Files
codex/prs/bolinfest/PR-1598.md
2025-09-02 15:17:45 -07:00

34 KiB
Raw Blame History

PR #1598: Record Git metadata to rollout

Description

Summary

  • Writing effective evals for codex sessions requires context of the overall repository state at the moment the session began
  • This change adds this metadata (git repository, branch, commit hash) to the top of the rollout of the session (if available - if not it doesn't add anything)
  • Currently, this is only effective on a clean working tree, as we can't track uncommitted/untracked changes with the current metadata set. Ideally in the future we may want to track unclean changes somehow, or perhaps prompt the user to stash or commit them.

Testing

  • Added unit tests
  • cargo test && cargo clippy --tests && cargo fmt -- --config imports_granularity=Item

Resulting Rollout

Screenshot 2025-07-17 at 1 50 00 PM

Full Diff

diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 4cc888b62e..f35348b779 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -594,7 +594,7 @@ async fn submission_loop(
                 let mut restored_items: Option<Vec<ResponseItem>> = None;
                 let rollout_recorder: Option<RolloutRecorder> =
                     if let Some(path) = resume_path.as_ref() {
-                        match RolloutRecorder::resume(path).await {
+                        match RolloutRecorder::resume(path, cwd.clone()).await {
                             Ok((rec, saved)) => {
                                 session_id = saved.session_id;
                                 if !saved.items.is_empty() {
diff --git a/codex-rs/core/src/git_info.rs b/codex-rs/core/src/git_info.rs
new file mode 100644
index 0000000000..cf959d32d1
--- /dev/null
+++ b/codex-rs/core/src/git_info.rs
@@ -0,0 +1,307 @@
+use std::path::Path;
+
+use serde::Deserialize;
+use serde::Serialize;
+use tokio::process::Command;
+use tokio::time::Duration as TokioDuration;
+use tokio::time::timeout;
+
+/// Timeout for git commands to prevent freezing on large repositories
+const GIT_COMMAND_TIMEOUT: TokioDuration = TokioDuration::from_secs(5);
+
+#[derive(Serialize, Deserialize, Clone)]
+pub struct GitInfo {
+    /// Current commit hash (SHA)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub commit_hash: Option<String>,
+    /// Current branch name
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub branch: Option<String>,
+    /// Repository URL (if available from remote)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub repository_url: Option<String>,
+}
+
+/// Collect git repository information from the given working directory using command-line git.
+/// Returns None if no git repository is found or if git operations fail.
+/// Uses timeouts to prevent freezing on large repositories.
+/// All git commands (except the initial repo check) run in parallel for better performance.
+pub async fn collect_git_info(cwd: &Path) -> Option<GitInfo> {
+    // Check if we're in a git repository first
+    let is_git_repo = run_git_command_with_timeout(&["rev-parse", "--git-dir"], cwd)
+        .await?
+        .status
+        .success();
+
+    if !is_git_repo {
+        return None;
+    }
+
+    // Run all git info collection commands in parallel
+    let (commit_result, branch_result, url_result) = tokio::join!(
+        run_git_command_with_timeout(&["rev-parse", "HEAD"], cwd),
+        run_git_command_with_timeout(&["rev-parse", "--abbrev-ref", "HEAD"], cwd),
+        run_git_command_with_timeout(&["remote", "get-url", "origin"], cwd)
+    );
+
+    let mut git_info = GitInfo {
+        commit_hash: None,
+        branch: None,
+        repository_url: None,
+    };
+
+    // Process commit hash
+    if let Some(output) = commit_result {
+        if output.status.success() {
+            if let Ok(hash) = String::from_utf8(output.stdout) {
+                git_info.commit_hash = Some(hash.trim().to_string());
+            }
+        }
+    }
+
+    // Process branch name
+    if let Some(output) = branch_result {
+        if output.status.success() {
+            if let Ok(branch) = String::from_utf8(output.stdout) {
+                let branch = branch.trim();
+                if branch != "HEAD" {
+                    git_info.branch = Some(branch.to_string());
+                }
+            }
+        }
+    }
+
+    // Process repository URL
+    if let Some(output) = url_result {
+        if output.status.success() {
+            if let Ok(url) = String::from_utf8(output.stdout) {
+                git_info.repository_url = Some(url.trim().to_string());
+            }
+        }
+    }
+
+    Some(git_info)
+}
+
+/// Run a git command with a timeout to prevent blocking on large repositories
+async fn run_git_command_with_timeout(args: &[&str], cwd: &Path) -> Option<std::process::Output> {
+    let result = timeout(
+        GIT_COMMAND_TIMEOUT,
+        Command::new("git").args(args).current_dir(cwd).output(),
+    )
+    .await;
+
+    match result {
+        Ok(Ok(output)) => Some(output),
+        _ => None, // Timeout or error
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    #![allow(clippy::expect_used)]
+    #![allow(clippy::unwrap_used)]
+
+    use super::*;
+
+    use std::fs;
+    use std::path::PathBuf;
+    use tempfile::TempDir;
+
+    // Helper function to create a test git repository
+    async fn create_test_git_repo(temp_dir: &TempDir) -> PathBuf {
+        let repo_path = temp_dir.path().to_path_buf();
+
+        // Initialize git repo
+        Command::new("git")
+            .args(["init"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to init git repo");
+
+        // Configure git user (required for commits)
+        Command::new("git")
+            .args(["config", "user.name", "Test User"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to set git user name");
+
+        Command::new("git")
+            .args(["config", "user.email", "test@example.com"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to set git user email");
+
+        // Create a test file and commit it
+        let test_file = repo_path.join("test.txt");
+        fs::write(&test_file, "test content").expect("Failed to write test file");
+
+        Command::new("git")
+            .args(["add", "."])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to add files");
+
+        Command::new("git")
+            .args(["commit", "-m", "Initial commit"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to commit");
+
+        repo_path
+    }
+
+    #[tokio::test]
+    async fn test_collect_git_info_non_git_directory() {
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+        let result = collect_git_info(temp_dir.path()).await;
+        assert!(result.is_none());
+    }
+
+    #[tokio::test]
+    async fn test_collect_git_info_git_repository() {
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+        let repo_path = create_test_git_repo(&temp_dir).await;
+
+        let git_info = collect_git_info(&repo_path)
+            .await
+            .expect("Should collect git info from repo");
+
+        // Should have commit hash
+        assert!(git_info.commit_hash.is_some());
+        let commit_hash = git_info.commit_hash.unwrap();
+        assert_eq!(commit_hash.len(), 40); // SHA-1 hash should be 40 characters
+        assert!(commit_hash.chars().all(|c| c.is_ascii_hexdigit()));
+
+        // Should have branch (likely "main" or "master")
+        assert!(git_info.branch.is_some());
+        let branch = git_info.branch.unwrap();
+        assert!(branch == "main" || branch == "master");
+
+        // Repository URL might be None for local repos without remote
+        // This is acceptable behavior
+    }
+
+    #[tokio::test]
+    async fn test_collect_git_info_with_remote() {
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+        let repo_path = create_test_git_repo(&temp_dir).await;
+
+        // Add a remote origin
+        Command::new("git")
+            .args([
+                "remote",
+                "add",
+                "origin",
+                "https://github.com/example/repo.git",
+            ])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to add remote");
+
+        let git_info = collect_git_info(&repo_path)
+            .await
+            .expect("Should collect git info from repo");
+
+        // Should have repository URL
+        assert_eq!(
+            git_info.repository_url,
+            Some("https://github.com/example/repo.git".to_string())
+        );
+    }
+
+    #[tokio::test]
+    async fn test_collect_git_info_detached_head() {
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+        let repo_path = create_test_git_repo(&temp_dir).await;
+
+        // Get the current commit hash
+        let output = Command::new("git")
+            .args(["rev-parse", "HEAD"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to get HEAD");
+        let commit_hash = String::from_utf8(output.stdout).unwrap().trim().to_string();
+
+        // Checkout the commit directly (detached HEAD)
+        Command::new("git")
+            .args(["checkout", &commit_hash])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to checkout commit");
+
+        let git_info = collect_git_info(&repo_path)
+            .await
+            .expect("Should collect git info from repo");
+
+        // Should have commit hash
+        assert!(git_info.commit_hash.is_some());
+        // Branch should be None for detached HEAD (since rev-parse --abbrev-ref HEAD returns "HEAD")
+        assert!(git_info.branch.is_none());
+    }
+
+    #[tokio::test]
+    async fn test_collect_git_info_with_branch() {
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+        let repo_path = create_test_git_repo(&temp_dir).await;
+
+        // Create and checkout a new branch
+        Command::new("git")
+            .args(["checkout", "-b", "feature-branch"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to create branch");
+
+        let git_info = collect_git_info(&repo_path)
+            .await
+            .expect("Should collect git info from repo");
+
+        // Should have the new branch name
+        assert_eq!(git_info.branch, Some("feature-branch".to_string()));
+    }
+
+    #[test]
+    fn test_git_info_serialization() {
+        let git_info = GitInfo {
+            commit_hash: Some("abc123def456".to_string()),
+            branch: Some("main".to_string()),
+            repository_url: Some("https://github.com/example/repo.git".to_string()),
+        };
+
+        let json = serde_json::to_string(&git_info).expect("Should serialize GitInfo");
+        let parsed: serde_json::Value = serde_json::from_str(&json).expect("Should parse JSON");
+
+        assert_eq!(parsed["commit_hash"], "abc123def456");
+        assert_eq!(parsed["branch"], "main");
+        assert_eq!(
+            parsed["repository_url"],
+            "https://github.com/example/repo.git"
+        );
+    }
+
+    #[test]
+    fn test_git_info_serialization_with_nones() {
+        let git_info = GitInfo {
+            commit_hash: None,
+            branch: None,
+            repository_url: None,
+        };
+
+        let json = serde_json::to_string(&git_info).expect("Should serialize GitInfo");
+        let parsed: serde_json::Value = serde_json::from_str(&json).expect("Should parse JSON");
+
+        // Fields with None values should be omitted due to skip_serializing_if
+        assert!(!parsed.as_object().unwrap().contains_key("commit_hash"));
+        assert!(!parsed.as_object().unwrap().contains_key("branch"));
+        assert!(!parsed.as_object().unwrap().contains_key("repository_url"));
+    }
+}
diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs
index 6812260c97..4e69e94b55 100644
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -19,6 +19,7 @@ pub mod error;
 pub mod exec;
 pub mod exec_env;
 mod flags;
+pub mod git_info;
 mod is_safe_command;
 mod mcp_connection_manager;
 mod mcp_tool_call;
diff --git a/codex-rs/core/src/rollout.rs b/codex-rs/core/src/rollout.rs
index 7f0f61b9eb..3e6de34d96 100644
--- a/codex-rs/core/src/rollout.rs
+++ b/codex-rs/core/src/rollout.rs
@@ -20,6 +20,8 @@ use tracing::warn;
 use uuid::Uuid;
 
 use crate::config::Config;
+use crate::git_info::GitInfo;
+use crate::git_info::collect_git_info;
 use crate::models::ResponseItem;
 
 const SESSIONS_SUBDIR: &str = "sessions";
@@ -31,6 +33,14 @@ pub struct SessionMeta {
     pub instructions: Option<String>,
 }
 
+#[derive(Serialize)]
+struct SessionMetaWithGit {
+    #[serde(flatten)]
+    meta: SessionMeta,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    git: Option<GitInfo>,
+}
+
 #[derive(Serialize, Deserialize, Default, Clone)]
 pub struct SessionStateSnapshot {}
 
@@ -86,15 +96,12 @@ impl RolloutRecorder {
             .format(timestamp_format)
             .map_err(|e| IoError::other(format!("failed to format timestamp: {e}")))?;
 
-        let meta = SessionMeta {
-            timestamp,
-            id: session_id,
-            instructions,
-        };
+        // Clone the cwd for the spawned task to collect git info asynchronously
+        let cwd = config.cwd.clone();
 
         // A reasonably-sized bounded channel. If the buffer fills up the send
         // future will yield, which is fine  we only need to ensure we do not
-        // perform *blocking* I/O on the callers thread.
+        // perform *blocking* I/O on the caller's thread.
         let (tx, rx) = mpsc::channel::<RolloutCmd>(256);
 
         // Spawn a Tokio task that owns the file handle and performs async
@@ -103,7 +110,12 @@ impl RolloutRecorder {
         tokio::task::spawn(rollout_writer(
             tokio::fs::File::from_std(file),
             rx,
-            Some(meta),
+            Some(SessionMeta {
+                timestamp,
+                id: session_id,
+                instructions,
+            }),
+            cwd,
         ));
 
         Ok(Self { tx })
@@ -143,7 +155,10 @@ impl RolloutRecorder {
             .map_err(|e| IoError::other(format!("failed to queue rollout state: {e}")))
     }
 
-    pub async fn resume(path: &Path) -> std::io::Result<(Self, SavedSession)> {
+    pub async fn resume(
+        path: &Path,
+        cwd: std::path::PathBuf,
+    ) -> std::io::Result<(Self, SavedSession)> {
         info!("Resuming rollout from {path:?}");
         let text = tokio::fs::read_to_string(path).await?;
         let mut lines = text.lines();
@@ -201,7 +216,12 @@ impl RolloutRecorder {
             .open(path)?;
 
         let (tx, rx) = mpsc::channel::<RolloutCmd>(256);
-        tokio::task::spawn(rollout_writer(tokio::fs::File::from_std(file), rx, None));
+        tokio::task::spawn(rollout_writer(
+            tokio::fs::File::from_std(file),
+            rx,
+            None,
+            cwd,
+        ));
         info!("Resumed rollout successfully from {path:?}");
         Ok((Self { tx }, saved))
     }
@@ -270,15 +290,26 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
 async fn rollout_writer(
     mut file: tokio::fs::File,
     mut rx: mpsc::Receiver<RolloutCmd>,
-    meta: Option<SessionMeta>,
+    mut meta: Option<SessionMeta>,
+    cwd: std::path::PathBuf,
 ) {
-    if let Some(meta) = meta {
-        if let Ok(json) = serde_json::to_string(&meta) {
+    // If we have a meta, collect git info asynchronously and write meta first
+    if let Some(session_meta) = meta.take() {
+        let git_info = collect_git_info(&cwd).await;
+        let session_meta_with_git = SessionMetaWithGit {
+            meta: session_meta,
+            git: git_info,
+        };
+
+        // Write the SessionMeta as the first item in the file
+        if let Ok(json) = serde_json::to_string(&session_meta_with_git) {
             let _ = file.write_all(json.as_bytes()).await;
             let _ = file.write_all(b"\n").await;
             let _ = file.flush().await;
         }
     }
+
+    // Process rollout commands
     while let Some(cmd) = rx.recv().await {
         match cmd {
             RolloutCmd::AddItems(items) => {
diff --git a/codex-rs/core/tests/cli_stream.rs b/codex-rs/core/tests/cli_stream.rs
index 567279ebd0..4694ba85ed 100644
--- a/codex-rs/core/tests/cli_stream.rs
+++ b/codex-rs/core/tests/cli_stream.rs
@@ -329,6 +329,7 @@ async fn integration_creates_and_checks_session_file() {
         .env("OPENAI_API_KEY", "dummy")
         .env("CODEX_RS_SSE_FIXTURE", &fixture)
         .env("OPENAI_BASE_URL", "http://unused.local");
+
     let output2 = cmd2.output().unwrap();
     assert!(output2.status.success(), "resume codex-cli run failed");
 
@@ -359,3 +360,125 @@ async fn integration_creates_and_checks_session_file() {
         "rollout missing resumed marker"
     );
 }
+
+/// Integration test to verify git info is collected and recorded in session files.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn integration_git_info_unit_test() {
+    // This test verifies git info collection works independently
+    // without depending on the full CLI integration
+
+    // 1. Create temp directory for git repo
+    let temp_dir = TempDir::new().unwrap();
+    let git_repo = temp_dir.path().to_path_buf();
+
+    // 2. Initialize a git repository with some content
+    let init_output = std::process::Command::new("git")
+        .args(["init"])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+    assert!(init_output.status.success(), "git init failed");
+
+    // Configure git user (required for commits)
+    std::process::Command::new("git")
+        .args(["config", "user.name", "Integration Test"])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+
+    std::process::Command::new("git")
+        .args(["config", "user.email", "test@example.com"])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+
+    // Create a test file and commit it
+    let test_file = git_repo.join("test.txt");
+    std::fs::write(&test_file, "integration test content").unwrap();
+
+    std::process::Command::new("git")
+        .args(["add", "."])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+
+    let commit_output = std::process::Command::new("git")
+        .args(["commit", "-m", "Integration test commit"])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+    assert!(commit_output.status.success(), "git commit failed");
+
+    // Create a branch to test branch detection
+    std::process::Command::new("git")
+        .args(["checkout", "-b", "integration-test-branch"])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+
+    // Add a remote to test repository URL detection
+    std::process::Command::new("git")
+        .args([
+            "remote",
+            "add",
+            "origin",
+            "https://github.com/example/integration-test.git",
+        ])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+
+    // 3. Test git info collection directly
+    let git_info = codex_core::git_info::collect_git_info(&git_repo).await;
+
+    // 4. Verify git info is present and contains expected data
+    assert!(git_info.is_some(), "Git info should be collected");
+
+    let git_info = git_info.unwrap();
+
+    // Check that we have a commit hash
+    assert!(
+        git_info.commit_hash.is_some(),
+        "Git info should contain commit_hash"
+    );
+    let commit_hash = git_info.commit_hash.as_ref().unwrap();
+    assert_eq!(commit_hash.len(), 40, "Commit hash should be 40 characters");
+    assert!(
+        commit_hash.chars().all(|c| c.is_ascii_hexdigit()),
+        "Commit hash should be hexadecimal"
+    );
+
+    // Check that we have the correct branch
+    assert!(git_info.branch.is_some(), "Git info should contain branch");
+    let branch = git_info.branch.as_ref().unwrap();
+    assert_eq!(
+        branch, "integration-test-branch",
+        "Branch should match what we created"
+    );
+
+    // Check that we have the repository URL
+    assert!(
+        git_info.repository_url.is_some(),
+        "Git info should contain repository_url"
+    );
+    let repo_url = git_info.repository_url.as_ref().unwrap();
+    assert_eq!(
+        repo_url, "https://github.com/example/integration-test.git",
+        "Repository URL should match what we configured"
+    );
+
+    println!("✅ Git info collection test passed!");
+    println!("   Commit: {commit_hash}");
+    println!("   Branch: {branch}");
+    println!("   Repo: {repo_url}");
+
+    // 5. Test serialization to ensure it works in SessionMeta
+    let serialized = serde_json::to_string(&git_info).unwrap();
+    let deserialized: codex_core::git_info::GitInfo = serde_json::from_str(&serialized).unwrap();
+
+    assert_eq!(git_info.commit_hash, deserialized.commit_hash);
+    assert_eq!(git_info.branch, deserialized.branch);
+    assert_eq!(git_info.repository_url, deserialized.repository_url);
+
+    println!("✅ Git info serialization test passed!");
+}

Review Comments

codex-rs/core/src/codex.rs

@@ -652,7 +652,9 @@ async fn submission_loop(
                 // overlapping rollout file. Consider passing RolloutRecorder
                 // from above.
                 let rollout_recorder =
-                    match RolloutRecorder::new(&config, session_id, instructions.clone()).await {
+                    match RolloutRecorder::new(&config, session_id, instructions.clone(), &cwd)

cwd is available on Config?

codex-rs/core/src/git_info.rs

@@ -0,0 +1,307 @@
+use std::path::Path;
+
+use serde::Deserialize;
+use serde::Serialize;
+use tokio::process::Command;
+use tokio::time::Duration as TokioDuration;
+use tokio::time::timeout;
+
+/// Timeout for git commands to prevent freezing on large repositories
+const GIT_COMMAND_TIMEOUT: TokioDuration = TokioDuration::from_secs(5);
+
+#[derive(Serialize, Deserialize, Clone)]
+pub struct GitInfo {
+    /// Current commit hash (SHA)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub commit_hash: Option<String>,
+    /// Current branch name
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub branch: Option<String>,
+    /// Repository URL (if available from remote)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub repository_url: Option<String>,
+}
+
+/// Run a git command with a timeout to prevent blocking on large repositories
+async fn run_git_command_with_timeout(args: &[&str], cwd: &Path) -> Option<std::process::Output> {
+    let result = timeout(
+        GIT_COMMAND_TIMEOUT,
+        Command::new("git").args(args).current_dir(cwd).output(),
+    )
+    .await;
+
+    match result {
+        Ok(Ok(output)) => Some(output),
+        _ => None, // Timeout or error
+    }
+}
+
+/// Collect git repository information from the given working directory using command-line git.
+/// Returns None if no git repository is found or if git operations fail.
+/// Uses timeouts to prevent freezing on large repositories.
+/// All git commands (except the initial repo check) run in parallel for better performance.
+pub async fn collect_git_info(cwd: &Path) -> Option<GitInfo> {

Please list this after pub struct GitInfo so it is at the top since it's the main "export" after the struct.

codex-rs/core/src/rollout.rs

@@ -22,12 +27,105 @@ use crate::models::ResponseItem;
 /// Folder inside `~/.codex` that holds saved rollouts.
 const SESSIONS_SUBDIR: &str = "sessions";
 
+#[derive(Serialize)]
+struct GitInfo {
+    /// Current commit hash (SHA)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    commit_hash: Option<String>,
+    /// Current branch name
+    #[serde(skip_serializing_if = "Option::is_none")]
+    branch: Option<String>,
+    /// Repository URL (if available from remote)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    repository_url: Option<String>,
+}
+
 #[derive(Serialize)]
 struct SessionMeta {
     id: String,
     timestamp: String,
     #[serde(skip_serializing_if = "Option::is_none")]
     instructions: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    git: Option<GitInfo>,
+}
+
+/// Timeout for git commands to prevent freezing on large repositories
+const GIT_COMMAND_TIMEOUT: Duration = Duration::from_secs(3);
+
+/// Run a git command with a timeout to prevent blocking on large repositories
+fn run_git_command_with_timeout(args: &[&str], cwd: &Path) -> Option<std::process::Output> {

Can you use tokio::Command and make this async instead? It's cheaper to create tokio tasks than POSIX threads. You should then update collect_git_info() to make all these calls in parallel.

@@ -22,12 +26,105 @@ use crate::models::ResponseItem;
 /// Folder inside `~/.codex` that holds saved rollouts.
 const SESSIONS_SUBDIR: &str = "sessions";
 
+#[derive(Serialize)]

Can you please move all this new code to a separate file? git_info.rs perhaps?

@@ -183,3 +285,212 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
         timestamp,
     })
 }
+
+#[cfg(test)]

Thanks for the nice test!

@@ -67,16 +165,20 @@ impl RolloutRecorder {
             .format(timestamp_format)
             .map_err(|e| IoError::other(format!("failed to format timestamp: {e}")))?;
 
+        // Collect git repository information
+        let git_info = collect_git_info(cwd).await;

I appreciate the timeouts in collect_git_info(), though if I am reading it correctly, I suppose this could add ~6s to startup in the worst case? It would be nice to figure out how to make this truly async, since RolloutRecorder::new() is on the critical path to startup.

The challenge seems to be that we have these lines below:

    recorder.record_item(&meta).await?;
    Ok(recorder)

That is, we don't want new() to exit until the first item has recorded and now that is dependent on collect_git_info(). Certainly this is fixable, but the bookkeeping may be a bit ugly. What do you think?

@@ -67,16 +165,20 @@ impl RolloutRecorder {
             .format(timestamp_format)
             .map_err(|e| IoError::other(format!("failed to format timestamp: {e}")))?;
 
+        // Collect git repository information
+        let git_info = collect_git_info(cwd).await;

Actually, what if we move collect_git_info(cwd).await into the lambda passed to tokio::task::spawn and then ensure it is written to file before the while let Some(line) = rx.recv().await loop starts?

You could also increase the git timeout to 5s maybe?

@@ -27,6 +29,8 @@ pub struct SessionMeta {
     pub id: Uuid,
     pub timestamp: String,
     pub instructions: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]

In general, it's preferable to avoid mutability, so I would rather keep SessionMeta as it was before... (see rollout.rs)

@@ -196,7 +203,12 @@ impl RolloutRecorder {
             .open(path)?;
 
         let (tx, rx) = mpsc::channel::<RolloutCmd>(256);
-        tokio::task::spawn(rollout_writer(tokio::fs::File::from_std(file), rx, None));
+        tokio::task::spawn(rollout_writer(

Please update the callsite to resume() so that it passes cwd (it's available on Op::ConfigureSession) and then update the third arg to rollout_writer to be PathBuf instead of Option<PathBuf>.

@@ -250,15 +262,36 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
 async fn rollout_writer(
     mut file: tokio::fs::File,
     mut rx: mpsc::Receiver<RolloutCmd>,
-    meta: Option<SessionMeta>,
+    mut meta: Option<SessionMeta>,
+    cwd: Option<std::path::PathBuf>,
 ) {
-    if let Some(meta) = meta {
-        if let Ok(json) = serde_json::to_string(&meta) {
+    // If we have a meta and cwd, collect git info asynchronously and write meta first
+    if let (Some(mut session_meta), Some(cwd)) = (meta.take(), cwd) {
+        // Skip git collection if disabled via environment variable (for tests)
+        let git_info = if std::env::var("CODEX_DISABLE_GIT_INFO").is_ok() {

If increasing the number of threads makes the test pass reliably, then let's remove this?

@@ -250,15 +262,36 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
 async fn rollout_writer(
     mut file: tokio::fs::File,
     mut rx: mpsc::Receiver<RolloutCmd>,
-    meta: Option<SessionMeta>,
+    mut meta: Option<SessionMeta>,
+    cwd: Option<std::path::PathBuf>,
 ) {
-    if let Some(meta) = meta {
-        if let Ok(json) = serde_json::to_string(&meta) {
+    // If we have a meta and cwd, collect git info asynchronously and write meta first
+    if let (Some(mut session_meta), Some(cwd)) = (meta.take(), cwd) {
+        // Skip git collection if disabled via environment variable (for tests)
+        let git_info = if std::env::var("CODEX_DISABLE_GIT_INFO").is_ok() {
+            None
+        } else {
+            // Collect git repository information asynchronously without blocking startup
+            collect_git_info(&cwd).await
+        };
+        session_meta.git = git_info;

Because I suggested removing git from SessionMeta so we don't have an optional/mutable field, here you can do something like:

        let SessionMeta { id, timestamp, instructions } = session_meta;
        session_meta = json!({
            "id" : id,
            "timestamp": timestamp,
            "instructions": instructions,
            "git": git_info,
        });

Or you could introduce a separate struct with the extra field and use serde_json::to_string() as before.

@@ -29,6 +31,14 @@ pub struct SessionMeta {
     pub instructions: Option<String>,
 }
 
+#[derive(Serialize)]
+struct SessionMetaWithGit {
+    #[serde(flatten)]

👍

@@ -297,3 +335,9 @@ async fn rollout_writer(
         }
     }
 }
+

Remove this block?

@@ -250,15 +262,36 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
 async fn rollout_writer(
     mut file: tokio::fs::File,
     mut rx: mpsc::Receiver<RolloutCmd>,
-    meta: Option<SessionMeta>,
+    mut meta: Option<SessionMeta>,
+    cwd: Option<std::path::PathBuf>,
 ) {
-    if let Some(meta) = meta {
-        if let Ok(json) = serde_json::to_string(&meta) {
+    // If we have a meta and cwd, collect git info asynchronously and write meta first
+    if let (Some(mut session_meta), Some(cwd)) = (meta.take(), cwd) {
+        // Skip git collection if disabled via environment variable (for tests)
+        let git_info = if std::env::var("CODEX_DISABLE_GIT_INFO").is_ok() {

https://github.com/openai/codex/pull/1647 might be the fix for this? I would really like to avoid experimental_disable_git_metadata, if possible.

codex-rs/core/tests/cli_stream.rs

@@ -328,7 +330,9 @@ async fn integration_creates_and_checks_session_file() {
     cmd2.env("CODEX_HOME", home.path())
         .env("OPENAI_API_KEY", "dummy")
         .env("CODEX_RS_SSE_FIXTURE", &fixture)
-        .env("OPENAI_BASE_URL", "http://unused.local");
+        .env("OPENAI_BASE_URL", "http://unused.local")
+        // Disable git info collection to avoid blocking message processing in tests
+        .env("CODEX_DISABLE_GIT_INFO", "1");

I have been trying to eliminate support for environment variables in favor of using configuration. Can we just added a config option (prefixed with "experimental" like experimental_resume) for this?

@@ -161,7 +161,9 @@ async fn integration_creates_and_checks_session_file() {
         .env("OPENAI_API_KEY", "dummy")
         .env("CODEX_RS_SSE_FIXTURE", &fixture)
         // Required for CLI arg parsing even though fixture short-circuits network usage.
-        .env("OPENAI_BASE_URL", "http://unused.local");
+        .env("OPENAI_BASE_URL", "http://unused.local")
+        // Disable git info collection to avoid blocking message processing in tests

Alternatively, should we increase the number of threads, so increase 2 in this line?

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]