mirror of
https://github.com/openai/codex.git
synced 2026-04-28 08:34:54 +00:00
960 lines
34 KiB
Markdown
960 lines
34 KiB
Markdown
# PR #1598: Record Git metadata to rollout
|
||
|
||
- URL: https://github.com/openai/codex/pull/1598
|
||
- Author: vishnu-oai
|
||
- Created: 2025-07-17 13:43:39 UTC
|
||
- Updated: 2025-07-24 18:35:35 UTC
|
||
- Changes: +475/-13, Files changed: 5, Commits: 18
|
||
|
||
## Description
|
||
|
||
# Summary
|
||
|
||
- Writing effective evals for codex sessions requires context of the overall repository state at the moment the session began
|
||
- This change adds this metadata (git repository, branch, commit hash) to the top of the rollout of the session (if available - if not it doesn't add anything)
|
||
- Currently, this is only effective on a clean working tree, as we can't track uncommitted/untracked changes with the current metadata set. Ideally in the future we may want to track unclean changes somehow, or perhaps prompt the user to stash or commit them.
|
||
|
||
# Testing
|
||
- Added unit tests
|
||
- `cargo test && cargo clippy --tests && cargo fmt -- --config imports_granularity=Item`
|
||
|
||
### Resulting Rollout
|
||
<img width="1243" height="127" alt="Screenshot 2025-07-17 at 1 50 00 PM" src="https://github.com/user-attachments/assets/68108941-f015-45b2-985c-ea315ce05415" />
|
||
|
||
## Full Diff
|
||
|
||
```diff
|
||
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
|
||
index 4cc888b62e..f35348b779 100644
|
||
--- a/codex-rs/core/src/codex.rs
|
||
+++ b/codex-rs/core/src/codex.rs
|
||
@@ -594,7 +594,7 @@ async fn submission_loop(
|
||
let mut restored_items: Option<Vec<ResponseItem>> = None;
|
||
let rollout_recorder: Option<RolloutRecorder> =
|
||
if let Some(path) = resume_path.as_ref() {
|
||
- match RolloutRecorder::resume(path).await {
|
||
+ match RolloutRecorder::resume(path, cwd.clone()).await {
|
||
Ok((rec, saved)) => {
|
||
session_id = saved.session_id;
|
||
if !saved.items.is_empty() {
|
||
diff --git a/codex-rs/core/src/git_info.rs b/codex-rs/core/src/git_info.rs
|
||
new file mode 100644
|
||
index 0000000000..cf959d32d1
|
||
--- /dev/null
|
||
+++ b/codex-rs/core/src/git_info.rs
|
||
@@ -0,0 +1,307 @@
|
||
+use std::path::Path;
|
||
+
|
||
+use serde::Deserialize;
|
||
+use serde::Serialize;
|
||
+use tokio::process::Command;
|
||
+use tokio::time::Duration as TokioDuration;
|
||
+use tokio::time::timeout;
|
||
+
|
||
+/// Timeout for git commands to prevent freezing on large repositories
|
||
+const GIT_COMMAND_TIMEOUT: TokioDuration = TokioDuration::from_secs(5);
|
||
+
|
||
+#[derive(Serialize, Deserialize, Clone)]
|
||
+pub struct GitInfo {
|
||
+ /// Current commit hash (SHA)
|
||
+ #[serde(skip_serializing_if = "Option::is_none")]
|
||
+ pub commit_hash: Option<String>,
|
||
+ /// Current branch name
|
||
+ #[serde(skip_serializing_if = "Option::is_none")]
|
||
+ pub branch: Option<String>,
|
||
+ /// Repository URL (if available from remote)
|
||
+ #[serde(skip_serializing_if = "Option::is_none")]
|
||
+ pub repository_url: Option<String>,
|
||
+}
|
||
+
|
||
+/// Collect git repository information from the given working directory using command-line git.
|
||
+/// Returns None if no git repository is found or if git operations fail.
|
||
+/// Uses timeouts to prevent freezing on large repositories.
|
||
+/// All git commands (except the initial repo check) run in parallel for better performance.
|
||
+pub async fn collect_git_info(cwd: &Path) -> Option<GitInfo> {
|
||
+ // Check if we're in a git repository first
|
||
+ let is_git_repo = run_git_command_with_timeout(&["rev-parse", "--git-dir"], cwd)
|
||
+ .await?
|
||
+ .status
|
||
+ .success();
|
||
+
|
||
+ if !is_git_repo {
|
||
+ return None;
|
||
+ }
|
||
+
|
||
+ // Run all git info collection commands in parallel
|
||
+ let (commit_result, branch_result, url_result) = tokio::join!(
|
||
+ run_git_command_with_timeout(&["rev-parse", "HEAD"], cwd),
|
||
+ run_git_command_with_timeout(&["rev-parse", "--abbrev-ref", "HEAD"], cwd),
|
||
+ run_git_command_with_timeout(&["remote", "get-url", "origin"], cwd)
|
||
+ );
|
||
+
|
||
+ let mut git_info = GitInfo {
|
||
+ commit_hash: None,
|
||
+ branch: None,
|
||
+ repository_url: None,
|
||
+ };
|
||
+
|
||
+ // Process commit hash
|
||
+ if let Some(output) = commit_result {
|
||
+ if output.status.success() {
|
||
+ if let Ok(hash) = String::from_utf8(output.stdout) {
|
||
+ git_info.commit_hash = Some(hash.trim().to_string());
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ // Process branch name
|
||
+ if let Some(output) = branch_result {
|
||
+ if output.status.success() {
|
||
+ if let Ok(branch) = String::from_utf8(output.stdout) {
|
||
+ let branch = branch.trim();
|
||
+ if branch != "HEAD" {
|
||
+ git_info.branch = Some(branch.to_string());
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ // Process repository URL
|
||
+ if let Some(output) = url_result {
|
||
+ if output.status.success() {
|
||
+ if let Ok(url) = String::from_utf8(output.stdout) {
|
||
+ git_info.repository_url = Some(url.trim().to_string());
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ Some(git_info)
|
||
+}
|
||
+
|
||
+/// Run a git command with a timeout to prevent blocking on large repositories
|
||
+async fn run_git_command_with_timeout(args: &[&str], cwd: &Path) -> Option<std::process::Output> {
|
||
+ let result = timeout(
|
||
+ GIT_COMMAND_TIMEOUT,
|
||
+ Command::new("git").args(args).current_dir(cwd).output(),
|
||
+ )
|
||
+ .await;
|
||
+
|
||
+ match result {
|
||
+ Ok(Ok(output)) => Some(output),
|
||
+ _ => None, // Timeout or error
|
||
+ }
|
||
+}
|
||
+
|
||
+#[cfg(test)]
|
||
+mod tests {
|
||
+ #![allow(clippy::expect_used)]
|
||
+ #![allow(clippy::unwrap_used)]
|
||
+
|
||
+ use super::*;
|
||
+
|
||
+ use std::fs;
|
||
+ use std::path::PathBuf;
|
||
+ use tempfile::TempDir;
|
||
+
|
||
+ // Helper function to create a test git repository
|
||
+ async fn create_test_git_repo(temp_dir: &TempDir) -> PathBuf {
|
||
+ let repo_path = temp_dir.path().to_path_buf();
|
||
+
|
||
+ // Initialize git repo
|
||
+ Command::new("git")
|
||
+ .args(["init"])
|
||
+ .current_dir(&repo_path)
|
||
+ .output()
|
||
+ .await
|
||
+ .expect("Failed to init git repo");
|
||
+
|
||
+ // Configure git user (required for commits)
|
||
+ Command::new("git")
|
||
+ .args(["config", "user.name", "Test User"])
|
||
+ .current_dir(&repo_path)
|
||
+ .output()
|
||
+ .await
|
||
+ .expect("Failed to set git user name");
|
||
+
|
||
+ Command::new("git")
|
||
+ .args(["config", "user.email", "test@example.com"])
|
||
+ .current_dir(&repo_path)
|
||
+ .output()
|
||
+ .await
|
||
+ .expect("Failed to set git user email");
|
||
+
|
||
+ // Create a test file and commit it
|
||
+ let test_file = repo_path.join("test.txt");
|
||
+ fs::write(&test_file, "test content").expect("Failed to write test file");
|
||
+
|
||
+ Command::new("git")
|
||
+ .args(["add", "."])
|
||
+ .current_dir(&repo_path)
|
||
+ .output()
|
||
+ .await
|
||
+ .expect("Failed to add files");
|
||
+
|
||
+ Command::new("git")
|
||
+ .args(["commit", "-m", "Initial commit"])
|
||
+ .current_dir(&repo_path)
|
||
+ .output()
|
||
+ .await
|
||
+ .expect("Failed to commit");
|
||
+
|
||
+ repo_path
|
||
+ }
|
||
+
|
||
+ #[tokio::test]
|
||
+ async fn test_collect_git_info_non_git_directory() {
|
||
+ let temp_dir = TempDir::new().expect("Failed to create temp dir");
|
||
+ let result = collect_git_info(temp_dir.path()).await;
|
||
+ assert!(result.is_none());
|
||
+ }
|
||
+
|
||
+ #[tokio::test]
|
||
+ async fn test_collect_git_info_git_repository() {
|
||
+ let temp_dir = TempDir::new().expect("Failed to create temp dir");
|
||
+ let repo_path = create_test_git_repo(&temp_dir).await;
|
||
+
|
||
+ let git_info = collect_git_info(&repo_path)
|
||
+ .await
|
||
+ .expect("Should collect git info from repo");
|
||
+
|
||
+ // Should have commit hash
|
||
+ assert!(git_info.commit_hash.is_some());
|
||
+ let commit_hash = git_info.commit_hash.unwrap();
|
||
+ assert_eq!(commit_hash.len(), 40); // SHA-1 hash should be 40 characters
|
||
+ assert!(commit_hash.chars().all(|c| c.is_ascii_hexdigit()));
|
||
+
|
||
+ // Should have branch (likely "main" or "master")
|
||
+ assert!(git_info.branch.is_some());
|
||
+ let branch = git_info.branch.unwrap();
|
||
+ assert!(branch == "main" || branch == "master");
|
||
+
|
||
+ // Repository URL might be None for local repos without remote
|
||
+ // This is acceptable behavior
|
||
+ }
|
||
+
|
||
+ #[tokio::test]
|
||
+ async fn test_collect_git_info_with_remote() {
|
||
+ let temp_dir = TempDir::new().expect("Failed to create temp dir");
|
||
+ let repo_path = create_test_git_repo(&temp_dir).await;
|
||
+
|
||
+ // Add a remote origin
|
||
+ Command::new("git")
|
||
+ .args([
|
||
+ "remote",
|
||
+ "add",
|
||
+ "origin",
|
||
+ "https://github.com/example/repo.git",
|
||
+ ])
|
||
+ .current_dir(&repo_path)
|
||
+ .output()
|
||
+ .await
|
||
+ .expect("Failed to add remote");
|
||
+
|
||
+ let git_info = collect_git_info(&repo_path)
|
||
+ .await
|
||
+ .expect("Should collect git info from repo");
|
||
+
|
||
+ // Should have repository URL
|
||
+ assert_eq!(
|
||
+ git_info.repository_url,
|
||
+ Some("https://github.com/example/repo.git".to_string())
|
||
+ );
|
||
+ }
|
||
+
|
||
+ #[tokio::test]
|
||
+ async fn test_collect_git_info_detached_head() {
|
||
+ let temp_dir = TempDir::new().expect("Failed to create temp dir");
|
||
+ let repo_path = create_test_git_repo(&temp_dir).await;
|
||
+
|
||
+ // Get the current commit hash
|
||
+ let output = Command::new("git")
|
||
+ .args(["rev-parse", "HEAD"])
|
||
+ .current_dir(&repo_path)
|
||
+ .output()
|
||
+ .await
|
||
+ .expect("Failed to get HEAD");
|
||
+ let commit_hash = String::from_utf8(output.stdout).unwrap().trim().to_string();
|
||
+
|
||
+ // Checkout the commit directly (detached HEAD)
|
||
+ Command::new("git")
|
||
+ .args(["checkout", &commit_hash])
|
||
+ .current_dir(&repo_path)
|
||
+ .output()
|
||
+ .await
|
||
+ .expect("Failed to checkout commit");
|
||
+
|
||
+ let git_info = collect_git_info(&repo_path)
|
||
+ .await
|
||
+ .expect("Should collect git info from repo");
|
||
+
|
||
+ // Should have commit hash
|
||
+ assert!(git_info.commit_hash.is_some());
|
||
+ // Branch should be None for detached HEAD (since rev-parse --abbrev-ref HEAD returns "HEAD")
|
||
+ assert!(git_info.branch.is_none());
|
||
+ }
|
||
+
|
||
+ #[tokio::test]
|
||
+ async fn test_collect_git_info_with_branch() {
|
||
+ let temp_dir = TempDir::new().expect("Failed to create temp dir");
|
||
+ let repo_path = create_test_git_repo(&temp_dir).await;
|
||
+
|
||
+ // Create and checkout a new branch
|
||
+ Command::new("git")
|
||
+ .args(["checkout", "-b", "feature-branch"])
|
||
+ .current_dir(&repo_path)
|
||
+ .output()
|
||
+ .await
|
||
+ .expect("Failed to create branch");
|
||
+
|
||
+ let git_info = collect_git_info(&repo_path)
|
||
+ .await
|
||
+ .expect("Should collect git info from repo");
|
||
+
|
||
+ // Should have the new branch name
|
||
+ assert_eq!(git_info.branch, Some("feature-branch".to_string()));
|
||
+ }
|
||
+
|
||
+ #[test]
|
||
+ fn test_git_info_serialization() {
|
||
+ let git_info = GitInfo {
|
||
+ commit_hash: Some("abc123def456".to_string()),
|
||
+ branch: Some("main".to_string()),
|
||
+ repository_url: Some("https://github.com/example/repo.git".to_string()),
|
||
+ };
|
||
+
|
||
+ let json = serde_json::to_string(&git_info).expect("Should serialize GitInfo");
|
||
+ let parsed: serde_json::Value = serde_json::from_str(&json).expect("Should parse JSON");
|
||
+
|
||
+ assert_eq!(parsed["commit_hash"], "abc123def456");
|
||
+ assert_eq!(parsed["branch"], "main");
|
||
+ assert_eq!(
|
||
+ parsed["repository_url"],
|
||
+ "https://github.com/example/repo.git"
|
||
+ );
|
||
+ }
|
||
+
|
||
+ #[test]
|
||
+ fn test_git_info_serialization_with_nones() {
|
||
+ let git_info = GitInfo {
|
||
+ commit_hash: None,
|
||
+ branch: None,
|
||
+ repository_url: None,
|
||
+ };
|
||
+
|
||
+ let json = serde_json::to_string(&git_info).expect("Should serialize GitInfo");
|
||
+ let parsed: serde_json::Value = serde_json::from_str(&json).expect("Should parse JSON");
|
||
+
|
||
+ // Fields with None values should be omitted due to skip_serializing_if
|
||
+ assert!(!parsed.as_object().unwrap().contains_key("commit_hash"));
|
||
+ assert!(!parsed.as_object().unwrap().contains_key("branch"));
|
||
+ assert!(!parsed.as_object().unwrap().contains_key("repository_url"));
|
||
+ }
|
||
+}
|
||
diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs
|
||
index 6812260c97..4e69e94b55 100644
|
||
--- a/codex-rs/core/src/lib.rs
|
||
+++ b/codex-rs/core/src/lib.rs
|
||
@@ -19,6 +19,7 @@ pub mod error;
|
||
pub mod exec;
|
||
pub mod exec_env;
|
||
mod flags;
|
||
+pub mod git_info;
|
||
mod is_safe_command;
|
||
mod mcp_connection_manager;
|
||
mod mcp_tool_call;
|
||
diff --git a/codex-rs/core/src/rollout.rs b/codex-rs/core/src/rollout.rs
|
||
index 7f0f61b9eb..3e6de34d96 100644
|
||
--- a/codex-rs/core/src/rollout.rs
|
||
+++ b/codex-rs/core/src/rollout.rs
|
||
@@ -20,6 +20,8 @@ use tracing::warn;
|
||
use uuid::Uuid;
|
||
|
||
use crate::config::Config;
|
||
+use crate::git_info::GitInfo;
|
||
+use crate::git_info::collect_git_info;
|
||
use crate::models::ResponseItem;
|
||
|
||
const SESSIONS_SUBDIR: &str = "sessions";
|
||
@@ -31,6 +33,14 @@ pub struct SessionMeta {
|
||
pub instructions: Option<String>,
|
||
}
|
||
|
||
+#[derive(Serialize)]
|
||
+struct SessionMetaWithGit {
|
||
+ #[serde(flatten)]
|
||
+ meta: SessionMeta,
|
||
+ #[serde(skip_serializing_if = "Option::is_none")]
|
||
+ git: Option<GitInfo>,
|
||
+}
|
||
+
|
||
#[derive(Serialize, Deserialize, Default, Clone)]
|
||
pub struct SessionStateSnapshot {}
|
||
|
||
@@ -86,15 +96,12 @@ impl RolloutRecorder {
|
||
.format(timestamp_format)
|
||
.map_err(|e| IoError::other(format!("failed to format timestamp: {e}")))?;
|
||
|
||
- let meta = SessionMeta {
|
||
- timestamp,
|
||
- id: session_id,
|
||
- instructions,
|
||
- };
|
||
+ // Clone the cwd for the spawned task to collect git info asynchronously
|
||
+ let cwd = config.cwd.clone();
|
||
|
||
// A reasonably-sized bounded channel. If the buffer fills up the send
|
||
// future will yield, which is fine – we only need to ensure we do not
|
||
- // perform *blocking* I/O on the caller’s thread.
|
||
+ // perform *blocking* I/O on the caller's thread.
|
||
let (tx, rx) = mpsc::channel::<RolloutCmd>(256);
|
||
|
||
// Spawn a Tokio task that owns the file handle and performs async
|
||
@@ -103,7 +110,12 @@ impl RolloutRecorder {
|
||
tokio::task::spawn(rollout_writer(
|
||
tokio::fs::File::from_std(file),
|
||
rx,
|
||
- Some(meta),
|
||
+ Some(SessionMeta {
|
||
+ timestamp,
|
||
+ id: session_id,
|
||
+ instructions,
|
||
+ }),
|
||
+ cwd,
|
||
));
|
||
|
||
Ok(Self { tx })
|
||
@@ -143,7 +155,10 @@ impl RolloutRecorder {
|
||
.map_err(|e| IoError::other(format!("failed to queue rollout state: {e}")))
|
||
}
|
||
|
||
- pub async fn resume(path: &Path) -> std::io::Result<(Self, SavedSession)> {
|
||
+ pub async fn resume(
|
||
+ path: &Path,
|
||
+ cwd: std::path::PathBuf,
|
||
+ ) -> std::io::Result<(Self, SavedSession)> {
|
||
info!("Resuming rollout from {path:?}");
|
||
let text = tokio::fs::read_to_string(path).await?;
|
||
let mut lines = text.lines();
|
||
@@ -201,7 +216,12 @@ impl RolloutRecorder {
|
||
.open(path)?;
|
||
|
||
let (tx, rx) = mpsc::channel::<RolloutCmd>(256);
|
||
- tokio::task::spawn(rollout_writer(tokio::fs::File::from_std(file), rx, None));
|
||
+ tokio::task::spawn(rollout_writer(
|
||
+ tokio::fs::File::from_std(file),
|
||
+ rx,
|
||
+ None,
|
||
+ cwd,
|
||
+ ));
|
||
info!("Resumed rollout successfully from {path:?}");
|
||
Ok((Self { tx }, saved))
|
||
}
|
||
@@ -270,15 +290,26 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
|
||
async fn rollout_writer(
|
||
mut file: tokio::fs::File,
|
||
mut rx: mpsc::Receiver<RolloutCmd>,
|
||
- meta: Option<SessionMeta>,
|
||
+ mut meta: Option<SessionMeta>,
|
||
+ cwd: std::path::PathBuf,
|
||
) {
|
||
- if let Some(meta) = meta {
|
||
- if let Ok(json) = serde_json::to_string(&meta) {
|
||
+ // If we have a meta, collect git info asynchronously and write meta first
|
||
+ if let Some(session_meta) = meta.take() {
|
||
+ let git_info = collect_git_info(&cwd).await;
|
||
+ let session_meta_with_git = SessionMetaWithGit {
|
||
+ meta: session_meta,
|
||
+ git: git_info,
|
||
+ };
|
||
+
|
||
+ // Write the SessionMeta as the first item in the file
|
||
+ if let Ok(json) = serde_json::to_string(&session_meta_with_git) {
|
||
let _ = file.write_all(json.as_bytes()).await;
|
||
let _ = file.write_all(b"\n").await;
|
||
let _ = file.flush().await;
|
||
}
|
||
}
|
||
+
|
||
+ // Process rollout commands
|
||
while let Some(cmd) = rx.recv().await {
|
||
match cmd {
|
||
RolloutCmd::AddItems(items) => {
|
||
diff --git a/codex-rs/core/tests/cli_stream.rs b/codex-rs/core/tests/cli_stream.rs
|
||
index 567279ebd0..4694ba85ed 100644
|
||
--- a/codex-rs/core/tests/cli_stream.rs
|
||
+++ b/codex-rs/core/tests/cli_stream.rs
|
||
@@ -329,6 +329,7 @@ async fn integration_creates_and_checks_session_file() {
|
||
.env("OPENAI_API_KEY", "dummy")
|
||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||
.env("OPENAI_BASE_URL", "http://unused.local");
|
||
+
|
||
let output2 = cmd2.output().unwrap();
|
||
assert!(output2.status.success(), "resume codex-cli run failed");
|
||
|
||
@@ -359,3 +360,125 @@ async fn integration_creates_and_checks_session_file() {
|
||
"rollout missing resumed marker"
|
||
);
|
||
}
|
||
+
|
||
+/// Integration test to verify git info is collected and recorded in session files.
|
||
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||
+async fn integration_git_info_unit_test() {
|
||
+ // This test verifies git info collection works independently
|
||
+ // without depending on the full CLI integration
|
||
+
|
||
+ // 1. Create temp directory for git repo
|
||
+ let temp_dir = TempDir::new().unwrap();
|
||
+ let git_repo = temp_dir.path().to_path_buf();
|
||
+
|
||
+ // 2. Initialize a git repository with some content
|
||
+ let init_output = std::process::Command::new("git")
|
||
+ .args(["init"])
|
||
+ .current_dir(&git_repo)
|
||
+ .output()
|
||
+ .unwrap();
|
||
+ assert!(init_output.status.success(), "git init failed");
|
||
+
|
||
+ // Configure git user (required for commits)
|
||
+ std::process::Command::new("git")
|
||
+ .args(["config", "user.name", "Integration Test"])
|
||
+ .current_dir(&git_repo)
|
||
+ .output()
|
||
+ .unwrap();
|
||
+
|
||
+ std::process::Command::new("git")
|
||
+ .args(["config", "user.email", "test@example.com"])
|
||
+ .current_dir(&git_repo)
|
||
+ .output()
|
||
+ .unwrap();
|
||
+
|
||
+ // Create a test file and commit it
|
||
+ let test_file = git_repo.join("test.txt");
|
||
+ std::fs::write(&test_file, "integration test content").unwrap();
|
||
+
|
||
+ std::process::Command::new("git")
|
||
+ .args(["add", "."])
|
||
+ .current_dir(&git_repo)
|
||
+ .output()
|
||
+ .unwrap();
|
||
+
|
||
+ let commit_output = std::process::Command::new("git")
|
||
+ .args(["commit", "-m", "Integration test commit"])
|
||
+ .current_dir(&git_repo)
|
||
+ .output()
|
||
+ .unwrap();
|
||
+ assert!(commit_output.status.success(), "git commit failed");
|
||
+
|
||
+ // Create a branch to test branch detection
|
||
+ std::process::Command::new("git")
|
||
+ .args(["checkout", "-b", "integration-test-branch"])
|
||
+ .current_dir(&git_repo)
|
||
+ .output()
|
||
+ .unwrap();
|
||
+
|
||
+ // Add a remote to test repository URL detection
|
||
+ std::process::Command::new("git")
|
||
+ .args([
|
||
+ "remote",
|
||
+ "add",
|
||
+ "origin",
|
||
+ "https://github.com/example/integration-test.git",
|
||
+ ])
|
||
+ .current_dir(&git_repo)
|
||
+ .output()
|
||
+ .unwrap();
|
||
+
|
||
+ // 3. Test git info collection directly
|
||
+ let git_info = codex_core::git_info::collect_git_info(&git_repo).await;
|
||
+
|
||
+ // 4. Verify git info is present and contains expected data
|
||
+ assert!(git_info.is_some(), "Git info should be collected");
|
||
+
|
||
+ let git_info = git_info.unwrap();
|
||
+
|
||
+ // Check that we have a commit hash
|
||
+ assert!(
|
||
+ git_info.commit_hash.is_some(),
|
||
+ "Git info should contain commit_hash"
|
||
+ );
|
||
+ let commit_hash = git_info.commit_hash.as_ref().unwrap();
|
||
+ assert_eq!(commit_hash.len(), 40, "Commit hash should be 40 characters");
|
||
+ assert!(
|
||
+ commit_hash.chars().all(|c| c.is_ascii_hexdigit()),
|
||
+ "Commit hash should be hexadecimal"
|
||
+ );
|
||
+
|
||
+ // Check that we have the correct branch
|
||
+ assert!(git_info.branch.is_some(), "Git info should contain branch");
|
||
+ let branch = git_info.branch.as_ref().unwrap();
|
||
+ assert_eq!(
|
||
+ branch, "integration-test-branch",
|
||
+ "Branch should match what we created"
|
||
+ );
|
||
+
|
||
+ // Check that we have the repository URL
|
||
+ assert!(
|
||
+ git_info.repository_url.is_some(),
|
||
+ "Git info should contain repository_url"
|
||
+ );
|
||
+ let repo_url = git_info.repository_url.as_ref().unwrap();
|
||
+ assert_eq!(
|
||
+ repo_url, "https://github.com/example/integration-test.git",
|
||
+ "Repository URL should match what we configured"
|
||
+ );
|
||
+
|
||
+ println!("✅ Git info collection test passed!");
|
||
+ println!(" Commit: {commit_hash}");
|
||
+ println!(" Branch: {branch}");
|
||
+ println!(" Repo: {repo_url}");
|
||
+
|
||
+ // 5. Test serialization to ensure it works in SessionMeta
|
||
+ let serialized = serde_json::to_string(&git_info).unwrap();
|
||
+ let deserialized: codex_core::git_info::GitInfo = serde_json::from_str(&serialized).unwrap();
|
||
+
|
||
+ assert_eq!(git_info.commit_hash, deserialized.commit_hash);
|
||
+ assert_eq!(git_info.branch, deserialized.branch);
|
||
+ assert_eq!(git_info.repository_url, deserialized.repository_url);
|
||
+
|
||
+ println!("✅ Git info serialization test passed!");
|
||
+}
|
||
```
|
||
|
||
## Review Comments
|
||
|
||
### codex-rs/core/src/codex.rs
|
||
|
||
- Created: 2025-07-17 22:55:29 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2214451100
|
||
|
||
```diff
|
||
@@ -652,7 +652,9 @@ async fn submission_loop(
|
||
// overlapping rollout file. Consider passing RolloutRecorder
|
||
// from above.
|
||
let rollout_recorder =
|
||
- match RolloutRecorder::new(&config, session_id, instructions.clone()).await {
|
||
+ match RolloutRecorder::new(&config, session_id, instructions.clone(), &cwd)
|
||
```
|
||
|
||
> `cwd` is available on `Config`?
|
||
|
||
### codex-rs/core/src/git_info.rs
|
||
|
||
- Created: 2025-07-22 15:26:26 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2222934571
|
||
|
||
```diff
|
||
@@ -0,0 +1,307 @@
|
||
+use std::path::Path;
|
||
+
|
||
+use serde::Deserialize;
|
||
+use serde::Serialize;
|
||
+use tokio::process::Command;
|
||
+use tokio::time::Duration as TokioDuration;
|
||
+use tokio::time::timeout;
|
||
+
|
||
+/// Timeout for git commands to prevent freezing on large repositories
|
||
+const GIT_COMMAND_TIMEOUT: TokioDuration = TokioDuration::from_secs(5);
|
||
+
|
||
+#[derive(Serialize, Deserialize, Clone)]
|
||
+pub struct GitInfo {
|
||
+ /// Current commit hash (SHA)
|
||
+ #[serde(skip_serializing_if = "Option::is_none")]
|
||
+ pub commit_hash: Option<String>,
|
||
+ /// Current branch name
|
||
+ #[serde(skip_serializing_if = "Option::is_none")]
|
||
+ pub branch: Option<String>,
|
||
+ /// Repository URL (if available from remote)
|
||
+ #[serde(skip_serializing_if = "Option::is_none")]
|
||
+ pub repository_url: Option<String>,
|
||
+}
|
||
+
|
||
+/// Run a git command with a timeout to prevent blocking on large repositories
|
||
+async fn run_git_command_with_timeout(args: &[&str], cwd: &Path) -> Option<std::process::Output> {
|
||
+ let result = timeout(
|
||
+ GIT_COMMAND_TIMEOUT,
|
||
+ Command::new("git").args(args).current_dir(cwd).output(),
|
||
+ )
|
||
+ .await;
|
||
+
|
||
+ match result {
|
||
+ Ok(Ok(output)) => Some(output),
|
||
+ _ => None, // Timeout or error
|
||
+ }
|
||
+}
|
||
+
|
||
+/// Collect git repository information from the given working directory using command-line git.
|
||
+/// Returns None if no git repository is found or if git operations fail.
|
||
+/// Uses timeouts to prevent freezing on large repositories.
|
||
+/// All git commands (except the initial repo check) run in parallel for better performance.
|
||
+pub async fn collect_git_info(cwd: &Path) -> Option<GitInfo> {
|
||
```
|
||
|
||
> Please list this after `pub struct GitInfo` so it is at the top since it's the main "export" after the struct.
|
||
|
||
### codex-rs/core/src/rollout.rs
|
||
|
||
- Created: 2025-07-17 19:05:42 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2214082920
|
||
|
||
```diff
|
||
@@ -22,12 +27,105 @@ use crate::models::ResponseItem;
|
||
/// Folder inside `~/.codex` that holds saved rollouts.
|
||
const SESSIONS_SUBDIR: &str = "sessions";
|
||
|
||
+#[derive(Serialize)]
|
||
+struct GitInfo {
|
||
+ /// Current commit hash (SHA)
|
||
+ #[serde(skip_serializing_if = "Option::is_none")]
|
||
+ commit_hash: Option<String>,
|
||
+ /// Current branch name
|
||
+ #[serde(skip_serializing_if = "Option::is_none")]
|
||
+ branch: Option<String>,
|
||
+ /// Repository URL (if available from remote)
|
||
+ #[serde(skip_serializing_if = "Option::is_none")]
|
||
+ repository_url: Option<String>,
|
||
+}
|
||
+
|
||
#[derive(Serialize)]
|
||
struct SessionMeta {
|
||
id: String,
|
||
timestamp: String,
|
||
#[serde(skip_serializing_if = "Option::is_none")]
|
||
instructions: Option<String>,
|
||
+ #[serde(skip_serializing_if = "Option::is_none")]
|
||
+ git: Option<GitInfo>,
|
||
+}
|
||
+
|
||
+/// Timeout for git commands to prevent freezing on large repositories
|
||
+const GIT_COMMAND_TIMEOUT: Duration = Duration::from_secs(3);
|
||
+
|
||
+/// Run a git command with a timeout to prevent blocking on large repositories
|
||
+fn run_git_command_with_timeout(args: &[&str], cwd: &Path) -> Option<std::process::Output> {
|
||
```
|
||
|
||
> Can you use tokio::Command and make this `async` instead? It's cheaper to create tokio tasks than POSIX threads. You should then update `collect_git_info()` to make all these calls in parallel.
|
||
|
||
- Created: 2025-07-17 22:57:23 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2214453622
|
||
|
||
```diff
|
||
@@ -22,12 +26,105 @@ use crate::models::ResponseItem;
|
||
/// Folder inside `~/.codex` that holds saved rollouts.
|
||
const SESSIONS_SUBDIR: &str = "sessions";
|
||
|
||
+#[derive(Serialize)]
|
||
```
|
||
|
||
> Can you please move all this new code to a separate file? `git_info.rs` perhaps?
|
||
|
||
- Created: 2025-07-17 22:58:16 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2214454353
|
||
|
||
```diff
|
||
@@ -183,3 +285,212 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
|
||
timestamp,
|
||
})
|
||
}
|
||
+
|
||
+#[cfg(test)]
|
||
```
|
||
|
||
> Thanks for the nice test!
|
||
|
||
- Created: 2025-07-17 23:06:24 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2214462855
|
||
|
||
```diff
|
||
@@ -67,16 +165,20 @@ impl RolloutRecorder {
|
||
.format(timestamp_format)
|
||
.map_err(|e| IoError::other(format!("failed to format timestamp: {e}")))?;
|
||
|
||
+ // Collect git repository information
|
||
+ let git_info = collect_git_info(cwd).await;
|
||
```
|
||
|
||
> I appreciate the timeouts in `collect_git_info()`, though if I am reading it correctly, I suppose this could add ~6s to startup in the worst case? It would be nice to figure out how to make this truly async, since `RolloutRecorder::new()` is on the critical path to startup.
|
||
>
|
||
> The challenge seems to be that we have these lines below:
|
||
>
|
||
> ```rust
|
||
> recorder.record_item(&meta).await?;
|
||
> Ok(recorder)
|
||
> ```
|
||
>
|
||
> That is, we don't want `new()` to exit until the first item has recorded and now that is dependent on `collect_git_info()`. Certainly this is fixable, but the bookkeeping may be a bit ugly. What do you think?
|
||
|
||
- Created: 2025-07-17 23:15:31 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2214474763
|
||
|
||
```diff
|
||
@@ -67,16 +165,20 @@ impl RolloutRecorder {
|
||
.format(timestamp_format)
|
||
.map_err(|e| IoError::other(format!("failed to format timestamp: {e}")))?;
|
||
|
||
+ // Collect git repository information
|
||
+ let git_info = collect_git_info(cwd).await;
|
||
```
|
||
|
||
> Actually, what if we move `collect_git_info(cwd).await` into the lambda passed to `tokio::task::spawn` and then ensure it is written to `file` before the `while let Some(line) = rx.recv().await` loop starts?
|
||
>
|
||
> You could also increase the `git` timeout to 5s maybe?
|
||
|
||
- Created: 2025-07-22 15:29:16 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2222944436
|
||
|
||
```diff
|
||
@@ -27,6 +29,8 @@ pub struct SessionMeta {
|
||
pub id: Uuid,
|
||
pub timestamp: String,
|
||
pub instructions: Option<String>,
|
||
+ #[serde(skip_serializing_if = "Option::is_none")]
|
||
```
|
||
|
||
> In general, it's preferable to avoid mutability, so I would rather keep `SessionMeta` as it was before... (see `rollout.rs`)
|
||
|
||
- Created: 2025-07-22 15:32:35 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2222956970
|
||
|
||
```diff
|
||
@@ -196,7 +203,12 @@ impl RolloutRecorder {
|
||
.open(path)?;
|
||
|
||
let (tx, rx) = mpsc::channel::<RolloutCmd>(256);
|
||
- tokio::task::spawn(rollout_writer(tokio::fs::File::from_std(file), rx, None));
|
||
+ tokio::task::spawn(rollout_writer(
|
||
```
|
||
|
||
> Please update the callsite to `resume()` so that it passes `cwd` (it's available on `Op::ConfigureSession`) and then update the third arg to `rollout_writer` to be `PathBuf` instead of `Option<PathBuf>`.
|
||
|
||
- Created: 2025-07-22 15:33:02 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2222958878
|
||
|
||
```diff
|
||
@@ -250,15 +262,36 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
|
||
async fn rollout_writer(
|
||
mut file: tokio::fs::File,
|
||
mut rx: mpsc::Receiver<RolloutCmd>,
|
||
- meta: Option<SessionMeta>,
|
||
+ mut meta: Option<SessionMeta>,
|
||
+ cwd: Option<std::path::PathBuf>,
|
||
) {
|
||
- if let Some(meta) = meta {
|
||
- if let Ok(json) = serde_json::to_string(&meta) {
|
||
+ // If we have a meta and cwd, collect git info asynchronously and write meta first
|
||
+ if let (Some(mut session_meta), Some(cwd)) = (meta.take(), cwd) {
|
||
+ // Skip git collection if disabled via environment variable (for tests)
|
||
+ let git_info = if std::env::var("CODEX_DISABLE_GIT_INFO").is_ok() {
|
||
```
|
||
|
||
> If increasing the number of threads makes the test pass reliably, then let's remove this?
|
||
|
||
- Created: 2025-07-22 15:35:51 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2222969734
|
||
|
||
```diff
|
||
@@ -250,15 +262,36 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
|
||
async fn rollout_writer(
|
||
mut file: tokio::fs::File,
|
||
mut rx: mpsc::Receiver<RolloutCmd>,
|
||
- meta: Option<SessionMeta>,
|
||
+ mut meta: Option<SessionMeta>,
|
||
+ cwd: Option<std::path::PathBuf>,
|
||
) {
|
||
- if let Some(meta) = meta {
|
||
- if let Ok(json) = serde_json::to_string(&meta) {
|
||
+ // If we have a meta and cwd, collect git info asynchronously and write meta first
|
||
+ if let (Some(mut session_meta), Some(cwd)) = (meta.take(), cwd) {
|
||
+ // Skip git collection if disabled via environment variable (for tests)
|
||
+ let git_info = if std::env::var("CODEX_DISABLE_GIT_INFO").is_ok() {
|
||
+ None
|
||
+ } else {
|
||
+ // Collect git repository information asynchronously without blocking startup
|
||
+ collect_git_info(&cwd).await
|
||
+ };
|
||
+ session_meta.git = git_info;
|
||
```
|
||
|
||
> Because I suggested removing `git` from `SessionMeta` so we don't have an optional/mutable field, here you can do something like:
|
||
>
|
||
> ```suggestion
|
||
> let SessionMeta { id, timestamp, instructions } = session_meta;
|
||
> session_meta = json!({
|
||
> "id" : id,
|
||
> "timestamp": timestamp,
|
||
> "instructions": instructions,
|
||
> "git": git_info,
|
||
> });
|
||
> ```
|
||
>
|
||
> Or you could introduce a separate struct with the extra field and use `serde_json::to_string()` as before.
|
||
|
||
- Created: 2025-07-23 00:42:50 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2224080928
|
||
|
||
```diff
|
||
@@ -29,6 +31,14 @@ pub struct SessionMeta {
|
||
pub instructions: Option<String>,
|
||
}
|
||
|
||
+#[derive(Serialize)]
|
||
+struct SessionMetaWithGit {
|
||
+ #[serde(flatten)]
|
||
```
|
||
|
||
> 👍
|
||
|
||
- Created: 2025-07-23 00:43:10 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2224081213
|
||
|
||
```diff
|
||
@@ -297,3 +335,9 @@ async fn rollout_writer(
|
||
}
|
||
}
|
||
}
|
||
+
|
||
```
|
||
|
||
> Remove this block?
|
||
|
||
- Created: 2025-07-23 00:49:13 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2224085948
|
||
|
||
```diff
|
||
@@ -250,15 +262,36 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
|
||
async fn rollout_writer(
|
||
mut file: tokio::fs::File,
|
||
mut rx: mpsc::Receiver<RolloutCmd>,
|
||
- meta: Option<SessionMeta>,
|
||
+ mut meta: Option<SessionMeta>,
|
||
+ cwd: Option<std::path::PathBuf>,
|
||
) {
|
||
- if let Some(meta) = meta {
|
||
- if let Ok(json) = serde_json::to_string(&meta) {
|
||
+ // If we have a meta and cwd, collect git info asynchronously and write meta first
|
||
+ if let (Some(mut session_meta), Some(cwd)) = (meta.take(), cwd) {
|
||
+ // Skip git collection if disabled via environment variable (for tests)
|
||
+ let git_info = if std::env::var("CODEX_DISABLE_GIT_INFO").is_ok() {
|
||
```
|
||
|
||
> https://github.com/openai/codex/pull/1647 might be the fix for this? I would really like to avoid `experimental_disable_git_metadata`, if possible.
|
||
|
||
### codex-rs/core/tests/cli_stream.rs
|
||
|
||
- Created: 2025-07-22 15:24:13 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2222926115
|
||
|
||
```diff
|
||
@@ -328,7 +330,9 @@ async fn integration_creates_and_checks_session_file() {
|
||
cmd2.env("CODEX_HOME", home.path())
|
||
.env("OPENAI_API_KEY", "dummy")
|
||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||
- .env("OPENAI_BASE_URL", "http://unused.local");
|
||
+ .env("OPENAI_BASE_URL", "http://unused.local")
|
||
+ // Disable git info collection to avoid blocking message processing in tests
|
||
+ .env("CODEX_DISABLE_GIT_INFO", "1");
|
||
```
|
||
|
||
> I have been trying to eliminate support for environment variables in favor of using configuration. Can we just added a config option (prefixed with "experimental" like `experimental_resume`) for this?
|
||
|
||
- Created: 2025-07-22 15:25:28 UTC | Link: https://github.com/openai/codex/pull/1598#discussion_r2222930978
|
||
|
||
```diff
|
||
@@ -161,7 +161,9 @@ async fn integration_creates_and_checks_session_file() {
|
||
.env("OPENAI_API_KEY", "dummy")
|
||
.env("CODEX_RS_SSE_FIXTURE", &fixture)
|
||
// Required for CLI arg parsing even though fixture short-circuits network usage.
|
||
- .env("OPENAI_BASE_URL", "http://unused.local");
|
||
+ .env("OPENAI_BASE_URL", "http://unused.local")
|
||
+ // Disable git info collection to avoid blocking message processing in tests
|
||
```
|
||
|
||
> Alternatively, should we increase the number of threads, so increase `2` in this line?
|
||
>
|
||
> ```
|
||
> #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||
> ``` |