Move git utilities into a dedicated crate (#15564)

- create `codex-git-utils` and move the shared git helpers into it with
file moves preserved for diff readability
- move the `GitInfo` helpers out of `core` so stacked rollout work can
depend on the shared crate without carrying its own git info module

---------

Co-authored-by: Ahmed Ibrahim <219906144+aibrahim-oai@users.noreply.github.com>
Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Ahmed Ibrahim
2026-03-24 13:26:23 -07:00
committed by GitHub
parent fc97092f75
commit 0f957a93cd
68 changed files with 206 additions and 134 deletions

View File

@@ -0,0 +1,847 @@
//! Helpers for applying unified diffs using the system `git` binary.
//!
//! The entry point is [`apply_git_patch`], which writes a diff to a temporary
//! file, shells out to `git apply` with the right flags, and then parses the
//! commands output into structured details. Callers can opt into dry-run
//! mode via [`ApplyGitRequest::preflight`] and inspect the resulting paths to
//! learn what would change before applying for real.
use once_cell::sync::Lazy;
use regex::Regex;
use std::ffi::OsStr;
use std::io;
use std::path::Path;
use std::path::PathBuf;
/// Parameters for invoking [`apply_git_patch`].
#[derive(Debug, Clone)]
pub struct ApplyGitRequest {
pub cwd: PathBuf,
pub diff: String,
pub revert: bool,
pub preflight: bool,
}
/// Result of running [`apply_git_patch`], including paths gleaned from stdout/stderr.
#[derive(Debug, Clone)]
pub struct ApplyGitResult {
pub exit_code: i32,
pub applied_paths: Vec<String>,
pub skipped_paths: Vec<String>,
pub conflicted_paths: Vec<String>,
pub stdout: String,
pub stderr: String,
pub cmd_for_log: String,
}
/// Apply a unified diff to the target repository by shelling out to `git apply`.
///
/// When [`ApplyGitRequest::preflight`] is `true`, this behaves like `git apply --check` and
/// leaves the working tree untouched while still parsing the command output for diagnostics.
pub fn apply_git_patch(req: &ApplyGitRequest) -> io::Result<ApplyGitResult> {
let git_root = resolve_git_root(&req.cwd)?;
// Write unified diff into a temporary file
let (tmpdir, patch_path) = write_temp_patch(&req.diff)?;
// Keep tmpdir alive until function end to ensure the file exists
let _guard = tmpdir;
if req.revert && !req.preflight {
// Stage WT paths first to avoid index mismatch on revert.
stage_paths(&git_root, &req.diff)?;
}
// Build git args
let mut args: Vec<String> = vec!["apply".into(), "--3way".into()];
if req.revert {
args.push("-R".into());
}
// Optional: additional git config via env knob (defaults OFF)
let mut cfg_parts: Vec<String> = Vec::new();
if let Ok(cfg) = std::env::var("CODEX_APPLY_GIT_CFG") {
for pair in cfg.split(',') {
let p = pair.trim();
if p.is_empty() || !p.contains('=') {
continue;
}
cfg_parts.push("-c".into());
cfg_parts.push(p.to_string());
}
}
args.push(patch_path.to_string_lossy().to_string());
// Optional preflight: dry-run only; do not modify working tree
if req.preflight {
let mut check_args = vec!["apply".to_string(), "--check".to_string()];
if req.revert {
check_args.push("-R".to_string());
}
check_args.push(patch_path.to_string_lossy().to_string());
let rendered = render_command_for_log(&git_root, &cfg_parts, &check_args);
let (c_code, c_out, c_err) = run_git(&git_root, &cfg_parts, &check_args)?;
let (mut applied_paths, mut skipped_paths, mut conflicted_paths) =
parse_git_apply_output(&c_out, &c_err);
applied_paths.sort();
applied_paths.dedup();
skipped_paths.sort();
skipped_paths.dedup();
conflicted_paths.sort();
conflicted_paths.dedup();
return Ok(ApplyGitResult {
exit_code: c_code,
applied_paths,
skipped_paths,
conflicted_paths,
stdout: c_out,
stderr: c_err,
cmd_for_log: rendered,
});
}
let cmd_for_log = render_command_for_log(&git_root, &cfg_parts, &args);
let (code, stdout, stderr) = run_git(&git_root, &cfg_parts, &args)?;
let (mut applied_paths, mut skipped_paths, mut conflicted_paths) =
parse_git_apply_output(&stdout, &stderr);
applied_paths.sort();
applied_paths.dedup();
skipped_paths.sort();
skipped_paths.dedup();
conflicted_paths.sort();
conflicted_paths.dedup();
Ok(ApplyGitResult {
exit_code: code,
applied_paths,
skipped_paths,
conflicted_paths,
stdout,
stderr,
cmd_for_log,
})
}
fn resolve_git_root(cwd: &Path) -> io::Result<PathBuf> {
let out = std::process::Command::new("git")
.arg("rev-parse")
.arg("--show-toplevel")
.current_dir(cwd)
.output()?;
let code = out.status.code().unwrap_or(-1);
if code != 0 {
return Err(io::Error::other(format!(
"not a git repository (exit {}): {}",
code,
String::from_utf8_lossy(&out.stderr)
)));
}
let root = String::from_utf8_lossy(&out.stdout).trim().to_string();
Ok(PathBuf::from(root))
}
fn write_temp_patch(diff: &str) -> io::Result<(tempfile::TempDir, PathBuf)> {
let dir = tempfile::tempdir()?;
let path = dir.path().join("patch.diff");
std::fs::write(&path, diff)?;
Ok((dir, path))
}
fn run_git(cwd: &Path, git_cfg: &[String], args: &[String]) -> io::Result<(i32, String, String)> {
let mut cmd = std::process::Command::new("git");
for p in git_cfg {
cmd.arg(p);
}
for a in args {
cmd.arg(a);
}
let out = cmd.current_dir(cwd).output()?;
let code = out.status.code().unwrap_or(-1);
let stdout = String::from_utf8_lossy(&out.stdout).into_owned();
let stderr = String::from_utf8_lossy(&out.stderr).into_owned();
Ok((code, stdout, stderr))
}
fn quote_shell(s: &str) -> String {
let simple = s
.chars()
.all(|c| c.is_ascii_alphanumeric() || "-_.:/@%+".contains(c));
if simple {
s.to_string()
} else {
format!("'{}'", s.replace('\'', "'\\''"))
}
}
fn render_command_for_log(cwd: &Path, git_cfg: &[String], args: &[String]) -> String {
let mut parts: Vec<String> = Vec::new();
parts.push("git".to_string());
for a in git_cfg {
parts.push(quote_shell(a));
}
for a in args {
parts.push(quote_shell(a));
}
format!(
"(cd {} && {})",
quote_shell(&cwd.display().to_string()),
parts.join(" ")
)
}
/// Collect every path referenced by the diff headers inside `diff --git` sections.
pub fn extract_paths_from_patch(diff_text: &str) -> Vec<String> {
let mut set = std::collections::BTreeSet::new();
for raw_line in diff_text.lines() {
let line = raw_line.trim();
let Some(rest) = line.strip_prefix("diff --git ") else {
continue;
};
let Some((a, b)) = parse_diff_git_paths(rest) else {
continue;
};
if let Some(a) = normalize_diff_path(&a, "a/") {
set.insert(a);
}
if let Some(b) = normalize_diff_path(&b, "b/") {
set.insert(b);
}
}
set.into_iter().collect()
}
fn parse_diff_git_paths(line: &str) -> Option<(String, String)> {
let mut chars = line.chars().peekable();
let first = read_diff_git_token(&mut chars)?;
let second = read_diff_git_token(&mut chars)?;
Some((first, second))
}
fn read_diff_git_token(chars: &mut std::iter::Peekable<std::str::Chars<'_>>) -> Option<String> {
while matches!(chars.peek(), Some(c) if c.is_whitespace()) {
chars.next();
}
let quote = match chars.peek().copied() {
Some('"') | Some('\'') => chars.next(),
_ => None,
};
let mut out = String::new();
while let Some(c) = chars.next() {
if let Some(q) = quote {
if c == q {
break;
}
if c == '\\' {
out.push('\\');
if let Some(next) = chars.next() {
out.push(next);
}
continue;
}
} else if c.is_whitespace() {
break;
}
out.push(c);
}
if out.is_empty() && quote.is_none() {
None
} else {
Some(match quote {
Some(_) => unescape_c_string(&out),
None => out,
})
}
}
fn normalize_diff_path(raw: &str, prefix: &str) -> Option<String> {
let trimmed = raw.trim();
if trimmed.is_empty() {
return None;
}
if trimmed == "/dev/null" || trimmed == format!("{prefix}dev/null") {
return None;
}
let trimmed = trimmed.strip_prefix(prefix).unwrap_or(trimmed);
if trimmed.is_empty() {
return None;
}
Some(trimmed.to_string())
}
fn unescape_c_string(input: &str) -> String {
let mut out = String::with_capacity(input.len());
let mut chars = input.chars().peekable();
while let Some(c) = chars.next() {
if c != '\\' {
out.push(c);
continue;
}
let Some(next) = chars.next() else {
out.push('\\');
break;
};
match next {
'n' => out.push('\n'),
'r' => out.push('\r'),
't' => out.push('\t'),
'b' => out.push('\u{0008}'),
'f' => out.push('\u{000C}'),
'a' => out.push('\u{0007}'),
'v' => out.push('\u{000B}'),
'\\' => out.push('\\'),
'"' => out.push('"'),
'\'' => out.push('\''),
'0'..='7' => {
let mut value = next.to_digit(8).unwrap_or(0);
for _ in 0..2 {
match chars.peek() {
Some('0'..='7') => {
if let Some(digit) = chars.next() {
value = value * 8 + digit.to_digit(8).unwrap_or(0);
} else {
break;
}
}
_ => break,
}
}
if let Some(ch) = std::char::from_u32(value) {
out.push(ch);
}
}
other => out.push(other),
}
}
out
}
/// Stage only the files that actually exist on disk for the given diff.
pub fn stage_paths(git_root: &Path, diff: &str) -> io::Result<()> {
let paths = extract_paths_from_patch(diff);
let mut existing: Vec<String> = Vec::new();
for p in paths {
let joined = git_root.join(&p);
if std::fs::symlink_metadata(&joined).is_ok() {
existing.push(p);
}
}
if existing.is_empty() {
return Ok(());
}
let mut cmd = std::process::Command::new("git");
cmd.arg("add");
cmd.arg("--");
for p in &existing {
cmd.arg(OsStr::new(p));
}
let out = cmd.current_dir(git_root).output()?;
let _code = out.status.code().unwrap_or(-1);
// We do not hard fail staging; best-effort is OK. Return Ok even on non-zero.
Ok(())
}
// ============ Parser ported from VS Code (TS) ============
/// Parse `git apply` output into applied/skipped/conflicted path groupings.
pub fn parse_git_apply_output(
stdout: &str,
stderr: &str,
) -> (Vec<String>, Vec<String>, Vec<String>) {
let combined = [stdout, stderr]
.iter()
.filter(|s| !s.is_empty())
.cloned()
.collect::<Vec<&str>>()
.join("\n");
let mut applied = std::collections::BTreeSet::new();
let mut skipped = std::collections::BTreeSet::new();
let mut conflicted = std::collections::BTreeSet::new();
let mut last_seen_path: Option<String> = None;
fn add(set: &mut std::collections::BTreeSet<String>, raw: &str) {
let trimmed = raw.trim();
if trimmed.is_empty() {
return;
}
let first = trimmed.chars().next().unwrap_or('\0');
let last = trimmed.chars().last().unwrap_or('\0');
let unquoted = if (first == '"' || first == '\'') && last == first && trimmed.len() >= 2 {
unescape_c_string(&trimmed[1..trimmed.len() - 1])
} else {
trimmed.to_string()
};
if !unquoted.is_empty() {
set.insert(unquoted);
}
}
static APPLIED_CLEAN: Lazy<Regex> =
Lazy::new(|| regex_ci("^Applied patch(?: to)?\\s+(?P<path>.+?)\\s+cleanly\\.?$"));
static APPLIED_CONFLICTS: Lazy<Regex> =
Lazy::new(|| regex_ci("^Applied patch(?: to)?\\s+(?P<path>.+?)\\s+with conflicts\\.?$"));
static APPLYING_WITH_REJECTS: Lazy<Regex> = Lazy::new(|| {
regex_ci("^Applying patch\\s+(?P<path>.+?)\\s+with\\s+\\d+\\s+rejects?\\.{0,3}$")
});
static CHECKING_PATCH: Lazy<Regex> =
Lazy::new(|| regex_ci("^Checking patch\\s+(?P<path>.+?)\\.\\.\\.$"));
static UNMERGED_LINE: Lazy<Regex> = Lazy::new(|| regex_ci("^U\\s+(?P<path>.+)$"));
static PATCH_FAILED: Lazy<Regex> =
Lazy::new(|| regex_ci("^error:\\s+patch failed:\\s+(?P<path>.+?)(?::\\d+)?(?:\\s|$)"));
static DOES_NOT_APPLY: Lazy<Regex> =
Lazy::new(|| regex_ci("^error:\\s+(?P<path>.+?):\\s+patch does not apply$"));
static THREE_WAY_START: Lazy<Regex> = Lazy::new(|| {
regex_ci("^(?:Performing three-way merge|Falling back to three-way merge)\\.\\.\\.$")
});
static THREE_WAY_FAILED: Lazy<Regex> =
Lazy::new(|| regex_ci("^Failed to perform three-way merge\\.\\.\\.$"));
static FALLBACK_DIRECT: Lazy<Regex> =
Lazy::new(|| regex_ci("^Falling back to direct application\\.\\.\\.$"));
static LACKS_BLOB: Lazy<Regex> = Lazy::new(|| {
regex_ci(
"^(?:error: )?repository lacks the necessary blob to (?:perform|fall back on) 3-?way merge\\.?$",
)
});
static INDEX_MISMATCH: Lazy<Regex> =
Lazy::new(|| regex_ci("^error:\\s+(?P<path>.+?):\\s+does not match index\\b"));
static NOT_IN_INDEX: Lazy<Regex> =
Lazy::new(|| regex_ci("^error:\\s+(?P<path>.+?):\\s+does not exist in index\\b"));
static ALREADY_EXISTS_WT: Lazy<Regex> = Lazy::new(|| {
regex_ci("^error:\\s+(?P<path>.+?)\\s+already exists in (?:the )?working directory\\b")
});
static FILE_EXISTS: Lazy<Regex> =
Lazy::new(|| regex_ci("^error:\\s+patch failed:\\s+(?P<path>.+?)\\s+File exists"));
static RENAMED_DELETED: Lazy<Regex> =
Lazy::new(|| regex_ci("^error:\\s+path\\s+(?P<path>.+?)\\s+has been renamed\\/deleted"));
static CANNOT_APPLY_BINARY: Lazy<Regex> = Lazy::new(|| {
regex_ci(
"^error:\\s+cannot apply binary patch to\\s+['\\\"]?(?P<path>.+?)['\\\"]?\\s+without full index line$",
)
});
static BINARY_DOES_NOT_APPLY: Lazy<Regex> = Lazy::new(|| {
regex_ci("^error:\\s+binary patch does not apply to\\s+['\\\"]?(?P<path>.+?)['\\\"]?$")
});
static BINARY_INCORRECT_RESULT: Lazy<Regex> = Lazy::new(|| {
regex_ci(
"^error:\\s+binary patch to\\s+['\\\"]?(?P<path>.+?)['\\\"]?\\s+creates incorrect result\\b",
)
});
static CANNOT_READ_CURRENT: Lazy<Regex> = Lazy::new(|| {
regex_ci("^error:\\s+cannot read the current contents of\\s+['\\\"]?(?P<path>.+?)['\\\"]?$")
});
static SKIPPED_PATCH: Lazy<Regex> =
Lazy::new(|| regex_ci("^Skipped patch\\s+['\\\"]?(?P<path>.+?)['\\\"]\\.$"));
static CANNOT_MERGE_BINARY_WARN: Lazy<Regex> = Lazy::new(|| {
regex_ci(
"^warning:\\s*Cannot merge binary files:\\s+(?P<path>.+?)\\s+\\(ours\\s+vs\\.\\s+theirs\\)",
)
});
for raw_line in combined.lines() {
let line = raw_line.trim();
if line.is_empty() {
continue;
}
// === "Checking patch <path>..." tracking ===
if let Some(c) = CHECKING_PATCH.captures(line) {
if let Some(m) = c.name("path") {
last_seen_path = Some(m.as_str().to_string());
}
continue;
}
// === Status lines ===
if let Some(c) = APPLIED_CLEAN.captures(line) {
if let Some(m) = c.name("path") {
add(&mut applied, m.as_str());
let p = applied.iter().next_back().cloned();
if let Some(p) = p {
conflicted.remove(&p);
skipped.remove(&p);
last_seen_path = Some(p);
}
}
continue;
}
if let Some(c) = APPLIED_CONFLICTS.captures(line) {
if let Some(m) = c.name("path") {
add(&mut conflicted, m.as_str());
let p = conflicted.iter().next_back().cloned();
if let Some(p) = p {
applied.remove(&p);
skipped.remove(&p);
last_seen_path = Some(p);
}
}
continue;
}
if let Some(c) = APPLYING_WITH_REJECTS.captures(line) {
if let Some(m) = c.name("path") {
add(&mut conflicted, m.as_str());
let p = conflicted.iter().next_back().cloned();
if let Some(p) = p {
applied.remove(&p);
skipped.remove(&p);
last_seen_path = Some(p);
}
}
continue;
}
// === “U <path>” after conflicts ===
if let Some(c) = UNMERGED_LINE.captures(line) {
if let Some(m) = c.name("path") {
add(&mut conflicted, m.as_str());
let p = conflicted.iter().next_back().cloned();
if let Some(p) = p {
applied.remove(&p);
skipped.remove(&p);
last_seen_path = Some(p);
}
}
continue;
}
// === Early hints ===
if PATCH_FAILED.is_match(line) || DOES_NOT_APPLY.is_match(line) {
if let Some(c) = PATCH_FAILED
.captures(line)
.or_else(|| DOES_NOT_APPLY.captures(line))
&& let Some(m) = c.name("path")
{
add(&mut skipped, m.as_str());
last_seen_path = Some(m.as_str().to_string());
}
continue;
}
// === Ignore narration ===
if THREE_WAY_START.is_match(line) || FALLBACK_DIRECT.is_match(line) {
continue;
}
// === 3-way failed entirely; attribute to last_seen_path ===
if THREE_WAY_FAILED.is_match(line) || LACKS_BLOB.is_match(line) {
if let Some(p) = last_seen_path.clone() {
add(&mut skipped, &p);
applied.remove(&p);
conflicted.remove(&p);
}
continue;
}
// === Skips / I/O problems ===
if let Some(c) = INDEX_MISMATCH
.captures(line)
.or_else(|| NOT_IN_INDEX.captures(line))
.or_else(|| ALREADY_EXISTS_WT.captures(line))
.or_else(|| FILE_EXISTS.captures(line))
.or_else(|| RENAMED_DELETED.captures(line))
.or_else(|| CANNOT_APPLY_BINARY.captures(line))
.or_else(|| BINARY_DOES_NOT_APPLY.captures(line))
.or_else(|| BINARY_INCORRECT_RESULT.captures(line))
.or_else(|| CANNOT_READ_CURRENT.captures(line))
.or_else(|| SKIPPED_PATCH.captures(line))
{
if let Some(m) = c.name("path") {
add(&mut skipped, m.as_str());
let p_now = skipped.iter().next_back().cloned();
if let Some(p) = p_now {
applied.remove(&p);
conflicted.remove(&p);
last_seen_path = Some(p);
}
}
continue;
}
// === Warnings that imply conflicts ===
if let Some(c) = CANNOT_MERGE_BINARY_WARN.captures(line) {
if let Some(m) = c.name("path") {
add(&mut conflicted, m.as_str());
let p = conflicted.iter().next_back().cloned();
if let Some(p) = p {
applied.remove(&p);
skipped.remove(&p);
last_seen_path = Some(p);
}
}
continue;
}
}
// Final precedence: conflicts > applied > skipped
for p in conflicted.iter() {
applied.remove(p);
skipped.remove(p);
}
for p in applied.iter() {
skipped.remove(p);
}
(
applied.into_iter().collect(),
skipped.into_iter().collect(),
conflicted.into_iter().collect(),
)
}
fn regex_ci(pat: &str) -> Regex {
Regex::new(&format!("(?i){pat}")).unwrap_or_else(|e| panic!("invalid regex: {e}"))
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::Path;
use std::sync::Mutex;
use std::sync::OnceLock;
fn env_lock() -> &'static Mutex<()> {
static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
LOCK.get_or_init(|| Mutex::new(()))
}
fn run(cwd: &Path, args: &[&str]) -> (i32, String, String) {
let out = std::process::Command::new(args[0])
.args(&args[1..])
.current_dir(cwd)
.output()
.expect("spawn ok");
(
out.status.code().unwrap_or(-1),
String::from_utf8_lossy(&out.stdout).into_owned(),
String::from_utf8_lossy(&out.stderr).into_owned(),
)
}
fn init_repo() -> tempfile::TempDir {
let dir = tempfile::tempdir().expect("tempdir");
let root = dir.path();
// git init and minimal identity
let _ = run(root, &["git", "init"]);
let _ = run(root, &["git", "config", "user.email", "codex@example.com"]);
let _ = run(root, &["git", "config", "user.name", "Codex"]);
dir
}
fn read_file_normalized(path: &Path) -> String {
std::fs::read_to_string(path)
.expect("read file")
.replace("\r\n", "\n")
}
#[test]
fn extract_paths_handles_quoted_headers() {
let diff = "diff --git \"a/hello world.txt\" \"b/hello world.txt\"\nnew file mode 100644\n--- /dev/null\n+++ b/hello world.txt\n@@ -0,0 +1 @@\n+hi\n";
let paths = extract_paths_from_patch(diff);
assert_eq!(paths, vec!["hello world.txt".to_string()]);
}
#[test]
fn extract_paths_ignores_dev_null_header() {
let diff = "diff --git a/dev/null b/ok.txt\nnew file mode 100644\n--- /dev/null\n+++ b/ok.txt\n@@ -0,0 +1 @@\n+hi\n";
let paths = extract_paths_from_patch(diff);
assert_eq!(paths, vec!["ok.txt".to_string()]);
}
#[test]
fn extract_paths_unescapes_c_style_in_quoted_headers() {
let diff = "diff --git \"a/hello\\tworld.txt\" \"b/hello\\tworld.txt\"\nnew file mode 100644\n--- /dev/null\n+++ b/hello\tworld.txt\n@@ -0,0 +1 @@\n+hi\n";
let paths = extract_paths_from_patch(diff);
assert_eq!(paths, vec!["hello\tworld.txt".to_string()]);
}
#[test]
fn parse_output_unescapes_quoted_paths() {
let stderr = "error: patch failed: \"hello\\tworld.txt\":1\n";
let (applied, skipped, conflicted) = parse_git_apply_output("", stderr);
assert_eq!(applied, Vec::<String>::new());
assert_eq!(conflicted, Vec::<String>::new());
assert_eq!(skipped, vec!["hello\tworld.txt".to_string()]);
}
#[test]
fn apply_add_success() {
let _g = env_lock().lock().unwrap();
let repo = init_repo();
let root = repo.path();
let diff = "diff --git a/hello.txt b/hello.txt\nnew file mode 100644\n--- /dev/null\n+++ b/hello.txt\n@@ -0,0 +1,2 @@\n+hello\n+world\n";
let req = ApplyGitRequest {
cwd: root.to_path_buf(),
diff: diff.to_string(),
revert: false,
preflight: false,
};
let r = apply_git_patch(&req).expect("run apply");
assert_eq!(r.exit_code, 0, "exit code 0");
// File exists now
assert!(root.join("hello.txt").exists());
}
#[test]
fn apply_modify_conflict() {
let _g = env_lock().lock().unwrap();
let repo = init_repo();
let root = repo.path();
// seed file and commit
std::fs::write(root.join("file.txt"), "line1\nline2\nline3\n").unwrap();
let _ = run(root, &["git", "add", "file.txt"]);
let _ = run(root, &["git", "commit", "-m", "seed"]);
// local edit (unstaged)
std::fs::write(root.join("file.txt"), "line1\nlocal2\nline3\n").unwrap();
// patch wants to change the same line differently
let diff = "diff --git a/file.txt b/file.txt\n--- a/file.txt\n+++ b/file.txt\n@@ -1,3 +1,3 @@\n line1\n-line2\n+remote2\n line3\n";
let req = ApplyGitRequest {
cwd: root.to_path_buf(),
diff: diff.to_string(),
revert: false,
preflight: false,
};
let r = apply_git_patch(&req).expect("run apply");
assert_ne!(r.exit_code, 0, "non-zero exit on conflict");
}
#[test]
fn apply_modify_skipped_missing_index() {
let _g = env_lock().lock().unwrap();
let repo = init_repo();
let root = repo.path();
// Try to modify a file that is not in the index
let diff = "diff --git a/ghost.txt b/ghost.txt\n--- a/ghost.txt\n+++ b/ghost.txt\n@@ -1,1 +1,1 @@\n-old\n+new\n";
let req = ApplyGitRequest {
cwd: root.to_path_buf(),
diff: diff.to_string(),
revert: false,
preflight: false,
};
let r = apply_git_patch(&req).expect("run apply");
assert_ne!(r.exit_code, 0, "non-zero exit on missing index");
}
#[test]
fn apply_then_revert_success() {
let _g = env_lock().lock().unwrap();
let repo = init_repo();
let root = repo.path();
// Seed file and commit original content
std::fs::write(root.join("file.txt"), "orig\n").unwrap();
let _ = run(root, &["git", "add", "file.txt"]);
let _ = run(root, &["git", "commit", "-m", "seed"]);
// Forward patch: orig -> ORIG
let diff = "diff --git a/file.txt b/file.txt\n--- a/file.txt\n+++ b/file.txt\n@@ -1,1 +1,1 @@\n-orig\n+ORIG\n";
let apply_req = ApplyGitRequest {
cwd: root.to_path_buf(),
diff: diff.to_string(),
revert: false,
preflight: false,
};
let res_apply = apply_git_patch(&apply_req).expect("apply ok");
assert_eq!(res_apply.exit_code, 0, "forward apply succeeded");
let after_apply = read_file_normalized(&root.join("file.txt"));
assert_eq!(after_apply, "ORIG\n");
// Revert patch: ORIG -> orig (stage paths first; engine handles it)
let revert_req = ApplyGitRequest {
cwd: root.to_path_buf(),
diff: diff.to_string(),
revert: true,
preflight: false,
};
let res_revert = apply_git_patch(&revert_req).expect("revert ok");
assert_eq!(res_revert.exit_code, 0, "revert apply succeeded");
let after_revert = read_file_normalized(&root.join("file.txt"));
assert_eq!(after_revert, "orig\n");
}
#[test]
fn revert_preflight_does_not_stage_index() {
let _g = env_lock().lock().unwrap();
let repo = init_repo();
let root = repo.path();
// Seed repo and apply forward patch so the working tree reflects the change.
std::fs::write(root.join("file.txt"), "orig\n").unwrap();
let _ = run(root, &["git", "add", "file.txt"]);
let _ = run(root, &["git", "commit", "-m", "seed"]);
let diff = "diff --git a/file.txt b/file.txt\n--- a/file.txt\n+++ b/file.txt\n@@ -1,1 +1,1 @@\n-orig\n+ORIG\n";
let apply_req = ApplyGitRequest {
cwd: root.to_path_buf(),
diff: diff.to_string(),
revert: false,
preflight: false,
};
let res_apply = apply_git_patch(&apply_req).expect("apply ok");
assert_eq!(res_apply.exit_code, 0, "forward apply succeeded");
let (commit_code, _, commit_err) = run(root, &["git", "commit", "-am", "apply change"]);
assert_eq!(commit_code, 0, "commit applied change: {commit_err}");
let (_code_before, staged_before, _stderr_before) =
run(root, &["git", "diff", "--cached", "--name-only"]);
let preflight_req = ApplyGitRequest {
cwd: root.to_path_buf(),
diff: diff.to_string(),
revert: true,
preflight: true,
};
let res_preflight = apply_git_patch(&preflight_req).expect("preflight ok");
assert_eq!(res_preflight.exit_code, 0, "revert preflight succeeded");
let (_code_after, staged_after, _stderr_after) =
run(root, &["git", "diff", "--cached", "--name-only"]);
assert_eq!(
staged_after.trim(),
staged_before.trim(),
"preflight should not stage new paths",
);
let after_preflight = read_file_normalized(&root.join("file.txt"));
assert_eq!(after_preflight, "ORIG\n");
}
#[test]
fn preflight_blocks_partial_changes() {
let _g = env_lock().lock().unwrap();
let repo = init_repo();
let root = repo.path();
// Build a multi-file diff: one valid add (ok.txt) and one invalid modify (ghost.txt)
let diff = "diff --git a/ok.txt b/ok.txt\nnew file mode 100644\n--- /dev/null\n+++ b/ok.txt\n@@ -0,0 +1,2 @@\n+alpha\n+beta\n\n\
diff --git a/ghost.txt b/ghost.txt\n--- a/ghost.txt\n+++ b/ghost.txt\n@@ -1,1 +1,1 @@\n-old\n+new\n";
// 1) With preflight enabled, nothing should be changed (even though ok.txt could be added)
let req1 = ApplyGitRequest {
cwd: root.to_path_buf(),
diff: diff.to_string(),
revert: false,
preflight: true,
};
let r1 = apply_git_patch(&req1).expect("preflight apply");
assert_ne!(r1.exit_code, 0, "preflight reports failure");
assert!(
!root.join("ok.txt").exists(),
"preflight must prevent adding ok.txt"
);
assert!(
r1.cmd_for_log.contains("--check"),
"preflight path recorded --check"
);
// 2) Without preflight, we should see no --check in the executed command
let req2 = ApplyGitRequest {
cwd: root.to_path_buf(),
diff: diff.to_string(),
revert: false,
preflight: false,
};
let r2 = apply_git_patch(&req2).expect("direct apply");
assert_ne!(r2.exit_code, 0, "apply is expected to fail overall");
assert!(
!r2.cmd_for_log.contains("--check"),
"non-preflight path should not use --check"
);
}
}

View File

@@ -0,0 +1,256 @@
use std::ffi::OsString;
use std::path::Path;
use crate::GitToolingError;
use crate::operations::ensure_git_repository;
use crate::operations::resolve_head;
use crate::operations::resolve_repository_root;
use crate::operations::run_git_for_stdout;
/// Returns the merge-base commit between `HEAD` and the latest version between local
/// and remote of the provided branch, if both exist.
///
/// The function mirrors `git merge-base HEAD <branch>` but returns `Ok(None)` when
/// the repository has no `HEAD` yet or when the branch cannot be resolved.
pub fn merge_base_with_head(
repo_path: &Path,
branch: &str,
) -> Result<Option<String>, GitToolingError> {
ensure_git_repository(repo_path)?;
let repo_root = resolve_repository_root(repo_path)?;
let head = match resolve_head(repo_root.as_path())? {
Some(head) => head,
None => return Ok(None),
};
let Some(branch_ref) = resolve_branch_ref(repo_root.as_path(), branch)? else {
return Ok(None);
};
let preferred_ref =
if let Some(upstream) = resolve_upstream_if_remote_ahead(repo_root.as_path(), branch)? {
resolve_branch_ref(repo_root.as_path(), &upstream)?.unwrap_or(branch_ref)
} else {
branch_ref
};
let merge_base = run_git_for_stdout(
repo_root.as_path(),
vec![
OsString::from("merge-base"),
OsString::from(head),
OsString::from(preferred_ref),
],
/*env*/ None,
)?;
Ok(Some(merge_base))
}
fn resolve_branch_ref(repo_root: &Path, branch: &str) -> Result<Option<String>, GitToolingError> {
let rev = run_git_for_stdout(
repo_root,
vec![
OsString::from("rev-parse"),
OsString::from("--verify"),
OsString::from(branch),
],
/*env*/ None,
);
match rev {
Ok(rev) => Ok(Some(rev)),
Err(GitToolingError::GitCommand { .. }) => Ok(None),
Err(other) => Err(other),
}
}
fn resolve_upstream_if_remote_ahead(
repo_root: &Path,
branch: &str,
) -> Result<Option<String>, GitToolingError> {
let upstream = match run_git_for_stdout(
repo_root,
vec![
OsString::from("rev-parse"),
OsString::from("--abbrev-ref"),
OsString::from("--symbolic-full-name"),
OsString::from(format!("{branch}@{{upstream}}")),
],
/*env*/ None,
) {
Ok(name) => {
let trimmed = name.trim();
if trimmed.is_empty() {
return Ok(None);
}
trimmed.to_string()
}
Err(GitToolingError::GitCommand { .. }) => return Ok(None),
Err(other) => return Err(other),
};
let counts = match run_git_for_stdout(
repo_root,
vec![
OsString::from("rev-list"),
OsString::from("--left-right"),
OsString::from("--count"),
OsString::from(format!("{branch}...{upstream}")),
],
/*env*/ None,
) {
Ok(counts) => counts,
Err(GitToolingError::GitCommand { .. }) => return Ok(None),
Err(other) => return Err(other),
};
let mut parts = counts.split_whitespace();
let _left: i64 = parts.next().unwrap_or("0").parse().unwrap_or(0);
let right: i64 = parts.next().unwrap_or("0").parse().unwrap_or(0);
if right > 0 {
Ok(Some(upstream))
} else {
Ok(None)
}
}
#[cfg(test)]
mod tests {
use super::merge_base_with_head;
use crate::GitToolingError;
use pretty_assertions::assert_eq;
use std::path::Path;
use std::process::Command;
use tempfile::tempdir;
fn run_git_in(repo_path: &Path, args: &[&str]) {
let status = Command::new("git")
.current_dir(repo_path)
.args(args)
.status()
.expect("git command");
assert!(status.success(), "git command failed: {args:?}");
}
fn run_git_stdout(repo_path: &Path, args: &[&str]) -> String {
let output = Command::new("git")
.current_dir(repo_path)
.args(args)
.output()
.expect("git command");
assert!(output.status.success(), "git command failed: {args:?}");
String::from_utf8_lossy(&output.stdout).trim().to_string()
}
fn init_test_repo(repo_path: &Path) {
run_git_in(repo_path, &["init", "--initial-branch=main"]);
run_git_in(repo_path, &["config", "core.autocrlf", "false"]);
}
fn commit(repo_path: &Path, message: &str) {
run_git_in(
repo_path,
&[
"-c",
"user.name=Tester",
"-c",
"user.email=test@example.com",
"commit",
"-m",
message,
],
);
}
#[test]
fn merge_base_returns_shared_commit() -> Result<(), GitToolingError> {
let temp = tempdir()?;
let repo = temp.path();
init_test_repo(repo);
std::fs::write(repo.join("base.txt"), "base\n")?;
run_git_in(repo, &["add", "base.txt"]);
commit(repo, "base commit");
run_git_in(repo, &["checkout", "-b", "feature"]);
std::fs::write(repo.join("feature.txt"), "feature change\n")?;
run_git_in(repo, &["add", "feature.txt"]);
commit(repo, "feature commit");
run_git_in(repo, &["checkout", "main"]);
std::fs::write(repo.join("main.txt"), "main change\n")?;
run_git_in(repo, &["add", "main.txt"]);
commit(repo, "main commit");
run_git_in(repo, &["checkout", "feature"]);
let expected = run_git_stdout(repo, &["merge-base", "HEAD", "main"]);
let merge_base = merge_base_with_head(repo, "main")?;
assert_eq!(merge_base, Some(expected));
Ok(())
}
#[test]
fn merge_base_prefers_upstream_when_remote_ahead() -> Result<(), GitToolingError> {
let temp = tempdir()?;
let repo = temp.path().join("repo");
let remote = temp.path().join("remote.git");
std::fs::create_dir_all(&repo)?;
std::fs::create_dir_all(&remote)?;
run_git_in(&remote, &["init", "--bare"]);
run_git_in(&repo, &["init", "--initial-branch=main"]);
run_git_in(&repo, &["config", "core.autocrlf", "false"]);
std::fs::write(repo.join("base.txt"), "base\n")?;
run_git_in(&repo, &["add", "base.txt"]);
commit(&repo, "base commit");
run_git_in(
&repo,
&["remote", "add", "origin", remote.to_str().unwrap()],
);
run_git_in(&repo, &["push", "-u", "origin", "main"]);
run_git_in(&repo, &["checkout", "-b", "feature"]);
std::fs::write(repo.join("feature.txt"), "feature change\n")?;
run_git_in(&repo, &["add", "feature.txt"]);
commit(&repo, "feature commit");
run_git_in(&repo, &["checkout", "--orphan", "rewrite"]);
run_git_in(&repo, &["rm", "-rf", "."]);
std::fs::write(repo.join("new-main.txt"), "rewritten main\n")?;
run_git_in(&repo, &["add", "new-main.txt"]);
commit(&repo, "rewrite main");
run_git_in(&repo, &["branch", "-M", "rewrite", "main"]);
run_git_in(&repo, &["branch", "--set-upstream-to=origin/main", "main"]);
run_git_in(&repo, &["checkout", "feature"]);
run_git_in(&repo, &["fetch", "origin"]);
let expected = run_git_stdout(&repo, &["merge-base", "HEAD", "origin/main"]);
let merge_base = merge_base_with_head(&repo, "main")?;
assert_eq!(merge_base, Some(expected));
Ok(())
}
#[test]
fn merge_base_returns_none_when_branch_missing() -> Result<(), GitToolingError> {
let temp = tempdir()?;
let repo = temp.path();
init_test_repo(repo);
std::fs::write(repo.join("tracked.txt"), "tracked\n")?;
run_git_in(repo, &["add", "tracked.txt"]);
commit(repo, "initial");
let merge_base = merge_base_with_head(repo, "missing-branch")?;
assert_eq!(merge_base, None);
Ok(())
}
}

View File

@@ -0,0 +1,35 @@
use std::path::PathBuf;
use std::process::ExitStatus;
use std::string::FromUtf8Error;
use thiserror::Error;
use walkdir::Error as WalkdirError;
/// Errors returned while managing git worktree snapshots.
#[derive(Debug, Error)]
pub enum GitToolingError {
#[error("git command `{command}` failed with status {status}: {stderr}")]
GitCommand {
command: String,
status: ExitStatus,
stderr: String,
},
#[error("git command `{command}` produced non-UTF-8 output")]
GitOutputUtf8 {
command: String,
#[source]
source: FromUtf8Error,
},
#[error("{path:?} is not a git repository")]
NotAGitRepository { path: PathBuf },
#[error("path {path:?} must be relative to the repository root")]
NonRelativePath { path: PathBuf },
#[error("path {path:?} escapes the repository root")]
PathEscapesRepository { path: PathBuf },
#[error("failed to process path inside worktree")]
PathPrefix(#[from] std::path::StripPrefixError),
#[error(transparent)]
Walkdir(#[from] WalkdirError),
#[error(transparent)]
Io(#[from] std::io::Error),
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,710 @@
use std::collections::BTreeMap;
use std::collections::HashSet;
use std::ffi::OsStr;
use std::path::Path;
use std::path::PathBuf;
use codex_utils_absolute_path::AbsolutePathBuf;
use futures::future::join_all;
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Serialize;
use tokio::process::Command;
use tokio::time::Duration as TokioDuration;
use tokio::time::timeout;
use ts_rs::TS;
use crate::GitSha;
/// Return `true` if the project folder specified by the `Config` is inside a
/// Git repository.
///
/// The check walks up the directory hierarchy looking for a `.git` file or
/// directory (note `.git` can be a file that contains a `gitdir` entry). This
/// approach does **not** require the `git` binary or the `git2` crate and is
/// therefore fairly lightweight.
///
/// Note that this does **not** detect *worktrees* created with
/// `git worktree add` where the checkout lives outside the main repository
/// directory. If you need Codex to work from such a checkout simply pass the
/// `--allow-no-git-exec` CLI flag that disables the repo requirement.
pub fn get_git_repo_root(base_dir: &Path) -> Option<PathBuf> {
let base = if base_dir.is_dir() {
base_dir
} else {
base_dir.parent()?
};
find_ancestor_git_entry(base).map(|(repo_root, _)| repo_root)
}
/// Timeout for git commands to prevent freezing on large repositories
const GIT_COMMAND_TIMEOUT: TokioDuration = TokioDuration::from_secs(5);
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, TS)]
pub struct GitInfo {
/// Current commit hash (SHA)
#[serde(skip_serializing_if = "Option::is_none")]
pub commit_hash: Option<GitSha>,
/// Current branch name
#[serde(skip_serializing_if = "Option::is_none")]
pub branch: Option<String>,
/// Repository URL (if available from remote)
#[serde(skip_serializing_if = "Option::is_none")]
pub repository_url: Option<String>,
}
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct GitDiffToRemote {
pub sha: GitSha,
pub diff: String,
}
/// Collect git repository information from the given working directory using command-line git.
/// Returns None if no git repository is found or if git operations fail.
/// Uses timeouts to prevent freezing on large repositories.
/// All git commands (except the initial repo check) run in parallel for better performance.
pub async fn collect_git_info(cwd: &Path) -> Option<GitInfo> {
// Check if we're in a git repository first
let is_git_repo = run_git_command_with_timeout(&["rev-parse", "--git-dir"], cwd)
.await?
.status
.success();
if !is_git_repo {
return None;
}
// Run all git info collection commands in parallel
let (commit_result, branch_result, url_result) = tokio::join!(
run_git_command_with_timeout(&["rev-parse", "HEAD"], cwd),
run_git_command_with_timeout(&["rev-parse", "--abbrev-ref", "HEAD"], cwd),
run_git_command_with_timeout(&["remote", "get-url", "origin"], cwd)
);
let mut git_info = GitInfo {
commit_hash: None,
branch: None,
repository_url: None,
};
// Process commit hash
if let Some(output) = commit_result
&& output.status.success()
&& let Ok(hash) = String::from_utf8(output.stdout)
{
git_info.commit_hash = Some(GitSha::new(hash.trim()));
}
// Process branch name
if let Some(output) = branch_result
&& output.status.success()
&& let Ok(branch) = String::from_utf8(output.stdout)
{
let branch = branch.trim();
if branch != "HEAD" {
git_info.branch = Some(branch.to_string());
}
}
// Process repository URL
if let Some(output) = url_result
&& output.status.success()
&& let Ok(url) = String::from_utf8(output.stdout)
{
git_info.repository_url = Some(url.trim().to_string());
}
Some(git_info)
}
/// Collect fetch remotes in a multi-root-friendly format: {"origin": "https://..."}.
pub async fn get_git_remote_urls(cwd: &Path) -> Option<BTreeMap<String, String>> {
let is_git_repo = run_git_command_with_timeout(&["rev-parse", "--git-dir"], cwd)
.await?
.status
.success();
if !is_git_repo {
return None;
}
get_git_remote_urls_assume_git_repo(cwd).await
}
/// Collect fetch remotes without checking whether `cwd` is in a git repo.
pub async fn get_git_remote_urls_assume_git_repo(cwd: &Path) -> Option<BTreeMap<String, String>> {
let output = run_git_command_with_timeout(&["remote", "-v"], cwd).await?;
if !output.status.success() {
return None;
}
let stdout = String::from_utf8(output.stdout).ok()?;
parse_git_remote_urls(stdout.as_str())
}
/// Return the current HEAD commit hash without checking whether `cwd` is in a git repo.
pub async fn get_head_commit_hash(cwd: &Path) -> Option<GitSha> {
let output = run_git_command_with_timeout(&["rev-parse", "HEAD"], cwd).await?;
if !output.status.success() {
return None;
}
let stdout = String::from_utf8(output.stdout).ok()?;
let hash = stdout.trim();
if hash.is_empty() {
None
} else {
Some(GitSha::new(hash))
}
}
pub async fn get_has_changes(cwd: &Path) -> Option<bool> {
let output = run_git_command_with_timeout(&["status", "--porcelain"], cwd).await?;
if !output.status.success() {
return None;
}
Some(!output.stdout.is_empty())
}
fn parse_git_remote_urls(stdout: &str) -> Option<BTreeMap<String, String>> {
let mut remotes = BTreeMap::new();
for line in stdout.lines() {
let Some(fetch_line) = line.strip_suffix(" (fetch)") else {
continue;
};
let Some((name, url_part)) = fetch_line
.split_once('\t')
.or_else(|| fetch_line.split_once(' '))
else {
continue;
};
let url = url_part.trim_start();
if !url.is_empty() {
remotes.insert(name.to_string(), url.to_string());
}
}
if remotes.is_empty() {
None
} else {
Some(remotes)
}
}
/// A minimal commit summary entry used for pickers (subject + timestamp + sha).
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct CommitLogEntry {
pub sha: String,
/// Unix timestamp (seconds since epoch) of the commit time (committer time).
pub timestamp: i64,
/// Single-line subject of the commit message.
pub subject: String,
}
/// Return the last `limit` commits reachable from HEAD for the current branch.
/// Each entry contains the SHA, commit timestamp (seconds), and subject line.
/// Returns an empty vector if not in a git repo or on error/timeout.
pub async fn recent_commits(cwd: &Path, limit: usize) -> Vec<CommitLogEntry> {
// Ensure we're in a git repo first to avoid noisy errors.
let Some(out) = run_git_command_with_timeout(&["rev-parse", "--git-dir"], cwd).await else {
return Vec::new();
};
if !out.status.success() {
return Vec::new();
}
let fmt = "%H%x1f%ct%x1f%s"; // <sha> <US> <commit_time> <US> <subject>
let limit_arg = (limit > 0).then(|| limit.to_string());
let mut args: Vec<String> = vec!["log".to_string()];
if let Some(n) = &limit_arg {
args.push("-n".to_string());
args.push(n.clone());
}
args.push(format!("--pretty=format:{fmt}"));
let arg_refs: Vec<&str> = args.iter().map(String::as_str).collect();
let Some(log_out) = run_git_command_with_timeout(&arg_refs, cwd).await else {
return Vec::new();
};
if !log_out.status.success() {
return Vec::new();
}
let text = String::from_utf8_lossy(&log_out.stdout);
let mut entries: Vec<CommitLogEntry> = Vec::new();
for line in text.lines() {
let mut parts = line.split('\u{001f}');
let sha = parts.next().unwrap_or("").trim();
let ts_s = parts.next().unwrap_or("").trim();
let subject = parts.next().unwrap_or("").trim();
if sha.is_empty() || ts_s.is_empty() {
continue;
}
let timestamp = ts_s.parse::<i64>().unwrap_or(0);
entries.push(CommitLogEntry {
sha: sha.to_string(),
timestamp,
subject: subject.to_string(),
});
}
entries
}
/// Returns the closest git sha to HEAD that is on a remote as well as the diff to that sha.
pub async fn git_diff_to_remote(cwd: &Path) -> Option<GitDiffToRemote> {
get_git_repo_root(cwd)?;
let remotes = get_git_remotes(cwd).await?;
let branches = branch_ancestry(cwd).await?;
let base_sha = find_closest_sha(cwd, &branches, &remotes).await?;
let diff = diff_against_sha(cwd, &base_sha).await?;
Some(GitDiffToRemote {
sha: base_sha,
diff,
})
}
/// Run a git command with a timeout to prevent blocking on large repositories
async fn run_git_command_with_timeout(args: &[&str], cwd: &Path) -> Option<std::process::Output> {
let mut command = Command::new("git");
command
.env("GIT_OPTIONAL_LOCKS", "0")
.args(args)
.current_dir(cwd)
.kill_on_drop(true);
let result = timeout(GIT_COMMAND_TIMEOUT, command.output()).await;
match result {
Ok(Ok(output)) => Some(output),
_ => None, // Timeout or error
}
}
async fn get_git_remotes(cwd: &Path) -> Option<Vec<String>> {
let output = run_git_command_with_timeout(&["remote"], cwd).await?;
if !output.status.success() {
return None;
}
let mut remotes: Vec<String> = String::from_utf8(output.stdout)
.ok()?
.lines()
.map(str::to_string)
.collect();
if let Some(pos) = remotes.iter().position(|r| r == "origin") {
let origin = remotes.remove(pos);
remotes.insert(0, origin);
}
Some(remotes)
}
/// Attempt to determine the repository's default branch name.
///
/// Preference order:
/// 1) The symbolic ref at `refs/remotes/<remote>/HEAD` for the first remote (origin prioritized)
/// 2) `git remote show <remote>` parsed for "HEAD branch: <name>"
/// 3) Local fallback to existing `main` or `master` if present
async fn get_default_branch(cwd: &Path) -> Option<String> {
// Prefer the first remote (with origin prioritized)
let remotes = get_git_remotes(cwd).await.unwrap_or_default();
for remote in remotes {
// Try symbolic-ref, which returns something like: refs/remotes/origin/main
if let Some(symref_output) = run_git_command_with_timeout(
&[
"symbolic-ref",
"--quiet",
&format!("refs/remotes/{remote}/HEAD"),
],
cwd,
)
.await
&& symref_output.status.success()
&& let Ok(sym) = String::from_utf8(symref_output.stdout)
{
let trimmed = sym.trim();
if let Some((_, name)) = trimmed.rsplit_once('/') {
return Some(name.to_string());
}
}
// Fall back to parsing `git remote show <remote>` output
if let Some(show_output) =
run_git_command_with_timeout(&["remote", "show", &remote], cwd).await
&& show_output.status.success()
&& let Ok(text) = String::from_utf8(show_output.stdout)
{
for line in text.lines() {
let line = line.trim();
if let Some(rest) = line.strip_prefix("HEAD branch:") {
let name = rest.trim();
if !name.is_empty() {
return Some(name.to_string());
}
}
}
}
}
// No remote-derived default; try common local defaults if they exist
get_default_branch_local(cwd).await
}
/// Determine the repository's default branch name, if available.
///
/// This inspects remote configuration first (including the symbolic `HEAD`
/// reference) and falls back to common local defaults such as `main` or
/// `master`. Returns `None` when the information cannot be determined, for
/// example when the current directory is not inside a Git repository.
pub async fn default_branch_name(cwd: &Path) -> Option<String> {
get_default_branch(cwd).await
}
/// Attempt to determine the repository's default branch name from local branches.
async fn get_default_branch_local(cwd: &Path) -> Option<String> {
for candidate in ["main", "master"] {
if let Some(verify) = run_git_command_with_timeout(
&[
"rev-parse",
"--verify",
"--quiet",
&format!("refs/heads/{candidate}"),
],
cwd,
)
.await
&& verify.status.success()
{
return Some(candidate.to_string());
}
}
None
}
/// Build an ancestry of branches starting at the current branch and ending at the
/// repository's default branch (if determinable)..
async fn branch_ancestry(cwd: &Path) -> Option<Vec<String>> {
// Discover current branch (ignore detached HEAD by treating it as None)
let current_branch = run_git_command_with_timeout(&["rev-parse", "--abbrev-ref", "HEAD"], cwd)
.await
.and_then(|o| {
if o.status.success() {
String::from_utf8(o.stdout).ok()
} else {
None
}
})
.map(|s| s.trim().to_string())
.filter(|s| s != "HEAD");
// Discover default branch
let default_branch = get_default_branch(cwd).await;
let mut ancestry: Vec<String> = Vec::new();
let mut seen: HashSet<String> = HashSet::new();
if let Some(cb) = current_branch.clone() {
seen.insert(cb.clone());
ancestry.push(cb);
}
if let Some(db) = default_branch
&& !seen.contains(&db)
{
seen.insert(db.clone());
ancestry.push(db);
}
// Expand candidates: include any remote branches that already contain HEAD.
// This addresses cases where we're on a new local-only branch forked from a
// remote branch that isn't the repository default. We prioritize remotes in
// the order returned by get_git_remotes (origin first).
let remotes = get_git_remotes(cwd).await.unwrap_or_default();
for remote in remotes {
if let Some(output) = run_git_command_with_timeout(
&[
"for-each-ref",
"--format=%(refname:short)",
"--contains=HEAD",
&format!("refs/remotes/{remote}"),
],
cwd,
)
.await
&& output.status.success()
&& let Ok(text) = String::from_utf8(output.stdout)
{
for line in text.lines() {
let short = line.trim();
// Expect format like: "origin/feature"; extract the branch path after "remote/"
if let Some(stripped) = short.strip_prefix(&format!("{remote}/"))
&& !stripped.is_empty()
&& !seen.contains(stripped)
{
seen.insert(stripped.to_string());
ancestry.push(stripped.to_string());
}
}
}
}
// Ensure we return Some vector, even if empty, to allow caller logic to proceed
Some(ancestry)
}
// Helper for a single branch: return the remote SHA if present on any remote
// and the distance (commits ahead of HEAD) for that branch. The first item is
// None if the branch is not present on any remote. Returns None if distance
// could not be computed due to git errors/timeouts.
async fn branch_remote_and_distance(
cwd: &Path,
branch: &str,
remotes: &[String],
) -> Option<(Option<GitSha>, usize)> {
// Try to find the first remote ref that exists for this branch (origin prioritized by caller).
let mut found_remote_sha: Option<GitSha> = None;
let mut found_remote_ref: Option<String> = None;
for remote in remotes {
let remote_ref = format!("refs/remotes/{remote}/{branch}");
let Some(verify_output) =
run_git_command_with_timeout(&["rev-parse", "--verify", "--quiet", &remote_ref], cwd)
.await
else {
// Mirror previous behavior: if the verify call times out/fails at the process level,
// treat the entire branch as unusable.
return None;
};
if !verify_output.status.success() {
continue;
}
let Ok(sha) = String::from_utf8(verify_output.stdout) else {
// Mirror previous behavior and skip the entire branch on parse failure.
return None;
};
found_remote_sha = Some(GitSha::new(sha.trim()));
found_remote_ref = Some(remote_ref);
break;
}
// Compute distance as the number of commits HEAD is ahead of the branch.
// Prefer local branch name if it exists; otherwise fall back to the remote ref (if any).
let count_output = if let Some(local_count) =
run_git_command_with_timeout(&["rev-list", "--count", &format!("{branch}..HEAD")], cwd)
.await
{
if local_count.status.success() {
local_count
} else if let Some(remote_ref) = &found_remote_ref {
match run_git_command_with_timeout(
&["rev-list", "--count", &format!("{remote_ref}..HEAD")],
cwd,
)
.await
{
Some(remote_count) => remote_count,
None => return None,
}
} else {
return None;
}
} else if let Some(remote_ref) = &found_remote_ref {
match run_git_command_with_timeout(
&["rev-list", "--count", &format!("{remote_ref}..HEAD")],
cwd,
)
.await
{
Some(remote_count) => remote_count,
None => return None,
}
} else {
return None;
};
if !count_output.status.success() {
return None;
}
let Ok(distance_str) = String::from_utf8(count_output.stdout) else {
return None;
};
let Ok(distance) = distance_str.trim().parse::<usize>() else {
return None;
};
Some((found_remote_sha, distance))
}
// Finds the closest sha that exist on any of branches and also exists on any of the remotes.
async fn find_closest_sha(cwd: &Path, branches: &[String], remotes: &[String]) -> Option<GitSha> {
// A sha and how many commits away from HEAD it is.
let mut closest_sha: Option<(GitSha, usize)> = None;
for branch in branches {
let Some((maybe_remote_sha, distance)) =
branch_remote_and_distance(cwd, branch, remotes).await
else {
continue;
};
let Some(remote_sha) = maybe_remote_sha else {
// Preserve existing behavior: skip branches that are not present on a remote.
continue;
};
match &closest_sha {
None => closest_sha = Some((remote_sha, distance)),
Some((_, best_distance)) if distance < *best_distance => {
closest_sha = Some((remote_sha, distance));
}
_ => {}
}
}
closest_sha.map(|(sha, _)| sha)
}
async fn diff_against_sha(cwd: &Path, sha: &GitSha) -> Option<String> {
let output =
run_git_command_with_timeout(&["diff", "--no-textconv", "--no-ext-diff", &sha.0], cwd)
.await?;
// 0 is success and no diff.
// 1 is success but there is a diff.
let exit_ok = output.status.code().is_some_and(|c| c == 0 || c == 1);
if !exit_ok {
return None;
}
let mut diff = String::from_utf8(output.stdout).ok()?;
if let Some(untracked_output) =
run_git_command_with_timeout(&["ls-files", "--others", "--exclude-standard"], cwd).await
&& untracked_output.status.success()
{
let untracked: Vec<String> = String::from_utf8(untracked_output.stdout)
.ok()?
.lines()
.map(str::to_string)
.filter(|s| !s.is_empty())
.collect();
if !untracked.is_empty() {
// Use platform-appropriate null device and guard paths with `--`.
let null_device: &str = if cfg!(windows) { "NUL" } else { "/dev/null" };
let futures_iter = untracked.into_iter().map(|file| async move {
let file_owned = file;
let args_vec: Vec<&str> = vec![
"diff",
"--no-textconv",
"--no-ext-diff",
"--binary",
"--no-index",
// -- ensures that filenames that start with - are not treated as options.
"--",
null_device,
&file_owned,
];
run_git_command_with_timeout(&args_vec, cwd).await
});
let results = join_all(futures_iter).await;
for extra in results.into_iter().flatten() {
if extra.status.code().is_some_and(|c| c == 0 || c == 1)
&& let Ok(s) = String::from_utf8(extra.stdout)
{
diff.push_str(&s);
}
}
}
}
Some(diff)
}
/// Resolve the path that should be used for trust checks. Similar to
/// `[get_git_repo_root]`, but resolves to the root of the main
/// repository. Handles worktrees via filesystem inspection without invoking
/// the `git` executable.
pub fn resolve_root_git_project_for_trust(cwd: &Path) -> Option<PathBuf> {
let base = if cwd.is_dir() { cwd } else { cwd.parent()? };
let (repo_root, dot_git) = find_ancestor_git_entry(base)?;
if dot_git.is_dir() {
return Some(canonicalize_or_raw(repo_root));
}
let git_dir_s = std::fs::read_to_string(&dot_git).ok()?;
let git_dir_rel = git_dir_s.trim().strip_prefix("gitdir:")?.trim();
if git_dir_rel.is_empty() {
return None;
}
let git_dir_path = canonicalize_or_raw(
AbsolutePathBuf::resolve_path_against_base(git_dir_rel, &repo_root)
.ok()?
.into_path_buf(),
);
let worktrees_dir = git_dir_path.parent()?;
if worktrees_dir.file_name() != Some(OsStr::new("worktrees")) {
return None;
}
let common_dir = worktrees_dir.parent()?;
let main_repo_root = common_dir.parent()?;
Some(canonicalize_or_raw(main_repo_root.to_path_buf()))
}
fn find_ancestor_git_entry(base_dir: &Path) -> Option<(PathBuf, PathBuf)> {
let mut dir = base_dir.to_path_buf();
loop {
let dot_git = dir.join(".git");
if dot_git.exists() {
return Some((dir, dot_git));
}
// Pop one component (go up one directory). `pop` returns false when
// we have reached the filesystem root.
if !dir.pop() {
break;
}
}
None
}
fn canonicalize_or_raw(path: PathBuf) -> PathBuf {
std::fs::canonicalize(&path).unwrap_or(path)
}
/// Returns a list of local git branches.
/// Includes the default branch at the beginning of the list, if it exists.
pub async fn local_git_branches(cwd: &Path) -> Vec<String> {
let mut branches: Vec<String> = if let Some(out) =
run_git_command_with_timeout(&["branch", "--format=%(refname:short)"], cwd).await
&& out.status.success()
{
String::from_utf8_lossy(&out.stdout)
.lines()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect()
} else {
Vec::new()
};
branches.sort_unstable();
if let Some(base) = get_default_branch_local(cwd).await
&& let Some(pos) = branches.iter().position(|name| name == &base)
{
let base_branch = branches.remove(pos);
branches.insert(0, base_branch);
}
branches
}
/// Returns the current checked out branch name.
pub async fn current_branch_name(cwd: &Path) -> Option<String> {
let out = run_git_command_with_timeout(&["branch", "--show-current"], cwd).await?;
if !out.status.success() {
return None;
}
String::from_utf8(out.stdout)
.ok()
.map(|s| s.trim().to_string())
.filter(|name| !name.is_empty())
}

View File

@@ -0,0 +1,116 @@
use std::fmt;
use std::path::PathBuf;
mod apply;
mod branch;
mod errors;
mod ghost_commits;
mod info;
mod operations;
mod platform;
pub use apply::ApplyGitRequest;
pub use apply::ApplyGitResult;
pub use apply::apply_git_patch;
pub use apply::extract_paths_from_patch;
pub use apply::parse_git_apply_output;
pub use apply::stage_paths;
pub use branch::merge_base_with_head;
pub use errors::GitToolingError;
pub use ghost_commits::CreateGhostCommitOptions;
pub use ghost_commits::GhostSnapshotConfig;
pub use ghost_commits::GhostSnapshotReport;
pub use ghost_commits::IgnoredUntrackedFile;
pub use ghost_commits::LargeUntrackedDir;
pub use ghost_commits::RestoreGhostCommitOptions;
pub use ghost_commits::capture_ghost_snapshot_report;
pub use ghost_commits::create_ghost_commit;
pub use ghost_commits::create_ghost_commit_with_report;
pub use ghost_commits::restore_ghost_commit;
pub use ghost_commits::restore_ghost_commit_with_options;
pub use ghost_commits::restore_to_commit;
pub use info::CommitLogEntry;
pub use info::GitDiffToRemote;
pub use info::GitInfo;
pub use info::collect_git_info;
pub use info::current_branch_name;
pub use info::default_branch_name;
pub use info::get_git_remote_urls;
pub use info::get_git_remote_urls_assume_git_repo;
pub use info::get_git_repo_root;
pub use info::get_has_changes;
pub use info::get_head_commit_hash;
pub use info::git_diff_to_remote;
pub use info::local_git_branches;
pub use info::recent_commits;
pub use info::resolve_root_git_project_for_trust;
pub use platform::create_symlink;
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Serialize;
use ts_rs::TS;
type CommitID = String;
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, JsonSchema, TS)]
#[serde(transparent)]
#[ts(type = "string")]
pub struct GitSha(pub String);
impl GitSha {
pub fn new(sha: &str) -> Self {
Self(sha.to_string())
}
}
/// Details of a ghost commit created from a repository state.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema, TS)]
pub struct GhostCommit {
id: CommitID,
parent: Option<CommitID>,
preexisting_untracked_files: Vec<PathBuf>,
preexisting_untracked_dirs: Vec<PathBuf>,
}
impl GhostCommit {
/// Create a new ghost commit wrapper from a raw commit ID and optional parent.
pub fn new(
id: CommitID,
parent: Option<CommitID>,
preexisting_untracked_files: Vec<PathBuf>,
preexisting_untracked_dirs: Vec<PathBuf>,
) -> Self {
Self {
id,
parent,
preexisting_untracked_files,
preexisting_untracked_dirs,
}
}
/// Commit ID for the snapshot.
pub fn id(&self) -> &str {
&self.id
}
/// Parent commit ID, if the repository had a `HEAD` at creation time.
pub fn parent(&self) -> Option<&str> {
self.parent.as_deref()
}
/// Untracked or ignored files that already existed when the snapshot was captured.
pub fn preexisting_untracked_files(&self) -> &[PathBuf] {
&self.preexisting_untracked_files
}
/// Untracked or ignored directories that already existed when the snapshot was captured.
pub fn preexisting_untracked_dirs(&self) -> &[PathBuf] {
&self.preexisting_untracked_dirs
}
}
impl fmt::Display for GhostCommit {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.id)
}
}

View File

@@ -0,0 +1,239 @@
use std::ffi::OsStr;
use std::ffi::OsString;
use std::path::Component;
use std::path::Path;
use std::path::PathBuf;
use std::process::Command;
use crate::GitToolingError;
pub(crate) fn ensure_git_repository(path: &Path) -> Result<(), GitToolingError> {
match run_git_for_stdout(
path,
vec![
OsString::from("rev-parse"),
OsString::from("--is-inside-work-tree"),
],
/*env*/ None,
) {
Ok(output) if output.trim() == "true" => Ok(()),
Ok(_) => Err(GitToolingError::NotAGitRepository {
path: path.to_path_buf(),
}),
Err(GitToolingError::GitCommand { status, .. }) if status.code() == Some(128) => {
Err(GitToolingError::NotAGitRepository {
path: path.to_path_buf(),
})
}
Err(err) => Err(err),
}
}
pub(crate) fn resolve_head(path: &Path) -> Result<Option<String>, GitToolingError> {
match run_git_for_stdout(
path,
vec![
OsString::from("rev-parse"),
OsString::from("--verify"),
OsString::from("HEAD"),
],
/*env*/ None,
) {
Ok(sha) => Ok(Some(sha)),
Err(GitToolingError::GitCommand { status, .. }) if status.code() == Some(128) => Ok(None),
Err(other) => Err(other),
}
}
pub(crate) fn normalize_relative_path(path: &Path) -> Result<PathBuf, GitToolingError> {
let mut result = PathBuf::new();
let mut saw_component = false;
for component in path.components() {
saw_component = true;
match component {
Component::Normal(part) => result.push(part),
Component::CurDir => {}
Component::ParentDir => {
if !result.pop() {
return Err(GitToolingError::PathEscapesRepository {
path: path.to_path_buf(),
});
}
}
Component::RootDir | Component::Prefix(_) => {
return Err(GitToolingError::NonRelativePath {
path: path.to_path_buf(),
});
}
}
}
if !saw_component {
return Err(GitToolingError::NonRelativePath {
path: path.to_path_buf(),
});
}
Ok(result)
}
pub(crate) fn resolve_repository_root(path: &Path) -> Result<PathBuf, GitToolingError> {
let root = run_git_for_stdout(
path,
vec![
OsString::from("rev-parse"),
OsString::from("--show-toplevel"),
],
/*env*/ None,
)?;
Ok(PathBuf::from(root))
}
pub(crate) fn apply_repo_prefix_to_force_include(
prefix: Option<&Path>,
paths: &[PathBuf],
) -> Vec<PathBuf> {
if paths.is_empty() {
return Vec::new();
}
match prefix {
Some(prefix) => paths.iter().map(|path| prefix.join(path)).collect(),
None => paths.to_vec(),
}
}
pub(crate) fn repo_subdir(repo_root: &Path, repo_path: &Path) -> Option<PathBuf> {
if repo_root == repo_path {
return None;
}
repo_path
.strip_prefix(repo_root)
.ok()
.and_then(non_empty_path)
.or_else(|| {
let repo_root_canon = repo_root.canonicalize().ok()?;
let repo_path_canon = repo_path.canonicalize().ok()?;
repo_path_canon
.strip_prefix(&repo_root_canon)
.ok()
.and_then(non_empty_path)
})
}
fn non_empty_path(path: &Path) -> Option<PathBuf> {
if path.as_os_str().is_empty() {
None
} else {
Some(path.to_path_buf())
}
}
pub(crate) fn run_git_for_status<I, S>(
dir: &Path,
args: I,
env: Option<&[(OsString, OsString)]>,
) -> Result<(), GitToolingError>
where
I: IntoIterator<Item = S>,
S: AsRef<OsStr>,
{
run_git(dir, args, env)?;
Ok(())
}
pub(crate) fn run_git_for_stdout<I, S>(
dir: &Path,
args: I,
env: Option<&[(OsString, OsString)]>,
) -> Result<String, GitToolingError>
where
I: IntoIterator<Item = S>,
S: AsRef<OsStr>,
{
let run = run_git(dir, args, env)?;
String::from_utf8(run.output.stdout)
.map(|value| value.trim().to_string())
.map_err(|source| GitToolingError::GitOutputUtf8 {
command: run.command,
source,
})
}
/// Executes `git` and returns the full stdout without trimming so callers
/// can parse delimiter-sensitive output, propagating UTF-8 errors with context.
pub(crate) fn run_git_for_stdout_all<I, S>(
dir: &Path,
args: I,
env: Option<&[(OsString, OsString)]>,
) -> Result<String, GitToolingError>
where
I: IntoIterator<Item = S>,
S: AsRef<OsStr>,
{
// Keep the raw stdout untouched so callers can parse delimiter-sensitive
// output (e.g. NUL-separated paths) without trimming artefacts.
let run = run_git(dir, args, env)?;
// Propagate UTF-8 conversion failures with the command context for debugging.
String::from_utf8(run.output.stdout).map_err(|source| GitToolingError::GitOutputUtf8 {
command: run.command,
source,
})
}
fn run_git<I, S>(
dir: &Path,
args: I,
env: Option<&[(OsString, OsString)]>,
) -> Result<GitRun, GitToolingError>
where
I: IntoIterator<Item = S>,
S: AsRef<OsStr>,
{
let iterator = args.into_iter();
let (lower, upper) = iterator.size_hint();
let mut args_vec = Vec::with_capacity(upper.unwrap_or(lower));
for arg in iterator {
args_vec.push(OsString::from(arg.as_ref()));
}
let command_string = build_command_string(&args_vec);
let mut command = Command::new("git");
command.current_dir(dir);
if let Some(envs) = env {
for (key, value) in envs {
command.env(key, value);
}
}
command.args(&args_vec);
let output = command.output()?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
return Err(GitToolingError::GitCommand {
command: command_string,
status: output.status,
stderr,
});
}
Ok(GitRun {
command: command_string,
output,
})
}
fn build_command_string(args: &[OsString]) -> String {
if args.is_empty() {
return "git".to_string();
}
let joined = args
.iter()
.map(|arg| arg.to_string_lossy().into_owned())
.collect::<Vec<_>>()
.join(" ");
format!("git {joined}")
}
struct GitRun {
command: String,
output: std::process::Output,
}

View File

@@ -0,0 +1,37 @@
use std::path::Path;
use crate::GitToolingError;
#[cfg(unix)]
pub fn create_symlink(
_source: &Path,
link_target: &Path,
destination: &Path,
) -> Result<(), GitToolingError> {
use std::os::unix::fs::symlink;
symlink(link_target, destination)?;
Ok(())
}
#[cfg(windows)]
pub fn create_symlink(
source: &Path,
link_target: &Path,
destination: &Path,
) -> Result<(), GitToolingError> {
use std::os::windows::fs::FileTypeExt;
use std::os::windows::fs::symlink_dir;
use std::os::windows::fs::symlink_file;
let metadata = std::fs::symlink_metadata(source)?;
if metadata.file_type().is_symlink_dir() {
symlink_dir(link_target, destination)?;
} else {
symlink_file(link_target, destination)?;
}
Ok(())
}
#[cfg(not(any(unix, windows)))]
compile_error!("codex-git symlink support is only implemented for Unix and Windows");