mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
feat: warning large commits (#6838)
This commit is contained in:
@@ -1,10 +1,14 @@
|
||||
use crate::codex::TurnContext;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::WarningEvent;
|
||||
use crate::state::TaskKind;
|
||||
use crate::tasks::SessionTask;
|
||||
use crate::tasks::SessionTaskContext;
|
||||
use async_trait::async_trait;
|
||||
use codex_git::CreateGhostCommitOptions;
|
||||
use codex_git::GhostSnapshotReport;
|
||||
use codex_git::GitToolingError;
|
||||
use codex_git::capture_ghost_snapshot_report;
|
||||
use codex_git::create_ghost_commit;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
@@ -39,6 +43,27 @@ impl SessionTask for GhostSnapshotTask {
|
||||
_ = cancellation_token.cancelled() => true,
|
||||
_ = async {
|
||||
let repo_path = ctx_for_task.cwd.clone();
|
||||
// First, compute a snapshot report so we can warn about
|
||||
// large untracked directories before running the heavier
|
||||
// snapshot logic.
|
||||
if let Ok(Ok(report)) = tokio::task::spawn_blocking({
|
||||
let repo_path = repo_path.clone();
|
||||
move || {
|
||||
let options = CreateGhostCommitOptions::new(&repo_path);
|
||||
capture_ghost_snapshot_report(&options)
|
||||
}
|
||||
})
|
||||
.await
|
||||
&& let Some(message) = format_large_untracked_warning(&report) {
|
||||
session
|
||||
.session
|
||||
.send_event(
|
||||
&ctx_for_task,
|
||||
EventMsg::Warning(WarningEvent { message }),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
// Required to run in a dedicated blocking pool.
|
||||
match tokio::task::spawn_blocking(move || {
|
||||
let options = CreateGhostCommitOptions::new(&repo_path);
|
||||
@@ -103,3 +128,22 @@ impl GhostSnapshotTask {
|
||||
Self { token }
|
||||
}
|
||||
}
|
||||
|
||||
fn format_large_untracked_warning(report: &GhostSnapshotReport) -> Option<String> {
|
||||
if report.large_untracked_dirs.is_empty() {
|
||||
return None;
|
||||
}
|
||||
const MAX_DIRS: usize = 3;
|
||||
let mut parts: Vec<String> = Vec::new();
|
||||
for dir in report.large_untracked_dirs.iter().take(MAX_DIRS) {
|
||||
parts.push(format!("{} ({} files)", dir.path.display(), dir.file_count));
|
||||
}
|
||||
if report.large_untracked_dirs.len() > MAX_DIRS {
|
||||
let remaining = report.large_untracked_dirs.len() - MAX_DIRS;
|
||||
parts.push(format!("{remaining} more"));
|
||||
}
|
||||
Some(format!(
|
||||
"Repository snapshot encountered large untracked directories: {}. This can slow Codex; consider adding these paths to .gitignore or disabling undo in your config.",
|
||||
parts.join(", ")
|
||||
))
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::HashSet;
|
||||
use std::ffi::OsString;
|
||||
use std::fs;
|
||||
@@ -21,6 +22,8 @@ use crate::operations::run_git_for_stdout_all;
|
||||
|
||||
/// Default commit message used for ghost commits when none is provided.
|
||||
const DEFAULT_COMMIT_MESSAGE: &str = "codex snapshot";
|
||||
/// Default threshold that triggers a warning about large untracked directories.
|
||||
const LARGE_UNTRACKED_WARNING_THRESHOLD: usize = 200;
|
||||
|
||||
/// Options to control ghost commit creation.
|
||||
pub struct CreateGhostCommitOptions<'a> {
|
||||
@@ -29,6 +32,19 @@ pub struct CreateGhostCommitOptions<'a> {
|
||||
pub force_include: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
/// Summary produced alongside a ghost snapshot.
|
||||
#[derive(Debug, Default, Clone, PartialEq, Eq)]
|
||||
pub struct GhostSnapshotReport {
|
||||
pub large_untracked_dirs: Vec<LargeUntrackedDir>,
|
||||
}
|
||||
|
||||
/// Directory containing a large amount of untracked content.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct LargeUntrackedDir {
|
||||
pub path: PathBuf,
|
||||
pub file_count: usize,
|
||||
}
|
||||
|
||||
impl<'a> CreateGhostCommitOptions<'a> {
|
||||
/// Creates options scoped to the provided repository path.
|
||||
pub fn new(repo_path: &'a Path) -> Self {
|
||||
@@ -64,10 +80,94 @@ impl<'a> CreateGhostCommitOptions<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn detect_large_untracked_dirs(files: &[PathBuf], dirs: &[PathBuf]) -> Vec<LargeUntrackedDir> {
|
||||
let mut counts: BTreeMap<PathBuf, usize> = BTreeMap::new();
|
||||
|
||||
let mut sorted_dirs: Vec<&PathBuf> = dirs.iter().collect();
|
||||
sorted_dirs.sort_by(|a, b| {
|
||||
let a_components = a.components().count();
|
||||
let b_components = b.components().count();
|
||||
b_components.cmp(&a_components).then_with(|| a.cmp(b))
|
||||
});
|
||||
|
||||
for file in files {
|
||||
let mut key: Option<PathBuf> = None;
|
||||
for dir in &sorted_dirs {
|
||||
if file.starts_with(dir.as_path()) {
|
||||
key = Some((*dir).clone());
|
||||
break;
|
||||
}
|
||||
}
|
||||
let key = key.unwrap_or_else(|| {
|
||||
file.parent()
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|| PathBuf::from("."))
|
||||
});
|
||||
let entry = counts.entry(key).or_insert(0);
|
||||
*entry += 1;
|
||||
}
|
||||
|
||||
let mut result: Vec<LargeUntrackedDir> = counts
|
||||
.into_iter()
|
||||
.filter(|(_, count)| *count >= LARGE_UNTRACKED_WARNING_THRESHOLD)
|
||||
.map(|(path, file_count)| LargeUntrackedDir { path, file_count })
|
||||
.collect();
|
||||
result.sort_by(|a, b| {
|
||||
b.file_count
|
||||
.cmp(&a.file_count)
|
||||
.then_with(|| a.path.cmp(&b.path))
|
||||
});
|
||||
result
|
||||
}
|
||||
|
||||
fn to_session_relative_path(path: &Path, repo_prefix: Option<&Path>) -> PathBuf {
|
||||
match repo_prefix {
|
||||
Some(prefix) => path
|
||||
.strip_prefix(prefix)
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|_| path.to_path_buf()),
|
||||
None => path.to_path_buf(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a ghost commit capturing the current state of the repository's working tree.
|
||||
pub fn create_ghost_commit(
|
||||
options: &CreateGhostCommitOptions<'_>,
|
||||
) -> Result<GhostCommit, GitToolingError> {
|
||||
create_ghost_commit_with_report(options).map(|(commit, _)| commit)
|
||||
}
|
||||
|
||||
/// Compute a report describing the working tree for a ghost snapshot without creating a commit.
|
||||
pub fn capture_ghost_snapshot_report(
|
||||
options: &CreateGhostCommitOptions<'_>,
|
||||
) -> Result<GhostSnapshotReport, GitToolingError> {
|
||||
ensure_git_repository(options.repo_path)?;
|
||||
|
||||
let repo_root = resolve_repository_root(options.repo_path)?;
|
||||
let repo_prefix = repo_subdir(repo_root.as_path(), options.repo_path);
|
||||
let existing_untracked =
|
||||
capture_existing_untracked(repo_root.as_path(), repo_prefix.as_deref())?;
|
||||
|
||||
let warning_files = existing_untracked
|
||||
.files
|
||||
.iter()
|
||||
.map(|path| to_session_relative_path(path, repo_prefix.as_deref()))
|
||||
.collect::<Vec<_>>();
|
||||
let warning_dirs = existing_untracked
|
||||
.dirs
|
||||
.iter()
|
||||
.map(|path| to_session_relative_path(path, repo_prefix.as_deref()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(GhostSnapshotReport {
|
||||
large_untracked_dirs: detect_large_untracked_dirs(&warning_files, &warning_dirs),
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a ghost commit capturing the current state of the repository's working tree along with a report.
|
||||
pub fn create_ghost_commit_with_report(
|
||||
options: &CreateGhostCommitOptions<'_>,
|
||||
) -> Result<(GhostCommit, GhostSnapshotReport), GitToolingError> {
|
||||
ensure_git_repository(options.repo_path)?;
|
||||
|
||||
let repo_root = resolve_repository_root(options.repo_path)?;
|
||||
@@ -76,6 +176,18 @@ pub fn create_ghost_commit(
|
||||
let existing_untracked =
|
||||
capture_existing_untracked(repo_root.as_path(), repo_prefix.as_deref())?;
|
||||
|
||||
let warning_files = existing_untracked
|
||||
.files
|
||||
.iter()
|
||||
.map(|path| to_session_relative_path(path, repo_prefix.as_deref()))
|
||||
.collect::<Vec<_>>();
|
||||
let warning_dirs = existing_untracked
|
||||
.dirs
|
||||
.iter()
|
||||
.map(|path| to_session_relative_path(path, repo_prefix.as_deref()))
|
||||
.collect::<Vec<_>>();
|
||||
let large_untracked_dirs = detect_large_untracked_dirs(&warning_files, &warning_dirs);
|
||||
|
||||
let normalized_force = options
|
||||
.force_include
|
||||
.iter()
|
||||
@@ -143,11 +255,18 @@ pub fn create_ghost_commit(
|
||||
Some(commit_env.as_slice()),
|
||||
)?;
|
||||
|
||||
Ok(GhostCommit::new(
|
||||
let ghost_commit = GhostCommit::new(
|
||||
commit_id,
|
||||
parent,
|
||||
existing_untracked.files,
|
||||
existing_untracked.dirs,
|
||||
);
|
||||
|
||||
Ok((
|
||||
ghost_commit,
|
||||
GhostSnapshotReport {
|
||||
large_untracked_dirs,
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
@@ -460,6 +579,95 @@ mod tests {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_snapshot_reports_large_untracked_dirs() -> Result<(), GitToolingError> {
|
||||
let temp = tempfile::tempdir()?;
|
||||
let repo = temp.path();
|
||||
init_test_repo(repo);
|
||||
|
||||
std::fs::write(repo.join("tracked.txt"), "contents\n")?;
|
||||
run_git_in(repo, &["add", "tracked.txt"]);
|
||||
run_git_in(
|
||||
repo,
|
||||
&[
|
||||
"-c",
|
||||
"user.name=Tester",
|
||||
"-c",
|
||||
"user.email=test@example.com",
|
||||
"commit",
|
||||
"-m",
|
||||
"initial",
|
||||
],
|
||||
);
|
||||
|
||||
let models = repo.join("models");
|
||||
std::fs::create_dir(&models)?;
|
||||
for idx in 0..(LARGE_UNTRACKED_WARNING_THRESHOLD + 1) {
|
||||
let file = models.join(format!("weights-{idx}.bin"));
|
||||
std::fs::write(file, "data\n")?;
|
||||
}
|
||||
|
||||
let (ghost, report) =
|
||||
create_ghost_commit_with_report(&CreateGhostCommitOptions::new(repo))?;
|
||||
assert!(ghost.parent().is_some());
|
||||
assert_eq!(
|
||||
report.large_untracked_dirs,
|
||||
vec![LargeUntrackedDir {
|
||||
path: PathBuf::from("models"),
|
||||
file_count: LARGE_UNTRACKED_WARNING_THRESHOLD + 1,
|
||||
}]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_snapshot_reports_nested_large_untracked_dirs_under_tracked_parent()
|
||||
-> Result<(), GitToolingError> {
|
||||
let temp = tempfile::tempdir()?;
|
||||
let repo = temp.path();
|
||||
init_test_repo(repo);
|
||||
|
||||
// Create a tracked src directory.
|
||||
let src = repo.join("src");
|
||||
std::fs::create_dir(&src)?;
|
||||
std::fs::write(src.join("main.rs"), "fn main() {}\n")?;
|
||||
run_git_in(repo, &["add", "src/main.rs"]);
|
||||
run_git_in(
|
||||
repo,
|
||||
&[
|
||||
"-c",
|
||||
"user.name=Tester",
|
||||
"-c",
|
||||
"user.email=test@example.com",
|
||||
"commit",
|
||||
"-m",
|
||||
"initial",
|
||||
],
|
||||
);
|
||||
|
||||
// Create a large untracked tree nested under the tracked src directory.
|
||||
let generated = src.join("generated").join("cache");
|
||||
std::fs::create_dir_all(&generated)?;
|
||||
for idx in 0..(LARGE_UNTRACKED_WARNING_THRESHOLD + 1) {
|
||||
let file = generated.join(format!("file-{idx}.bin"));
|
||||
std::fs::write(file, "data\n")?;
|
||||
}
|
||||
|
||||
let (_, report) = create_ghost_commit_with_report(&CreateGhostCommitOptions::new(repo))?;
|
||||
assert_eq!(report.large_untracked_dirs.len(), 1);
|
||||
let entry = &report.large_untracked_dirs[0];
|
||||
assert_ne!(entry.path, PathBuf::from("src"));
|
||||
assert!(
|
||||
entry.path.starts_with(Path::new("src/generated")),
|
||||
"unexpected path for large untracked directory: {}",
|
||||
entry.path.display()
|
||||
);
|
||||
assert_eq!(entry.file_count, LARGE_UNTRACKED_WARNING_THRESHOLD + 1);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// Ensures ghost commits succeed in repositories without an existing HEAD.
|
||||
fn create_snapshot_without_existing_head() -> Result<(), GitToolingError> {
|
||||
|
||||
@@ -17,7 +17,11 @@ pub use apply::stage_paths;
|
||||
pub use branch::merge_base_with_head;
|
||||
pub use errors::GitToolingError;
|
||||
pub use ghost_commits::CreateGhostCommitOptions;
|
||||
pub use ghost_commits::GhostSnapshotReport;
|
||||
pub use ghost_commits::LargeUntrackedDir;
|
||||
pub use ghost_commits::capture_ghost_snapshot_report;
|
||||
pub use ghost_commits::create_ghost_commit;
|
||||
pub use ghost_commits::create_ghost_commit_with_report;
|
||||
pub use ghost_commits::restore_ghost_commit;
|
||||
pub use ghost_commits::restore_to_commit;
|
||||
pub use platform::create_symlink;
|
||||
|
||||
Reference in New Issue
Block a user