From 90bd67cfa4338bc40028541d7600b4651537b8d7 Mon Sep 17 00:00:00 2001 From: Daniel Edrisian Date: Tue, 2 Sep 2025 18:47:10 -0700 Subject: [PATCH] aaaa --- codex-rs/lastprs | 58 ++++++---- codex-rs/review | 137 ++++++++++++++++++----- prs/bolinfest/review/PR-2696-review.json | 25 ++++- 3 files changed, 168 insertions(+), 52 deletions(-) diff --git a/codex-rs/lastprs b/codex-rs/lastprs index fa09252777..86412536d6 100755 --- a/codex-rs/lastprs +++ b/codex-rs/lastprs @@ -7,7 +7,7 @@ import shutil import subprocess import sys from datetime import datetime, timedelta, timezone -from typing import Iterable, List, Optional, Set, Tuple +from typing import Iterable, List, Optional, Set, Tuple, Dict, Any def _run(cmd: List[str]) -> Tuple[int, str, str]: @@ -146,19 +146,16 @@ def ensure_dir(path: str): os.makedirs(path, exist_ok=True) -def run_pr2md(pr2md_path: str, repo: str, pr_number: int, reviewer: str, out_dir: str) -> Tuple[int, str]: - out_file = None +def run_pr2md(pr2md_path: str, repo: str, pr_number: int, reviewer: str) -> Tuple[int, str, Optional[str]]: + """Return (pr_number, status, markdown_or_none).""" try: - out_file = os.path.join(out_dir, f"PR-{pr_number}.md") cmd = [pr2md_path, str(pr_number), repo, "--reviewer", reviewer] code, out, err = _run(cmd) if code != 0: - return pr_number, f"error: {err.strip() or 'pr2md failed'}" - with open(out_file, "w", encoding="utf-8") as f: - f.write(out) - return pr_number, "ok" + return pr_number, f"error: {err.strip() or 'pr2md failed'}", None + return pr_number, "ok", out except Exception as e: - return pr_number, f"error: {e}" + return pr_number, f"error: {e}", None def dedupe(seq: Iterable[int]) -> List[int]: @@ -175,8 +172,8 @@ def main(): parser = argparse.ArgumentParser( prog="lastprs", description=( - "Generate Markdown via pr2md for PRs a reviewer commented on in the last N days.\n" - "Outputs files under prs// in the current repo." + "Fetch PRs a reviewer commented on in the last N days and render each via pr2md.\n" + "Writes a consolidated reviewers/.json with all raw PR markdowns." ), ) parser.add_argument("days", type=int, help="Number of days to look back (N)") @@ -262,27 +259,46 @@ def main(): ) return - # Determine output directory under the repo root + # Determine output path under the repo root repo_root = detect_repo_root() or os.getcwd() - out_dir = os.path.join(repo_root, "prs", args.reviewer) - ensure_dir(out_dir) + reviewers_dir = os.path.join(repo_root, "reviewers") + ensure_dir(reviewers_dir) + out_json = os.path.join(reviewers_dir, f"{args.reviewer}.json") - # Run pr2md in parallel - print(f"Found {len(prs)} PR(s). Writing Markdown to {out_dir}") - results: List[Tuple[int, str]] = [] + # Run pr2md in parallel and collect markdown + print(f"Found {len(prs)} PR(s). Rendering to reviewers/{args.reviewer}.json") + results: List[Tuple[int, str, Optional[str]]] = [] with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, args.jobs)) as ex: futs = [ - ex.submit(run_pr2md, pr2md_path, repo, pr_num, args.reviewer, out_dir) + ex.submit(run_pr2md, pr2md_path, repo, pr_num, args.reviewer) for pr_num in prs ] for fut in concurrent.futures.as_completed(futs): results.append(fut.result()) - ok = sum(1 for _, s in results if s == "ok") - failures = [(n, s) for n, s in results if s != "ok"] + ok = sum(1 for _, s, _ in results if s == "ok") + failures = [(n, s) for n, s, m in results if s != "ok"] for n, s in failures: print(f"PR {n}: {s}", file=sys.stderr) - print(f"Done. {ok}/{len(prs)} succeeded.") + + # Build JSON document + now = iso8601(datetime.now(timezone.utc)) + prs_json: List[Dict[str, Any]] = [] + for pr_number, status, md in sorted(results, key=lambda t: t[0]): + if status == "ok" and md is not None: + prs_json.append({"number": pr_number, "markdown": md}) + + data: Dict[str, Any] = { + "repo": repo, + "reviewer": args.reviewer, + "generated_at": now, + "days": args.days, + "prs": prs_json, + } + with open(out_json, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2) + f.write("\n") + print(f"Done. {ok}/{len(prs)} succeeded. Wrote {out_json}") if __name__ == "__main__": diff --git a/codex-rs/review b/codex-rs/review index 9cc61b7ed6..ccb7e47699 100755 --- a/codex-rs/review +++ b/codex-rs/review @@ -37,6 +37,61 @@ def detect_repo_root() -> Optional[str]: return out.strip() +def parse_repo_from_url(url: str) -> Optional[str]: + u = url.strip() + if not u: + return None + if "github.com:" in u: + path = u.split("github.com:", 1)[1] + elif "github.com/" in u: + path = u.split("github.com/", 1)[1] + elif u.startswith("github.com/"): + path = u.split("github.com/", 1)[1] + else: + return None + if path.endswith(".git"): + path = path[:-4] + parts = path.strip("/").split("/") + if len(parts) >= 2: + return f"{parts[0]}/{parts[1]}" + return None + + +def detect_repo_from_git() -> Optional[str]: + code, out, _ = _run(["git", "rev-parse", "--is-inside-work-tree"]) + if code != 0 or out.strip() != "true": + return None + code, origin_url, _ = _run(["git", "config", "--get", "remote.origin.url"]) + if code != 0: + return None + return parse_repo_from_url(origin_url) + + +def reviewers_json_path(reviewer: str) -> str: + root = detect_repo_root() or os.getcwd() + path = os.path.join(root, "reviewers") + os.makedirs(path, exist_ok=True) + return os.path.join(path, f"{reviewer}.json") + + +def load_reviewer_json(reviewer: str) -> Optional[Dict[str, Any]]: + p = reviewers_json_path(reviewer) + if not os.path.isfile(p): + return None + try: + with open(p, "r", encoding="utf-8") as f: + return json.load(f) + except Exception: + return None + + +def save_reviewer_json(reviewer: str, data: Dict[str, Any]): + p = reviewers_json_path(reviewer) + with open(p, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2) + f.write("\n") + + def get_current_branch() -> str: code, out, _ = _run(["git", "rev-parse", "--abbrev-ref", "HEAD"]) return out.strip() if code == 0 else "HEAD" @@ -80,13 +135,13 @@ def estimate_tokens_approx(text: str) -> int: def study_files_in_dir(base: str) -> List[str]: + # Legacy helper (not used in JSON mode); preserved for compatibility. if not os.path.isdir(base): return [] files = [] for name in os.listdir(base): if re.match(r"PR-\d+-study\.md$", name): files.append(os.path.join(base, name)) - # Sort by PR number for determinism def prnum(p: str) -> int: m = re.search(r"(\d+)", os.path.basename(p)) return int(m.group(1)) if m else 0 @@ -382,11 +437,7 @@ def main(): action="store_true", help="Recompute review JSONs even if cached results exist", ) - parser.add_argument( - "--clear", - action="store_true", - help="Clear the output directory (review folder) before running", - ) + parser.add_argument("--debug", action="store_true", help="Do not clear reviewers/dump/ before running") args = parser.parse_args() @@ -394,11 +445,49 @@ def main(): repo_root = detect_repo_root() or os.getcwd() reviewer = args.reviewer - study_dir = os.path.abspath(args.study_dir) if args.study_dir else os.path.join(repo_root, "prs", reviewer, "study") - guides = study_files_in_dir(study_dir) - if not guides: - print(f"No studyguides found in {study_dir}.", file=sys.stderr) - sys.exit(0) + # Preferred source of truth: reviewers/.json + data = load_reviewer_json(reviewer) + if data is None: + repo = detect_repo_from_git() or "owner/repo" + print(f"No reviewers/{reviewer}.json found.") + try: + resp = input(f"Study {reviewer}'s last 100 days of PRs under {repo}? [y/N] ").strip().lower() + except EOFError: + resp = "n" + if resp == "y": + # Call lastprs to generate the JSON, then run study to populate studyguides + code, out, err = _run([os.path.join(repo_root, "lastprs"), "100", reviewer]) + if code != 0: + print(f"lastprs failed: {err.strip()}", file=sys.stderr) + sys.exit(2) + data = load_reviewer_json(reviewer) + if data is None: + print("Failed to load reviewers JSON after lastprs.", file=sys.stderr) + sys.exit(2) + # Auto-run study for all PRs (no limit) to populate studyguide fields + study_dump_dir = os.path.join(repo_root, "reviewers", "dump", reviewer) + if not args.debug and os.path.isdir(study_dump_dir): + shutil.rmtree(study_dump_dir, ignore_errors=True) + _study_fill_studyguides(data, reviewer, jobs=args.jobs, limit=args.limit, debug=args.debug) + save_reviewer_json(reviewer, data) + else: + print("Aborting: reviewer dataset not found.", file=sys.stderr) + sys.exit(2) + + # Build list of (pr_number, studyguide) from JSON + prs = list(data.get("prs") or []) + # Filter those that have a non-empty studyguide + pairs = [(int(p.get("number")), p.get("studyguide", "")) for p in prs] + pairs = [(n, sg) for (n, sg) in pairs if isinstance(n, int) and isinstance(sg, str) and sg.strip()] + if not pairs: + print(f"No studyguides present in reviewers/{reviewer}.json. Try: ./review study {reviewer}", file=sys.stderr) + sys.exit(2) + total_available = len(pairs) + if args.limit is not None: + if args.limit <= 0: + print("Error: --limit must be a positive integer.", file=sys.stderr) + sys.exit(2) + pairs = pairs[: args.limit] total_available = len(guides) if args.limit is not None: @@ -415,21 +504,13 @@ def main(): if not diff_text.strip(): print("Warning: empty diff vs base; all guides may be irrelevant or pass.", file=sys.stderr) - if args.out_dir: - out_dir = os.path.abspath(args.out_dir) - else: - # Default: sibling 'review' next to the study folder - out_dir = os.path.join(os.path.dirname(study_dir), "review") - if args.clear and os.path.isdir(out_dir): - # Danger: delete the review folder to start fresh - try: - shutil.rmtree(out_dir) - except Exception as e: - print(f"Failed to clear output dir {out_dir}: {e}", file=sys.stderr) - sys.exit(2) + # Output dir: reviewers/dump/ + out_dir = os.path.join(repo_root, "reviewers", "dump", reviewer) + if not args.debug and os.path.isdir(out_dir): + shutil.rmtree(out_dir, ignore_errors=True) os.makedirs(out_dir, exist_ok=True) - total = len(guides) + total = len(pairs) passed = 0 completed = 0 lock = threading.Lock() @@ -445,17 +526,17 @@ def main(): file=sys.stderr, ) sys.exit(2) - print(f"Study dir: {study_dir}") print(f"Output dir: {out_dir}") if args.limit is not None and args.limit < total_available: print(f"Limit: using first {total} of {total_available} guides") print_progress(passed, completed, total, lock) - def task(p: str): - return review_one(p, diff_text, branch, base_ref, out_dir, force=args.force) + def task(item: Tuple[int, str]): + pr_number, studyguide = item + return review_one_from_json(pr_number, studyguide, diff_text, branch, base_ref, out_dir, force=args.force) with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, args.jobs)) as ex: - futs = [ex.submit(task, p) for p in guides] + futs = [ex.submit(task, it) for it in pairs] for fut in concurrent.futures.as_completed(futs): guide_name, ok, failures_display, failures_structured, err = fut.result() with lock: diff --git a/prs/bolinfest/review/PR-2696-review.json b/prs/bolinfest/review/PR-2696-review.json index 34f9e0c74c..96be0b4d2d 100644 --- a/prs/bolinfest/review/PR-2696-review.json +++ b/prs/bolinfest/review/PR-2696-review.json @@ -1,5 +1,24 @@ { - "relevant": false, - "passes": true, - "failures": [] + "relevant": true, + "passes": false, + "failures": [ + { + "issue": "Uses blocking std::fs; prefer async I/O with tokio::fs for file reads.", + "file": "codex-rs/core/src/config_edit.rs", + "line": 47, + "excerpt": "let mut doc = match std::fs::read_to_string(&config_path) {" + }, + { + "issue": "Uses blocking std::fs; prefer async I/O with tokio::fs for directory creation.", + "file": "codex-rs/core/src/config_edit.rs", + "line": 78, + "excerpt": "std::fs::create_dir_all(codex_home)?;" + }, + { + "issue": "Uses blocking std::fs; prefer async I/O with tokio::fs for file writes.", + "file": "codex-rs/core/src/config_edit.rs", + "line": 80, + "excerpt": "std::fs::write(tmp_file.path(), doc.to_string())?;" + } + ] }