review

2026-04-24 14:45:27 +00:00 · 2025-09-02 15:10:54 -07:00
parent 7adacf7aae
commit 64751867ac
1 changed files with 274 additions and 0 deletions
--- a/codex-rs/review
+++ b/codex-rs/review
@@ -0,0 +1,274 @@
+#!/usr/bin/env python3
+import argparse
+import concurrent.futures
+import json
+import os
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import threading
+from typing import Dict, List, Optional, Tuple
+
+
+def _run(cmd: List[str], input_text: Optional[str] = None) -> Tuple[int, str, str]:
+    proc = subprocess.run(
+        cmd,
+        input=input_text,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        check=False,
+    )
+    return proc.returncode, proc.stdout, proc.stderr
+
+
+def require(cmd: str, hint: str):
+    if shutil.which(cmd) is None:
+        print(f"Error: required command '{cmd}' not found. {hint}", file=sys.stderr)
+        sys.exit(1)
+
+
+def detect_repo_root() -> Optional[str]:
+    code, out, _ = _run(["git", "rev-parse", "--show-toplevel"])
+    if code != 0:
+        return None
+    return out.strip()
+
+
+def get_current_branch() -> str:
+    code, out, _ = _run(["git", "rev-parse", "--abbrev-ref", "HEAD"])
+    return out.strip() if code == 0 else "HEAD"
+
+
+def resolve_base_ref() -> str:
+    # Prefer remote tracking branch if available.
+    for ref in ("origin/main", "upstream/main", "main"):
+        code, _, _ = _run(["git", "rev-parse", "--verify", ref])
+        if code == 0:
+            return ref
+    return "main"
+
+
+def get_diff_text(base_ref: str, head_ref: str) -> str:
+    # Use merge-base (three-dot) to focus only on changes introduced on the branch.
+    # Avoid color codes for clean parsing in the model.
+    code, out, err = _run(["git", "diff", "--no-color", f"{base_ref}...{head_ref}"])
+    if code != 0:
+        print(f"Error: failed to compute git diff: {err.strip()}", file=sys.stderr)
+        sys.exit(2)
+    return out
+
+
+def study_files_for_reviewer(repo_root: str, reviewer: str) -> List[str]:
+    base = os.path.join(repo_root, "prs", reviewer, "study")
+    if not os.path.isdir(base):
+        return []
+    files = []
+    for name in os.listdir(base):
+        if re.match(r"PR-\d+-study\.md$", name):
+            files.append(os.path.join(base, name))
+    # Sort by PR number for determinism
+    def prnum(p: str) -> int:
+        m = re.search(r"(\d+)", os.path.basename(p))
+        return int(m.group(1)) if m else 0
+    return sorted(files, key=prnum)
+
+
+def build_prompt(studyguide: str, diff_text: str, branch: str, base_ref: str) -> str:
+    return (
+        "You are a senior code reviewer. Evaluate the current branch diff against a study guide.\n\n"
+        f"Branch: {branch}\nBase: {base_ref}\n\n"
+        "STUDYGUIDE (Markdown):\n" + studyguide + "\n\n"
+        "DIFF (unified):\n```diff\n" + diff_text + "\n```\n\n"
+        "Task: Determine whether this diff adheres to the DOs and DON'Ts from the studyguide.\n"
+        "- The studyguide might be irrelevant to this diff; mark that clearly.\n"
+        "- If relevant and the diff violates items, list each failing point.\n"
+        "- If everything passes, return a single green check.\n\n"
+        "Output: Respond with EXACTLY one JSON object as RAW JSON (no Markdown, no backticks). Nothing else.\n"
+        "Schema: {\n  \"relevant\": boolean,\n  \"passes\": boolean,\n  \"failures\": string[]  // human-readable descriptions of each violation, empty if passes or irrelevant\n}\n"
+        "Rules:\n- Use true/false for booleans.\n- When irrelevant, set relevant=false and passes=true and failures=[].\n- Do not wrap output in code fences.\n"
+    )
+
+
+def run_codex_exec(prompt: str, last_message_file: Optional[str] = None) -> Tuple[int, str, str]:
+    # Prefer globally installed codex; fallback to cargo run.
+    if shutil.which("codex") is not None:
+        cmd = ["codex", "-c", "model_reasoning_effort=high", "exec"]
+        if last_message_file:
+            cmd.extend(["--output-last-message", last_message_file])
+        return _run(cmd, input_text=prompt)
+    cmd = [
+        "cargo",
+        "run",
+        "--quiet",
+        "--bin",
+        "codex",
+        "--",
+        "-c",
+        "model_reasoning_effort=high",
+        "exec",
+    ]
+    if last_message_file:
+        cmd.extend(["--output-last-message", last_message_file])
+    return _run(cmd, input_text=prompt)
+
+
+def parse_json_from_text(text: str) -> Optional[Dict]:
+    # Accept raw JSON or a fenced ```json block; return parsed dict if possible.
+    text = text.strip()
+    # Prefer raw JSON
+    if text.startswith("{") and text.endswith("}"):
+        try:
+            return json.loads(text)
+        except Exception:
+            pass
+    # Fallback: fenced code block
+    m = re.search(r"```json\s*(\{[\s\S]*?\})\s*```", text, re.IGNORECASE)
+    if m:
+        try:
+            return json.loads(m.group(1))
+        except Exception:
+            return None
+    return None
+
+
+def review_one(study_path: str, diff_text: str, branch: str, base_ref: str, out_dir: str) -> Tuple[str, bool, List[str], Optional[str]]:
+    # Returns (study_filename, passes, failures, error)
+    try:
+        with open(study_path, "r", encoding="utf-8") as f:
+            studyguide = f.read()
+        prompt = build_prompt(studyguide, diff_text, branch, base_ref)
+
+        os.makedirs(out_dir, exist_ok=True)
+        tmp_outfile = os.path.join(out_dir, os.path.basename(study_path).replace("-study.md", "-review.json"))
+        code, out, err = run_codex_exec(prompt, last_message_file=tmp_outfile)
+        if code != 0:
+            return (os.path.basename(study_path), False, [], f"codex exec failed (exit {code}): {err.strip()}")
+
+        # Prefer file written by codex; fall back to captured stdout
+        content = None
+        try:
+            if os.path.isfile(tmp_outfile) and os.path.getsize(tmp_outfile) > 0:
+                with open(tmp_outfile, "r", encoding="utf-8") as f:
+                    content = f.read()
+        except Exception:
+            pass
+        if content is None:
+            content = out
+
+        data = parse_json_from_text(content)
+        if not data:
+            return (os.path.basename(study_path), False, [], "could not parse JSON from model output")
+
+        # Normalize file on disk to pretty-printed raw JSON for future reuse.
+        try:
+            with open(tmp_outfile, "w", encoding="utf-8") as f:
+                json.dump(data, f, indent=2)
+                f.write("\n")
+        except Exception:
+            # Non-fatal
+            pass
+
+        relevant = bool(data.get("relevant", True))
+        passes = bool(data.get("passes", False))
+        failures = list(data.get("failures") or [])
+
+        # If irrelevant, treat as pass-by-default (per schema instructions)
+        if not relevant:
+            passes = True
+            failures = []
+
+        return (os.path.basename(study_path), passes, failures, None)
+    except Exception as e:
+        return (os.path.basename(study_path), False, [], str(e))
+
+
+def print_progress(passed: int, completed: int, total: int, lock: threading.Lock):
+    pct = int((passed / total) * 100) if total else 0
+    width = 30
+    filled = int((passed / total) * width) if total else 0
+    bar = "#" * filled + "-" * (width - filled)
+    with lock:
+        print(f"[{bar}] {passed}/{total} passed ({pct}%), {completed}/{total} completed")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        prog="review",
+        description=(
+            "Run codex checks of current branch diff against each studyguide in prs/<reviewer>/study.\n"
+            "Aggregates results, prints a progress bar and a summary of failed points."
+        ),
+    )
+    parser.add_argument("reviewer", help="GitHub login whose studyguides to use (prs/<reviewer>/study)")
+    parser.add_argument("--jobs", "-j", type=int, default=10, help="Parallel jobs (default: 10)")
+    parser.add_argument("--base", default=None, help="Base ref to diff against (default: auto: origin/main or main)")
+    parser.add_argument("--show-errors", action="store_true", help="Print per-guide errors encountered")
+
+    args = parser.parse_args()
+
+    require("gh", "Install GitHub CLI: https://cli.github.com (used by other tools in this repo)")
+
+    repo_root = detect_repo_root() or os.getcwd()
+    reviewer = args.reviewer
+    guides = study_files_for_reviewer(repo_root, reviewer)
+    if not guides:
+        print(f"No studyguides found in prs/{reviewer}/study.", file=sys.stderr)
+        sys.exit(0)
+
+    branch = get_current_branch()
+    base_ref = args.base or resolve_base_ref()
+    diff_text = get_diff_text(base_ref, "HEAD")
+    if not diff_text.strip():
+        print("Warning: empty diff vs base; all guides may be irrelevant or pass.", file=sys.stderr)
+
+    out_dir = os.path.join(repo_root, "prs", reviewer, "review")
+    os.makedirs(out_dir, exist_ok=True)
+
+    total = len(guides)
+    passed = 0
+    completed = 0
+    lock = threading.Lock()
+    failures_all: List[Tuple[str, str]] = []  # (guide, failure)
+    errors_all: List[Tuple[str, str]] = []    # (guide, error)
+
+    print(f"Running {total} review(s) against {branch} vs {base_ref}…")
+    print_progress(passed, completed, total, lock)
+
+    def task(p: str):
+        return review_one(p, diff_text, branch, base_ref, out_dir)
+
+    with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, args.jobs)) as ex:
+        futs = [ex.submit(task, p) for p in guides]
+        for fut in concurrent.futures.as_completed(futs):
+            guide_name, ok, failures, err = fut.result()
+            with lock:
+                completed += 1
+                if ok:
+                    passed += 1
+                else:
+                    if err:
+                        errors_all.append((guide_name, err))
+                    for f in failures:
+                        failures_all.append((guide_name, f))
+            print_progress(passed, completed, total, lock)
+
+    print("")
+    print(f"Summary: {passed}/{total} guides passing ({int((passed/total)*100) if total else 0}%)")
+    if args.show_errors and errors_all:
+        print("\nErrors:")
+        for g, e in errors_all:
+            print(f"- {g}: {e}")
+
+    if failures_all:
+        print("\nFailed points:")
+        for g, f in failures_all:
+            print(f"- [{g}] {f}")
+    else:
+        print("\nNo failed points detected.")
+
+
+if __name__ == "__main__":
+    main()