codex/codex-rs/lastprs

#!/usr/bin/env python3
import argparse
import concurrent.futures
import json
import os
import shutil
import subprocess
import sys
from datetime import datetime, timedelta, timezone
from typing import Iterable, List, Optional, Set, Tuple


def _run(cmd: List[str]) -> Tuple[int, str, str]:
    proc = subprocess.run(
        cmd,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
        check=False,
    )
    return proc.returncode, proc.stdout, proc.stderr


def require_gh():
    if shutil.which("gh") is None:
        print("Error: GitHub CLI 'gh' not found. Please install and authenticate.", file=sys.stderr)
        sys.exit(1)


def require_pr2md(script_dir: str) -> str:
    # Prefer pr2md next to this script; fallback to PATH
    local = os.path.join(script_dir, "pr2md")
    if os.path.isfile(local) and os.access(local, os.X_OK):
        return local
    if shutil.which("pr2md"):
        return "pr2md"
    print("Error: 'pr2md' not found next to this script or in PATH.", file=sys.stderr)
    sys.exit(1)


def parse_repo_from_url(url: str) -> Optional[str]:
    u = url.strip()
    if not u:
        return None
    if "github.com:" in u:
        path = u.split("github.com:", 1)[1]
    elif "github.com/" in u:
        path = u.split("github.com/", 1)[1]
    elif u.startswith("github.com/"):
        path = u.split("github.com/", 1)[1]
    else:
        return None
    if path.endswith(".git"):
        path = path[:-4]
    parts = path.strip("/").split("/")
    if len(parts) >= 2:
        return f"{parts[0]}/{parts[1]}"
    return None


def detect_repo_from_git() -> Optional[str]:
    code, out, _ = _run(["git", "rev-parse", "--is-inside-work-tree"])
    if code != 0 or out.strip() != "true":
        return None
    code, origin_url, _ = _run(["git", "config", "--get", "remote.origin.url"])
    if code != 0:
        return None
    return parse_repo_from_url(origin_url)


def detect_repo_root() -> Optional[str]:
    code, out, _ = _run(["git", "rev-parse", "--show-toplevel"])
    if code != 0:
        return None
    return out.strip()


def iso8601(dt: datetime) -> str:
    return dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


def list_review_comment_prs(repo: str, reviewer: str, since_iso: str) -> Set[int]:
    prs: Set[int] = set()
    page = 1
    reviewer_lc = reviewer.lower()
    while True:
        path = f"/repos/{repo}/pulls/comments?per_page=100&page={page}&since={since_iso}"
        code, out, err = _run(["gh", "api", path])
        if code != 0:
            print(f"Error: failed to fetch review comments: {err.strip()}", file=sys.stderr)
            sys.exit(1)
        try:
            batch = json.loads(out)
        except json.JSONDecodeError as e:
            print(f"Error: could not parse review comments JSON: {e}", file=sys.stderr)
            sys.exit(1)
        if not batch:
            break
        for c in batch:
            user = (c.get("user") or {}).get("login", "").lower()
            if user != reviewer_lc:
                continue
            pr_url = c.get("pull_request_url") or ""
            # Expect .../pulls/<number>
            try:
                pr_number = int(pr_url.rstrip("/").split("/")[-1])
                prs.add(pr_number)
            except Exception:
                continue
        if len(batch) < 100:
            break
        page += 1
        if page > 50:
            break
    return prs


def list_recent_prs(repo: str, days: int) -> List[int]:
    # As a fallback: list PRs updated in the window via gh and parse numbers.
    # Uses GitHub search qualifiers supported by `gh pr list --search`.
    since_date = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%d")
    code, out, err = _run([
        "gh",
        "pr",
        "list",
        "-R",
        repo,
        "--state",
        "all",
        "--search",
        f"updated:>={since_date}",
        "--json",
        "number",
    ])
    if code != 0:
        print(f"Error: failed to list recent PRs: {err.strip()}", file=sys.stderr)
        sys.exit(1)
    try:
        data = json.loads(out)
    except json.JSONDecodeError:
        return []
    return [int(x.get("number")) for x in data if isinstance(x.get("number"), int)]


def ensure_dir(path: str):
    os.makedirs(path, exist_ok=True)


def run_pr2md(pr2md_path: str, repo: str, pr_number: int, reviewer: str, out_dir: str) -> Tuple[int, str]:
    out_file = None
    try:
        out_file = os.path.join(out_dir, f"PR-{pr_number}.md")
        cmd = [pr2md_path, str(pr_number), repo, "--reviewer", reviewer]
        code, out, err = _run(cmd)
        if code != 0:
            return pr_number, f"error: {err.strip() or 'pr2md failed'}"
        with open(out_file, "w", encoding="utf-8") as f:
            f.write(out)
        return pr_number, "ok"
    except Exception as e:
        return pr_number, f"error: {e}"


def dedupe(seq: Iterable[int]) -> List[int]:
    seen = set()
    out: List[int] = []
    for n in seq:
        if n not in seen:
            seen.add(n)
            out.append(n)
    return out


def main():
    parser = argparse.ArgumentParser(
        prog="lastprs",
        description=(
            "Generate Markdown via pr2md for PRs a reviewer commented on in the last N days.\n"
            "Outputs files under prs/<reviewer>/ in the current repo."
        ),
    )
    parser.add_argument("days", type=int, help="Number of days to look back (N)")
    parser.add_argument("reviewer", help="GitHub login of the reviewer")
    parser.add_argument(
        "repo",
        nargs="?",
        help="Repository in 'owner/repo' form; inferred from git origin if omitted",
    )
    parser.add_argument(
        "--jobs",
        "-j",
        type=int,
        default=min(8, (os.cpu_count() or 4)),
        help="Parallel jobs when invoking pr2md (default: min(8, CPUs))",
    )

    args = parser.parse_args()

    if args.days <= 0:
        print("Error: days must be a positive integer.", file=sys.stderr)
        sys.exit(2)

    require_gh()
    script_dir = os.path.dirname(os.path.abspath(__file__))
    pr2md_path = require_pr2md(script_dir)

    repo = args.repo or detect_repo_from_git()
    if not repo:
        print(
            "Error: Could not determine repository from git origin. Pass repo as 'owner/repo'.",
            file=sys.stderr,
        )
        sys.exit(2)

    # Compute window
    since = datetime.now(timezone.utc) - timedelta(days=args.days)
    since_iso = iso8601(since)

    # Identify PRs with review comments by reviewer since the cutoff
    pr_set = list_review_comment_prs(repo, args.reviewer, since_iso)

    if not pr_set:
        # Fallback: scan recently updated PRs and check comments per-PR
        recent = list_recent_prs(repo, args.days)
        pr_set = set()
        reviewer_lc = args.reviewer.lower()
        for pr_num in recent:
            # Query review comments for this PR and filter by user + since
            page = 1
            found = False
            while True:
                path = f"/repos/{repo}/pulls/{pr_num}/comments?per_page=100&page={page}"
                code, out, err = _run(["gh", "api", path])
                if code != 0:
                    break
                try:
                    batch = json.loads(out)
                except json.JSONDecodeError:
                    break
                if not batch:
                    break
                for c in batch:
                    user = (c.get("user") or {}).get("login", "").lower()
                    created_at = c.get("created_at") or c.get("updated_at") or ""
                    if user == reviewer_lc and created_at >= since_iso:
                        found = True
                        break
                if found or len(batch) < 100:
                    break
                page += 1
                if page > 20:
                    break
            if found:
                pr_set.add(pr_num)

    prs = sorted(dedupe(pr_set))

    if not prs:
        print(
            f"No PRs in {repo} with review comments from {args.reviewer} in the last {args.days} days.",
            file=sys.stderr,
        )
        return

    # Determine output directory under the repo root
    repo_root = detect_repo_root() or os.getcwd()
    out_dir = os.path.join(repo_root, "prs", args.reviewer)
    ensure_dir(out_dir)

    # Run pr2md in parallel
    print(f"Found {len(prs)} PR(s). Writing Markdown to {out_dir}")
    results: List[Tuple[int, str]] = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, args.jobs)) as ex:
        futs = [
            ex.submit(run_pr2md, pr2md_path, repo, pr_num, args.reviewer, out_dir)
            for pr_num in prs
        ]
        for fut in concurrent.futures.as_completed(futs):
            results.append(fut.result())

    ok = sum(1 for _, s in results if s == "ok")
    failures = [(n, s) for n, s in results if s != "ok"]
    for n, s in failures:
        print(f"PR {n}: {s}", file=sys.stderr)
    print(f"Done. {ok}/{len(prs)} succeeded.")


if __name__ == "__main__":
    main()