Files
codex/codex-rs/lastprs
Daniel Edrisian af3ccfd50a lastprs
2025-09-02 14:23:04 -07:00

290 lines
8.9 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import concurrent.futures
import json
import os
import shutil
import subprocess
import sys
from datetime import datetime, timedelta, timezone
from typing import Iterable, List, Optional, Set, Tuple
def _run(cmd: List[str]) -> Tuple[int, str, str]:
proc = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
)
return proc.returncode, proc.stdout, proc.stderr
def require_gh():
if shutil.which("gh") is None:
print("Error: GitHub CLI 'gh' not found. Please install and authenticate.", file=sys.stderr)
sys.exit(1)
def require_pr2md(script_dir: str) -> str:
# Prefer pr2md next to this script; fallback to PATH
local = os.path.join(script_dir, "pr2md")
if os.path.isfile(local) and os.access(local, os.X_OK):
return local
if shutil.which("pr2md"):
return "pr2md"
print("Error: 'pr2md' not found next to this script or in PATH.", file=sys.stderr)
sys.exit(1)
def parse_repo_from_url(url: str) -> Optional[str]:
u = url.strip()
if not u:
return None
if "github.com:" in u:
path = u.split("github.com:", 1)[1]
elif "github.com/" in u:
path = u.split("github.com/", 1)[1]
elif u.startswith("github.com/"):
path = u.split("github.com/", 1)[1]
else:
return None
if path.endswith(".git"):
path = path[:-4]
parts = path.strip("/").split("/")
if len(parts) >= 2:
return f"{parts[0]}/{parts[1]}"
return None
def detect_repo_from_git() -> Optional[str]:
code, out, _ = _run(["git", "rev-parse", "--is-inside-work-tree"])
if code != 0 or out.strip() != "true":
return None
code, origin_url, _ = _run(["git", "config", "--get", "remote.origin.url"])
if code != 0:
return None
return parse_repo_from_url(origin_url)
def detect_repo_root() -> Optional[str]:
code, out, _ = _run(["git", "rev-parse", "--show-toplevel"])
if code != 0:
return None
return out.strip()
def iso8601(dt: datetime) -> str:
return dt.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def list_review_comment_prs(repo: str, reviewer: str, since_iso: str) -> Set[int]:
prs: Set[int] = set()
page = 1
reviewer_lc = reviewer.lower()
while True:
path = f"/repos/{repo}/pulls/comments?per_page=100&page={page}&since={since_iso}"
code, out, err = _run(["gh", "api", path])
if code != 0:
print(f"Error: failed to fetch review comments: {err.strip()}", file=sys.stderr)
sys.exit(1)
try:
batch = json.loads(out)
except json.JSONDecodeError as e:
print(f"Error: could not parse review comments JSON: {e}", file=sys.stderr)
sys.exit(1)
if not batch:
break
for c in batch:
user = (c.get("user") or {}).get("login", "").lower()
if user != reviewer_lc:
continue
pr_url = c.get("pull_request_url") or ""
# Expect .../pulls/<number>
try:
pr_number = int(pr_url.rstrip("/").split("/")[-1])
prs.add(pr_number)
except Exception:
continue
if len(batch) < 100:
break
page += 1
if page > 50:
break
return prs
def list_recent_prs(repo: str, days: int) -> List[int]:
# As a fallback: list PRs updated in the window via gh and parse numbers.
# Uses GitHub search qualifiers supported by `gh pr list --search`.
since_date = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y-%m-%d")
code, out, err = _run([
"gh",
"pr",
"list",
"-R",
repo,
"--state",
"all",
"--search",
f"updated:>={since_date}",
"--json",
"number",
])
if code != 0:
print(f"Error: failed to list recent PRs: {err.strip()}", file=sys.stderr)
sys.exit(1)
try:
data = json.loads(out)
except json.JSONDecodeError:
return []
return [int(x.get("number")) for x in data if isinstance(x.get("number"), int)]
def ensure_dir(path: str):
os.makedirs(path, exist_ok=True)
def run_pr2md(pr2md_path: str, repo: str, pr_number: int, reviewer: str, out_dir: str) -> Tuple[int, str]:
out_file = None
try:
out_file = os.path.join(out_dir, f"PR-{pr_number}.md")
cmd = [pr2md_path, str(pr_number), repo, "--reviewer", reviewer]
code, out, err = _run(cmd)
if code != 0:
return pr_number, f"error: {err.strip() or 'pr2md failed'}"
with open(out_file, "w", encoding="utf-8") as f:
f.write(out)
return pr_number, "ok"
except Exception as e:
return pr_number, f"error: {e}"
def dedupe(seq: Iterable[int]) -> List[int]:
seen = set()
out: List[int] = []
for n in seq:
if n not in seen:
seen.add(n)
out.append(n)
return out
def main():
parser = argparse.ArgumentParser(
prog="lastprs",
description=(
"Generate Markdown via pr2md for PRs a reviewer commented on in the last N days.\n"
"Outputs files under prs/<reviewer>/ in the current repo."
),
)
parser.add_argument("days", type=int, help="Number of days to look back (N)")
parser.add_argument("reviewer", help="GitHub login of the reviewer")
parser.add_argument(
"repo",
nargs="?",
help="Repository in 'owner/repo' form; inferred from git origin if omitted",
)
parser.add_argument(
"--jobs",
"-j",
type=int,
default=min(8, (os.cpu_count() or 4)),
help="Parallel jobs when invoking pr2md (default: min(8, CPUs))",
)
args = parser.parse_args()
if args.days <= 0:
print("Error: days must be a positive integer.", file=sys.stderr)
sys.exit(2)
require_gh()
script_dir = os.path.dirname(os.path.abspath(__file__))
pr2md_path = require_pr2md(script_dir)
repo = args.repo or detect_repo_from_git()
if not repo:
print(
"Error: Could not determine repository from git origin. Pass repo as 'owner/repo'.",
file=sys.stderr,
)
sys.exit(2)
# Compute window
since = datetime.now(timezone.utc) - timedelta(days=args.days)
since_iso = iso8601(since)
# Identify PRs with review comments by reviewer since the cutoff
pr_set = list_review_comment_prs(repo, args.reviewer, since_iso)
if not pr_set:
# Fallback: scan recently updated PRs and check comments per-PR
recent = list_recent_prs(repo, args.days)
pr_set = set()
reviewer_lc = args.reviewer.lower()
for pr_num in recent:
# Query review comments for this PR and filter by user + since
page = 1
found = False
while True:
path = f"/repos/{repo}/pulls/{pr_num}/comments?per_page=100&page={page}"
code, out, err = _run(["gh", "api", path])
if code != 0:
break
try:
batch = json.loads(out)
except json.JSONDecodeError:
break
if not batch:
break
for c in batch:
user = (c.get("user") or {}).get("login", "").lower()
created_at = c.get("created_at") or c.get("updated_at") or ""
if user == reviewer_lc and created_at >= since_iso:
found = True
break
if found or len(batch) < 100:
break
page += 1
if page > 20:
break
if found:
pr_set.add(pr_num)
prs = sorted(dedupe(pr_set))
if not prs:
print(
f"No PRs in {repo} with review comments from {args.reviewer} in the last {args.days} days.",
file=sys.stderr,
)
return
# Determine output directory under the repo root
repo_root = detect_repo_root() or os.getcwd()
out_dir = os.path.join(repo_root, "prs", args.reviewer)
ensure_dir(out_dir)
# Run pr2md in parallel
print(f"Found {len(prs)} PR(s). Writing Markdown to {out_dir}")
results: List[Tuple[int, str]] = []
with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, args.jobs)) as ex:
futs = [
ex.submit(run_pr2md, pr2md_path, repo, pr_num, args.reviewer, out_dir)
for pr_num in prs
]
for fut in concurrent.futures.as_completed(futs):
results.append(fut.result())
ok = sum(1 for _, s in results if s == "ok")
failures = [(n, s) for n, s in results if s != "ok"]
for n, s in failures:
print(f"PR {n}: {s}", file=sys.stderr)
print(f"Done. {ok}/{len(prs)} succeeded.")
if __name__ == "__main__":
main()