mirror of
https://github.com/openai/codex.git
synced 2026-05-27 06:25:48 +00:00
513 lines
16 KiB
Python
Executable File
513 lines
16 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import argparse
|
|
import json
|
|
import re
|
|
import shlex
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
from datetime import datetime, timezone
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import List, Optional, Sequence, Tuple
|
|
|
|
|
|
def header(msg: str) -> None:
|
|
print(f"==> {msg}", file=sys.stderr)
|
|
|
|
|
|
USAGE_TEXT = """
|
|
Usage: scripts/release_gen.py [--dump-only] [-q|--quiet] [--repo <owner/repo>] [--repo-dir <path>] [--gh-timeout-secs N] [--codex-timeout-secs N] [owner/repo] <from_tag> <to_tag> [version]
|
|
|
|
Examples:
|
|
scripts/release_gen.py --repo openai/codex v0.23.0 v0.24.0
|
|
scripts/release_gen.py --repo-dir ../codex-repo v0.23.0 v0.24.0 # detect repo from this directory's git remote
|
|
scripts/release_gen.py v0.23.0 v0.24.0 # auto-detect repo from current dir's git remote
|
|
scripts/release_gen.py v0.23.0 v0.24.0 0.24.0 # auto-detect with explicit version
|
|
scripts/release_gen.py --dump-only v0.23.0 v0.24.0 # only generate releases/release_dump_<ver>.txt
|
|
scripts/release_gen.py -q v0.23.0 v0.24.0 # quiet Codex call with progress dots
|
|
|
|
Notes:
|
|
- Requires: gh (GitHub CLI) for dump generation; codex CLI for note generation.
|
|
- If release_dump_<ver>.txt does not exist, it will be created automatically.
|
|
- Then runs codex to generate <ver>.txt based on the dump (unless --dump-only).
|
|
- If you omit tags, the script lists the last 20 releases for the repo.
|
|
- Timeouts: set with --gh-timeout-secs (default 60) and --codex-timeout-secs (default 300).
|
|
""".strip()
|
|
|
|
|
|
# -------- argument parsing (shell-like) --------
|
|
|
|
|
|
@dataclass
|
|
class Args:
|
|
dump_only: bool
|
|
quiet: bool
|
|
repo: Optional[str]
|
|
repo_dir: Optional[Path]
|
|
gh_timeout_secs: int
|
|
codex_timeout_secs: int
|
|
rest: List[str]
|
|
|
|
|
|
def parse_args(argv: Sequence[str]) -> Args:
|
|
dump_only = False
|
|
quiet = False
|
|
repo: Optional[str] = None
|
|
repo_dir: Optional[Path] = None
|
|
gh_timeout_secs = 60
|
|
codex_timeout_secs = 300
|
|
rest: List[str] = []
|
|
it = iter(argv)
|
|
for a in it:
|
|
if a == "--dump-only":
|
|
dump_only = True
|
|
elif a in ("-q", "--quiet"):
|
|
quiet = True
|
|
elif a == "--repo":
|
|
try:
|
|
repo = next(it)
|
|
except StopIteration:
|
|
print("Error: --repo requires a value", file=sys.stderr)
|
|
sys.exit(2)
|
|
elif a == "--repo-dir":
|
|
try:
|
|
repo_dir = Path(next(it)).resolve()
|
|
except StopIteration:
|
|
print("Error: --repo-dir requires a path", file=sys.stderr)
|
|
sys.exit(2)
|
|
elif a == "--gh-timeout-secs":
|
|
try:
|
|
gh_timeout_secs = int(next(it))
|
|
except (StopIteration, ValueError):
|
|
print("Error: --gh-timeout-secs requires an integer", file=sys.stderr)
|
|
sys.exit(2)
|
|
elif a == "--codex-timeout-secs":
|
|
try:
|
|
codex_timeout_secs = int(next(it))
|
|
except (StopIteration, ValueError):
|
|
print("Error: --codex-timeout-secs requires an integer", file=sys.stderr)
|
|
sys.exit(2)
|
|
elif a in ("-h", "--help"):
|
|
print(USAGE_TEXT)
|
|
sys.exit(0)
|
|
else:
|
|
rest.append(a)
|
|
return Args(
|
|
dump_only=dump_only,
|
|
quiet=quiet,
|
|
repo=repo,
|
|
repo_dir=repo_dir,
|
|
gh_timeout_secs=gh_timeout_secs,
|
|
codex_timeout_secs=codex_timeout_secs,
|
|
rest=rest,
|
|
)
|
|
|
|
|
|
# -------- helpers --------
|
|
|
|
|
|
def run(
|
|
cmd: Sequence[str],
|
|
check: bool = True,
|
|
capture: bool = True,
|
|
text: bool = True,
|
|
env: Optional[dict] = None,
|
|
cwd: Optional[Path] = None,
|
|
timeout: Optional[float] = None,
|
|
) -> subprocess.CompletedProcess:
|
|
return subprocess.run(
|
|
cmd,
|
|
check=check,
|
|
capture_output=capture,
|
|
text=text,
|
|
env=env,
|
|
cwd=str(cwd) if cwd is not None else None,
|
|
timeout=timeout,
|
|
)
|
|
|
|
|
|
def which(name: str) -> Optional[str]:
|
|
from shutil import which as _which
|
|
|
|
return _which(name)
|
|
|
|
|
|
def abspath(p: Path) -> Path:
|
|
return p.resolve()
|
|
|
|
|
|
def detect_repo_from_git(repo_dir: Optional[Path] = None) -> Optional[str]:
|
|
# Try origin then upstream
|
|
urls: List[str] = []
|
|
for remote in ("origin", "upstream"):
|
|
try:
|
|
cp = run(["git", "remote", "get-url", remote], cwd=repo_dir)
|
|
if cp.stdout.strip():
|
|
urls.append(cp.stdout.strip())
|
|
break
|
|
except subprocess.CalledProcessError:
|
|
continue
|
|
if not urls:
|
|
return None
|
|
remote = urls[0]
|
|
path = remote
|
|
# strip protocols and user@
|
|
for prefix in ("git@", "ssh://", "https://", "http://"):
|
|
if path.startswith(prefix):
|
|
path = path[len(prefix) :]
|
|
if "@" in path:
|
|
path = path.split("@", 1)[1]
|
|
# handle github.com:owner/repo or .../owner/repo
|
|
if ":" in path and path.split(":", 1)[0].endswith("github.com"):
|
|
path = path.split(":", 1)[1]
|
|
if "github.com/" in path:
|
|
path = path.split("github.com/", 1)[1]
|
|
path = path.lstrip("/")
|
|
if path.endswith(".git"):
|
|
path = path[: -len(".git")]
|
|
parts = path.split("/")
|
|
if len(parts) >= 2:
|
|
return f"{parts[0]}/{parts[1]}"
|
|
return None
|
|
|
|
|
|
def show_recent_releases_and_exit(repo: str, gh_timeout_secs: int) -> None:
|
|
print("", file=sys.stderr)
|
|
print("Please pass a source/target release.", file=sys.stderr)
|
|
print("", file=sys.stderr)
|
|
print("e.g.: ./scripts/release_gen.py -q rust-v0.23.0 rust-v0.24.0", file=sys.stderr)
|
|
print("", file=sys.stderr)
|
|
header(f"Recent releases for {repo}:")
|
|
print("", file=sys.stderr)
|
|
try:
|
|
cp = run(
|
|
["gh", "release", "list", "--repo", repo, "--limit", "20"],
|
|
timeout=gh_timeout_secs,
|
|
)
|
|
lines = cp.stdout.splitlines()
|
|
for line in lines:
|
|
if not line.strip():
|
|
continue
|
|
first = line.split()[0]
|
|
print(f"- {first}", file=sys.stderr)
|
|
except subprocess.CalledProcessError:
|
|
print(f"Error: unable to fetch releases for {repo}", file=sys.stderr)
|
|
sys.exit(1)
|
|
sys.exit(1)
|
|
|
|
|
|
# -------- dump generation (ported) --------
|
|
|
|
|
|
def gh_json(args: Sequence[str], gh_timeout_secs: int) -> dict:
|
|
cp = run(["gh", *args], timeout=gh_timeout_secs)
|
|
return json.loads(cp.stdout)
|
|
|
|
|
|
def get_tag_datetime_iso(repo: str, tag: str, gh_timeout_secs: int) -> str:
|
|
# Try release publish date
|
|
try:
|
|
cp = run([
|
|
"gh",
|
|
"release",
|
|
"view",
|
|
tag,
|
|
"--repo",
|
|
repo,
|
|
"--json",
|
|
"publishedAt",
|
|
"--jq",
|
|
".publishedAt",
|
|
], timeout=gh_timeout_secs)
|
|
ts = cp.stdout.strip()
|
|
if ts and ts != "null":
|
|
return ts
|
|
except subprocess.CalledProcessError:
|
|
pass
|
|
|
|
# Fallback via tag -> commit -> committer.date
|
|
ref = gh_json(["api", f"repos/{repo}/git/ref/tags/{tag}"], gh_timeout_secs)
|
|
obj_type = ref.get("object", {}).get("type")
|
|
obj_url = ref.get("object", {}).get("url")
|
|
commit_sha = None
|
|
if obj_type == "tag" and obj_url:
|
|
tag_obj = gh_json(["api", obj_url], gh_timeout_secs)
|
|
commit_sha = (tag_obj.get("object") or {}).get("sha")
|
|
else:
|
|
commit_sha = (ref.get("object") or {}).get("sha")
|
|
if not commit_sha:
|
|
raise RuntimeError(f"Failed to resolve commit for tag {tag}")
|
|
commit = gh_json(["api", f"repos/{repo}/commits/{commit_sha}"], gh_timeout_secs)
|
|
return ((commit.get("commit") or {}).get("committer") or {}).get("date") or ""
|
|
|
|
|
|
def _parse_iso_to_utc(ts: str) -> Optional[datetime]:
|
|
"""Parse an ISO-8601 timestamp into an aware UTC datetime.
|
|
|
|
Accepts inputs like "2024-08-01T12:34:56Z" or with offsets like
|
|
"2024-08-01T22:34:56+10:00" and normalizes them to UTC for safe
|
|
chronological comparisons.
|
|
|
|
Returns None if parsing fails or the input is empty.
|
|
"""
|
|
if not ts:
|
|
return None
|
|
s = ts.strip()
|
|
# Python's fromisoformat doesn't accept trailing 'Z'; map it to +00:00
|
|
if s.endswith("Z"):
|
|
s = s[:-1] + "+00:00"
|
|
try:
|
|
dt = datetime.fromisoformat(s)
|
|
except ValueError:
|
|
return None
|
|
# If naive, assume UTC; otherwise convert to UTC
|
|
if dt.tzinfo is None:
|
|
dt = dt.replace(tzinfo=timezone.utc)
|
|
else:
|
|
dt = dt.astimezone(timezone.utc)
|
|
return dt
|
|
|
|
|
|
def collect_prs_within_range(repo: str, from_iso: str, to_iso: str, gh_timeout_secs: int) -> List[dict]:
|
|
# Normalize bounds to UTC datetimes for robust comparison
|
|
from_dt = _parse_iso_to_utc(from_iso)
|
|
to_dt = _parse_iso_to_utc(to_iso)
|
|
cp = run(
|
|
[
|
|
"gh",
|
|
"pr",
|
|
"list",
|
|
"--repo",
|
|
repo,
|
|
"--state",
|
|
"merged",
|
|
"--limit",
|
|
"1000",
|
|
"--json",
|
|
"number,title,mergedAt,author,body",
|
|
],
|
|
timeout=gh_timeout_secs,
|
|
)
|
|
data = json.loads(cp.stdout)
|
|
|
|
def keep(pr: dict) -> bool:
|
|
if not (from_dt and to_dt):
|
|
return False
|
|
ma_str = pr.get("mergedAt") or ""
|
|
ma_dt = _parse_iso_to_utc(ma_str)
|
|
return bool(ma_dt and from_dt <= ma_dt <= to_dt)
|
|
|
|
out = []
|
|
for pr in data:
|
|
if not keep(pr):
|
|
continue
|
|
out.append(
|
|
{
|
|
"number": pr.get("number"),
|
|
"title": pr.get("title") or "",
|
|
"merged_at": pr.get("mergedAt") or "",
|
|
"author": ((pr.get("author") or {}).get("login")) or "-",
|
|
"body": pr.get("body") or "",
|
|
}
|
|
)
|
|
# Sort by actual datetime to avoid lexical issues
|
|
def sort_key(item: dict):
|
|
dt = _parse_iso_to_utc(item.get("merged_at") or "")
|
|
# Use epoch start as fallback so unparsable items sort last when reverse=True
|
|
return dt or datetime(1970, 1, 1, tzinfo=timezone.utc)
|
|
|
|
out.sort(key=sort_key, reverse=True)
|
|
return out
|
|
|
|
|
|
_ISSUE_CLOSING_RE = re.compile(
|
|
r"(?i)(?:close|closed|closes|fix|fixed|fixes|resolve|resolved|resolves)[\s:]+(?:[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+)?#(\d+)"
|
|
)
|
|
|
|
|
|
def format_related_issues(body: str) -> str:
|
|
body = body.replace("\r", "")
|
|
nums = {_ for _ in _ISSUE_CLOSING_RE.findall(body)}
|
|
if not nums:
|
|
return "-"
|
|
ints = sorted({int(n) for n in nums})
|
|
return ", ".join([f"#{n}" for n in ints])
|
|
|
|
|
|
def generate_dump(repo: str, from_tag: str, to_tag: str, out_file: Path, gh_timeout_secs: int) -> None:
|
|
if not which("gh"):
|
|
print("Error: gh (GitHub CLI) is required", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
header(f"Resolving tag dates ({from_tag} -> {to_tag})")
|
|
from_iso = get_tag_datetime_iso(repo, from_tag, gh_timeout_secs)
|
|
to_iso = get_tag_datetime_iso(repo, to_tag, gh_timeout_secs)
|
|
if not (from_iso and to_iso):
|
|
print(
|
|
f"Error: failed to resolve tag dates. from={from_tag} ({from_iso}) to={to_tag} ({to_iso})",
|
|
file=sys.stderr,
|
|
)
|
|
sys.exit(1)
|
|
|
|
header("Collecting merged PRs via gh pr list")
|
|
prs = collect_prs_within_range(repo, from_iso, to_iso, gh_timeout_secs)
|
|
count = len(prs)
|
|
|
|
header(f"Writing {out_file} (Total PRs: {count})")
|
|
out_file.parent.mkdir(parents=True, exist_ok=True)
|
|
with out_file.open("w", encoding="utf-8") as f:
|
|
f.write(f"Repository: {repo}\n")
|
|
f.write(f"Range: {from_tag} ({from_iso}) -> {to_tag} ({to_iso})\n")
|
|
f.write(f"Generated: {time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}\n")
|
|
f.write(f"Total PRs: {count}\n\n")
|
|
|
|
for pr in prs:
|
|
title = pr["title"]
|
|
number = pr["number"]
|
|
merged_at = pr["merged_at"]
|
|
author = pr["author"]
|
|
body = pr["body"]
|
|
issues = format_related_issues(body)
|
|
|
|
f.write(f"PR #{number}: {title}\n")
|
|
f.write(f"Merged: {merged_at} | Author: {author}\n")
|
|
f.write(f"Related issues: {issues}\n\n")
|
|
|
|
dep_names = ("app/dependabot", "dependabot[bot]")
|
|
if author not in dep_names and re.search(r"[Dd]ependabot", author) is None:
|
|
f.write("Description:\n")
|
|
max_len = 2000
|
|
snippet = body[:max_len]
|
|
if len(body) > max_len:
|
|
snippet += "..."
|
|
f.write(snippet + "\n\n")
|
|
f.write("-----\n\n")
|
|
|
|
header(f"Done -> {out_file}")
|
|
|
|
|
|
def build_prompt(dump_path: Path) -> str:
|
|
dump_content = dump_path.read_text(encoding="utf-8")
|
|
example_path = Path(__file__).resolve().parent / "prompts" / "release_notes_example.md"
|
|
try:
|
|
example = example_path.read_text(encoding="utf-8")
|
|
except FileNotFoundError:
|
|
example = ""
|
|
instr = (
|
|
f"""{dump_content}
|
|
|
|
---
|
|
|
|
Please generate a summarized release note based on the list of PRs above. Then, write your suggested release notes. It should follow this structure (+ the style/tone/brevity in this example):
|
|
|
|
{example}
|
|
"""
|
|
)
|
|
return instr
|
|
|
|
|
|
def run_codex(prompt: str, quiet: bool, gen_file: str, timeout_secs: int) -> int:
|
|
if not which("codex"):
|
|
print(
|
|
"Error: codex CLI is required for generation. Use --dump-only to skip.",
|
|
file=sys.stderr,
|
|
)
|
|
return 127
|
|
|
|
cmd = ["codex", "exec", "--sandbox", "read-only", "--output-last-message", gen_file, prompt]
|
|
if quiet:
|
|
try:
|
|
proc = subprocess.Popen(
|
|
cmd,
|
|
stdout=subprocess.DEVNULL,
|
|
stderr=subprocess.DEVNULL,
|
|
text=True,
|
|
)
|
|
except FileNotFoundError:
|
|
return 127
|
|
start = time.monotonic()
|
|
while True:
|
|
ret = proc.poll()
|
|
if ret is not None:
|
|
break
|
|
if timeout_secs and (time.monotonic() - start) > timeout_secs:
|
|
try:
|
|
proc.kill()
|
|
finally:
|
|
return 124
|
|
print(".", end="", file=sys.stderr, flush=True)
|
|
time.sleep(1)
|
|
print("", file=sys.stderr)
|
|
return ret or 0
|
|
else:
|
|
# stream to stderr like the shell script
|
|
try:
|
|
proc = subprocess.run(
|
|
cmd, stdout=sys.stderr, stderr=sys.stderr, text=True, timeout=timeout_secs
|
|
)
|
|
return proc.returncode
|
|
except subprocess.TimeoutExpired:
|
|
return 124
|
|
|
|
|
|
def main(argv: Sequence[str]) -> int:
|
|
pargs = parse_args(argv)
|
|
|
|
rest = pargs.rest
|
|
# repo optional first arg unless --repo provided
|
|
repo: Optional[str]
|
|
if pargs.repo:
|
|
repo = pargs.repo
|
|
elif rest and "/" in rest[0]:
|
|
repo = rest[0]
|
|
rest = rest[1:]
|
|
else:
|
|
repo = detect_repo_from_git(pargs.repo_dir) or ""
|
|
if not repo:
|
|
print(
|
|
"Error: failed to auto-detect repository from git remote. Provide --repo <owner/repo> explicitly.",
|
|
file=sys.stderr,
|
|
)
|
|
return 1
|
|
|
|
if len(rest) < 2:
|
|
show_recent_releases_and_exit(repo, pargs.gh_timeout_secs)
|
|
return 1 # unreachable
|
|
|
|
from_tag, to_tag = rest[0], rest[1]
|
|
ver = rest[2] if len(rest) >= 3 else to_tag
|
|
ver = ver.lstrip("v")
|
|
|
|
script_dir = Path(__file__).resolve().parent
|
|
releases_dir = script_dir / "releases"
|
|
dump_file = releases_dir / f"release_dump_{ver}.txt"
|
|
gen_file = releases_dir / f"{ver}.txt"
|
|
|
|
# Create dump if missing
|
|
if not dump_file.exists():
|
|
header(f"Dump not found: {dump_file}. Generating...")
|
|
generate_dump(repo, from_tag, to_tag, dump_file, pargs.gh_timeout_secs)
|
|
else:
|
|
header(f"Using existing dump: {dump_file}")
|
|
|
|
if pargs.dump_only:
|
|
return 0
|
|
|
|
dump_path = abspath(dump_file)
|
|
prompt = build_prompt(dump_path)
|
|
header(f"Calling codex to generate {gen_file}")
|
|
status = run_codex(prompt, pargs.quiet, gen_file, pargs.codex_timeout_secs)
|
|
|
|
if gen_file.exists():
|
|
# Output only the generated release notes to stdout
|
|
sys.stdout.write(gen_file.read_text(encoding="utf-8"))
|
|
return 0
|
|
else:
|
|
print(f"Warning: {gen_file} not created. Check codex output.", file=sys.stderr)
|
|
return 1 if status != 0 else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main(sys.argv[1:]))
|