mirror of
https://github.com/openai/codex.git
synced 2026-05-03 10:56:37 +00:00
## Why The `codex-issue-digest` skill was producing more detail than the daily digest needed, and broad all-area digests could miss active issues. In particular, issue #16088 had substantial recent comments and reactions but did not appear in the weekly all-areas output because GitHub search was using default relevance ranking and the collector could exhaust its candidate cap before later search queries got a fair sample. That made the digest look quieter than the underlying user activity and made threshold tuning misleading. ## What changed - Make the digest summary headline-first and summary-only by default. - Add an explicit opt-in flow for `## Details`, so the issue table is shown only when requested or when the prompt asks for details upfront. - Update the collector to request GitHub issue search results with `sort=updated` and `order=desc`. - Apply the search candidate cap per query instead of globally across all queries. - Bump the collector script version to `3`. - Add tests that cover updated sorting and per-query candidate limits. ## Verification - `pytest .codex/skills/codex-issue-digest/scripts/test_collect_issue_digest.py` - `ruff check .codex/skills/codex-issue-digest/scripts/collect_issue_digest.py .codex/skills/codex-issue-digest/scripts/test_collect_issue_digest.py` - `git diff --check` - Reran the all-areas weekly collector and confirmed #16088 is now included with `55` interactions.
995 lines
34 KiB
Python
Executable File
995 lines
34 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Collect recent openai/codex issue activity for owner-focused digests."""
|
|
|
|
import argparse
|
|
import json
|
|
import math
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
from urllib.parse import quote
|
|
|
|
SCRIPT_VERSION = 4
|
|
QUALIFYING_KIND_LABELS = ("bug", "enhancement")
|
|
REACTION_KEYS = ("+1", "-1", "laugh", "hooray", "confused", "heart", "rocket", "eyes")
|
|
BASE_ATTENTION_WINDOW_HOURS = 24.0
|
|
ONE_ATTENTION_INTERACTION_THRESHOLD = 5
|
|
TWO_ATTENTION_INTERACTION_THRESHOLD = 10
|
|
ALL_LABEL_PHRASES = {"all", "all areas", "all labels", "all-areas", "all-labels", "*"}
|
|
|
|
|
|
class GhCommandError(RuntimeError):
|
|
pass
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(
|
|
description="Collect recent GitHub issue activity for a Codex owner digest."
|
|
)
|
|
parser.add_argument(
|
|
"--repo", default="openai/codex", help="OWNER/REPO, default openai/codex"
|
|
)
|
|
parser.add_argument(
|
|
"--labels",
|
|
nargs="+",
|
|
default=[],
|
|
help="Feature-area labels owned by the digest recipient, for example: tui exec",
|
|
)
|
|
parser.add_argument(
|
|
"--all-labels",
|
|
action="store_true",
|
|
help="Collect bug/enhancement issues across all feature-area labels",
|
|
)
|
|
parser.add_argument(
|
|
"--window",
|
|
help='Lookback duration such as "24h", "7d", "1w", or "past week"',
|
|
)
|
|
parser.add_argument(
|
|
"--window-hours", type=float, default=24.0, help="Lookback window"
|
|
)
|
|
parser.add_argument(
|
|
"--since", help="UTC ISO timestamp override for the window start"
|
|
)
|
|
parser.add_argument("--until", help="UTC ISO timestamp override for the window end")
|
|
parser.add_argument(
|
|
"--limit-issues",
|
|
type=int,
|
|
default=200,
|
|
help="Maximum candidate issues to hydrate after search",
|
|
)
|
|
parser.add_argument(
|
|
"--body-chars", type=int, default=1200, help="Issue body excerpt length"
|
|
)
|
|
parser.add_argument(
|
|
"--comment-chars", type=int, default=900, help="Comment excerpt length"
|
|
)
|
|
parser.add_argument(
|
|
"--max-comment-pages",
|
|
type=int,
|
|
default=3,
|
|
help=(
|
|
"Maximum pages of issue comments to hydrate per issue after applying the "
|
|
"window filter. Use 0 with --fetch-all-comments for no page cap."
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
"--fetch-all-comments",
|
|
action="store_true",
|
|
help="Hydrate complete issue comment histories instead of only window-updated comments.",
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
def parse_timestamp(value, arg_name):
|
|
if value is None:
|
|
return None
|
|
normalized = value.strip()
|
|
if not normalized:
|
|
return None
|
|
if normalized.endswith("Z"):
|
|
normalized = f"{normalized[:-1]}+00:00"
|
|
try:
|
|
parsed = datetime.fromisoformat(normalized)
|
|
except ValueError as err:
|
|
raise ValueError(f"{arg_name} must be an ISO timestamp") from err
|
|
if parsed.tzinfo is None:
|
|
parsed = parsed.replace(tzinfo=timezone.utc)
|
|
return parsed.astimezone(timezone.utc)
|
|
|
|
|
|
def format_timestamp(value):
|
|
return (
|
|
value.astimezone(timezone.utc)
|
|
.replace(microsecond=0)
|
|
.isoformat()
|
|
.replace("+00:00", "Z")
|
|
)
|
|
|
|
|
|
def resolve_window(args):
|
|
until = parse_timestamp(args.until, "--until") or datetime.now(timezone.utc)
|
|
since = parse_timestamp(args.since, "--since")
|
|
if since is None:
|
|
hours = parse_duration_hours(getattr(args, "window", None))
|
|
if hours is None:
|
|
hours = getattr(args, "window_hours", 24.0)
|
|
if hours <= 0:
|
|
raise ValueError("window duration must be > 0")
|
|
since = until - timedelta(hours=hours)
|
|
if since >= until:
|
|
raise ValueError("--since must be before --until")
|
|
return since, until
|
|
|
|
|
|
def parse_duration_hours(value):
|
|
if value is None:
|
|
return None
|
|
text = value.strip().casefold().replace("_", " ")
|
|
if not text:
|
|
return None
|
|
text = re.sub(r"^(past|last)\s+", "", text)
|
|
aliases = {
|
|
"day": 24.0,
|
|
"24h": 24.0,
|
|
"week": 168.0,
|
|
"7d": 168.0,
|
|
}
|
|
if text in aliases:
|
|
return aliases[text]
|
|
match = re.fullmatch(r"(\d+(?:\.\d+)?)\s*(h|hr|hrs|hour|hours)", text)
|
|
if match:
|
|
return float(match.group(1))
|
|
match = re.fullmatch(r"(\d+(?:\.\d+)?)\s*(d|day|days)", text)
|
|
if match:
|
|
return float(match.group(1)) * 24.0
|
|
match = re.fullmatch(r"(\d+(?:\.\d+)?)\s*(w|week|weeks)", text)
|
|
if match:
|
|
return float(match.group(1)) * 168.0
|
|
raise ValueError(f"Unsupported duration: {value}")
|
|
|
|
|
|
def normalize_requested_labels(labels, all_labels=False):
|
|
out = []
|
|
seen = set()
|
|
for raw in labels:
|
|
for piece in raw.split(","):
|
|
label = piece.strip()
|
|
if not label:
|
|
continue
|
|
key = label.casefold()
|
|
if key not in seen:
|
|
out.append(label)
|
|
seen.add(key)
|
|
phrase = " ".join(label.casefold() for label in out)
|
|
if all_labels or phrase in ALL_LABEL_PHRASES:
|
|
return [], True
|
|
if not out:
|
|
raise ValueError(
|
|
"At least one feature-area label is required, or use --all-labels"
|
|
)
|
|
return out, False
|
|
|
|
|
|
def quote_label(label):
|
|
if re.fullmatch(r"[A-Za-z0-9_.:-]+", label):
|
|
return f"label:{label}"
|
|
escaped = label.replace('"', '\\"')
|
|
return f'label:"{escaped}"'
|
|
|
|
|
|
def build_search_queries(
|
|
repo, owner_labels, since, kind_labels=QUALIFYING_KIND_LABELS, all_labels=False
|
|
):
|
|
since_date = since.date().isoformat()
|
|
queries = []
|
|
if all_labels:
|
|
for kind_label in kind_labels:
|
|
queries.append(
|
|
" ".join(
|
|
[
|
|
f"repo:{repo}",
|
|
"is:issue",
|
|
f"updated:>={since_date}",
|
|
quote_label(kind_label),
|
|
]
|
|
)
|
|
)
|
|
return queries
|
|
for owner_label in owner_labels:
|
|
for kind_label in kind_labels:
|
|
queries.append(
|
|
" ".join(
|
|
[
|
|
f"repo:{repo}",
|
|
"is:issue",
|
|
f"updated:>={since_date}",
|
|
quote_label(owner_label),
|
|
quote_label(kind_label),
|
|
]
|
|
)
|
|
)
|
|
return queries
|
|
|
|
|
|
def _format_gh_error(cmd, err):
|
|
stdout = (err.stdout or "").strip()
|
|
stderr = (err.stderr or "").strip()
|
|
parts = [f"GitHub CLI command failed: {' '.join(cmd)}"]
|
|
if stdout:
|
|
parts.append(f"stdout: {stdout}")
|
|
if stderr:
|
|
parts.append(f"stderr: {stderr}")
|
|
return "\n".join(parts)
|
|
|
|
|
|
def gh_json(args):
|
|
cmd = ["gh", *args]
|
|
try:
|
|
proc = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
|
except FileNotFoundError as err:
|
|
raise GhCommandError("`gh` command not found") from err
|
|
except subprocess.CalledProcessError as err:
|
|
raise GhCommandError(_format_gh_error(cmd, err)) from err
|
|
raw = proc.stdout.strip()
|
|
if not raw:
|
|
return None
|
|
try:
|
|
return json.loads(raw)
|
|
except json.JSONDecodeError as err:
|
|
raise GhCommandError(
|
|
f"Failed to parse JSON from gh output for {' '.join(args)}"
|
|
) from err
|
|
|
|
|
|
def gh_text(args):
|
|
cmd = ["gh", *args]
|
|
try:
|
|
proc = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
|
except (FileNotFoundError, subprocess.CalledProcessError):
|
|
return ""
|
|
return proc.stdout.strip()
|
|
|
|
|
|
def git_head():
|
|
try:
|
|
proc = subprocess.run(
|
|
["git", "rev-parse", "--short=12", "HEAD"],
|
|
check=True,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
except (FileNotFoundError, subprocess.CalledProcessError):
|
|
return None
|
|
return proc.stdout.strip() or None
|
|
|
|
|
|
def skill_relative_path():
|
|
try:
|
|
return str(Path(__file__).resolve().relative_to(Path.cwd().resolve()))
|
|
except ValueError:
|
|
return str(Path(__file__).resolve())
|
|
|
|
|
|
def gh_api_list_paginated(endpoint, per_page=100, max_pages=None, with_metadata=False):
|
|
items = []
|
|
page = 1
|
|
truncated = False
|
|
while True:
|
|
sep = "&" if "?" in endpoint else "?"
|
|
page_endpoint = f"{endpoint}{sep}per_page={per_page}&page={page}"
|
|
payload = gh_json(["api", page_endpoint])
|
|
if payload is None:
|
|
break
|
|
if not isinstance(payload, list):
|
|
raise GhCommandError(f"Unexpected paginated payload from gh api {endpoint}")
|
|
items.extend(payload)
|
|
if len(payload) < per_page:
|
|
break
|
|
if max_pages is not None and page >= max_pages:
|
|
truncated = True
|
|
break
|
|
page += 1
|
|
if with_metadata:
|
|
return {
|
|
"items": items,
|
|
"truncated": truncated,
|
|
"pages": page,
|
|
"max_pages": max_pages,
|
|
}
|
|
return items
|
|
|
|
|
|
def search_issue_numbers(queries, limit):
|
|
numbers = {}
|
|
for query in queries:
|
|
page = 1
|
|
seen_for_query = 0
|
|
while True:
|
|
payload = gh_json(
|
|
[
|
|
"api",
|
|
"search/issues",
|
|
"-X",
|
|
"GET",
|
|
"-f",
|
|
f"q={query}",
|
|
"-f",
|
|
"sort=updated",
|
|
"-f",
|
|
"order=desc",
|
|
"-f",
|
|
"per_page=100",
|
|
"-f",
|
|
f"page={page}",
|
|
]
|
|
)
|
|
if not isinstance(payload, dict):
|
|
raise GhCommandError("Unexpected payload from GitHub issue search")
|
|
items = payload.get("items") or []
|
|
if not isinstance(items, list):
|
|
raise GhCommandError("Expected search `items` to be a list")
|
|
for item in items:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
number = item.get("number")
|
|
if isinstance(number, int):
|
|
numbers[number] = str(item.get("updated_at") or "")
|
|
seen_for_query += 1
|
|
if len(items) < 100 or seen_for_query >= limit:
|
|
break
|
|
page += 1
|
|
ordered = sorted(
|
|
numbers, key=lambda number: (numbers[number], number), reverse=True
|
|
)
|
|
return ordered[:limit]
|
|
|
|
|
|
def fetch_issue(repo, number):
|
|
payload = gh_json(["api", f"repos/{repo}/issues/{number}"])
|
|
if not isinstance(payload, dict):
|
|
raise GhCommandError(f"Unexpected issue payload for #{number}")
|
|
return payload
|
|
|
|
|
|
def fetch_comments(repo, number, since=None, max_pages=None):
|
|
endpoint = f"repos/{repo}/issues/{number}/comments"
|
|
if since is not None:
|
|
endpoint = f"{endpoint}?since={quote(format_timestamp(since), safe='')}"
|
|
return gh_api_list_paginated(
|
|
endpoint,
|
|
max_pages=max_pages,
|
|
with_metadata=True,
|
|
)
|
|
|
|
|
|
def fetch_reactions_for_item(endpoint, item):
|
|
if reaction_summary(item)["total"] <= 0:
|
|
return []
|
|
return gh_api_list_paginated(endpoint)
|
|
|
|
|
|
def fetch_comment_reactions(repo, comments):
|
|
reactions_by_comment_id = {}
|
|
for comment in comments:
|
|
comment_id = comment.get("id")
|
|
if comment_id in (None, ""):
|
|
continue
|
|
endpoint = f"repos/{repo}/issues/comments/{comment_id}/reactions"
|
|
reactions_by_comment_id[comment_id] = fetch_reactions_for_item(
|
|
endpoint, comment
|
|
)
|
|
return reactions_by_comment_id
|
|
|
|
|
|
def extract_login(user_obj):
|
|
if isinstance(user_obj, dict):
|
|
return str(user_obj.get("login") or "")
|
|
return ""
|
|
|
|
|
|
def is_bot_login(login):
|
|
return bool(login) and login.lower().endswith("[bot]")
|
|
|
|
|
|
def is_human_user(user_obj):
|
|
login = extract_login(user_obj)
|
|
return bool(login) and not is_bot_login(login)
|
|
|
|
|
|
def label_names(issue):
|
|
labels = []
|
|
for label in issue.get("labels") or []:
|
|
if isinstance(label, dict) and label.get("name"):
|
|
labels.append(str(label["name"]))
|
|
return sorted(labels, key=str.casefold)
|
|
|
|
|
|
def matching_labels(labels, requested):
|
|
labels_by_key = {label.casefold(): label for label in labels}
|
|
return [label for label in requested if label.casefold() in labels_by_key]
|
|
|
|
|
|
def area_labels(labels):
|
|
kind_keys = {label.casefold() for label in QUALIFYING_KIND_LABELS}
|
|
return [label for label in labels if label.casefold() not in kind_keys]
|
|
|
|
|
|
def attention_thresholds_for_window(window_hours):
|
|
if window_hours <= 0:
|
|
raise ValueError("window_hours must be > 0")
|
|
window_hours = round(window_hours, 6)
|
|
scale = window_hours / BASE_ATTENTION_WINDOW_HOURS
|
|
elevated = max(1, math.ceil(ONE_ATTENTION_INTERACTION_THRESHOLD * scale))
|
|
very_high = max(
|
|
elevated + 1, math.ceil(TWO_ATTENTION_INTERACTION_THRESHOLD * scale)
|
|
)
|
|
return {
|
|
"base_window_hours": BASE_ATTENTION_WINDOW_HOURS,
|
|
"window_hours": round(window_hours, 3),
|
|
"scale": round(scale, 3),
|
|
"elevated": elevated,
|
|
"very_high": very_high,
|
|
}
|
|
|
|
|
|
def attention_level_for(user_interactions, attention_thresholds=None):
|
|
thresholds = attention_thresholds or attention_thresholds_for_window(
|
|
BASE_ATTENTION_WINDOW_HOURS
|
|
)
|
|
if user_interactions >= thresholds["very_high"]:
|
|
return 2
|
|
if user_interactions >= thresholds["elevated"]:
|
|
return 1
|
|
return 0
|
|
|
|
|
|
def attention_marker_for(user_interactions, attention_thresholds=None):
|
|
return "🔥" * attention_level_for(user_interactions, attention_thresholds)
|
|
|
|
|
|
def reaction_summary(item):
|
|
reactions = item.get("reactions")
|
|
if not isinstance(reactions, dict):
|
|
return {"total": 0, "counts": {}}
|
|
counts = {}
|
|
for key in REACTION_KEYS:
|
|
value = reactions.get(key, 0)
|
|
if isinstance(value, int) and value:
|
|
counts[key] = value
|
|
total = reactions.get("total_count")
|
|
if not isinstance(total, int):
|
|
total = sum(counts.values())
|
|
return {"total": total, "counts": counts}
|
|
|
|
|
|
def reaction_event_summary(reactions, since, until):
|
|
counts = {}
|
|
total = 0
|
|
for reaction in reactions or []:
|
|
if not isinstance(reaction, dict):
|
|
continue
|
|
if not is_in_window(str(reaction.get("created_at") or ""), since, until):
|
|
continue
|
|
if not is_human_user(reaction.get("user")):
|
|
continue
|
|
content = str(reaction.get("content") or "")
|
|
if not content:
|
|
continue
|
|
counts[content] = counts.get(content, 0) + 1
|
|
total += 1
|
|
return {
|
|
"total": total,
|
|
"counts": counts,
|
|
"upvotes": counts.get("+1", 0),
|
|
}
|
|
|
|
|
|
def compact_text(value, limit):
|
|
text = re.sub(r"\s+", " ", str(value or "")).strip()
|
|
if limit <= 0:
|
|
return ""
|
|
if len(text) <= limit:
|
|
return text
|
|
return f"{text[: max(limit - 1, 0)].rstrip()}..."
|
|
|
|
|
|
def clean_title_for_description(title):
|
|
cleaned = re.sub(r"\s+", " ", str(title or "")).strip()
|
|
cleaned = re.sub(
|
|
r"^(codex(?: desktop| app|\.app| cli)?|desktop|windows codex app)\s*[:,-]\s*",
|
|
"",
|
|
cleaned,
|
|
flags=re.IGNORECASE,
|
|
)
|
|
cleaned = re.sub(r"^on windows,\s*", "Windows: ", cleaned, flags=re.IGNORECASE)
|
|
cleaned = cleaned.strip(" -:;")
|
|
return compact_text(cleaned, 80) or "Issue needs owner review"
|
|
|
|
|
|
def issue_description(issue):
|
|
return clean_title_for_description(issue.get("title"))
|
|
|
|
|
|
def is_in_window(timestamp, since, until):
|
|
parsed = parse_timestamp(timestamp, "timestamp")
|
|
if parsed is None:
|
|
return False
|
|
return since <= parsed < until
|
|
|
|
|
|
def summarize_comment(
|
|
comment, comment_chars, reaction_events=None, since=None, until=None
|
|
):
|
|
reactions = reaction_summary(comment)
|
|
new_reactions = (
|
|
reaction_event_summary(reaction_events, since, until)
|
|
if since is not None and until is not None
|
|
else {"total": 0, "counts": {}, "upvotes": 0}
|
|
)
|
|
human_user_interaction = is_human_user(comment.get("user"))
|
|
return {
|
|
"id": comment.get("id"),
|
|
"author": extract_login(comment.get("user")),
|
|
"author_association": str(comment.get("author_association") or ""),
|
|
"created_at": str(comment.get("created_at") or ""),
|
|
"updated_at": str(comment.get("updated_at") or ""),
|
|
"url": str(comment.get("html_url") or ""),
|
|
"human_user_interaction": human_user_interaction,
|
|
"reactions": reactions["counts"],
|
|
"reaction_total": reactions["total"],
|
|
"new_reactions": new_reactions["total"],
|
|
"new_upvotes": new_reactions["upvotes"],
|
|
"new_reaction_counts": new_reactions["counts"],
|
|
"body_excerpt": compact_text(comment.get("body"), comment_chars),
|
|
}
|
|
|
|
|
|
def summarize_issue(
|
|
issue,
|
|
comments,
|
|
requested_labels,
|
|
since,
|
|
until,
|
|
body_chars,
|
|
comment_chars,
|
|
issue_reaction_events=None,
|
|
comment_reactions_by_id=None,
|
|
all_labels=False,
|
|
comments_hydration=None,
|
|
attention_thresholds=None,
|
|
):
|
|
labels = label_names(issue)
|
|
labels_by_key = {label.casefold() for label in labels}
|
|
kind_labels = [
|
|
label for label in QUALIFYING_KIND_LABELS if label.casefold() in labels_by_key
|
|
]
|
|
if all_labels:
|
|
owner_labels = area_labels(labels) or ["unlabeled"]
|
|
else:
|
|
owner_labels = matching_labels(labels, requested_labels)
|
|
if not kind_labels or not owner_labels:
|
|
return None
|
|
|
|
updated_at = str(issue.get("updated_at") or "")
|
|
if not is_in_window(updated_at, since, until):
|
|
return None
|
|
|
|
new_issue = is_in_window(str(issue.get("created_at") or ""), since, until)
|
|
comment_reactions_by_id = comment_reactions_by_id or {}
|
|
new_comments = [
|
|
summarize_comment(
|
|
comment,
|
|
comment_chars,
|
|
reaction_events=comment_reactions_by_id.get(comment.get("id")),
|
|
since=since,
|
|
until=until,
|
|
)
|
|
for comment in comments
|
|
if is_in_window(str(comment.get("created_at") or ""), since, until)
|
|
]
|
|
new_comments.sort(key=lambda item: (item["created_at"], str(item["id"])))
|
|
|
|
issue_reactions = reaction_summary(issue)
|
|
issue_reaction_events_summary = reaction_event_summary(
|
|
issue_reaction_events, since, until
|
|
)
|
|
comment_reaction_events_summary = reaction_event_summary(
|
|
[
|
|
reaction
|
|
for reactions in comment_reactions_by_id.values()
|
|
for reaction in reactions
|
|
],
|
|
since,
|
|
until,
|
|
)
|
|
new_reactions = (
|
|
issue_reaction_events_summary["total"]
|
|
+ comment_reaction_events_summary["total"]
|
|
)
|
|
new_upvotes = (
|
|
issue_reaction_events_summary["upvotes"]
|
|
+ comment_reaction_events_summary["upvotes"]
|
|
)
|
|
all_comment_reaction_total = sum(
|
|
reaction_summary(comment)["total"] for comment in comments
|
|
)
|
|
new_comment_reaction_total = sum(
|
|
comment["reaction_total"] for comment in new_comments
|
|
)
|
|
new_issue_user_interaction = new_issue and is_human_user(issue.get("user"))
|
|
new_comment_user_interactions = sum(
|
|
1 for comment in new_comments if comment["human_user_interaction"]
|
|
)
|
|
user_interactions = (
|
|
int(new_issue_user_interaction) + new_comment_user_interactions + new_reactions
|
|
)
|
|
attention_level = attention_level_for(user_interactions, attention_thresholds)
|
|
attention_marker = attention_marker_for(user_interactions, attention_thresholds)
|
|
updated_without_visible_new_post = (
|
|
not new_issue and not new_comments and new_reactions == 0
|
|
)
|
|
|
|
engagement_score = (
|
|
len(new_comments) * 3
|
|
+ new_reactions
|
|
+ issue_reactions["total"]
|
|
+ new_comment_reaction_total
|
|
+ min(int(issue.get("comments") or len(comments) or 0), 10)
|
|
)
|
|
|
|
return {
|
|
"number": issue.get("number"),
|
|
"title": str(issue.get("title") or ""),
|
|
"description": issue_description(issue),
|
|
"url": str(issue.get("html_url") or ""),
|
|
"state": str(issue.get("state") or ""),
|
|
"author": extract_login(issue.get("user")),
|
|
"author_association": str(issue.get("author_association") or ""),
|
|
"created_at": str(issue.get("created_at") or ""),
|
|
"updated_at": updated_at,
|
|
"labels": labels,
|
|
"kind_labels": kind_labels,
|
|
"owner_labels": owner_labels,
|
|
"comments_total": int(issue.get("comments") or len(comments) or 0),
|
|
"comments_hydration": comments_hydration
|
|
or {
|
|
"fetched": len(comments),
|
|
"since": None,
|
|
"truncated": False,
|
|
"max_pages": None,
|
|
},
|
|
"issue_reactions": issue_reactions["counts"],
|
|
"issue_reaction_total": issue_reactions["total"],
|
|
"comment_reaction_total": all_comment_reaction_total,
|
|
"new_comment_reaction_total": new_comment_reaction_total,
|
|
"new_issue_reactions": issue_reaction_events_summary["total"],
|
|
"new_issue_upvotes": issue_reaction_events_summary["upvotes"],
|
|
"new_comment_reactions": comment_reaction_events_summary["total"],
|
|
"new_comment_upvotes": comment_reaction_events_summary["upvotes"],
|
|
"new_reactions": new_reactions,
|
|
"new_upvotes": new_upvotes,
|
|
"user_interactions": user_interactions,
|
|
"attention": attention_level > 0,
|
|
"attention_level": attention_level,
|
|
"attention_marker": attention_marker,
|
|
"engagement_score": engagement_score,
|
|
"activity": {
|
|
"new_issue": new_issue,
|
|
"new_comments": len(new_comments),
|
|
"new_human_comments": new_comment_user_interactions,
|
|
"new_reactions": new_reactions,
|
|
"new_upvotes": new_upvotes,
|
|
"updated_without_visible_new_post": updated_without_visible_new_post,
|
|
},
|
|
"body_excerpt": compact_text(issue.get("body"), body_chars),
|
|
"new_comments": new_comments,
|
|
}
|
|
|
|
|
|
def count_by_label(issues, labels):
|
|
out = {}
|
|
for label in labels:
|
|
matching = [issue for issue in issues if label in issue["owner_labels"]]
|
|
out[label] = {
|
|
"issues": len(matching),
|
|
"new_issues": sum(
|
|
1 for issue in matching if issue["activity"]["new_issue"]
|
|
),
|
|
"new_comments": sum(
|
|
issue["activity"]["new_comments"] for issue in matching
|
|
),
|
|
}
|
|
return out
|
|
|
|
|
|
def count_by_kind(issues):
|
|
out = {}
|
|
for kind in QUALIFYING_KIND_LABELS:
|
|
matching = [issue for issue in issues if kind in issue["kind_labels"]]
|
|
out[kind] = {
|
|
"issues": len(matching),
|
|
"new_issues": sum(
|
|
1 for issue in matching if issue["activity"]["new_issue"]
|
|
),
|
|
"new_comments": sum(
|
|
issue["activity"]["new_comments"] for issue in matching
|
|
),
|
|
}
|
|
return out
|
|
|
|
|
|
def hot_items(issues, limit=8):
|
|
ranked = sorted(
|
|
issues,
|
|
key=lambda issue: (
|
|
issue["attention"],
|
|
issue["attention_level"],
|
|
issue["user_interactions"],
|
|
issue["engagement_score"],
|
|
issue["activity"]["new_comments"],
|
|
issue["issue_reaction_total"] + issue["comment_reaction_total"],
|
|
issue["updated_at"],
|
|
),
|
|
reverse=True,
|
|
)
|
|
return [
|
|
{
|
|
"number": issue["number"],
|
|
"title": issue["title"],
|
|
"url": issue["url"],
|
|
"owner_labels": issue["owner_labels"],
|
|
"kind_labels": issue["kind_labels"],
|
|
"attention": issue["attention"],
|
|
"attention_level": issue["attention_level"],
|
|
"attention_marker": issue["attention_marker"],
|
|
"user_interactions": issue["user_interactions"],
|
|
"new_reactions": issue["new_reactions"],
|
|
"new_upvotes": issue["new_upvotes"],
|
|
"engagement_score": issue["engagement_score"],
|
|
"new_comments": issue["activity"]["new_comments"],
|
|
"reaction_total": issue["issue_reaction_total"]
|
|
+ issue["comment_reaction_total"],
|
|
}
|
|
for issue in ranked[:limit]
|
|
if issue["engagement_score"] > 0
|
|
]
|
|
|
|
|
|
def ranked_digest_issues(issues):
|
|
return sorted(
|
|
issues,
|
|
key=lambda issue: (
|
|
issue["attention"],
|
|
issue["attention_level"],
|
|
issue["user_interactions"],
|
|
issue["engagement_score"],
|
|
issue["activity"]["new_comments"],
|
|
issue["updated_at"],
|
|
),
|
|
reverse=True,
|
|
)
|
|
|
|
|
|
def digest_rows(issues, limit=10, ref_map=None):
|
|
ranked = ranked_digest_issues(issues)
|
|
if ref_map is None:
|
|
ref_map = {issue["number"]: ref for ref, issue in enumerate(ranked, start=1)}
|
|
rows = []
|
|
for issue in ranked[:limit]:
|
|
ref = ref_map[issue["number"]]
|
|
reaction_total = issue["issue_reaction_total"] + issue["comment_reaction_total"]
|
|
rows.append(
|
|
{
|
|
"ref": ref,
|
|
"ref_markdown": f"[{ref}]({issue['url']})",
|
|
"marker": issue["attention_marker"],
|
|
"attention_marker": issue["attention_marker"],
|
|
"number": issue["number"],
|
|
"description": issue["description"],
|
|
"title": issue["title"],
|
|
"url": issue["url"],
|
|
"area": ", ".join(issue["owner_labels"]),
|
|
"kind": ", ".join(issue["kind_labels"]),
|
|
"state": issue["state"],
|
|
"interactions": issue["user_interactions"],
|
|
"user_interactions": issue["user_interactions"],
|
|
"new_reactions": issue["new_reactions"],
|
|
"new_upvotes": issue["new_upvotes"],
|
|
"current_reactions": reaction_total,
|
|
}
|
|
)
|
|
return rows
|
|
|
|
|
|
def issue_ref_markdown(issue, ref_map):
|
|
ref = ref_map[issue["number"]]
|
|
return f"[{ref}]({issue['url']})"
|
|
|
|
|
|
def summary_inputs(issues, limit=80, ref_map=None):
|
|
ranked = ranked_digest_issues(issues)
|
|
if ref_map is None:
|
|
ref_map = {issue["number"]: ref for ref, issue in enumerate(ranked, start=1)}
|
|
rows = []
|
|
for issue in ranked[:limit]:
|
|
rows.append(
|
|
{
|
|
"ref": ref_map[issue["number"]],
|
|
"ref_markdown": issue_ref_markdown(issue, ref_map),
|
|
"number": issue["number"],
|
|
"title": issue["title"],
|
|
"description": issue["description"],
|
|
"url": issue["url"],
|
|
"labels": issue["labels"],
|
|
"owner_labels": issue["owner_labels"],
|
|
"kind_labels": issue["kind_labels"],
|
|
"state": issue.get("state", ""),
|
|
"attention_marker": issue.get("attention_marker", ""),
|
|
"interactions": issue["user_interactions"],
|
|
"new_comments": issue["activity"].get("new_comments", 0),
|
|
"new_reactions": issue.get("new_reactions", 0),
|
|
"new_upvotes": issue.get("new_upvotes", 0),
|
|
"current_reactions": issue.get("issue_reaction_total", 0)
|
|
+ issue.get("comment_reaction_total", 0),
|
|
}
|
|
)
|
|
return rows
|
|
|
|
|
|
def collect_digest(args):
|
|
since, until = resolve_window(args)
|
|
window_hours = (until - since).total_seconds() / 3600
|
|
attention_thresholds = attention_thresholds_for_window(window_hours)
|
|
requested_labels, all_labels = normalize_requested_labels(
|
|
args.labels, all_labels=args.all_labels
|
|
)
|
|
queries = build_search_queries(
|
|
args.repo, requested_labels, since, all_labels=all_labels
|
|
)
|
|
numbers = search_issue_numbers(queries, args.limit_issues)
|
|
gh_version_output = gh_text(["--version"])
|
|
|
|
issues = []
|
|
max_comment_pages = None if args.max_comment_pages <= 0 else args.max_comment_pages
|
|
for number in numbers:
|
|
issue = fetch_issue(args.repo, number)
|
|
comments_since = None if args.fetch_all_comments else since
|
|
comments_payload = fetch_comments(
|
|
args.repo,
|
|
number,
|
|
since=comments_since,
|
|
max_pages=max_comment_pages,
|
|
)
|
|
comments = comments_payload["items"]
|
|
issue_reaction_events = fetch_reactions_for_item(
|
|
f"repos/{args.repo}/issues/{number}/reactions", issue
|
|
)
|
|
comment_reactions_by_id = fetch_comment_reactions(args.repo, comments)
|
|
comments_hydration = {
|
|
"fetched": len(comments),
|
|
"total": int(issue.get("comments") or len(comments) or 0),
|
|
"since": format_timestamp(comments_since) if comments_since else None,
|
|
"truncated": comments_payload["truncated"],
|
|
"max_pages": comments_payload["max_pages"],
|
|
"fetch_all_comments": args.fetch_all_comments,
|
|
}
|
|
summary = summarize_issue(
|
|
issue,
|
|
comments,
|
|
requested_labels,
|
|
since,
|
|
until,
|
|
args.body_chars,
|
|
args.comment_chars,
|
|
issue_reaction_events=issue_reaction_events,
|
|
comment_reactions_by_id=comment_reactions_by_id,
|
|
all_labels=all_labels,
|
|
comments_hydration=comments_hydration,
|
|
attention_thresholds=attention_thresholds,
|
|
)
|
|
if summary is not None:
|
|
issues.append(summary)
|
|
|
|
issues.sort(
|
|
key=lambda issue: (issue["updated_at"], int(issue["number"] or 0)), reverse=True
|
|
)
|
|
totals = {
|
|
"candidate_issues": len(numbers),
|
|
"included_issues": len(issues),
|
|
"new_issues": sum(1 for issue in issues if issue["activity"]["new_issue"]),
|
|
"issues_with_new_comments": sum(
|
|
1 for issue in issues if issue["activity"]["new_comments"] > 0
|
|
),
|
|
"new_comments": sum(issue["activity"]["new_comments"] for issue in issues),
|
|
"comments_fetched": sum(
|
|
issue["comments_hydration"]["fetched"] for issue in issues
|
|
),
|
|
"issues_with_truncated_comment_hydration": sum(
|
|
1 for issue in issues if issue["comments_hydration"]["truncated"]
|
|
),
|
|
"updated_without_visible_new_post": sum(
|
|
1
|
|
for issue in issues
|
|
if issue["activity"]["updated_without_visible_new_post"]
|
|
),
|
|
"issue_reactions_current_total": sum(
|
|
issue["issue_reaction_total"] for issue in issues
|
|
),
|
|
"comment_reactions_current_total": sum(
|
|
issue["comment_reaction_total"] for issue in issues
|
|
),
|
|
"new_reactions": sum(issue["new_reactions"] for issue in issues),
|
|
"new_upvotes": sum(issue["new_upvotes"] for issue in issues),
|
|
"user_interactions": sum(issue["user_interactions"] for issue in issues),
|
|
}
|
|
ranked = ranked_digest_issues(issues)
|
|
ref_map = {issue["number"]: ref for ref, issue in enumerate(ranked, start=1)}
|
|
filter_label = "all" if all_labels else requested_labels
|
|
|
|
return {
|
|
"generated_at": format_timestamp(datetime.now(timezone.utc)),
|
|
"source": {
|
|
"repo": args.repo,
|
|
"skill": "codex-issue-digest",
|
|
"collector": skill_relative_path(),
|
|
"script_version": SCRIPT_VERSION,
|
|
"git_head": git_head(),
|
|
"gh_version": gh_version_output.splitlines()[0]
|
|
if gh_version_output
|
|
else None,
|
|
},
|
|
"window": {
|
|
"since": format_timestamp(since),
|
|
"until": format_timestamp(until),
|
|
"hours": round(window_hours, 3),
|
|
},
|
|
"attention_thresholds": attention_thresholds,
|
|
"filters": {
|
|
"owner_labels": filter_label,
|
|
"all_labels": all_labels,
|
|
"kind_labels": list(QUALIFYING_KIND_LABELS),
|
|
},
|
|
"collection_notes": [
|
|
"Issues are selected when they currently have bug or enhancement plus at least one requested owner label and were updated during the window.",
|
|
"By default, issue comments are fetched with since=window_start and a max page cap to avoid long historical threads; use --fetch-all-comments when exhaustive comment history is needed.",
|
|
"New issue comments are filtered by comment creation time within the window from the fetched comment set.",
|
|
"Reaction events are counted by GitHub reaction created_at timestamps for hydrated issues and fetched comments.",
|
|
"Current reaction totals are standing engagement signals; new_reactions and new_upvotes are windowed activity.",
|
|
"The collector does not assign semantic clusters; use summary_inputs as model-ready evidence for report-time clustering.",
|
|
"Pure reaction-only issues may be missed if GitHub issue search does not surface them via updated_at.",
|
|
"Issues updated during the window without a new issue body or new comment are retained because label/status edits can still be useful owner signals.",
|
|
],
|
|
"totals": totals,
|
|
"by_owner_label": count_by_label(
|
|
issues,
|
|
sorted(
|
|
{area for issue in issues for area in issue["owner_labels"]},
|
|
key=str.casefold,
|
|
)
|
|
if all_labels
|
|
else requested_labels,
|
|
),
|
|
"by_kind_label": count_by_kind(issues),
|
|
"hot_items": hot_items(issues),
|
|
"summary_inputs": summary_inputs(issues, ref_map=ref_map),
|
|
"digest_rows": digest_rows(issues, ref_map=ref_map),
|
|
"issues": issues,
|
|
}
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
try:
|
|
digest = collect_digest(args)
|
|
except (GhCommandError, RuntimeError, ValueError) as err:
|
|
sys.stderr.write(f"collect_issue_digest.py error: {err}\n")
|
|
return 1
|
|
sys.stdout.write(json.dumps(digest, indent=2, sort_keys=True) + "\n")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|