From 76845d716b720ca701b2c91fec75431532e66c74 Mon Sep 17 00:00:00 2001 From: Eric Traut Date: Sun, 10 May 2026 09:55:42 -0700 Subject: [PATCH] Deduplicate issue digest interactions by user (#22039) ## Summary The issue digest uses recent posts, comments, and reactions to decide which issues deserve attention. A single active user could previously raise an issue's apparent importance by commenting or reacting multiple times in the window. This changes `codex-issue-digest` so `user_interactions` counts unique human GitHub users per issue across new issue posts, new comments, and new reactions. Raw reaction/comment counts are still preserved for detail output, and the skill guidance now describes `Interactions` as a unique-human-user count. --- .codex/skills/codex-issue-digest/SKILL.md | 8 +-- .../scripts/collect_issue_digest.py | 33 ++++++++-- .../scripts/test_collect_issue_digest.py | 64 +++++++++++++++++++ 3 files changed, 94 insertions(+), 11 deletions(-) diff --git a/.codex/skills/codex-issue-digest/SKILL.md b/.codex/skills/codex-issue-digest/SKILL.md index e502e892fb..bae4bf9b67 100644 --- a/.codex/skills/codex-issue-digest/SKILL.md +++ b/.codex/skills/codex-issue-digest/SKILL.md @@ -53,7 +53,7 @@ Use `--window "past week"` or `--window-hours 168` when the user asks for a non- ## Summary No major issues reported by users. -Source: collector v4, git `abc123def456`, window `2026-04-27T00:00:00Z` to `2026-04-28T00:00:00Z`. +Source: collector v5, git `abc123def456`, window `2026-04-27T00:00:00Z` to `2026-04-28T00:00:00Z`. Want details? I can expand this into the issue table. ``` @@ -65,7 +65,7 @@ Two issues are being surfaced by users: 🔥🔥 Terminal launch hangs on startup [1](https://github.com/openai/codex/issues/123) 🔥 Resume switches model providers unexpectedly [2](https://github.com/openai/codex/issues/456) -Source: collector v4, git `abc123def456`, window `2026-04-27T00:00:00Z` to `2026-04-28T00:00:00Z`. +Source: collector v5, git `abc123def456`, window `2026-04-27T00:00:00Z` to `2026-04-28T00:00:00Z`. Want details? I can expand this into the issue table. ``` 5. In `## Details`, when details are requested, include a compact table only when useful: @@ -76,7 +76,7 @@ Want details? I can expand this into the issue table. - A clear quiet/no-concern sentence when there is no meaningful signal. 6. Use the JSON `attention_marker` exactly. It is empty for normal rows, `🔥` for elevated rows, and `🔥🔥` for very high-attention rows. The actual cutoffs are in `attention_thresholds`. 7. Use inline numbered references where a row or bullet points to issues, for example `Compaction bugs [1](https://github.com/openai/codex/issues/123), [2](https://github.com/openai/codex/issues/456)`. Do not add a separate footnotes section. -8. Label `interactions` as `Interactions`; it counts posts/comments/reactions during the requested window, not unique people. +8. Label `interactions` as `Interactions`; it counts unique human GitHub users who created a new issue, added a new comment, or reacted during the requested window. Multiple posts/reactions from the same user on the same issue count once. 9. Mention the collector `script_version`, repo checkout `git_head`, and time window in one compact source line. In default mode, put this before the details prompt so the final line still asks whether the user wants details. In details-upfront mode, it can be the footer. ## Reaction Handling @@ -89,7 +89,7 @@ GitHub issue search is still seeded by issue `updated_at`, so a purely reaction- ## Attention Markers -The collector scales attention markers by the requested time window. The baseline is 5 human user interactions for `🔥` and 10 for `🔥🔥` over 24 hours; longer or shorter windows scale those cutoffs linearly and round up. For example, a one-week report uses 35 and 70 interactions. Human user interactions are human-authored new issue posts, human-authored new comments, and human reactions created during the window, including upvotes. Bot posts and bot reactions are excluded. In prose, explain this as high user interaction rather than naming the emoji. +The collector scales attention markers by the requested time window. The baseline is 5 unique human users for `🔥` and 10 unique human users for `🔥🔥` over 24 hours; longer or shorter windows scale those cutoffs linearly and round up. For example, a one-week report uses 35 and 70 interactions. Unique human users are users who authored a new issue, authored a new comment, or reacted during the window, including upvotes. Multiple actions from the same user on the same issue count once. Bot posts and bot reactions are excluded. In prose, explain this as high user interaction rather than naming the emoji. ## Freshness diff --git a/.codex/skills/codex-issue-digest/scripts/collect_issue_digest.py b/.codex/skills/codex-issue-digest/scripts/collect_issue_digest.py index a4f3982db2..80b54d6106 100755 --- a/.codex/skills/codex-issue-digest/scripts/collect_issue_digest.py +++ b/.codex/skills/codex-issue-digest/scripts/collect_issue_digest.py @@ -11,7 +11,7 @@ from datetime import datetime, timedelta, timezone from pathlib import Path from urllib.parse import quote -SCRIPT_VERSION = 4 +SCRIPT_VERSION = 5 QUALIFYING_KIND_LABELS = ("bug", "enhancement") REACTION_KEYS = ("+1", "-1", "laugh", "hooray", "confused", "heart", "rocket", "eyes") BASE_ATTENTION_WINDOW_HOURS = 24.0 @@ -393,9 +393,15 @@ def is_bot_login(login): return bool(login) and login.lower().endswith("[bot]") -def is_human_user(user_obj): +def human_login_key(user_obj): login = extract_login(user_obj) - return bool(login) and not is_bot_login(login) + if not login or is_bot_login(login): + return "" + return login.casefold() + + +def is_human_user(user_obj): + return bool(human_login_key(user_obj)) def label_names(issue): @@ -467,22 +473,26 @@ def reaction_summary(item): def reaction_event_summary(reactions, since, until): counts = {} total = 0 + users = set() for reaction in reactions or []: if not isinstance(reaction, dict): continue if not is_in_window(str(reaction.get("created_at") or ""), since, until): continue - if not is_human_user(reaction.get("user")): + user_key = human_login_key(reaction.get("user")) + if not user_key: continue content = str(reaction.get("content") or "") if not content: continue counts[content] = counts.get(content, 0) + 1 total += 1 + users.add(user_key) return { "total": total, "counts": counts, "upvotes": counts.get("+1", 0), + "users": sorted(users, key=str.casefold), } @@ -618,13 +628,21 @@ def summarize_issue( new_comment_reaction_total = sum( comment["reaction_total"] for comment in new_comments ) - new_issue_user_interaction = new_issue and is_human_user(issue.get("user")) + new_issue_user_key = human_login_key(issue.get("user")) if new_issue else "" + new_issue_user_interaction = bool(new_issue_user_key) new_comment_user_interactions = sum( 1 for comment in new_comments if comment["human_user_interaction"] ) - user_interactions = ( - int(new_issue_user_interaction) + new_comment_user_interactions + new_reactions + interaction_user_keys = set(issue_reaction_events_summary["users"]) + interaction_user_keys.update(comment_reaction_events_summary["users"]) + if new_issue_user_key: + interaction_user_keys.add(new_issue_user_key) + interaction_user_keys.update( + comment["author"].casefold() + for comment in new_comments + if comment["human_user_interaction"] ) + user_interactions = len(interaction_user_keys) attention_level = attention_level_for(user_interactions, attention_thresholds) attention_marker = attention_marker_for(user_interactions, attention_thresholds) updated_without_visible_new_post = ( @@ -957,6 +975,7 @@ def collect_digest(args): "New issue comments are filtered by comment creation time within the window from the fetched comment set.", "Reaction events are counted by GitHub reaction created_at timestamps for hydrated issues and fetched comments.", "Current reaction totals are standing engagement signals; new_reactions and new_upvotes are windowed activity.", + "user_interactions counts unique human users per issue across new issues, new comments, and new reactions; repeated actions by the same user count once.", "The collector does not assign semantic clusters; use summary_inputs as model-ready evidence for report-time clustering.", "Pure reaction-only issues may be missed if GitHub issue search does not surface them via updated_at.", "Issues updated during the window without a new issue body or new comment are retained because label/status edits can still be useful owner signals.", diff --git a/.codex/skills/codex-issue-digest/scripts/test_collect_issue_digest.py b/.codex/skills/codex-issue-digest/scripts/test_collect_issue_digest.py index 8619f867ac..f1ba54e7f0 100644 --- a/.codex/skills/codex-issue-digest/scripts/test_collect_issue_digest.py +++ b/.codex/skills/codex-issue-digest/scripts/test_collect_issue_digest.py @@ -494,6 +494,70 @@ def test_reactions_count_toward_attention_markers(): assert summary["new_comments"][0]["new_upvotes"] == 0 +def test_user_interactions_are_deduped_by_human_login(): + since = collect_issue_digest.parse_timestamp("2026-04-25T00:00:00Z", "--since") + until = collect_issue_digest.parse_timestamp("2026-04-26T00:00:00Z", "--until") + + def comment(comment_id, login): + return { + "id": comment_id, + "created_at": f"2026-04-25T0{comment_id + 1}:00:00Z", + "updated_at": f"2026-04-25T0{comment_id + 1}:00:00Z", + "user": {"login": login}, + "body": "same issue", + } + + def reaction(content, login, created_at="2026-04-25T10:00:00Z"): + return { + "content": content, + "created_at": created_at, + "user": {"login": login}, + } + + issue = { + "number": 790, + "title": "Repeated pings should not boost attention", + "html_url": "https://github.com/openai/codex/issues/790", + "state": "open", + "created_at": "2026-04-25T01:00:00Z", + "updated_at": "2026-04-25T12:00:00Z", + "user": {"login": "Alice"}, + "labels": [{"name": "bug"}, {"name": "tui"}], + } + comments = [comment(1, "alice"), comment(2, "ALICE"), comment(3, "bob")] + comments.append(comment(4, "github-actions[bot]")) + issue_reactions = [ + reaction("+1", "alice"), + reaction("rocket", "Alice"), + reaction("+1", "bob"), + reaction("+1", "github-actions[bot]"), + reaction("+1", "carol", created_at="2026-04-24T23:00:00Z"), + ] + comment_reactions_by_id = { + 1: [reaction("heart", "alice")], + 2: [reaction("+1", "bob")], + 3: [reaction("eyes", "carol")], + } + + summary = collect_issue_digest.summarize_issue( + issue, + comments, + ["tui"], + since, + until, + body_chars=100, + comment_chars=100, + issue_reaction_events=issue_reactions, + comment_reactions_by_id=comment_reactions_by_id, + ) + + assert summary["activity"]["new_human_comments"] == 3 + assert summary["new_reactions"] == 6 + assert summary["user_interactions"] == 3 + assert summary["attention"] is False + assert summary["attention_marker"] == "" + + def test_digest_rows_are_table_ready_with_concise_descriptions(): rows = collect_issue_digest.digest_rows( [