Fix issue deduplication workflow for Codex issues (#13215)

Fixes #13203

Summary
- split the duplicate-finding workflow into two jobs so we gather all
issues first
- add an open-issue fallback job that runs only when the full scan finds
nothing
- centralize final selection so `comment-on-issue` always sees the best
dedupe output
This commit is contained in:
Eric Traut
2026-03-01 22:45:50 -07:00
committed by GitHub
parent 0aeb55bf08
commit d94f0b6ce7

View File

@@ -7,15 +7,17 @@ on:
- labeled
jobs:
gather-duplicates:
name: Identify potential duplicates
gather-duplicates-all:
name: Identify potential duplicates (all issues)
# Prevent runs on forks (requires OpenAI API key, wastes Actions minutes)
if: github.repository == 'openai/codex' && (github.event.action == 'opened' || (github.event.action == 'labeled' && github.event.label.name == 'codex-deduplicate'))
runs-on: ubuntu-latest
permissions:
contents: read
outputs:
codex_output: ${{ steps.select-final.outputs.codex_output }}
issues_json: ${{ steps.normalize-all.outputs.issues_json }}
reason: ${{ steps.normalize-all.outputs.reason }}
has_matches: ${{ steps.normalize-all.outputs.has_matches }}
steps:
- uses: actions/checkout@v6
@@ -29,7 +31,6 @@ jobs:
CURRENT_ISSUE_FILE=codex-current-issue.json
EXISTING_ALL_FILE=codex-existing-issues-all.json
EXISTING_OPEN_FILE=codex-existing-issues-open.json
gh issue list --repo "$REPO" \
--json number,title,body,createdAt,updatedAt,state,labels \
@@ -47,22 +48,6 @@ jobs:
}]' \
> "$EXISTING_ALL_FILE"
gh issue list --repo "$REPO" \
--json number,title,body,createdAt,updatedAt,state,labels \
--limit 1000 \
--state open \
--search "sort:created-desc" \
| jq '[.[] | {
number,
title,
body: ((.body // "")[0:4000]),
createdAt,
updatedAt,
state,
labels: ((.labels // []) | map(.name))
}]' \
> "$EXISTING_OPEN_FILE"
gh issue view "$ISSUE_NUMBER" \
--repo "$REPO" \
--json number,title,body \
@@ -71,7 +56,6 @@ jobs:
echo "Prepared duplicate detection input files."
echo "all_issue_count=$(jq 'length' "$EXISTING_ALL_FILE")"
echo "open_issue_count=$(jq 'length' "$EXISTING_OPEN_FILE")"
# Prompt instructions are intentionally inline in this workflow. The old
# .github/prompts/issue-deduplicator.txt file is obsolete and removed.
@@ -158,9 +142,59 @@ jobs:
echo "has_matches=$has_matches"
} >> "$GITHUB_OUTPUT"
gather-duplicates-open:
name: Identify potential duplicates (open issues fallback)
# Pass 1 may drop sudo on the runner, so run the fallback in a fresh job.
needs: gather-duplicates-all
if: ${{ needs.gather-duplicates-all.result == 'success' && needs.gather-duplicates-all.outputs.has_matches != 'true' }}
runs-on: ubuntu-latest
permissions:
contents: read
outputs:
issues_json: ${{ steps.normalize-open.outputs.issues_json }}
reason: ${{ steps.normalize-open.outputs.reason }}
has_matches: ${{ steps.normalize-open.outputs.has_matches }}
steps:
- uses: actions/checkout@v6
- name: Prepare Codex inputs
env:
GH_TOKEN: ${{ github.token }}
REPO: ${{ github.repository }}
ISSUE_NUMBER: ${{ github.event.issue.number }}
run: |
set -eo pipefail
CURRENT_ISSUE_FILE=codex-current-issue.json
EXISTING_OPEN_FILE=codex-existing-issues-open.json
gh issue list --repo "$REPO" \
--json number,title,body,createdAt,updatedAt,state,labels \
--limit 1000 \
--state open \
--search "sort:created-desc" \
| jq '[.[] | {
number,
title,
body: ((.body // "")[0:4000]),
createdAt,
updatedAt,
state,
labels: ((.labels // []) | map(.name))
}]' \
> "$EXISTING_OPEN_FILE"
gh issue view "$ISSUE_NUMBER" \
--repo "$REPO" \
--json number,title,body \
| jq '{number, title, body: ((.body // "")[0:4000])}' \
> "$CURRENT_ISSUE_FILE"
echo "Prepared fallback duplicate detection input files."
echo "open_issue_count=$(jq 'length' "$EXISTING_OPEN_FILE")"
- id: codex-open
name: Find duplicates (pass 2, open issues)
if: ${{ steps.normalize-all.outputs.has_matches != 'true' }}
uses: openai/codex-action@main
with:
openai-api-key: ${{ secrets.CODEX_OPENAI_API_KEY }}
@@ -200,7 +234,6 @@ jobs:
- id: normalize-open
name: Normalize pass 2 output
if: ${{ steps.normalize-all.outputs.has_matches != 'true' }}
env:
CODEX_OUTPUT: ${{ steps.codex-open.outputs.final-message }}
CURRENT_ISSUE_NUMBER: ${{ github.event.issue.number }}
@@ -243,15 +276,27 @@ jobs:
echo "has_matches=$has_matches"
} >> "$GITHUB_OUTPUT"
select-final:
name: Select final duplicate set
needs:
- gather-duplicates-all
- gather-duplicates-open
if: ${{ always() && needs.gather-duplicates-all.result == 'success' && (needs.gather-duplicates-open.result == 'success' || needs.gather-duplicates-open.result == 'skipped') }}
runs-on: ubuntu-latest
permissions:
contents: read
outputs:
codex_output: ${{ steps.select-final.outputs.codex_output }}
steps:
- id: select-final
name: Select final duplicate set
env:
PASS1_ISSUES: ${{ steps.normalize-all.outputs.issues_json }}
PASS1_REASON: ${{ steps.normalize-all.outputs.reason }}
PASS2_ISSUES: ${{ steps.normalize-open.outputs.issues_json }}
PASS2_REASON: ${{ steps.normalize-open.outputs.reason }}
PASS1_HAS_MATCHES: ${{ steps.normalize-all.outputs.has_matches }}
PASS2_HAS_MATCHES: ${{ steps.normalize-open.outputs.has_matches }}
PASS1_ISSUES: ${{ needs.gather-duplicates-all.outputs.issues_json }}
PASS1_REASON: ${{ needs.gather-duplicates-all.outputs.reason }}
PASS2_ISSUES: ${{ needs.gather-duplicates-open.outputs.issues_json }}
PASS2_REASON: ${{ needs.gather-duplicates-open.outputs.reason }}
PASS1_HAS_MATCHES: ${{ needs.gather-duplicates-all.outputs.has_matches }}
PASS2_HAS_MATCHES: ${{ needs.gather-duplicates-open.outputs.has_matches }}
run: |
set -eo pipefail
@@ -289,8 +334,8 @@ jobs:
comment-on-issue:
name: Comment with potential duplicates
needs: gather-duplicates
if: ${{ needs.gather-duplicates.result != 'skipped' }}
needs: select-final
if: ${{ needs.select-final.result != 'skipped' }}
runs-on: ubuntu-latest
permissions:
contents: read
@@ -299,7 +344,7 @@ jobs:
- name: Comment on issue
uses: actions/github-script@v8
env:
CODEX_OUTPUT: ${{ needs.gather-duplicates.outputs.codex_output }}
CODEX_OUTPUT: ${{ needs.select-final.outputs.codex_output }}
with:
github-token: ${{ github.token }}
script: |