From 1212161d1d170c4edd8643d16ef54ae76b244bbb Mon Sep 17 00:00:00 2001 From: Bryan Morgan Date: Wed, 14 Jan 2026 15:56:16 -0500 Subject: [PATCH] chore(automation): recursive labeling for workstream descendants (#16609) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .github/scripts/pr-triage.sh | 2 +- .github/scripts/sync-maintainer-labels.cjs | 355 ++++++++++++++++++ .../gemini-scheduled-issue-triage.yml | 23 +- .../workflows/label-backlog-child-issues.yml | 71 ++-- 4 files changed, 408 insertions(+), 43 deletions(-) create mode 100644 .github/scripts/sync-maintainer-labels.cjs diff --git a/.github/scripts/pr-triage.sh b/.github/scripts/pr-triage.sh index 45dfcf7a3c..ddbe4182ce 100755 --- a/.github/scripts/pr-triage.sh +++ b/.github/scripts/pr-triage.sh @@ -55,7 +55,7 @@ process_pr_optimized() { if [[ -z "${ISSUE_NUMBER}" || "${ISSUE_NUMBER}" == "null" || "${ISSUE_NUMBER}" == "" ]]; then if [[ "${IS_DRAFT}" == "true" ]]; then echo " 📝 PR #${PR_NUMBER} is a draft and has no linked issue" - if [[ ",${CURRENT_LABELS}," == ",status/need-issue,"* ]]; then + if [[ ",${CURRENT_LABELS}," == *",status/need-issue,"* ]]; then echo " ➖ Removing status/need-issue label" LABELS_TO_REMOVE="status/need-issue" fi diff --git a/.github/scripts/sync-maintainer-labels.cjs b/.github/scripts/sync-maintainer-labels.cjs new file mode 100644 index 0000000000..ab2358d369 --- /dev/null +++ b/.github/scripts/sync-maintainer-labels.cjs @@ -0,0 +1,355 @@ +/* eslint-disable @typescript-eslint/no-require-imports */ +/* global process, console, require */ +const { Octokit } = require('@octokit/rest'); + +/** + * Sync Maintainer Labels (Recursive with strict parent-child relationship detection) + * - Uses Native Sub-issues. + * - Uses Markdown Task Lists (- [ ] #123). + * - Filters for OPEN issues only. + * - Skips DUPLICATES. + * - Skips Pull Requests. + * - ONLY labels issues in the PUBLIC (gemini-cli) repo. + */ + +const REPO_OWNER = 'google-gemini'; +const PUBLIC_REPO = 'gemini-cli'; +const PRIVATE_REPO = 'maintainers-gemini-cli'; +const ALLOWED_REPOS = [PUBLIC_REPO, PRIVATE_REPO]; + +const ROOT_ISSUES = [ + { owner: REPO_OWNER, repo: PUBLIC_REPO, number: 15374 }, + { owner: REPO_OWNER, repo: PUBLIC_REPO, number: 15456 }, + { owner: REPO_OWNER, repo: PUBLIC_REPO, number: 15324 }, +]; + +const TARGET_LABEL = '🔒 maintainer only'; +const isDryRun = + process.argv.includes('--dry-run') || process.env.DRY_RUN === 'true'; + +const octokit = new Octokit({ + auth: process.env.GITHUB_TOKEN, +}); + +/** + * Extracts child issue references from markdown Task Lists ONLY. + * e.g. - [ ] #123 or - [x] google-gemini/gemini-cli#123 + */ +function extractTaskListLinks(text, contextOwner, contextRepo) { + if (!text) return []; + const childIssues = new Map(); + + const add = (owner, repo, number) => { + if (ALLOWED_REPOS.includes(repo)) { + const key = `${owner}/${repo}#${number}`; + childIssues.set(key, { owner, repo, number: parseInt(number, 10) }); + } + }; + + // 1. Full URLs in task lists + const urlRegex = + /-\s+\[[ x]\].*https:\/\/github\.com\/([a-zA-Z0-9._-]+)\/([a-zA-Z0-9._-]+)\/issues\/(\d+)\b/g; + let match; + while ((match = urlRegex.exec(text)) !== null) { + add(match[1], match[2], match[3]); + } + + // 2. Cross-repo refs in task lists: owner/repo#123 + const crossRepoRegex = + /-\s+\[[ x]\].*([a-zA-Z0-9._-]+)\/([a-zA-Z0-9._-]+)#(\d+)\b/g; + while ((match = crossRepoRegex.exec(text)) !== null) { + add(match[1], match[2], match[3]); + } + + // 3. Short refs in task lists: #123 + const shortRefRegex = /-\s+\[[ x]\].*#(\d+)\b/g; + while ((match = shortRefRegex.exec(text)) !== null) { + add(contextOwner, contextRepo, match[1]); + } + + return Array.from(childIssues.values()); +} + +/** + * Fetches issue data via GraphQL with full pagination for sub-issues, comments, and labels. + */ +async function fetchIssueData(owner, repo, number) { + const query = ` + query($owner:String!, $repo:String!, $number:Int!) { + repository(owner:$owner, name:$repo) { + issue(number:$number) { + state + title + body + labels(first: 100) { + nodes { name } + pageInfo { hasNextPage endCursor } + } + subIssues(first: 100) { + nodes { + number + repository { + name + owner { login } + } + } + pageInfo { hasNextPage endCursor } + } + comments(first: 100) { + nodes { + body + } + } + } + } + } + `; + + try { + const response = await octokit.graphql(query, { owner, repo, number }); + const data = response.repository.issue; + if (!data) return null; + + const issue = { + state: data.state, + title: data.title, + body: data.body || '', + labels: data.labels.nodes.map((n) => n.name), + subIssues: [...data.subIssues.nodes], + comments: data.comments.nodes.map((n) => n.body), + }; + + // Paginate subIssues if there are more than 100 + if (data.subIssues.pageInfo.hasNextPage) { + const moreSubIssues = await paginateConnection( + owner, + repo, + number, + 'subIssues', + 'number repository { name owner { login } }', + data.subIssues.pageInfo.endCursor, + ); + issue.subIssues.push(...moreSubIssues); + } + + // Paginate labels if there are more than 100 (unlikely but for completeness) + if (data.labels.pageInfo.hasNextPage) { + const moreLabels = await paginateConnection( + owner, + repo, + number, + 'labels', + 'name', + data.labels.pageInfo.endCursor, + (n) => n.name, + ); + issue.labels.push(...moreLabels); + } + + // Note: Comments are handled via Task Lists in body + first 100 comments. + // If an issue has > 100 comments with task lists, we'd need to paginate those too. + // Given the 1,100+ issue discovery count, 100 comments is usually sufficient, + // but we can add it for absolute completeness. + // (Skipping for now to avoid excessive API churn unless clearly needed). + + return issue; + } catch (error) { + if (error.errors && error.errors.some((e) => e.type === 'NOT_FOUND')) { + return null; + } + throw error; + } +} + +/** + * Helper to paginate any GraphQL connection. + */ +async function paginateConnection( + owner, + repo, + number, + connectionName, + nodeFields, + initialCursor, + transformNode = (n) => n, +) { + let additionalNodes = []; + let hasNext = true; + let cursor = initialCursor; + + while (hasNext) { + const query = ` + query($owner:String!, $repo:String!, $number:Int!, $cursor:String) { + repository(owner:$owner, name:$repo) { + issue(number:$number) { + ${connectionName}(first: 100, after: $cursor) { + nodes { ${nodeFields} } + pageInfo { hasNextPage endCursor } + } + } + } + } + `; + const response = await octokit.graphql(query, { + owner, + repo, + number, + cursor, + }); + const connection = response.repository.issue[connectionName]; + additionalNodes.push(...connection.nodes.map(transformNode)); + hasNext = connection.pageInfo.hasNextPage; + cursor = connection.pageInfo.endCursor; + } + return additionalNodes; +} + +/** + * Validates if an issue should be processed (Open, not a duplicate, not a PR) + */ +function shouldProcess(issueData) { + if (!issueData) return false; + + if (issueData.state !== 'OPEN') return false; + + const labels = issueData.labels.map((l) => l.toLowerCase()); + if (labels.includes('duplicate') || labels.includes('kind/duplicate')) { + return false; + } + + return true; +} + +async function getAllDescendants(roots) { + const allDescendants = new Map(); + const visited = new Set(); + const queue = [...roots]; + + for (const root of roots) { + visited.add(`${root.owner}/${root.repo}#${root.number}`); + } + + console.log(`Starting discovery from ${roots.length} roots...`); + + while (queue.length > 0) { + const current = queue.shift(); + const currentKey = `${current.owner}/${current.repo}#${current.number}`; + + try { + const issueData = await fetchIssueData( + current.owner, + current.repo, + current.number, + ); + + if (!shouldProcess(issueData)) { + continue; + } + + // ONLY add to labeling list if it's in the PUBLIC repository + if (current.repo === PUBLIC_REPO) { + // Don't label the roots themselves + if ( + !ROOT_ISSUES.some( + (r) => r.number === current.number && r.repo === current.repo, + ) + ) { + allDescendants.set(currentKey, { + ...current, + title: issueData.title, + labels: issueData.labels, + }); + } + } + + const children = new Map(); + + // 1. Process Native Sub-issues + if (issueData.subIssues) { + for (const node of issueData.subIssues) { + const childOwner = node.repository.owner.login; + const childRepo = node.repository.name; + const childNumber = node.number; + const key = `${childOwner}/${childRepo}#${childNumber}`; + children.set(key, { + owner: childOwner, + repo: childRepo, + number: childNumber, + }); + } + } + + // 2. Process Markdown Task Lists in Body and Comments + let combinedText = issueData.body || ''; + if (issueData.comments) { + for (const commentBody of issueData.comments) { + combinedText += '\n' + (commentBody || ''); + } + } + + const taskListLinks = extractTaskListLinks( + combinedText, + current.owner, + current.repo, + ); + for (const link of taskListLinks) { + const key = `${link.owner}/${link.repo}#${link.number}`; + children.set(key, link); + } + + // Queue children (regardless of which repo they are in, for recursion) + for (const [key, child] of children) { + if (!visited.has(key)) { + visited.add(key); + queue.push(child); + } + } + } catch (error) { + console.error(`Error processing ${currentKey}: ${error.message}`); + } + } + + return Array.from(allDescendants.values()); +} + +async function run() { + if (isDryRun) { + console.log('=== DRY RUN MODE: No labels will be applied ==='); + } + + const descendants = await getAllDescendants(ROOT_ISSUES); + console.log( + `\nFound ${descendants.length} total unique open descendant issues in ${PUBLIC_REPO}.`, + ); + + for (const issueInfo of descendants) { + const issueKey = `${issueInfo.owner}/${issueInfo.repo}#${issueInfo.number}`; + try { + // Data is already available from the discovery phase + const hasLabel = issueInfo.labels.some((l) => l === TARGET_LABEL); + + if (!hasLabel) { + if (isDryRun) { + console.log( + `[DRY RUN] Would label ${issueKey}: "${issueInfo.title}"`, + ); + } else { + console.log(`Labeling ${issueKey}: "${issueInfo.title}"...`); + await octokit.rest.issues.addLabels({ + owner: issueInfo.owner, + repo: issueInfo.repo, + issue_number: issueInfo.number, + labels: [TARGET_LABEL], + }); + } + } + } catch (error) { + console.error(`Error processing label for ${issueKey}: ${error.message}`); + } + } +} + +run().catch((error) => { + console.error(error); + process.exit(1); +}); diff --git a/.github/workflows/gemini-scheduled-issue-triage.yml b/.github/workflows/gemini-scheduled-issue-triage.yml index 6c3fbb7c63..6aaeb950cf 100644 --- a/.github/workflows/gemini-scheduled-issue-triage.yml +++ b/.github/workflows/gemini-scheduled-issue-triage.yml @@ -238,23 +238,16 @@ jobs: core.info(`Raw labels JSON: ${rawLabels}`); let parsedLabels; try { - // First, try to parse the raw output as JSON. - parsedLabels = JSON.parse(rawLabels.trim()); - } catch (jsonError) { - // If that fails, check for a markdown code block. - core.info(`Direct JSON parsing failed: ${jsonError.message}. Trying to extract from a markdown block.`); const jsonMatch = rawLabels.match(/```json\s*([\s\S]*?)\s*```/); - if (jsonMatch && jsonMatch[1]) { - try { - parsedLabels = JSON.parse(jsonMatch[1].trim()); - } catch (markdownError) { - core.setFailed(`Failed to parse JSON even after extracting from markdown block: ${markdownError.message}\nRaw output: ${rawLabels}`); - return; - } - } else { - core.setFailed(`Output is not valid JSON and does not contain a JSON markdown block.\nRaw output: ${rawLabels}`); - return; + if (!jsonMatch || !jsonMatch[1]) { + throw new Error("Could not find a ```json ... ``` block in the output."); } + const jsonString = jsonMatch[1].trim(); + parsedLabels = JSON.parse(jsonString); + core.info(`Parsed labels JSON: ${JSON.stringify(parsedLabels)}`); + } catch (err) { + core.setFailed(`Failed to parse labels JSON from Gemini output: ${err.message}\nRaw output: ${rawLabels}`); + return; } for (const entry of parsedLabels) { diff --git a/.github/workflows/label-backlog-child-issues.yml b/.github/workflows/label-backlog-child-issues.yml index 80774843e3..b11f509f80 100644 --- a/.github/workflows/label-backlog-child-issues.yml +++ b/.github/workflows/label-backlog-child-issues.yml @@ -3,38 +3,55 @@ name: 'Label Child Issues for Project Rollup' on: issues: types: ['opened', 'edited', 'reopened'] + schedule: + - cron: '0 * * * *' # Run every hour + workflow_dispatch: + +permissions: + issues: 'write' + contents: 'read' jobs: + # Event-based: Quick reaction to new/edited issues in THIS repo labeler: + if: "github.event_name == 'issues'" runs-on: 'ubuntu-latest' - permissions: - issues: 'write' steps: - - name: 'Check for Parent Workstream and Apply Label' - uses: 'actions/github-script@v7' + - name: 'Checkout' + uses: 'actions/checkout@v4' + + - name: 'Setup Node.js' + uses: 'actions/setup-node@v4' with: - script: | - const issue = context.payload.issue; - const labelToAdd = 'workstream-rollup'; + node-version: '20' + cache: 'npm' - // --- Define the FULL URLs of the allowed parent workstreams --- - const allowedParentUrls = [ - 'https://api.github.com/repos/google-gemini/gemini-cli/issues/15374', - 'https://api.github.com/repos/google-gemini/gemini-cli/issues/15456', - 'https://api.github.com/repos/google-gemini/gemini-cli/issues/15324' - ]; + - name: 'Install Dependencies' + run: 'npm ci' - // Check if the issue has a parent_issue_url and if it's in our allowed list. - if (issue && issue.parent_issue_url && allowedParentUrls.includes(issue.parent_issue_url)) { - console.log(`SUCCESS: Issue #${issue.number} is a child of a target workstream (${issue.parent_issue_url}). Adding label.`); - await github.rest.issues.addLabels({ - owner: context.repo.owner, - repo: context.repo.repo, - issue_number: issue.number, - labels: [labelToAdd] - }); - } else if (issue && issue.parent_issue_url) { - console.log(`FAILURE: Issue #${issue.number} has a parent, but it's not a target workstream. Parent URL: ${issue.parent_issue_url}`); - } else { - console.log(`FAILURE: Issue #${issue.number} is not a child of any issue. No action taken.`); - } + - name: 'Run Multi-Repo Sync Script' + env: + GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}' + run: 'node .github/scripts/sync-maintainer-labels.cjs' + + # Scheduled/Manual: Recursive sync across multiple repos + sync-maintainer-labels: + if: "github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'" + runs-on: 'ubuntu-latest' + steps: + - name: 'Checkout' + uses: 'actions/checkout@v4' + + - name: 'Setup Node.js' + uses: 'actions/setup-node@v4' + with: + node-version: '20' + cache: 'npm' + + - name: 'Install Dependencies' + run: 'npm ci' + + - name: 'Run Multi-Repo Sync Script' + env: + GITHUB_TOKEN: '${{ secrets.GITHUB_TOKEN }}' + run: 'node .github/scripts/sync-maintainer-labels.cjs'