#!/usr/bin/env bash set -e if [ $# -lt 1 ]; then echo "Usage: get-pr-comments PR_NUMBER [OWNER/REPO]" echo "Example: get-pr-comments 123" echo "Example: get-pr-comments 123 EveryInc/cora" exit 1 fi PR_NUMBER=$1 if [ -n "$2" ]; then OWNER=$(echo "$2" | cut -d/ -f1) REPO=$(echo "$2" | cut -d/ -f2) else OWNER=$(gh repo view --json owner -q .owner.login 2>/dev/null) REPO=$(gh repo view --json name -q .name 2>/dev/null) fi if [ -z "$OWNER" ] || [ -z "$REPO" ]; then echo "Error: Could not detect repository. Pass OWNER/REPO as second argument." exit 1 fi # Output is a JSON object with four keys: # review_threads - unresolved inline review threads, edge-wrapped as # [{ node: { id, isResolved, isOutdated, path, line, ..., # comments: { nodes: [...] } } }] # pr_comments - top-level PR conversation comments (excludes PR author # and known CI/status bots) # review_bodies - review submissions with non-empty body text (same # filtering as pr_comments) # cross_invocation - cross-invocation awareness envelope: # signal: true when both resolved and unresolved threads exist (multi-round review) # resolved_threads: last 10 resolved threads by recency, for cluster analysis input # # Pagination (issue #798): each top-level connection -- reviewThreads, # comments, reviews -- is fetched in its own paginated query because # `gh api graphql --paginate` only follows the outermost pageInfo per # response. Combining them into one query (as this script previously did) # silently dropped everything past page 1 on long-lived PRs and made the # skill report "0 of 0 resolved" while real findings sat unanswered. # Per-thread inline `comments` are fetched up to 100 per thread without # follow-up pagination; threads that exceed 100 comments are rare and out of # scope for this fix. # # Bot filtering: only CI/status bots (codecov, etc.) are filtered at the source. # Their output is structurally never actionable -- coverage numbers, build # summaries, deploy status -- and that holds regardless of format changes. # AI review bots (coderabbitai, codex, gemini, copilot) are NOT filtered here. # Historically their top-level comments were assumed to always be wrappers, but # that turned out to be wrong: Codex sometimes posts actionable findings as # top-level PR comments with no inline thread counterpart. Any source-level # heuristic to separate wrapper from actionable for these bots is brittle (one # bot format change away from silently dropping feedback). SKILL.md step 2 # has a content-aware actionability check and Silent Drop rule that handles # wrappers correctly, so we trust that layer instead. Add new logins to the CI # list only if their output is structurally non-actionable like codecov's. threads_pages=$(gh api graphql --paginate --slurp \ -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \ -f query=' query Threads($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) { repository(owner: $owner, name: $repo) { pullRequest(number: $pr) { author { login } reviewThreads(first: 100, after: $endCursor) { nodes { id isResolved isOutdated path line originalLine startLine originalStartLine comments(first: 100) { nodes { id author { login } body createdAt url } } } pageInfo { hasNextPage endCursor } } } } }') comments_pages=$(gh api graphql --paginate --slurp \ -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \ -f query=' query Comments($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) { repository(owner: $owner, name: $repo) { pullRequest(number: $pr) { comments(first: 100, after: $endCursor) { nodes { id author { login } body } pageInfo { hasNextPage endCursor } } } } }') reviews_pages=$(gh api graphql --paginate --slurp \ -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \ -f query=' query Reviews($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) { repository(owner: $owner, name: $repo) { pullRequest(number: $pr) { reviews(first: 100, after: $endCursor) { nodes { id author { login } body state } pageInfo { hasNextPage endCursor } } } } }') # Resolution semantics: `isOutdated` means the diff hunk around the comment # has shifted since the thread was opened -- not that the reviewer concern # was addressed. Resolution state is the only authoritative signal; outdated # threads are still surfaced (with their isOutdated flag intact) so the # resolver can factor in that the referenced line may have moved. jq -n \ --argjson threads "$threads_pages" \ --argjson comments "$comments_pages" \ --argjson reviews "$reviews_pages" ' ($threads[0].data.repository.pullRequest.author) as $author | [$threads[].data.repository.pullRequest.reviewThreads.nodes[]] as $all_threads | [$comments[].data.repository.pullRequest.comments.nodes[]] as $all_comments | [$reviews[].data.repository.pullRequest.reviews.nodes[]] as $all_reviews | ["codecov"] as $ci_bot_logins | [$all_threads[] | select(.isResolved == false)] as $unresolved | ([$all_threads[] | select(.isResolved == true) | { thread_id: .id, path: .path, line: .line, first_comment_body: .comments.nodes[0].body, last_comment_at: ([.comments.nodes[].createdAt] | sort | last) }] | sort_by(.last_comment_at) | .[-10:] | reverse) as $resolved | { review_threads: [$unresolved[] | { node: . }], pr_comments: [$all_comments[] | select(.author.login != $author.login) | select(.author.login as $l | $ci_bot_logins | index($l) | not) | select(.body | test("^\\s*$") | not)], review_bodies: [$all_reviews[] | select(.body != null and .body != "") | select(.author.login != $author.login) | select(.author.login as $l | $ci_bot_logins | index($l) | not)], cross_invocation: { signal: (($resolved | length) > 0 and ($unresolved | length) > 0), resolved_threads: $resolved } }'