Files
compound-engineering-plugin…/plugins/compound-engineering/skills/ce-resolve-pr-feedback/scripts/get-pr-comments

168 lines
6.3 KiB
Bash
Executable File

#!/usr/bin/env bash
set -e
if [ $# -lt 1 ]; then
echo "Usage: get-pr-comments PR_NUMBER [OWNER/REPO]"
echo "Example: get-pr-comments 123"
echo "Example: get-pr-comments 123 EveryInc/cora"
exit 1
fi
PR_NUMBER=$1
if [ -n "$2" ]; then
OWNER=$(echo "$2" | cut -d/ -f1)
REPO=$(echo "$2" | cut -d/ -f2)
else
OWNER=$(gh repo view --json owner -q .owner.login 2>/dev/null)
REPO=$(gh repo view --json name -q .name 2>/dev/null)
fi
if [ -z "$OWNER" ] || [ -z "$REPO" ]; then
echo "Error: Could not detect repository. Pass OWNER/REPO as second argument."
exit 1
fi
# Output is a JSON object with four keys:
# review_threads - unresolved inline review threads, edge-wrapped as
# [{ node: { id, isResolved, isOutdated, path, line, ...,
# comments: { nodes: [...] } } }]
# pr_comments - top-level PR conversation comments (excludes PR author
# and known CI/status bots)
# review_bodies - review submissions with non-empty body text (same
# filtering as pr_comments)
# cross_invocation - cross-invocation awareness envelope:
# signal: true when both resolved and unresolved threads exist (multi-round review)
# resolved_threads: last 10 resolved threads by recency, for cluster analysis input
#
# Pagination (issue #798): each top-level connection -- reviewThreads,
# comments, reviews -- is fetched in its own paginated query because
# `gh api graphql --paginate` only follows the outermost pageInfo per
# response. Combining them into one query (as this script previously did)
# silently dropped everything past page 1 on long-lived PRs and made the
# skill report "0 of 0 resolved" while real findings sat unanswered.
# Per-thread inline `comments` are fetched up to 100 per thread without
# follow-up pagination; threads that exceed 100 comments are rare and out of
# scope for this fix.
#
# Bot filtering: only CI/status bots (codecov, etc.) are filtered at the source.
# Their output is structurally never actionable -- coverage numbers, build
# summaries, deploy status -- and that holds regardless of format changes.
# AI review bots (coderabbitai, codex, gemini, copilot) are NOT filtered here.
# Historically their top-level comments were assumed to always be wrappers, but
# that turned out to be wrong: Codex sometimes posts actionable findings as
# top-level PR comments with no inline thread counterpart. Any source-level
# heuristic to separate wrapper from actionable for these bots is brittle (one
# bot format change away from silently dropping feedback). SKILL.md step 2
# has a content-aware actionability check and Silent Drop rule that handles
# wrappers correctly, so we trust that layer instead. Add new logins to the CI
# list only if their output is structurally non-actionable like codecov's.
threads_pages=$(gh api graphql --paginate --slurp \
-f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \
-f query='
query Threads($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) {
repository(owner: $owner, name: $repo) {
pullRequest(number: $pr) {
author { login }
reviewThreads(first: 100, after: $endCursor) {
nodes {
id
isResolved
isOutdated
path
line
originalLine
startLine
originalStartLine
comments(first: 100) {
nodes {
id
author { login }
body
createdAt
url
}
}
}
pageInfo { hasNextPage endCursor }
}
}
}
}')
comments_pages=$(gh api graphql --paginate --slurp \
-f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \
-f query='
query Comments($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) {
repository(owner: $owner, name: $repo) {
pullRequest(number: $pr) {
comments(first: 100, after: $endCursor) {
nodes {
id
author { login }
body
}
pageInfo { hasNextPage endCursor }
}
}
}
}')
reviews_pages=$(gh api graphql --paginate --slurp \
-f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" \
-f query='
query Reviews($owner: String!, $repo: String!, $pr: Int!, $endCursor: String) {
repository(owner: $owner, name: $repo) {
pullRequest(number: $pr) {
reviews(first: 100, after: $endCursor) {
nodes {
id
author { login }
body
state
}
pageInfo { hasNextPage endCursor }
}
}
}
}')
# Resolution semantics: `isOutdated` means the diff hunk around the comment
# has shifted since the thread was opened -- not that the reviewer concern
# was addressed. Resolution state is the only authoritative signal; outdated
# threads are still surfaced (with their isOutdated flag intact) so the
# resolver can factor in that the referenced line may have moved.
jq -n \
--argjson threads "$threads_pages" \
--argjson comments "$comments_pages" \
--argjson reviews "$reviews_pages" '
($threads[0].data.repository.pullRequest.author) as $author |
[$threads[].data.repository.pullRequest.reviewThreads.nodes[]] as $all_threads |
[$comments[].data.repository.pullRequest.comments.nodes[]] as $all_comments |
[$reviews[].data.repository.pullRequest.reviews.nodes[]] as $all_reviews |
["codecov"] as $ci_bot_logins |
[$all_threads[] | select(.isResolved == false)] as $unresolved |
([$all_threads[]
| select(.isResolved == true)
| { thread_id: .id, path: .path, line: .line,
first_comment_body: .comments.nodes[0].body,
last_comment_at: ([.comments.nodes[].createdAt] | sort | last) }]
| sort_by(.last_comment_at) | .[-10:] | reverse) as $resolved |
{
review_threads: [$unresolved[] | { node: . }],
pr_comments: [$all_comments[]
| select(.author.login != $author.login)
| select(.author.login as $l | $ci_bot_logins | index($l) | not)
| select(.body | test("^\\s*$") | not)],
review_bodies: [$all_reviews[]
| select(.body != null and .body != "")
| select(.author.login != $author.login)
| select(.author.login as $l | $ci_bot_logins | index($l) | not)],
cross_invocation: {
signal: (($resolved | length) > 0 and ($unresolved | length) > 0),
resolved_threads: $resolved
}
}'