1#!/usr/bin/env bash
2
3# Get the code owners of the files changed by a PR, returning one username per line
4
5set -euo pipefail
6
7log() {
8 echo "$@" >&2
9}
10
11if (( "$#" < 4 )); then
12 log "Usage: $0 GIT_REPO OWNERS_FILE BASE_REF HEAD_REF"
13 exit 1
14fi
15
16gitRepo=$1
17ownersFile=$2
18baseRef=$3
19headRef=$4
20
21tmp=$(mktemp -d)
22trap 'rm -rf "$tmp"' exit
23
24git -C "$gitRepo" diff --name-only --merge-base "$baseRef" "$headRef" > "$tmp/touched-files"
25readarray -t touchedFiles < "$tmp/touched-files"
26log "This PR touches ${#touchedFiles[@]} files"
27
28# Get the owners file from the base, because we don't want to allow PRs to
29# remove code owners to avoid pinging them
30git -C "$gitRepo" show "$baseRef":"$ownersFile" > "$tmp"/codeowners
31
32# Associative array with the user as the key for easy de-duplication
33# Make sure to always lowercase keys to avoid duplicates with different casings
34declare -A users=()
35
36for file in "${touchedFiles[@]}"; do
37 result=$(codeowners --file "$tmp"/codeowners "$file")
38
39 # Remove the file prefix and trim the surrounding spaces
40 read -r owners <<< "${result#"$file"}"
41 if [[ "$owners" == "(unowned)" ]]; then
42 log "File $file is unowned"
43 continue
44 fi
45 log "File $file is owned by $owners"
46
47 # Split up multiple owners, separated by arbitrary amounts of spaces
48 IFS=" " read -r -a entries <<< "$owners"
49
50 for entry in "${entries[@]}"; do
51 # GitHub technically also supports Emails as code owners,
52 # but we can't easily support that, so let's not
53 if [[ ! "$entry" =~ @(.*) ]]; then
54 warn -e "\e[33mCodeowner \"$entry\" for file $file is not valid: Must start with \"@\"\e[0m" >&2
55 # Don't fail, because the PR for which this script runs can't fix it,
56 # it has to be fixed in the base branch
57 continue
58 fi
59 # The first regex match is everything after the @
60 entry=${BASH_REMATCH[1]}
61
62 if [[ "$entry" =~ (.*)/(.*) ]]; then
63 # Teams look like $org/$team
64 org=${BASH_REMATCH[1]}
65 team=${BASH_REMATCH[2]}
66
67 # Instead of requesting a review from the team itself,
68 # we request reviews from the individual users.
69 # This is because once somebody from a team reviewed the PR,
70 # the API doesn't expose that the team was already requested for a review,
71 # so we wouldn't be able to avoid rerequesting reviews
72 # without saving some some extra state somewhere
73
74 # We could also consider implementing a more advanced heuristic
75 # in the future that e.g. only pings one team member,
76 # but escalates to somebody else if that member doesn't respond in time.
77 gh api \
78 --cache=1h \
79 -H "Accept: application/vnd.github+json" \
80 -H "X-GitHub-Api-Version: 2022-11-28" \
81 "/orgs/$org/teams/$team/members" \
82 --jq '.[].login' > "$tmp/team-members"
83 readarray -t members < "$tmp/team-members"
84 log "Team $entry has these members: ${members[*]}"
85
86 for user in "${members[@]}"; do
87 users[${user,,}]=
88 done
89 else
90 # Everything else is a user
91 users[${entry,,}]=
92 fi
93 done
94
95done
96
97printf "%s\n" "${!users[@]}"