at 25.11-pre 4.2 kB view raw
1#! /bin/sh 2 3export LANG=C LC_ALL=C LC_COLLATE=C 4 5# Load git log 6raw_git_log="$(git log)" 7git_data="$(echo "$raw_git_log" | grep 'Author:' | 8 sed -e 's/^ *Author://; s/\\//g; s/^ *//; s/ *$//; 9 s/ @ .*//; s/ *[<]/\t/; s/[>]//')" 10 11# Name - nick - email correspondence from log and from maintainer list 12# Also there are a few manual entries 13maintainers="$(cat "$(dirname "$0")/../maintainer-list.nix" | 14 grep '=' | sed -re 's/\\"/''/g; 15 s/[ ]*([^ =]*)[ ]*=[ ]*" *(.*[^ ]) *[<](.*)[>] *".*/\1\t\2\t\3/')" 16git_lines="$( ( echo "$git_data"; 17 cat "$(dirname "$0")/vanity-manual-equalities.txt") | sort |uniq)" 18 19emails="$( 20 ( echo "$maintainers" | cut -f 3; echo "$git_data" | cut -f 2 ) | 21 sort | uniq | grep -E ".+@.+[.].+" 22 )" 23 24fetchGithubName () { 25 commitid="$( 26 echo "$raw_git_log" | grep -B3 "Author: .*[<]$1[>]" | head -n 3 | 27 grep '^commit ' | tail -n 1 | sed -e 's/^commit //' 28 )" 29 userid="$( 30 curl https://github.com/NixOS/nixpkgs/commit/"$commitid" 2>/dev/null | 31 grep committed -B10 | grep 'href="/' | 32 sed -re 's@.* href="/@@; s@".*@@' | 33 grep -v "/commit/" 34 )"; 35 echo "$userid" 36} 37 38[ -n "$NIXPKGS_GITHUB_NAME_CACHE" ] && { 39 echo "$emails" | while read email; do 40 line="$(grep "$email " "$NIXPKGS_GITHUB_NAME_CACHE")" 41 [ -z "$line" ] && { 42 echo "$email $(fetchGithubName "$email")" >> \ 43 "$NIXPKGS_GITHUB_NAME_CACHE" 44 } 45 done 46} 47 48# For RDF 49normalize_name () { 50 sed -e 's/%/%25/g; s/ /%20/g; s/'\''/%27/g; s/"/%22/g; s/`/%60/g; s/\^/%5e/g; ' 51} 52 53denormalize_name () { 54 sed -e 's/%20/ /g; s/%27/'\''/g; s/%22/"/g; s/%60/`/g; s/%5e/^/g; s/%25/%/g;'; 55} 56 57n3="$(mktemp --suffix .n3)" 58 59# «The same person» relation and a sorting hint 60# Full name is something with a space 61( 62echo "$git_lines" | sed -re 's@(.*)\t(.*)@<my://name/\1> <my://can-be> <my://name/\2>.@' 63echo "$git_lines" | sed -re 's@(.*)\t(.*)@<my://name/\2> <my://can-be> <my://name/\1>.@' 64echo "$maintainers" | sed -re 's@(.*)\t(.*)\t(.*)@<my://name/\1> <my://can-be> <my://name/\2>.@' 65echo "$maintainers" | sed -re 's@(.*)\t(.*)\t(.*)@<my://name/\2> <my://can-be> <my://name/\3>.@' 66echo "$maintainers" | sed -re 's@(.*)\t(.*)\t(.*)@<my://name/\3> <my://can-be> <my://name/\1>.@' 67echo "$git_lines" | grep ' ' | cut -f 1 | sed -e 's@.*@<my://name/&> <my://is-name> <my://0>.@' 68echo "$git_lines" | grep -v ' ' | cut -f 1 | sed -e 's@.*@<my://name/&> <my://is-name> <my://1>.@' 69echo "$maintainers" | cut -f 2 | sed -e 's@.*@<my://name/&> <my://is-name> <my://0>.@' 70[ -n "$NIXPKGS_GITHUB_NAME_CACHE" ] && cat "$NIXPKGS_GITHUB_NAME_CACHE" | 71 grep -v " $" | 72 sed -re 's@(.*)\t(.*)@<my://name/\1> <my://at-github> <my://github/\2>.@' 73) | normalize_name | grep -E '<my://[-a-z]+>' | sort | uniq > "$n3" 74 75# Get transitive closure 76sparql="$(nix-build '<nixpkgs>' -Q -A apache-jena --no-out-link)/bin/sparql" 77name_list="$( 78 "$sparql" --results=TSV --data="$n3" " 79 select ?x ?y ?g where { 80 ?x <my://can-be>+ ?y. 81 ?x <my://is-name> ?g. 82 } 83 " | tail -n +2 | 84 sed -re 's@<my://name/@@g; s@<my://@@g; s@>@@g;' | 85 sort -k 2,3 -t ' ' 86)" 87github_name_list="$( 88 "$sparql" --results=TSV --data="$n3" " 89 select ?x ?y where { 90 ?x (<my://can-be>+ / <my://at-github>) ?y. 91 } 92 " | tail -n +2 | 93 sed -re 's@<my://(name|github)/@@g; s@<my://@@g; s@>@@g;' 94)" 95 96# Take first spelling option for every person 97name_list_canonical="$(echo "$name_list" | cut -f 1,2 | uniq -f1)" 98 99cleaner_script="$(echo "$name_list_canonical" | denormalize_name | 100 sed -re 's/(.*)\t(.*)/s#^\2$#\1#g/g')" 101 102# Add github usernames 103if [ -n "$NIXPKGS_GITHUB_NAME_CACHE" ]; then 104 github_adder_script="$(mktemp)" 105 echo "$github_name_list" | 106 grep -E "$(echo "$name_list_canonical" | cut -f 2 | 107 tr '\n' '|' )" | 108 sort | uniq | 109 sed -re 's/(.*)\t(.*)/s| \1$| \1\t\2|g;/' | 110 denormalize_name > "$github_adder_script" 111else 112 github_adder_script='/dev/null' 113fi 114 115echo "$name_list" | denormalize_name 116 117echo 118 119echo "$git_data" | cut -f 1 | 120 sed -e "$cleaner_script" | 121 sort | uniq -c | sort -k1n | sed -rf "$github_adder_script" | 122 sed -re 's/^ *([0-9]+) /\1\t/'