1#! /usr/bin/env nix-shell
2#! nix-shell -i bash -p coreutils findutils gnused nix wget
3
4set -efuo pipefail
5export LC_COLLATE=C # fix sort order
6
7# parse files and folders from https://download.kde.org/ and https://download.qt.io/
8# you can override this function in fetch.sh
9function PARSE_INDEX() {
10 cat "$1" | grep -o -E -e '\s+href="[^"]+\.tar\.xz"' -e '\s+href="[-_a-zA-Z0-9]+/"' | cut -d'"' -f2 | sort | uniq
11}
12
13if [ $# != 1 ]; then
14 echo "example use:" >&2
15 echo "cd nixpkgs/" >&2
16 echo "./maintainers/scripts/fetch-kde-qt.sh pkgs/development/libraries/qt-5/5.12" >&2
17 exit 1
18fi
19
20if ! echo "$1" | grep -q '^pkgs/'; then
21 echo "error: path argument must start with pkgs/" >&2
22 exit 1
23fi
24
25# need absolute path for the pushd-popd block
26if [ -f "$1" ]; then
27 echo "ok: using fetchfile $1"
28 fetchfilerel="$1"
29 fetchfile="$(readlink -f "$fetchfilerel")" # resolve absolute path
30 basedir="$(dirname "$fetchfile")"
31 basedirrel="$(dirname "$fetchfilerel")"
32elif [ -d "$1" ]; then
33 echo "ok: using basedir $1"
34 basedirrel="$1"
35 basedir="$(readlink -f "$basedirrel")" # resolve absolute path
36 if ! [ -d "$basedir" ]; then
37 basedir="$(dirname "$basedir")"
38 fi
39 fetchfile="$basedir/fetch.sh"
40else
41 echo 'error: $1 must be file or dir' >&2
42 exit 1
43fi
44
45pkgname=$(basename "$basedir")
46SRCS="$basedir/srcs.nix"
47srcsrel="$basedirrel/srcs.nix"
48
49source "$fetchfile"
50
51if [ -n "$WGET_ARGS" ]; then # old format
52 BASE_URL="${WGET_ARGS[0]}" # convert to new format
53 # validate
54 if ! echo "$BASE_URL" | grep -q -E '^(http|https|ftp)://'; then
55 printf 'error: from WGET_ARGS, converted invalid BASE_URL: %q\n' "$BASE_URL" >&2
56 exit 1
57 fi
58 printf 'ok: from WGET_ARGS, converted BASE_URL: %q\n' "$BASE_URL"
59elif [ -n "$BASE_URL" ]; then # new format
60 :
61else
62 echo "error: fetch.sh must set either WGET_ARGS or BASE_URL" >&2
63 exit 1
64fi
65
66tmptpl=tmp.fetch-kde-qt.$pkgname.XXXXXXXXXX
67
68tmp=$(mktemp -d $tmptpl)
69pushd $tmp >/dev/null
70echo "tempdir is $tmp"
71
72wgetargs='--quiet --show-progress'
73#wgetargs='' # debug
74
75dirlist="$BASE_URL"
76filelist=""
77base_url_len=${#BASE_URL}
78
79clean_urls() {
80 # // -> /
81 sed -E 's,//+,/,g' | sed -E 's,^(http|https|ftp):/,&/,'
82}
83
84while [ -n "$dirlist" ]
85do
86 for dirurl in $dirlist
87 do
88 echo "fetching index.html from $dirurl"
89 relpath=$(echo "./${dirurl:$base_url_len}" | clean_urls)
90 mkdir -p "$relpath"
91 indexfile=$(echo "$relpath/index.html" | clean_urls)
92 wget $wgetargs -O "$indexfile" "$dirurl"
93 echo "parsing $indexfile"
94 filedirlist="$(PARSE_INDEX "$indexfile")"
95 filelist_next="$(echo "$filedirlist" | grep '\.tar\.xz$' | while read file; do echo "$dirurl/$file"; done)"
96 filelist_next="$(echo "$filelist_next" | clean_urls)"
97 [ -n "$filelist" ] && filelist+=$'\n'
98 filelist+="$filelist_next"
99 dirlist="$(echo "$filedirlist" | grep -v '\.tar\.xz$' | while read dir; do echo "$dirurl/$dir"; done || true)"
100 dirlist="$(echo "$dirlist" | clean_urls)"
101 done
102done
103
104filecount=$(echo "$filelist" | wc -l)
105
106if [ -z "$filelist" ]
107then
108 echo "error: no files parsed from $tmp/index.html"
109 exit 1
110fi
111
112echo "parsed $filecount tar.xz files:"; echo "$filelist"
113
114# most time is spent here
115echo "fetching $filecount sha256 files ..."
116urllist="$(echo "$filelist" | while read file; do echo "$file.sha256"; done)"
117# wget -r: keep directory structure
118echo "$urllist" | xargs wget $wgetargs -nH -r -c --no-parent && {
119 actual=$(find . -type f -name '*.sha256' | wc -l)
120 echo "fetching $filecount sha256 files done: got $actual files"
121} || {
122 # workaround: in rare cases, the server does not provide the sha256 files
123 # for example when the release is just a few hours old
124 # and the servers are not yet fully synced
125 actual=$(find . -type f -name '*.sha256' | wc -l)
126 echo "fetching $filecount sha256 files failed: got only $actual files"
127
128 # TODO fetch only missing tar.xz files
129 echo "fetching $filecount tar.xz files ..."
130 echo "$filelist" | xargs wget $wgetargs -nH -r -c --no-parent
131
132 echo "generating sha256 files ..."
133 find . -type f -name '*.tar.xz' | while read src; do
134 name=$(basename "$src")
135 sha256=$(sha256sum "$src" | cut -d' ' -f1)
136 echo "$sha256 $name" >"$src.sha256"
137 done
138}
139
140csv=$(mktemp $tmptpl.csv)
141echo "writing temporary file $csv ..."
142find . -type f -name '*.sha256' | while read sha256file; do
143 src="${sha256file%.*}" # remove extension
144 sha256=$(cat $sha256file | cut -d' ' -f1) # base16
145 sha256=$(nix-hash --type sha256 --to-base32 $sha256)
146 # Sanitize file name
147 filename=$(basename "$src" | tr '@' '_')
148 nameVersion="${filename%.tar.*}"
149 name=$(echo "$nameVersion" | sed -e 's,-[[:digit:]].*,,' | sed -e 's,-opensource-src$,,' | sed -e 's,-everywhere-src$,,')
150 version=$(echo "$nameVersion" | sed -e 's,^\([[:alpha:]][[:alnum:]]*-\)\+,,')
151 echo "$name,$version,$src,$filename,$sha256" >>$csv
152done
153
154files_before=$(grep -c 'src = ' "$SRCS")
155
156echo "writing output file $SRCS ..."
157cat >"$SRCS" <<EOF
158# DO NOT EDIT! This file is generated automatically.
159# Command: ./maintainers/scripts/fetch-kde-qt.sh $@
160{ fetchurl, mirror }:
161
162{
163EOF
164
165gawk -F , "{ print \$1 }" $csv | sort | uniq | while read name; do
166 versions=$(gawk -F , "/^$name,/ { print \$2 }" $csv)
167 latestVersion=$(echo "$versions" | sort -rV | head -n 1)
168 src=$(gawk -F , "/^$name,$latestVersion,/ { print \$3 }" $csv)
169 filename=$(gawk -F , "/^$name,$latestVersion,/ { print \$4 }" $csv)
170 sha256=$(gawk -F , "/^$name,$latestVersion,/ { print \$5 }" $csv)
171 url="${src:2}"
172 cat >>"$SRCS" <<EOF
173 $name = {
174 version = "$latestVersion";
175 src = fetchurl {
176 url = "\${mirror}/$url";
177 sha256 = "$sha256";
178 name = "$filename";
179 };
180 };
181EOF
182done
183
184echo "}" >>"$SRCS"
185
186files_after=$(grep -c 'src = ' "$SRCS")
187echo "files before: $files_before"
188echo "files after: $files_after"
189
190echo "compare:"
191echo "git diff $srcsrel"
192
193popd >/dev/null
194rm -fr $tmp >/dev/null
195
196rm -f $csv >/dev/null