Assorted shell and Python scripts
1#!/usr/bin/env bash
2
3# Use rclone to sync the last two good Wikimedia XML data dumps.
4
5set -euxo pipefail
6
7if [ -z "$1" ]; then
8 echo "Please supply an rclone remote"
9 exit 1
10fi
11
12RCLONE_REMOTE="$1"
13MIRROR_URL=$(rclone config show "$RCLONE_REMOTE" | grep "url" | awk '{print $3}')
14NTFY_IP=$(sudo tailscale status | grep "dietpi" | awk '{print $1}')
15
16cleanup() {
17 echo "Cleaning up"
18 rm -fv "${HOME}/rsync-filelist-last-2-good.txt"
19 rm -fv "${HOME}/rsync-filelist-last-2-good-en.txt"
20 exit
21}
22
23trap cleanup 0 1 2 3 6
24
25wget "${MIRROR_URL}/rsync-filelist-last-2-good.txt" \
26 -O "${HOME}/rsync-filelist-last-2-good.txt"
27
28grep "enwiki" "${HOME}/rsync-filelist-last-2-good.txt" |
29 grep -v "tenwiki" |
30 tee "${HOME}/rsync-filelist-last-2-good-en.txt"
31
32rclone sync \
33 --http-no-head \
34 -P \
35 --transfers 16 \
36 --include-from "${HOME}/rsync-filelist-last-2-good-en.txt" "${RCLONE_REMOTE}:" \
37 /naspool/archives/wikimedia-xmldatadumps-en
38
39curl \
40 -H prio:default \
41 -H tags:incoming_envelope \
42 -d "Syncing of wikimedia xml datadumps succeeded" \
43 "http://${NTFY_IP}:8080/wikimedia_xmldatadumps_en"
44
45exit 0