Assorted shell and Python scripts
at main 1.1 kB view raw
1#!/usr/bin/env bash 2 3# Use rclone to sync the last two good Wikimedia XML data dumps. 4 5set -euxo pipefail 6 7if [ -z "$1" ]; then 8 echo "Please supply an rclone remote" 9 exit 1 10fi 11 12RCLONE_REMOTE="$1" 13MIRROR_URL=$(rclone config show "$RCLONE_REMOTE" | grep "url" | awk '{print $3}') 14NTFY_IP=$(sudo tailscale status | grep "dietpi" | awk '{print $1}') 15 16cleanup() { 17 echo "Cleaning up" 18 rm -fv "${HOME}/rsync-filelist-last-2-good.txt" 19 rm -fv "${HOME}/rsync-filelist-last-2-good-en.txt" 20 exit 21} 22 23trap cleanup 0 1 2 3 6 24 25wget "${MIRROR_URL}/rsync-filelist-last-2-good.txt" \ 26 -O "${HOME}/rsync-filelist-last-2-good.txt" 27 28grep "enwiki" "${HOME}/rsync-filelist-last-2-good.txt" | 29 grep -v "tenwiki" | 30 tee "${HOME}/rsync-filelist-last-2-good-en.txt" 31 32rclone sync \ 33 --http-no-head \ 34 -P \ 35 --transfers 16 \ 36 --include-from "${HOME}/rsync-filelist-last-2-good-en.txt" "${RCLONE_REMOTE}:" \ 37 /naspool/archives/wikimedia-xmldatadumps-en 38 39curl \ 40 -H prio:default \ 41 -H tags:incoming_envelope \ 42 -d "Syncing of wikimedia xml datadumps succeeded" \ 43 "http://${NTFY_IP}:8080/wikimedia_xmldatadumps_en" 44 45exit 0