the home site for me: also iteration 3 or 4 of my site
at main 5.7 kB view raw
1#!/usr/bin/env bash 2set -euo pipefail 3 4API_URL="https://cdn.hackclub.com/api/v3/new" 5TOKEN="${HACKCLUB_CDN_TOKEN:-}" 6if [[ -z "${TOKEN}" ]]; then 7 TOKEN="${1:-}" 8fi 9if [[ -z "${TOKEN}" ]]; then 10 echo "Usage: HACKCLUB_CDN_TOKEN=... $0 [token] [--dry-run] [paths...]" >&2 11 exit 1 12fi 13 14DRY_RUN=false 15SKIP_CHECK=false 16CACHED_URLS=() 17ARGS=() 18for a in "$@"; do 19 case "$a" in 20 --dry-run) DRY_RUN=true ;; 21 --skip-check) SKIP_CHECK=true ;; 22 *) ARGS+=("$a") ;; 23 esac 24done 25# remove token if passed as first arg 26if [[ ${#ARGS[@]} -gt 0 && "${ARGS[0]}" != "--dry-run" ]]; then 27 ARGS=("${ARGS[@]:1}") 28fi 29 30PATHS=("content") 31if [[ ${#ARGS[@]} -gt 0 ]]; then PATHS=("${ARGS[@]}"); fi 32 33TMP_DIR=".crush/rehost-cdn" 34MAP_FILE="$TMP_DIR/map.tsv" 35mkdir -p "$TMP_DIR" 36touch "$MAP_FILE" 37 38collect_urls() { 39 # Markdown images: ![alt](URL) 40 rg -n --no-heading -e '!\[[^\]]*\]\((https?://[^)\s]+)\)' -g '!**/*.map' -g '!**/*.lock' "${PATHS[@]}" 2>/dev/null | 41 awk -F: '{file=$1; sub(/^[^:]*:/, "", $0); match($0, /!\[[^\]]*\]\((https?:\/\/[^)\s]+)\)/, m); if(m[1]!="") print file"\t"m[1]}' | 42 # Zola shortcode variants: 43 # - {% img(id="URL", ...) %} 44 # - {{ img(id="URL", ...) }} 45 cat <( rg -n --no-heading -e '\{[%{]\s*img[^}%]*[}%]\}' "${PATHS[@]}" 2>/dev/null | \ 46 awk -F: '{file=$1; sub(/^[^:]*:/, "", $0); if (match($0, /(id|src)[[:space:]]*=[[:space:]]*"(https?:\/\/[^"[:space:]]+)"/, m)) print file"\t"m[2]}' ) | 47 awk -F'\t' '{print $1"\t"$2}' | 48 grep -E '\.(png|jpe?g|gif|webp|svg|bmp|tiff?|avif)(\?.*)?$' -i | 49 grep -vE 'hc-cdn\.|cdn\.hackclub\.com' 50} 51 52batch_upload() { 53 payload=$(jq -sR 'split("\n")|map(select(length>0))' <(printf "%s\n" "$@")) 54 for attempt in 1 2 3; do 55 resp=$(curl -sS -w "\n%{http_code}" -X POST "$API_URL" \ 56 -H "Authorization: Bearer $TOKEN" \ 57 -H 'Content-Type: application/json' \ 58 --data-raw "$payload" 2>&1) || true 59 body=$(printf "%s" "$resp" | sed '$d') 60 code=$(printf "%s" "$resp" | tail -n1) 61 if [[ "$code" == "200" ]]; then 62 printf "%s" "$body" | jq -r '.files[] | .sourceUrl? as $s | .deployedUrl + "\t" + ( $s // "" )' 63 return 0 64 fi 65 echo "Upload attempt $attempt failed with $code" >&2 66 echo "Response body:" >&2 67 printf "%s\n" "$body" >&2 68 echo "Payload:" >&2 69 printf "%s\n" "$payload" >&2 70 sleep $((attempt*2)) 71 done 72 echo "Upload failed after retries" >&2 73 return 1 74} 75 76mapfile -t LINES < <(collect_urls | sort -u) 77 78URLS_TO_SEND=() 79FILES=() 80total=${#LINES[@]} 81idx=0 82for line in "${LINES[@]}"; do 83 idx=$((idx+1)) 84 file="${line%%$'\t'*}" 85 url="${line#*$'\t'}" 86 if grep -Fq "${url}" "$MAP_FILE" 2>/dev/null; then 87 echo "[$idx/$total] cached: $url -> will rewrite only" 88 CACHED_URLS+=("$url") 89 continue 90 fi 91 if $DRY_RUN; then 92 echo "[$idx/$total] queued: $url" 93 URLS_TO_SEND+=("$url") 94 FILES+=("$file") 95 else 96 if $SKIP_CHECK; then 97 echo "[$idx/$total] no-check: $url" 98 URLS_TO_SEND+=("$url") 99 FILES+=("$file") 100 else 101 echo -n "[$idx/$total] checking: $url ... " 102 code=$(curl -sS -o /dev/null -w '%{http_code}' -L "$url" || echo 000) 103 if [[ "$code" =~ ^2|3 ]]; then 104 echo "ok ($code)" 105 URLS_TO_SEND+=("$url") 106 FILES+=("$file") 107 else 108 echo "fail ($code)"; echo "Skipping: $url" >&2 109 fi 110 fi 111 fi 112done 113 114if [[ ${#URLS_TO_SEND[@]} -eq 0 ]]; then 115 echo "No new image URLs to process"; exit 0 116fi 117 118BATCH=50 119start=0 120# Rewrites for cached URLs from map without uploading 121if [ "${#CACHED_URLS[@]}" -gt 0 ] 2>/dev/null; then 122 echo "Rewriting cached URLs from map without upload..." 123 for src in "${CACHED_URLS[@]}"; do 124 dst=$(awk -F'\t' -v s="$src" '$1==s{print $2}' "$MAP_FILE" | head -n1) 125 [[ -z "$dst" ]] && continue 126 rg -l --fixed-strings -- "$src" "${PATHS[@]}" 2>/dev/null | while read -r f; do 127 mkdir -p "$TMP_DIR/backup" 128 if [[ ! -e "$TMP_DIR/backup/$f" ]]; then 129 mkdir -p "$TMP_DIR/backup/$(dirname "$f")" 130 cp "$f" "$TMP_DIR/backup/$f" 131 fi 132 sed -i "s#$(printf '%s' "$src" | sed -e 's/[.[\*^$]/\\&/g' -e 's#/#\\/#g')#$(printf '%s' "$dst" | sed -e 's/[&]/\\&/g' -e 's#/#\\/#g')#g" "$f" 133 echo "Rewrote (cached): $f" 134 done 135 done 136fi 137 138while [[ $start -lt ${#URLS_TO_SEND[@]} ]]; do 139 end=$(( start + BATCH )) 140 if [[ $end -gt ${#URLS_TO_SEND[@]} ]]; then end=${#URLS_TO_SEND[@]}; fi 141 chunk=("${URLS_TO_SEND[@]:start:end-start}") 142 if $DRY_RUN; then 143 for u in "${chunk[@]}"; do echo "DRY: would upload $u"; done 144 else 145 echo "Uploading ${#chunk[@]} URLs..." 146 resp=$(batch_upload "${chunk[@]}") || { echo "Upload failed" >&2; exit 1; } 147 echo "Upload response:"; printf "%s\n" "$resp" 148 mapfile -t deployed_arr < <(printf "%s\n" "$resp" | awk '{print $0}') 149 for i in "${!chunk[@]}"; do 150 src="${chunk[$i]}" 151 dst="${deployed_arr[$i]:-}" 152 if [[ -n "$dst" ]]; then 153 printf "%s\t%s\n" "$src" "$dst" | tee -a "$MAP_FILE" 154 fi 155 done 156 fi 157 start=$end 158done 159 160if $DRY_RUN; then echo "DRY: skipping replacements"; exit 0; fi 161 162# Replace in-place using map 163if [[ -s "$MAP_FILE" ]]; then 164 cp "$MAP_FILE" "$TMP_DIR/map-$(date +%s).tsv" 165 while IFS=$'\t' read -r src dst; do 166 [[ -z "$src" || -z "$dst" ]] && continue 167 rg -l --fixed-strings -- "$src" "${PATHS[@]}" 2>/dev/null | while read -r f; do 168 mkdir -p "$TMP_DIR/backup" 169 if [[ ! -e "$TMP_DIR/backup/$f" ]]; then 170 mkdir -p "$TMP_DIR/backup/$(dirname "$f")" 171 cp "$f" "$TMP_DIR/backup/$f" 172 fi 173 sed -i "s#$(printf '%s' "$src" | sed -e 's/[.[\*^$]/\\&/g' -e 's#/#\\/#g')#$(printf '%s' "$dst" | sed -e 's/[&]/\\&/g' -e 's#/#\\/#g')#g" "$f" 174 echo "Rewrote: $f" 175 done 176 done < "$MAP_FILE" 177 echo "Backups in $TMP_DIR/backup" 178fi 179 180echo "Done"