the home site for me: also iteration 3 or 4 of my site
1#!/usr/bin/env bash
2set -euo pipefail
3
4API_URL="https://cdn.hackclub.com/api/v3/new"
5TOKEN="${HACKCLUB_CDN_TOKEN:-}"
6if [[ -z "${TOKEN}" ]]; then
7 TOKEN="${1:-}"
8fi
9if [[ -z "${TOKEN}" ]]; then
10 echo "Usage: HACKCLUB_CDN_TOKEN=... $0 [token] [--dry-run] [paths...]" >&2
11 exit 1
12fi
13
14DRY_RUN=false
15SKIP_CHECK=false
16CACHED_URLS=()
17ARGS=()
18for a in "$@"; do
19 case "$a" in
20 --dry-run) DRY_RUN=true ;;
21 --skip-check) SKIP_CHECK=true ;;
22 *) ARGS+=("$a") ;;
23 esac
24done
25# remove token if passed as first arg
26if [[ ${#ARGS[@]} -gt 0 && "${ARGS[0]}" != "--dry-run" ]]; then
27 ARGS=("${ARGS[@]:1}")
28fi
29
30PATHS=("content")
31if [[ ${#ARGS[@]} -gt 0 ]]; then PATHS=("${ARGS[@]}"); fi
32
33TMP_DIR=".crush/rehost-cdn"
34MAP_FILE="$TMP_DIR/map.tsv"
35mkdir -p "$TMP_DIR"
36touch "$MAP_FILE"
37
38collect_urls() {
39 # Markdown images: 
40 rg -n --no-heading -e '!\[[^\]]*\]\((https?://[^)\s]+)\)' -g '!**/*.map' -g '!**/*.lock' "${PATHS[@]}" 2>/dev/null |
41 awk -F: '{file=$1; sub(/^[^:]*:/, "", $0); match($0, /!\[[^\]]*\]\((https?:\/\/[^)\s]+)\)/, m); if(m[1]!="") print file"\t"m[1]}' |
42 # Zola shortcode variants:
43 # - {% img(id="URL", ...) %}
44 # - {{ img(id="URL", ...) }}
45 cat <( rg -n --no-heading -e '\{[%{]\s*img[^}%]*[}%]\}' "${PATHS[@]}" 2>/dev/null | \
46 awk -F: '{file=$1; sub(/^[^:]*:/, "", $0); if (match($0, /(id|src)[[:space:]]*=[[:space:]]*"(https?:\/\/[^"[:space:]]+)"/, m)) print file"\t"m[2]}' ) |
47 awk -F'\t' '{print $1"\t"$2}' |
48 grep -E '\.(png|jpe?g|gif|webp|svg|bmp|tiff?|avif)(\?.*)?$' -i |
49 grep -vE 'hc-cdn\.|cdn\.hackclub\.com'
50}
51
52batch_upload() {
53 payload=$(jq -sR 'split("\n")|map(select(length>0))' <(printf "%s\n" "$@"))
54 for attempt in 1 2 3; do
55 resp=$(curl -sS -w "\n%{http_code}" -X POST "$API_URL" \
56 -H "Authorization: Bearer $TOKEN" \
57 -H 'Content-Type: application/json' \
58 --data-raw "$payload" 2>&1) || true
59 body=$(printf "%s" "$resp" | sed '$d')
60 code=$(printf "%s" "$resp" | tail -n1)
61 if [[ "$code" == "200" ]]; then
62 printf "%s" "$body" | jq -r '.files[] | .sourceUrl? as $s | .deployedUrl + "\t" + ( $s // "" )'
63 return 0
64 fi
65 echo "Upload attempt $attempt failed with $code" >&2
66 echo "Response body:" >&2
67 printf "%s\n" "$body" >&2
68 echo "Payload:" >&2
69 printf "%s\n" "$payload" >&2
70 sleep $((attempt*2))
71 done
72 echo "Upload failed after retries" >&2
73 return 1
74}
75
76mapfile -t LINES < <(collect_urls | sort -u)
77
78URLS_TO_SEND=()
79FILES=()
80total=${#LINES[@]}
81idx=0
82for line in "${LINES[@]}"; do
83 idx=$((idx+1))
84 file="${line%%$'\t'*}"
85 url="${line#*$'\t'}"
86 if grep -Fq "${url}" "$MAP_FILE" 2>/dev/null; then
87 echo "[$idx/$total] cached: $url -> will rewrite only"
88 CACHED_URLS+=("$url")
89 continue
90 fi
91 if $DRY_RUN; then
92 echo "[$idx/$total] queued: $url"
93 URLS_TO_SEND+=("$url")
94 FILES+=("$file")
95 else
96 if $SKIP_CHECK; then
97 echo "[$idx/$total] no-check: $url"
98 URLS_TO_SEND+=("$url")
99 FILES+=("$file")
100 else
101 echo -n "[$idx/$total] checking: $url ... "
102 code=$(curl -sS -o /dev/null -w '%{http_code}' -L "$url" || echo 000)
103 if [[ "$code" =~ ^2|3 ]]; then
104 echo "ok ($code)"
105 URLS_TO_SEND+=("$url")
106 FILES+=("$file")
107 else
108 echo "fail ($code)"; echo "Skipping: $url" >&2
109 fi
110 fi
111 fi
112done
113
114if [[ ${#URLS_TO_SEND[@]} -eq 0 ]]; then
115 echo "No new image URLs to process"; exit 0
116fi
117
118BATCH=50
119start=0
120# Rewrites for cached URLs from map without uploading
121if [ "${#CACHED_URLS[@]}" -gt 0 ] 2>/dev/null; then
122 echo "Rewriting cached URLs from map without upload..."
123 for src in "${CACHED_URLS[@]}"; do
124 dst=$(awk -F'\t' -v s="$src" '$1==s{print $2}' "$MAP_FILE" | head -n1)
125 [[ -z "$dst" ]] && continue
126 rg -l --fixed-strings -- "$src" "${PATHS[@]}" 2>/dev/null | while read -r f; do
127 mkdir -p "$TMP_DIR/backup"
128 if [[ ! -e "$TMP_DIR/backup/$f" ]]; then
129 mkdir -p "$TMP_DIR/backup/$(dirname "$f")"
130 cp "$f" "$TMP_DIR/backup/$f"
131 fi
132 sed -i "s#$(printf '%s' "$src" | sed -e 's/[.[\*^$]/\\&/g' -e 's#/#\\/#g')#$(printf '%s' "$dst" | sed -e 's/[&]/\\&/g' -e 's#/#\\/#g')#g" "$f"
133 echo "Rewrote (cached): $f"
134 done
135 done
136fi
137
138while [[ $start -lt ${#URLS_TO_SEND[@]} ]]; do
139 end=$(( start + BATCH ))
140 if [[ $end -gt ${#URLS_TO_SEND[@]} ]]; then end=${#URLS_TO_SEND[@]}; fi
141 chunk=("${URLS_TO_SEND[@]:start:end-start}")
142 if $DRY_RUN; then
143 for u in "${chunk[@]}"; do echo "DRY: would upload $u"; done
144 else
145 echo "Uploading ${#chunk[@]} URLs..."
146 resp=$(batch_upload "${chunk[@]}") || { echo "Upload failed" >&2; exit 1; }
147 echo "Upload response:"; printf "%s\n" "$resp"
148 mapfile -t deployed_arr < <(printf "%s\n" "$resp" | awk '{print $0}')
149 for i in "${!chunk[@]}"; do
150 src="${chunk[$i]}"
151 dst="${deployed_arr[$i]:-}"
152 if [[ -n "$dst" ]]; then
153 printf "%s\t%s\n" "$src" "$dst" | tee -a "$MAP_FILE"
154 fi
155 done
156 fi
157 start=$end
158done
159
160if $DRY_RUN; then echo "DRY: skipping replacements"; exit 0; fi
161
162# Replace in-place using map
163if [[ -s "$MAP_FILE" ]]; then
164 cp "$MAP_FILE" "$TMP_DIR/map-$(date +%s).tsv"
165 while IFS=$'\t' read -r src dst; do
166 [[ -z "$src" || -z "$dst" ]] && continue
167 rg -l --fixed-strings -- "$src" "${PATHS[@]}" 2>/dev/null | while read -r f; do
168 mkdir -p "$TMP_DIR/backup"
169 if [[ ! -e "$TMP_DIR/backup/$f" ]]; then
170 mkdir -p "$TMP_DIR/backup/$(dirname "$f")"
171 cp "$f" "$TMP_DIR/backup/$f"
172 fi
173 sed -i "s#$(printf '%s' "$src" | sed -e 's/[.[\*^$]/\\&/g' -e 's#/#\\/#g')#$(printf '%s' "$dst" | sed -e 's/[&]/\\&/g' -e 's#/#\\/#g')#g" "$f"
174 echo "Rewrote: $f"
175 done
176 done < "$MAP_FILE"
177 echo "Backups in $TMP_DIR/backup"
178fi
179
180echo "Done"