#!/usr/bin/env node // ok look this code was definitely never meant to be published // but i haven't made it into something better and it's been a // while so // // hi // // this has my weird changes and half-baked bits, runs much // slower than needed, and basically works. // // (initially based on futur's [script](https://gist.github.com/futurGH/2ee18d385eff3ba98f5b35b9dcac0aed#file-requestcrawl-ts)) for (const envVar of ["RELAY_ADDRESS", "RELAY_ADMIN_KEY"]) { if (!process.env[envVar]) throw new Error(`Missing env var ${envVar}`); } const start_at = 0; let shrooms_found = 0; const shrooms_limit = 30; async function bluh(url, i) { if (i < start_at) { console.log(`skipping ${i} (before start)`); return true; } // else if (shrooms_found >= shrooms_limit) { // console.log(`skipping ${i} (reached limit)`); // return; // } else { // shrooms_found += 1; // } try { const res = await fetch(`${process.env.RELAY_ADDRESS}/admin/pds/requestCrawl`, { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Basic ${process.env.RELAY_ADMIN_KEY}`, }, body: JSON.stringify({ hostname: "https://" + url.hostname, per_second: 200, per_hour: 150 * 60 * 60, per_day: 120 * 60 * 60 * 24, crawl_rate: 50, repo_limit: 1_000_000, }), }); if (res.ok) { console.log(`${i} got ${url.hostname}`); } else { const ej = await await res.json(); // if (e?.message.includes('i/o')) { // } console.error( `${i} Error requesting crawl for ${url.hostname}: ${res.status} ${res.statusText} — ${ej ? JSON.stringify(ej) : "unknown error"}`, ); } } catch (err) { console.error(`${i} Network error requesting crawl for ${url.hostname}: ${err}`); } } let i = 0; async function main() { const pdses = (await fetchPdses()).map(url => new URL(url)); console.log("Requesting crawls..."); async function get_next() { if (pdses.length === 0) { console.log("Done crawling!"); if (process.env["HEALTHCHECK_URL"]) { console.log('trying to ping healtcheck...'); try { const res = await fetch(process.env["HEALTHCHECK_URL"]); console.log(`Pinged healthcheck endpoint! ok? ${res.ok}`); } catch (e) { console.error(`Failed to ping healtcheck: ${e}`); throw e; } } return; } let gofast = await bluh(pdses.shift(), i); i += 1; setTimeout(get_next, gofast ? 0 : 300); } get_next(); } async function fetchPdses() { const data = await fetch( "https://raw.githubusercontent.com/mary-ext/atproto-scraping/refs/heads/trunk/state.json", ).then((res) => res.ok ? res.json() : null); if (!data?.pdses) throw new Error("Failed to fetch PDSes"); const pdses = Object .keys(data.pdses) .filter((pds) => pds.startsWith("https://")) // .filter((pds) => pds.includes('bsky.network')); return pdses; } main();