Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol.
wisp.place
1import { getAllSites } from './db';
2import { fetchSiteRecord, getPdsForDid, downloadAndCacheSite, isCached } from './utils';
3import { logger } from './observability';
4import { markSiteAsBeingCached, unmarkSiteAsBeingCached } from './cache';
5import { clearRedirectRulesCache } from '../server';
6
7export interface BackfillOptions {
8 skipExisting?: boolean; // Skip sites already in cache
9 concurrency?: number; // Number of sites to cache concurrently
10 maxSites?: number; // Maximum number of sites to backfill (for testing)
11}
12
13export interface BackfillStats {
14 total: number;
15 cached: number;
16 skipped: number;
17 failed: number;
18 duration: number;
19}
20
21/**
22 * Backfill all sites from the database into the local cache
23 */
24export async function backfillCache(options: BackfillOptions = {}): Promise<BackfillStats> {
25 const {
26 skipExisting = true,
27 concurrency = 10, // Increased from 3 to 10 for better parallelization
28 maxSites,
29 } = options;
30
31 const startTime = Date.now();
32 const stats: BackfillStats = {
33 total: 0,
34 cached: 0,
35 skipped: 0,
36 failed: 0,
37 duration: 0,
38 };
39
40 logger.info('Starting cache backfill', { skipExisting, concurrency, maxSites });
41 console.log(`
42╔══════════════════════════════════════════╗
43║ CACHE BACKFILL STARTING ║
44╚══════════════════════════════════════════╝
45 `);
46
47 try {
48 // Get all sites from database
49 let sites = await getAllSites();
50 stats.total = sites.length;
51
52 logger.info(`Found ${sites.length} sites in database`);
53 console.log(`📊 Found ${sites.length} sites in database`);
54
55 // Limit if specified
56 if (maxSites && maxSites > 0) {
57 sites = sites.slice(0, maxSites);
58 console.log(`⚙️ Limited to ${maxSites} sites for backfill`);
59 }
60
61 // Process sites in batches
62 const batches: typeof sites[] = [];
63 for (let i = 0; i < sites.length; i += concurrency) {
64 batches.push(sites.slice(i, i + concurrency));
65 }
66
67 let processed = 0;
68 for (const batch of batches) {
69 await Promise.all(
70 batch.map(async (site) => {
71 try {
72 // Check if already cached
73 if (skipExisting && isCached(site.did, site.rkey)) {
74 stats.skipped++;
75 processed++;
76 logger.debug(`Skipping already cached site`, { did: site.did, rkey: site.rkey });
77 console.log(`⏭️ [${processed}/${sites.length}] Skipped (cached): ${site.display_name || site.rkey}`);
78 return;
79 }
80
81 // Fetch site record
82 const siteData = await fetchSiteRecord(site.did, site.rkey);
83 if (!siteData) {
84 stats.failed++;
85 processed++;
86 logger.error('Site record not found during backfill', null, { did: site.did, rkey: site.rkey });
87 console.log(`❌ [${processed}/${sites.length}] Failed (not found): ${site.display_name || site.rkey}`);
88 return;
89 }
90
91 // Get PDS endpoint
92 const pdsEndpoint = await getPdsForDid(site.did);
93 if (!pdsEndpoint) {
94 stats.failed++;
95 processed++;
96 logger.error('PDS not found during backfill', null, { did: site.did });
97 console.log(`❌ [${processed}/${sites.length}] Failed (no PDS): ${site.display_name || site.rkey}`);
98 return;
99 }
100
101 // Mark site as being cached to prevent serving stale content during update
102 markSiteAsBeingCached(site.did, site.rkey);
103
104 try {
105 // Download and cache site
106 await downloadAndCacheSite(site.did, site.rkey, siteData.record, pdsEndpoint, siteData.cid);
107 // Clear redirect rules cache since the site was updated
108 clearRedirectRulesCache(site.did, site.rkey);
109 stats.cached++;
110 processed++;
111 logger.info('Successfully cached site during backfill', { did: site.did, rkey: site.rkey });
112 console.log(`✅ [${processed}/${sites.length}] Cached: ${site.display_name || site.rkey}`);
113 } finally {
114 // Always unmark, even if caching fails
115 unmarkSiteAsBeingCached(site.did, site.rkey);
116 }
117 } catch (err) {
118 stats.failed++;
119 processed++;
120 logger.error('Failed to cache site during backfill', err, { did: site.did, rkey: site.rkey });
121 console.log(`❌ [${processed}/${sites.length}] Failed: ${site.display_name || site.rkey}`);
122 }
123 })
124 );
125 }
126
127 stats.duration = Date.now() - startTime;
128
129 console.log(`
130╔══════════════════════════════════════════╗
131║ CACHE BACKFILL COMPLETED ║
132╚══════════════════════════════════════════╝
133
134📊 Total Sites: ${stats.total}
135✅ Cached: ${stats.cached}
136⏭️ Skipped: ${stats.skipped}
137❌ Failed: ${stats.failed}
138⏱️ Duration: ${(stats.duration / 1000).toFixed(2)}s
139 `);
140
141 logger.info('Cache backfill completed', stats);
142 } catch (err) {
143 logger.error('Cache backfill failed', err);
144 console.error('❌ Cache backfill failed:', err);
145 }
146
147 return stats;
148}