Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol.
wisp.place
1import { AtpAgent } from '@atproto/api';
2import type { Record as WispFsRecord, Directory, Entry, File } from '../lexicon/types/place/wisp/fs';
3import type { Record as SubfsRecord } from '../lexicon/types/place/wisp/subfs';
4import { existsSync, mkdirSync, readFileSync, rmSync } from 'fs';
5import { writeFile, readFile, rename } from 'fs/promises';
6import { safeFetchJson, safeFetchBlob } from './safe-fetch';
7import { CID } from 'multiformats';
8
9const CACHE_DIR = process.env.CACHE_DIR || './cache/sites';
10const CACHE_TTL = 14 * 24 * 60 * 60 * 1000; // 14 days cache TTL
11
12interface CacheMetadata {
13 recordCid: string;
14 cachedAt: number;
15 did: string;
16 rkey: string;
17 // Map of file path to blob CID for incremental updates
18 fileCids?: Record<string, string>;
19}
20
21/**
22 * Determines if a MIME type should benefit from gzip compression.
23 * Returns true for text-based web assets (HTML, CSS, JS, JSON, XML, SVG).
24 * Returns false for already-compressed formats (images, video, audio, PDFs).
25 *
26 */
27export function shouldCompressMimeType(mimeType: string | undefined): boolean {
28 if (!mimeType) return false;
29
30 const mime = mimeType.toLowerCase();
31
32 // Text-based web assets and uncompressed audio that benefit from compression
33 const compressibleTypes = [
34 'text/html',
35 'text/css',
36 'text/javascript',
37 'application/javascript',
38 'application/x-javascript',
39 'text/xml',
40 'application/xml',
41 'application/json',
42 'text/plain',
43 'image/svg+xml',
44 // Uncompressed audio formats
45 'audio/wav',
46 'audio/wave',
47 'audio/x-wav',
48 'audio/aiff',
49 'audio/x-aiff',
50 ];
51
52 if (compressibleTypes.some(type => mime === type || mime.startsWith(type))) {
53 return true;
54 }
55
56 // Already-compressed formats that should NOT be double-compressed
57 const alreadyCompressedPrefixes = [
58 'video/',
59 'audio/',
60 'image/',
61 'application/pdf',
62 'application/zip',
63 'application/gzip',
64 ];
65
66 if (alreadyCompressedPrefixes.some(prefix => mime.startsWith(prefix))) {
67 return false;
68 }
69
70 // Default to not compressing for unknown types
71 return false;
72}
73
74interface IpldLink {
75 $link: string;
76}
77
78interface TypedBlobRef {
79 ref: CID | IpldLink;
80}
81
82interface UntypedBlobRef {
83 cid: string;
84}
85
86function isIpldLink(obj: unknown): obj is IpldLink {
87 return typeof obj === 'object' && obj !== null && '$link' in obj && typeof (obj as IpldLink).$link === 'string';
88}
89
90function isTypedBlobRef(obj: unknown): obj is TypedBlobRef {
91 return typeof obj === 'object' && obj !== null && 'ref' in obj;
92}
93
94function isUntypedBlobRef(obj: unknown): obj is UntypedBlobRef {
95 return typeof obj === 'object' && obj !== null && 'cid' in obj && typeof (obj as UntypedBlobRef).cid === 'string';
96}
97
98export async function resolveDid(identifier: string): Promise<string | null> {
99 try {
100 // If it's already a DID, return it
101 if (identifier.startsWith('did:')) {
102 return identifier;
103 }
104
105 // Otherwise, resolve the handle using agent's built-in method
106 const agent = new AtpAgent({ service: 'https://public.api.bsky.app' });
107 const response = await agent.resolveHandle({ handle: identifier });
108 return response.data.did;
109 } catch (err) {
110 console.error('Failed to resolve identifier', identifier, err);
111 return null;
112 }
113}
114
115export async function getPdsForDid(did: string): Promise<string | null> {
116 try {
117 let doc;
118
119 if (did.startsWith('did:plc:')) {
120 doc = await safeFetchJson(`https://plc.directory/${encodeURIComponent(did)}`);
121 } else if (did.startsWith('did:web:')) {
122 const didUrl = didWebToHttps(did);
123 doc = await safeFetchJson(didUrl);
124 } else {
125 console.error('Unsupported DID method', did);
126 return null;
127 }
128
129 const services = doc.service || [];
130 const pdsService = services.find((s: any) => s.id === '#atproto_pds');
131
132 return pdsService?.serviceEndpoint || null;
133 } catch (err) {
134 console.error('Failed to get PDS for DID', did, err);
135 return null;
136 }
137}
138
139function didWebToHttps(did: string): string {
140 const didParts = did.split(':');
141 if (didParts.length < 3 || didParts[0] !== 'did' || didParts[1] !== 'web') {
142 throw new Error('Invalid did:web format');
143 }
144
145 const domain = didParts[2];
146 const pathParts = didParts.slice(3);
147
148 if (pathParts.length === 0) {
149 return `https://${domain}/.well-known/did.json`;
150 } else {
151 const path = pathParts.join('/');
152 return `https://${domain}/${path}/did.json`;
153 }
154}
155
156export async function fetchSiteRecord(did: string, rkey: string): Promise<{ record: WispFsRecord; cid: string } | null> {
157 try {
158 const pdsEndpoint = await getPdsForDid(did);
159 if (!pdsEndpoint) return null;
160
161 const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(rkey)}`;
162 const data = await safeFetchJson(url);
163
164 return {
165 record: data.value as WispFsRecord,
166 cid: data.cid || ''
167 };
168 } catch (err) {
169 console.error('Failed to fetch site record', did, rkey, err);
170 return null;
171 }
172}
173
174export function extractBlobCid(blobRef: unknown): string | null {
175 if (isIpldLink(blobRef)) {
176 return blobRef.$link;
177 }
178
179 if (isTypedBlobRef(blobRef)) {
180 const ref = blobRef.ref;
181
182 const cid = CID.asCID(ref);
183 if (cid) {
184 return cid.toString();
185 }
186
187 if (isIpldLink(ref)) {
188 return ref.$link;
189 }
190 }
191
192 if (isUntypedBlobRef(blobRef)) {
193 return blobRef.cid;
194 }
195
196 return null;
197}
198
199/**
200 * Extract all subfs URIs from a directory tree with their mount paths
201 */
202function extractSubfsUris(directory: Directory, currentPath: string = ''): Array<{ uri: string; path: string }> {
203 const uris: Array<{ uri: string; path: string }> = [];
204
205 for (const entry of directory.entries) {
206 const fullPath = currentPath ? `${currentPath}/${entry.name}` : entry.name;
207
208 if ('type' in entry.node) {
209 if (entry.node.type === 'subfs') {
210 // Subfs node with subject URI
211 const subfsNode = entry.node as any;
212 if (subfsNode.subject) {
213 uris.push({ uri: subfsNode.subject, path: fullPath });
214 }
215 } else if (entry.node.type === 'directory') {
216 // Recursively search subdirectories
217 const subUris = extractSubfsUris(entry.node as Directory, fullPath);
218 uris.push(...subUris);
219 }
220 }
221 }
222
223 return uris;
224}
225
226/**
227 * Fetch a subfs record from the PDS
228 */
229async function fetchSubfsRecord(uri: string, pdsEndpoint: string): Promise<SubfsRecord | null> {
230 try {
231 // Parse URI: at://did/collection/rkey
232 const parts = uri.replace('at://', '').split('/');
233 if (parts.length < 3) {
234 console.error('Invalid subfs URI:', uri);
235 return null;
236 }
237
238 const did = parts[0];
239 const collection = parts[1];
240 const rkey = parts[2];
241
242 // Fetch the record from PDS
243 const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=${encodeURIComponent(collection)}&rkey=${encodeURIComponent(rkey)}`;
244 const response = await safeFetchJson(url);
245
246 if (!response || !response.value) {
247 console.error('Subfs record not found:', uri);
248 return null;
249 }
250
251 return response.value as SubfsRecord;
252 } catch (err) {
253 console.error('Failed to fetch subfs record:', uri, err);
254 return null;
255 }
256}
257
258/**
259 * Replace subfs nodes in a directory tree with their actual content
260 * Subfs entries are "merged" - their root entries are hoisted into the parent directory
261 */
262async function expandSubfsNodes(directory: Directory, pdsEndpoint: string): Promise<Directory> {
263 // Extract all subfs URIs
264 const subfsUris = extractSubfsUris(directory);
265
266 if (subfsUris.length === 0) {
267 // No subfs nodes, return as-is
268 return directory;
269 }
270
271 console.log(`Found ${subfsUris.length} subfs records, fetching...`);
272
273 // Fetch all subfs records in parallel
274 const subfsRecords = await Promise.all(
275 subfsUris.map(async ({ uri, path }) => {
276 const record = await fetchSubfsRecord(uri, pdsEndpoint);
277 return { record, path };
278 })
279 );
280
281 // Build a map of path -> root entries to merge
282 const subfsMap = new Map<string, Entry[]>();
283 for (const { record, path } of subfsRecords) {
284 if (record && record.root && record.root.entries) {
285 subfsMap.set(path, record.root.entries);
286 }
287 }
288
289 // Replace subfs nodes by merging their root entries into the parent directory
290 function replaceSubfsInEntries(entries: Entry[], currentPath: string = ''): Entry[] {
291 const result: Entry[] = [];
292
293 for (const entry of entries) {
294 const fullPath = currentPath ? `${currentPath}/${entry.name}` : entry.name;
295 const node = entry.node;
296
297 if ('type' in node && node.type === 'subfs') {
298 // Merge subfs entries into parent directory
299 const subfsEntries = subfsMap.get(fullPath);
300 if (subfsEntries) {
301 console.log(`Merging subfs node at ${fullPath} (${subfsEntries.length} entries)`);
302 // Recursively process the merged entries in case they contain nested subfs
303 const processedEntries = replaceSubfsInEntries(subfsEntries, currentPath);
304 result.push(...processedEntries);
305 } else {
306 // If fetch failed, skip this entry
307 console.warn(`Failed to fetch subfs at ${fullPath}, skipping`);
308 }
309 } else if ('type' in node && node.type === 'directory' && 'entries' in node) {
310 // Recursively process subdirectories
311 result.push({
312 ...entry,
313 node: {
314 ...node,
315 entries: replaceSubfsInEntries(node.entries, fullPath)
316 }
317 });
318 } else {
319 // Regular file entry
320 result.push(entry);
321 }
322 }
323
324 return result;
325 }
326
327 return {
328 ...directory,
329 entries: replaceSubfsInEntries(directory.entries)
330 };
331}
332
333export async function downloadAndCacheSite(did: string, rkey: string, record: WispFsRecord, pdsEndpoint: string, recordCid: string): Promise<void> {
334 console.log('Caching site', did, rkey);
335
336 if (!record.root) {
337 console.error('Record missing root directory:', JSON.stringify(record, null, 2));
338 throw new Error('Invalid record structure: missing root directory');
339 }
340
341 if (!record.root.entries || !Array.isArray(record.root.entries)) {
342 console.error('Record root missing entries array:', JSON.stringify(record.root, null, 2));
343 throw new Error('Invalid record structure: root missing entries array');
344 }
345
346 // Expand subfs nodes before caching
347 const expandedRoot = await expandSubfsNodes(record.root, pdsEndpoint);
348
349 // Get existing cache metadata to check for incremental updates
350 const existingMetadata = await getCacheMetadata(did, rkey);
351 const existingFileCids = existingMetadata?.fileCids || {};
352
353 // Use a temporary directory with timestamp to avoid collisions
354 const tempSuffix = `.tmp-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
355 const tempDir = `${CACHE_DIR}/${did}/${rkey}${tempSuffix}`;
356 const finalDir = `${CACHE_DIR}/${did}/${rkey}`;
357
358 try {
359 // Collect file CIDs from the new record (using expanded root)
360 const newFileCids: Record<string, string> = {};
361 collectFileCidsFromEntries(expandedRoot.entries, '', newFileCids);
362
363 // Download/copy files to temporary directory (with incremental logic, using expanded root)
364 await cacheFiles(did, rkey, expandedRoot.entries, pdsEndpoint, '', tempSuffix, existingFileCids, finalDir);
365 await saveCacheMetadata(did, rkey, recordCid, tempSuffix, newFileCids);
366
367 // Atomically replace old cache with new cache
368 // On POSIX systems (Linux/macOS), rename is atomic
369 if (existsSync(finalDir)) {
370 // Rename old directory to backup
371 const backupDir = `${finalDir}.old-${Date.now()}`;
372 await rename(finalDir, backupDir);
373
374 try {
375 // Rename new directory to final location
376 await rename(tempDir, finalDir);
377
378 // Clean up old backup
379 rmSync(backupDir, { recursive: true, force: true });
380 } catch (err) {
381 // If rename failed, restore backup
382 if (existsSync(backupDir) && !existsSync(finalDir)) {
383 await rename(backupDir, finalDir);
384 }
385 throw err;
386 }
387 } else {
388 // No existing cache, just rename temp to final
389 await rename(tempDir, finalDir);
390 }
391
392 console.log('Successfully cached site atomically', did, rkey);
393 } catch (err) {
394 // Clean up temp directory on failure
395 if (existsSync(tempDir)) {
396 rmSync(tempDir, { recursive: true, force: true });
397 }
398 throw err;
399 }
400}
401
402/**
403 * Recursively collect file CIDs from entries for incremental update tracking
404 */
405function collectFileCidsFromEntries(entries: Entry[], pathPrefix: string, fileCids: Record<string, string>): void {
406 for (const entry of entries) {
407 const currentPath = pathPrefix ? `${pathPrefix}/${entry.name}` : entry.name;
408 const node = entry.node;
409
410 if ('type' in node && node.type === 'directory' && 'entries' in node) {
411 collectFileCidsFromEntries(node.entries, currentPath, fileCids);
412 } else if ('type' in node && node.type === 'file' && 'blob' in node) {
413 const fileNode = node as File;
414 const cid = extractBlobCid(fileNode.blob);
415 if (cid) {
416 fileCids[currentPath] = cid;
417 }
418 }
419 }
420}
421
422async function cacheFiles(
423 did: string,
424 site: string,
425 entries: Entry[],
426 pdsEndpoint: string,
427 pathPrefix: string,
428 dirSuffix: string = '',
429 existingFileCids: Record<string, string> = {},
430 existingCacheDir?: string
431): Promise<void> {
432 // Collect file tasks, separating unchanged files from new/changed files
433 const downloadTasks: Array<() => Promise<void>> = [];
434 const copyTasks: Array<() => Promise<void>> = [];
435
436 function collectFileTasks(
437 entries: Entry[],
438 currentPathPrefix: string
439 ) {
440 for (const entry of entries) {
441 const currentPath = currentPathPrefix ? `${currentPathPrefix}/${entry.name}` : entry.name;
442 const node = entry.node;
443
444 if ('type' in node && node.type === 'directory' && 'entries' in node) {
445 collectFileTasks(node.entries, currentPath);
446 } else if ('type' in node && node.type === 'file' && 'blob' in node) {
447 const fileNode = node as File;
448 const cid = extractBlobCid(fileNode.blob);
449
450 // Check if file is unchanged (same CID as existing cache)
451 if (cid && existingFileCids[currentPath] === cid && existingCacheDir) {
452 // File unchanged - copy from existing cache instead of downloading
453 copyTasks.push(() => copyExistingFile(
454 did,
455 site,
456 currentPath,
457 dirSuffix,
458 existingCacheDir
459 ));
460 } else {
461 // File new or changed - download it
462 downloadTasks.push(() => cacheFileBlob(
463 did,
464 site,
465 currentPath,
466 fileNode.blob,
467 pdsEndpoint,
468 fileNode.encoding,
469 fileNode.mimeType,
470 fileNode.base64,
471 dirSuffix
472 ));
473 }
474 }
475 }
476 }
477
478 collectFileTasks(entries, pathPrefix);
479
480 console.log(`[Incremental Update] Files to copy: ${copyTasks.length}, Files to download: ${downloadTasks.length}`);
481
482 // Copy unchanged files in parallel (fast local operations) - increased limit for better performance
483 const copyLimit = 50;
484 for (let i = 0; i < copyTasks.length; i += copyLimit) {
485 const batch = copyTasks.slice(i, i + copyLimit);
486 await Promise.all(batch.map(task => task()));
487 if (copyTasks.length > copyLimit) {
488 console.log(`[Cache Progress] Copied ${Math.min(i + copyLimit, copyTasks.length)}/${copyTasks.length} unchanged files`);
489 }
490 }
491
492 // Download new/changed files concurrently - increased from 3 to 20 for much better performance
493 const downloadLimit = 20;
494 for (let i = 0; i < downloadTasks.length; i += downloadLimit) {
495 const batch = downloadTasks.slice(i, i + downloadLimit);
496 await Promise.all(batch.map(task => task()));
497 if (downloadTasks.length > downloadLimit) {
498 console.log(`[Cache Progress] Downloaded ${Math.min(i + downloadLimit, downloadTasks.length)}/${downloadTasks.length} files`);
499 }
500 }
501}
502
503/**
504 * Copy an unchanged file from existing cache to new cache location
505 */
506async function copyExistingFile(
507 did: string,
508 site: string,
509 filePath: string,
510 dirSuffix: string,
511 existingCacheDir: string
512): Promise<void> {
513 const { copyFile } = await import('fs/promises');
514
515 const sourceFile = `${existingCacheDir}/${filePath}`;
516 const destFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`;
517 const destDir = destFile.substring(0, destFile.lastIndexOf('/'));
518
519 // Create destination directory if needed
520 if (destDir && !existsSync(destDir)) {
521 mkdirSync(destDir, { recursive: true });
522 }
523
524 try {
525 // Copy the file
526 await copyFile(sourceFile, destFile);
527
528 // Copy metadata file if it exists
529 const sourceMetaFile = `${sourceFile}.meta`;
530 const destMetaFile = `${destFile}.meta`;
531 if (existsSync(sourceMetaFile)) {
532 await copyFile(sourceMetaFile, destMetaFile);
533 }
534 } catch (err) {
535 console.error(`Failed to copy cached file ${filePath}, will attempt download:`, err);
536 throw err;
537 }
538}
539
540async function cacheFileBlob(
541 did: string,
542 site: string,
543 filePath: string,
544 blobRef: any,
545 pdsEndpoint: string,
546 encoding?: 'gzip',
547 mimeType?: string,
548 base64?: boolean,
549 dirSuffix: string = ''
550): Promise<void> {
551 const cid = extractBlobCid(blobRef);
552 if (!cid) {
553 console.error('Could not extract CID from blob', blobRef);
554 return;
555 }
556
557 const blobUrl = `${pdsEndpoint}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${encodeURIComponent(cid)}`;
558
559 // Allow up to 500MB per file blob, with 5 minute timeout
560 let content = await safeFetchBlob(blobUrl, { maxSize: 500 * 1024 * 1024, timeout: 300000 });
561
562 // If content is base64-encoded, decode it back to raw binary (gzipped or not)
563 if (base64) {
564 // Decode base64 directly from raw bytes - no string conversion
565 // The blob contains base64-encoded text as raw bytes, decode it in-place
566 const textDecoder = new TextDecoder();
567 const base64String = textDecoder.decode(content);
568 content = Buffer.from(base64String, 'base64');
569 }
570
571 const cacheFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`;
572 const fileDir = cacheFile.substring(0, cacheFile.lastIndexOf('/'));
573
574 if (fileDir && !existsSync(fileDir)) {
575 mkdirSync(fileDir, { recursive: true });
576 }
577
578 // Use the shared function to determine if this should remain compressed
579 const shouldStayCompressed = shouldCompressMimeType(mimeType);
580
581 // Decompress files that shouldn't be stored compressed
582 if (encoding === 'gzip' && !shouldStayCompressed && content.length >= 2 &&
583 content[0] === 0x1f && content[1] === 0x8b) {
584 try {
585 const { gunzipSync } = await import('zlib');
586 const decompressed = gunzipSync(content);
587 content = decompressed;
588 // Clear the encoding flag since we're storing decompressed
589 encoding = undefined;
590 } catch (error) {
591 console.error(`Failed to decompress ${filePath}, storing original gzipped content:`, error);
592 }
593 }
594
595 await writeFile(cacheFile, content);
596
597 // Store metadata only if file is still compressed
598 if (encoding === 'gzip' && mimeType) {
599 const metaFile = `${cacheFile}.meta`;
600 await writeFile(metaFile, JSON.stringify({ encoding, mimeType }));
601 console.log('Cached file', filePath, content.length, 'bytes (gzipped,', mimeType + ')');
602 } else {
603 console.log('Cached file', filePath, content.length, 'bytes');
604 }
605}
606
607/**
608 * Sanitize a file path to prevent directory traversal attacks
609 * Removes any path segments that attempt to go up directories
610 */
611export function sanitizePath(filePath: string): string {
612 // Remove leading slashes
613 let cleaned = filePath.replace(/^\/+/, '');
614
615 // Split into segments and filter out dangerous ones
616 const segments = cleaned.split('/').filter(segment => {
617 // Remove empty segments
618 if (!segment || segment === '.') return false;
619 // Remove parent directory references
620 if (segment === '..') return false;
621 // Remove segments with null bytes
622 if (segment.includes('\0')) return false;
623 return true;
624 });
625
626 // Rejoin the safe segments
627 return segments.join('/');
628}
629
630export function getCachedFilePath(did: string, site: string, filePath: string): string {
631 const sanitizedPath = sanitizePath(filePath);
632 return `${CACHE_DIR}/${did}/${site}/${sanitizedPath}`;
633}
634
635export function isCached(did: string, site: string): boolean {
636 return existsSync(`${CACHE_DIR}/${did}/${site}`);
637}
638
639async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = '', fileCids?: Record<string, string>): Promise<void> {
640 const metadata: CacheMetadata = {
641 recordCid,
642 cachedAt: Date.now(),
643 did,
644 rkey,
645 fileCids
646 };
647
648 const metadataPath = `${CACHE_DIR}/${did}/${rkey}${dirSuffix}/.metadata.json`;
649 const metadataDir = metadataPath.substring(0, metadataPath.lastIndexOf('/'));
650
651 if (!existsSync(metadataDir)) {
652 mkdirSync(metadataDir, { recursive: true });
653 }
654
655 await writeFile(metadataPath, JSON.stringify(metadata, null, 2));
656}
657
658async function getCacheMetadata(did: string, rkey: string): Promise<CacheMetadata | null> {
659 try {
660 const metadataPath = `${CACHE_DIR}/${did}/${rkey}/.metadata.json`;
661 if (!existsSync(metadataPath)) return null;
662
663 const content = await readFile(metadataPath, 'utf-8');
664 return JSON.parse(content) as CacheMetadata;
665 } catch (err) {
666 console.error('Failed to read cache metadata', err);
667 return null;
668 }
669}
670
671export async function isCacheValid(did: string, rkey: string, currentRecordCid?: string): Promise<boolean> {
672 const metadata = await getCacheMetadata(did, rkey);
673 if (!metadata) return false;
674
675 // Check if cache has expired (14 days TTL)
676 const cacheAge = Date.now() - metadata.cachedAt;
677 if (cacheAge > CACHE_TTL) {
678 console.log('[Cache] Cache expired for', did, rkey);
679 return false;
680 }
681
682 // If current CID is provided, verify it matches
683 if (currentRecordCid && metadata.recordCid !== currentRecordCid) {
684 console.log('[Cache] CID mismatch for', did, rkey, 'cached:', metadata.recordCid, 'current:', currentRecordCid);
685 return false;
686 }
687
688 return true;
689}