Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol.
wisp.place
1import { AtpAgent } from '@atproto/api';
2import type { Record as WispFsRecord, Directory, Entry, File } from '../lexicon/types/place/wisp/fs';
3import type { Record as SubfsRecord } from '../lexicon/types/place/wisp/subfs';
4import { existsSync, mkdirSync, readFileSync, rmSync } from 'fs';
5import { writeFile, readFile, rename } from 'fs/promises';
6import { safeFetchJson, safeFetchBlob } from './safe-fetch';
7import { CID } from 'multiformats';
8
9const CACHE_DIR = process.env.CACHE_DIR || './cache/sites';
10const CACHE_TTL = 14 * 24 * 60 * 60 * 1000; // 14 days cache TTL
11
12interface CacheMetadata {
13 recordCid: string;
14 cachedAt: number;
15 did: string;
16 rkey: string;
17 // Map of file path to blob CID for incremental updates
18 fileCids?: Record<string, string>;
19}
20
21/**
22 * Determines if a MIME type should benefit from gzip compression.
23 * Returns true for text-based web assets (HTML, CSS, JS, JSON, XML, SVG).
24 * Returns false for already-compressed formats (images, video, audio, PDFs).
25 *
26 */
27export function shouldCompressMimeType(mimeType: string | undefined): boolean {
28 if (!mimeType) return false;
29
30 const mime = mimeType.toLowerCase();
31
32 // Text-based web assets that benefit from compression
33 const compressibleTypes = [
34 'text/html',
35 'text/css',
36 'text/javascript',
37 'application/javascript',
38 'application/x-javascript',
39 'text/xml',
40 'application/xml',
41 'application/json',
42 'text/plain',
43 'image/svg+xml',
44 ];
45
46 if (compressibleTypes.some(type => mime === type || mime.startsWith(type))) {
47 return true;
48 }
49
50 // Already-compressed formats that should NOT be double-compressed
51 const alreadyCompressedPrefixes = [
52 'video/',
53 'audio/',
54 'image/',
55 'application/pdf',
56 'application/zip',
57 'application/gzip',
58 ];
59
60 if (alreadyCompressedPrefixes.some(prefix => mime.startsWith(prefix))) {
61 return false;
62 }
63
64 // Default to not compressing for unknown types
65 return false;
66}
67
68interface IpldLink {
69 $link: string;
70}
71
72interface TypedBlobRef {
73 ref: CID | IpldLink;
74}
75
76interface UntypedBlobRef {
77 cid: string;
78}
79
80function isIpldLink(obj: unknown): obj is IpldLink {
81 return typeof obj === 'object' && obj !== null && '$link' in obj && typeof (obj as IpldLink).$link === 'string';
82}
83
84function isTypedBlobRef(obj: unknown): obj is TypedBlobRef {
85 return typeof obj === 'object' && obj !== null && 'ref' in obj;
86}
87
88function isUntypedBlobRef(obj: unknown): obj is UntypedBlobRef {
89 return typeof obj === 'object' && obj !== null && 'cid' in obj && typeof (obj as UntypedBlobRef).cid === 'string';
90}
91
92export async function resolveDid(identifier: string): Promise<string | null> {
93 try {
94 // If it's already a DID, return it
95 if (identifier.startsWith('did:')) {
96 return identifier;
97 }
98
99 // Otherwise, resolve the handle using agent's built-in method
100 const agent = new AtpAgent({ service: 'https://public.api.bsky.app' });
101 const response = await agent.resolveHandle({ handle: identifier });
102 return response.data.did;
103 } catch (err) {
104 console.error('Failed to resolve identifier', identifier, err);
105 return null;
106 }
107}
108
109export async function getPdsForDid(did: string): Promise<string | null> {
110 try {
111 let doc;
112
113 if (did.startsWith('did:plc:')) {
114 doc = await safeFetchJson(`https://plc.directory/${encodeURIComponent(did)}`);
115 } else if (did.startsWith('did:web:')) {
116 const didUrl = didWebToHttps(did);
117 doc = await safeFetchJson(didUrl);
118 } else {
119 console.error('Unsupported DID method', did);
120 return null;
121 }
122
123 const services = doc.service || [];
124 const pdsService = services.find((s: any) => s.id === '#atproto_pds');
125
126 return pdsService?.serviceEndpoint || null;
127 } catch (err) {
128 console.error('Failed to get PDS for DID', did, err);
129 return null;
130 }
131}
132
133function didWebToHttps(did: string): string {
134 const didParts = did.split(':');
135 if (didParts.length < 3 || didParts[0] !== 'did' || didParts[1] !== 'web') {
136 throw new Error('Invalid did:web format');
137 }
138
139 const domain = didParts[2];
140 const pathParts = didParts.slice(3);
141
142 if (pathParts.length === 0) {
143 return `https://${domain}/.well-known/did.json`;
144 } else {
145 const path = pathParts.join('/');
146 return `https://${domain}/${path}/did.json`;
147 }
148}
149
150export async function fetchSiteRecord(did: string, rkey: string): Promise<{ record: WispFsRecord; cid: string } | null> {
151 try {
152 const pdsEndpoint = await getPdsForDid(did);
153 if (!pdsEndpoint) return null;
154
155 const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(rkey)}`;
156 const data = await safeFetchJson(url);
157
158 return {
159 record: data.value as WispFsRecord,
160 cid: data.cid || ''
161 };
162 } catch (err) {
163 console.error('Failed to fetch site record', did, rkey, err);
164 return null;
165 }
166}
167
168export function extractBlobCid(blobRef: unknown): string | null {
169 if (isIpldLink(blobRef)) {
170 return blobRef.$link;
171 }
172
173 if (isTypedBlobRef(blobRef)) {
174 const ref = blobRef.ref;
175
176 const cid = CID.asCID(ref);
177 if (cid) {
178 return cid.toString();
179 }
180
181 if (isIpldLink(ref)) {
182 return ref.$link;
183 }
184 }
185
186 if (isUntypedBlobRef(blobRef)) {
187 return blobRef.cid;
188 }
189
190 return null;
191}
192
193/**
194 * Extract all subfs URIs from a directory tree with their mount paths
195 */
196function extractSubfsUris(directory: Directory, currentPath: string = ''): Array<{ uri: string; path: string }> {
197 const uris: Array<{ uri: string; path: string }> = [];
198
199 for (const entry of directory.entries) {
200 const fullPath = currentPath ? `${currentPath}/${entry.name}` : entry.name;
201
202 if ('type' in entry.node) {
203 if (entry.node.type === 'subfs') {
204 // Subfs node with subject URI
205 const subfsNode = entry.node as any;
206 if (subfsNode.subject) {
207 uris.push({ uri: subfsNode.subject, path: fullPath });
208 }
209 } else if (entry.node.type === 'directory') {
210 // Recursively search subdirectories
211 const subUris = extractSubfsUris(entry.node as Directory, fullPath);
212 uris.push(...subUris);
213 }
214 }
215 }
216
217 return uris;
218}
219
220/**
221 * Fetch a subfs record from the PDS
222 */
223async function fetchSubfsRecord(uri: string, pdsEndpoint: string): Promise<SubfsRecord | null> {
224 try {
225 // Parse URI: at://did/collection/rkey
226 const parts = uri.replace('at://', '').split('/');
227 if (parts.length < 3) {
228 console.error('Invalid subfs URI:', uri);
229 return null;
230 }
231
232 const did = parts[0];
233 const collection = parts[1];
234 const rkey = parts[2];
235
236 // Fetch the record from PDS
237 const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=${encodeURIComponent(collection)}&rkey=${encodeURIComponent(rkey)}`;
238 const response = await safeFetchJson(url);
239
240 if (!response || !response.value) {
241 console.error('Subfs record not found:', uri);
242 return null;
243 }
244
245 return response.value as SubfsRecord;
246 } catch (err) {
247 console.error('Failed to fetch subfs record:', uri, err);
248 return null;
249 }
250}
251
252/**
253 * Replace subfs nodes in a directory tree with their actual content
254 */
255async function expandSubfsNodes(directory: Directory, pdsEndpoint: string): Promise<Directory> {
256 // Extract all subfs URIs
257 const subfsUris = extractSubfsUris(directory);
258
259 if (subfsUris.length === 0) {
260 // No subfs nodes, return as-is
261 return directory;
262 }
263
264 console.log(`Found ${subfsUris.length} subfs records, fetching...`);
265
266 // Fetch all subfs records in parallel
267 const subfsRecords = await Promise.all(
268 subfsUris.map(async ({ uri, path }) => {
269 const record = await fetchSubfsRecord(uri, pdsEndpoint);
270 return { record, path };
271 })
272 );
273
274 // Build a map of path -> directory content
275 const subfsMap = new Map<string, Directory>();
276 for (const { record, path } of subfsRecords) {
277 if (record && record.root) {
278 subfsMap.set(path, record.root);
279 }
280 }
281
282 // Replace subfs nodes with their actual content
283 function replaceSubfsInEntries(entries: Entry[], currentPath: string = ''): Entry[] {
284 return entries.map(entry => {
285 const fullPath = currentPath ? `${currentPath}/${entry.name}` : entry.name;
286 const node = entry.node;
287
288 if ('type' in node && node.type === 'subfs') {
289 // Replace with actual directory content
290 const subfsDir = subfsMap.get(fullPath);
291 if (subfsDir) {
292 console.log(`Expanding subfs node at ${fullPath}`);
293 return {
294 ...entry,
295 node: subfsDir
296 };
297 }
298 // If fetch failed, keep the subfs node (will be skipped later)
299 return entry;
300 } else if ('type' in node && node.type === 'directory' && 'entries' in node) {
301 // Recursively process subdirectories
302 return {
303 ...entry,
304 node: {
305 ...node,
306 entries: replaceSubfsInEntries(node.entries, fullPath)
307 }
308 };
309 }
310
311 return entry;
312 });
313 }
314
315 return {
316 ...directory,
317 entries: replaceSubfsInEntries(directory.entries)
318 };
319}
320
321export async function downloadAndCacheSite(did: string, rkey: string, record: WispFsRecord, pdsEndpoint: string, recordCid: string): Promise<void> {
322 console.log('Caching site', did, rkey);
323
324 if (!record.root) {
325 console.error('Record missing root directory:', JSON.stringify(record, null, 2));
326 throw new Error('Invalid record structure: missing root directory');
327 }
328
329 if (!record.root.entries || !Array.isArray(record.root.entries)) {
330 console.error('Record root missing entries array:', JSON.stringify(record.root, null, 2));
331 throw new Error('Invalid record structure: root missing entries array');
332 }
333
334 // Expand subfs nodes before caching
335 const expandedRoot = await expandSubfsNodes(record.root, pdsEndpoint);
336
337 // Get existing cache metadata to check for incremental updates
338 const existingMetadata = await getCacheMetadata(did, rkey);
339 const existingFileCids = existingMetadata?.fileCids || {};
340
341 // Use a temporary directory with timestamp to avoid collisions
342 const tempSuffix = `.tmp-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
343 const tempDir = `${CACHE_DIR}/${did}/${rkey}${tempSuffix}`;
344 const finalDir = `${CACHE_DIR}/${did}/${rkey}`;
345
346 try {
347 // Collect file CIDs from the new record (using expanded root)
348 const newFileCids: Record<string, string> = {};
349 collectFileCidsFromEntries(expandedRoot.entries, '', newFileCids);
350
351 // Download/copy files to temporary directory (with incremental logic, using expanded root)
352 await cacheFiles(did, rkey, expandedRoot.entries, pdsEndpoint, '', tempSuffix, existingFileCids, finalDir);
353 await saveCacheMetadata(did, rkey, recordCid, tempSuffix, newFileCids);
354
355 // Atomically replace old cache with new cache
356 // On POSIX systems (Linux/macOS), rename is atomic
357 if (existsSync(finalDir)) {
358 // Rename old directory to backup
359 const backupDir = `${finalDir}.old-${Date.now()}`;
360 await rename(finalDir, backupDir);
361
362 try {
363 // Rename new directory to final location
364 await rename(tempDir, finalDir);
365
366 // Clean up old backup
367 rmSync(backupDir, { recursive: true, force: true });
368 } catch (err) {
369 // If rename failed, restore backup
370 if (existsSync(backupDir) && !existsSync(finalDir)) {
371 await rename(backupDir, finalDir);
372 }
373 throw err;
374 }
375 } else {
376 // No existing cache, just rename temp to final
377 await rename(tempDir, finalDir);
378 }
379
380 console.log('Successfully cached site atomically', did, rkey);
381 } catch (err) {
382 // Clean up temp directory on failure
383 if (existsSync(tempDir)) {
384 rmSync(tempDir, { recursive: true, force: true });
385 }
386 throw err;
387 }
388}
389
390/**
391 * Recursively collect file CIDs from entries for incremental update tracking
392 */
393function collectFileCidsFromEntries(entries: Entry[], pathPrefix: string, fileCids: Record<string, string>): void {
394 for (const entry of entries) {
395 const currentPath = pathPrefix ? `${pathPrefix}/${entry.name}` : entry.name;
396 const node = entry.node;
397
398 if ('type' in node && node.type === 'directory' && 'entries' in node) {
399 collectFileCidsFromEntries(node.entries, currentPath, fileCids);
400 } else if ('type' in node && node.type === 'file' && 'blob' in node) {
401 const fileNode = node as File;
402 const cid = extractBlobCid(fileNode.blob);
403 if (cid) {
404 fileCids[currentPath] = cid;
405 }
406 }
407 }
408}
409
410async function cacheFiles(
411 did: string,
412 site: string,
413 entries: Entry[],
414 pdsEndpoint: string,
415 pathPrefix: string,
416 dirSuffix: string = '',
417 existingFileCids: Record<string, string> = {},
418 existingCacheDir?: string
419): Promise<void> {
420 // Collect file tasks, separating unchanged files from new/changed files
421 const downloadTasks: Array<() => Promise<void>> = [];
422 const copyTasks: Array<() => Promise<void>> = [];
423
424 function collectFileTasks(
425 entries: Entry[],
426 currentPathPrefix: string
427 ) {
428 for (const entry of entries) {
429 const currentPath = currentPathPrefix ? `${currentPathPrefix}/${entry.name}` : entry.name;
430 const node = entry.node;
431
432 if ('type' in node && node.type === 'directory' && 'entries' in node) {
433 collectFileTasks(node.entries, currentPath);
434 } else if ('type' in node && node.type === 'file' && 'blob' in node) {
435 const fileNode = node as File;
436 const cid = extractBlobCid(fileNode.blob);
437
438 // Check if file is unchanged (same CID as existing cache)
439 if (cid && existingFileCids[currentPath] === cid && existingCacheDir) {
440 // File unchanged - copy from existing cache instead of downloading
441 copyTasks.push(() => copyExistingFile(
442 did,
443 site,
444 currentPath,
445 dirSuffix,
446 existingCacheDir
447 ));
448 } else {
449 // File new or changed - download it
450 downloadTasks.push(() => cacheFileBlob(
451 did,
452 site,
453 currentPath,
454 fileNode.blob,
455 pdsEndpoint,
456 fileNode.encoding,
457 fileNode.mimeType,
458 fileNode.base64,
459 dirSuffix
460 ));
461 }
462 }
463 }
464 }
465
466 collectFileTasks(entries, pathPrefix);
467
468 console.log(`[Incremental Update] Files to copy: ${copyTasks.length}, Files to download: ${downloadTasks.length}`);
469
470 // Copy unchanged files in parallel (fast local operations)
471 const copyLimit = 10;
472 for (let i = 0; i < copyTasks.length; i += copyLimit) {
473 const batch = copyTasks.slice(i, i + copyLimit);
474 await Promise.all(batch.map(task => task()));
475 }
476
477 // Download new/changed files concurrently with a limit of 3 at a time
478 const downloadLimit = 3;
479 for (let i = 0; i < downloadTasks.length; i += downloadLimit) {
480 const batch = downloadTasks.slice(i, i + downloadLimit);
481 await Promise.all(batch.map(task => task()));
482 }
483}
484
485/**
486 * Copy an unchanged file from existing cache to new cache location
487 */
488async function copyExistingFile(
489 did: string,
490 site: string,
491 filePath: string,
492 dirSuffix: string,
493 existingCacheDir: string
494): Promise<void> {
495 const { copyFile } = await import('fs/promises');
496
497 const sourceFile = `${existingCacheDir}/${filePath}`;
498 const destFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`;
499 const destDir = destFile.substring(0, destFile.lastIndexOf('/'));
500
501 // Create destination directory if needed
502 if (destDir && !existsSync(destDir)) {
503 mkdirSync(destDir, { recursive: true });
504 }
505
506 try {
507 // Copy the file
508 await copyFile(sourceFile, destFile);
509
510 // Copy metadata file if it exists
511 const sourceMetaFile = `${sourceFile}.meta`;
512 const destMetaFile = `${destFile}.meta`;
513 if (existsSync(sourceMetaFile)) {
514 await copyFile(sourceMetaFile, destMetaFile);
515 }
516
517 console.log(`[Incremental] Copied unchanged file: ${filePath}`);
518 } catch (err) {
519 console.error(`[Incremental] Failed to copy file ${filePath}, will attempt download:`, err);
520 throw err;
521 }
522}
523
524async function cacheFileBlob(
525 did: string,
526 site: string,
527 filePath: string,
528 blobRef: any,
529 pdsEndpoint: string,
530 encoding?: 'gzip',
531 mimeType?: string,
532 base64?: boolean,
533 dirSuffix: string = ''
534): Promise<void> {
535 const cid = extractBlobCid(blobRef);
536 if (!cid) {
537 console.error('Could not extract CID from blob', blobRef);
538 return;
539 }
540
541 const blobUrl = `${pdsEndpoint}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${encodeURIComponent(cid)}`;
542
543 // Allow up to 500MB per file blob, with 5 minute timeout
544 let content = await safeFetchBlob(blobUrl, { maxSize: 500 * 1024 * 1024, timeout: 300000 });
545
546 console.log(`[DEBUG] ${filePath}: fetched ${content.length} bytes, base64=${base64}, encoding=${encoding}, mimeType=${mimeType}`);
547
548 // If content is base64-encoded, decode it back to raw binary (gzipped or not)
549 if (base64) {
550 const originalSize = content.length;
551 // Decode base64 directly from raw bytes - no string conversion
552 // The blob contains base64-encoded text as raw bytes, decode it in-place
553 const textDecoder = new TextDecoder();
554 const base64String = textDecoder.decode(content);
555 content = Buffer.from(base64String, 'base64');
556 console.log(`[DEBUG] ${filePath}: decoded base64 from ${originalSize} bytes to ${content.length} bytes`);
557
558 // Check if it's actually gzipped by looking at magic bytes
559 if (content.length >= 2) {
560 const hasGzipMagic = content[0] === 0x1f && content[1] === 0x8b;
561 console.log(`[DEBUG] ${filePath}: has gzip magic bytes: ${hasGzipMagic}`);
562 }
563 }
564
565 const cacheFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`;
566 const fileDir = cacheFile.substring(0, cacheFile.lastIndexOf('/'));
567
568 if (fileDir && !existsSync(fileDir)) {
569 mkdirSync(fileDir, { recursive: true });
570 }
571
572 // Use the shared function to determine if this should remain compressed
573 const shouldStayCompressed = shouldCompressMimeType(mimeType);
574
575 // Decompress files that shouldn't be stored compressed
576 if (encoding === 'gzip' && !shouldStayCompressed && content.length >= 2 &&
577 content[0] === 0x1f && content[1] === 0x8b) {
578 console.log(`[DEBUG] ${filePath}: decompressing non-compressible type (${mimeType}) before caching`);
579 try {
580 const { gunzipSync } = await import('zlib');
581 const decompressed = gunzipSync(content);
582 console.log(`[DEBUG] ${filePath}: decompressed from ${content.length} to ${decompressed.length} bytes`);
583 content = decompressed;
584 // Clear the encoding flag since we're storing decompressed
585 encoding = undefined;
586 } catch (error) {
587 console.log(`[DEBUG] ${filePath}: failed to decompress, storing original gzipped content. Error:`, error);
588 }
589 }
590
591 await writeFile(cacheFile, content);
592
593 // Store metadata only if file is still compressed
594 if (encoding === 'gzip' && mimeType) {
595 const metaFile = `${cacheFile}.meta`;
596 await writeFile(metaFile, JSON.stringify({ encoding, mimeType }));
597 console.log('Cached file', filePath, content.length, 'bytes (gzipped,', mimeType + ')');
598 } else {
599 console.log('Cached file', filePath, content.length, 'bytes');
600 }
601}
602
603/**
604 * Sanitize a file path to prevent directory traversal attacks
605 * Removes any path segments that attempt to go up directories
606 */
607export function sanitizePath(filePath: string): string {
608 // Remove leading slashes
609 let cleaned = filePath.replace(/^\/+/, '');
610
611 // Split into segments and filter out dangerous ones
612 const segments = cleaned.split('/').filter(segment => {
613 // Remove empty segments
614 if (!segment || segment === '.') return false;
615 // Remove parent directory references
616 if (segment === '..') return false;
617 // Remove segments with null bytes
618 if (segment.includes('\0')) return false;
619 return true;
620 });
621
622 // Rejoin the safe segments
623 return segments.join('/');
624}
625
626export function getCachedFilePath(did: string, site: string, filePath: string): string {
627 const sanitizedPath = sanitizePath(filePath);
628 return `${CACHE_DIR}/${did}/${site}/${sanitizedPath}`;
629}
630
631export function isCached(did: string, site: string): boolean {
632 return existsSync(`${CACHE_DIR}/${did}/${site}`);
633}
634
635async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = '', fileCids?: Record<string, string>): Promise<void> {
636 const metadata: CacheMetadata = {
637 recordCid,
638 cachedAt: Date.now(),
639 did,
640 rkey,
641 fileCids
642 };
643
644 const metadataPath = `${CACHE_DIR}/${did}/${rkey}${dirSuffix}/.metadata.json`;
645 const metadataDir = metadataPath.substring(0, metadataPath.lastIndexOf('/'));
646
647 if (!existsSync(metadataDir)) {
648 mkdirSync(metadataDir, { recursive: true });
649 }
650
651 await writeFile(metadataPath, JSON.stringify(metadata, null, 2));
652}
653
654async function getCacheMetadata(did: string, rkey: string): Promise<CacheMetadata | null> {
655 try {
656 const metadataPath = `${CACHE_DIR}/${did}/${rkey}/.metadata.json`;
657 if (!existsSync(metadataPath)) return null;
658
659 const content = await readFile(metadataPath, 'utf-8');
660 return JSON.parse(content) as CacheMetadata;
661 } catch (err) {
662 console.error('Failed to read cache metadata', err);
663 return null;
664 }
665}
666
667export async function isCacheValid(did: string, rkey: string, currentRecordCid?: string): Promise<boolean> {
668 const metadata = await getCacheMetadata(did, rkey);
669 if (!metadata) return false;
670
671 // Check if cache has expired (14 days TTL)
672 const cacheAge = Date.now() - metadata.cachedAt;
673 if (cacheAge > CACHE_TTL) {
674 console.log('[Cache] Cache expired for', did, rkey);
675 return false;
676 }
677
678 // If current CID is provided, verify it matches
679 if (currentRecordCid && metadata.recordCid !== currentRecordCid) {
680 console.log('[Cache] CID mismatch for', did, rkey, 'cached:', metadata.recordCid, 'current:', currentRecordCid);
681 return false;
682 }
683
684 return true;
685}