Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place
1import { AtpAgent } from '@atproto/api'; 2import type { Record as WispFsRecord, Directory, Entry, File } from '../lexicon/types/place/wisp/fs'; 3import type { Record as SubfsRecord } from '../lexicon/types/place/wisp/subfs'; 4import { existsSync, mkdirSync, readFileSync, rmSync } from 'fs'; 5import { writeFile, readFile, rename } from 'fs/promises'; 6import { safeFetchJson, safeFetchBlob } from './safe-fetch'; 7import { CID } from 'multiformats'; 8 9const CACHE_DIR = process.env.CACHE_DIR || './cache/sites'; 10const CACHE_TTL = 14 * 24 * 60 * 60 * 1000; // 14 days cache TTL 11 12interface CacheMetadata { 13 recordCid: string; 14 cachedAt: number; 15 did: string; 16 rkey: string; 17 // Map of file path to blob CID for incremental updates 18 fileCids?: Record<string, string>; 19} 20 21/** 22 * Determines if a MIME type should benefit from gzip compression. 23 * Returns true for text-based web assets (HTML, CSS, JS, JSON, XML, SVG). 24 * Returns false for already-compressed formats (images, video, audio, PDFs). 25 * 26 */ 27export function shouldCompressMimeType(mimeType: string | undefined): boolean { 28 if (!mimeType) return false; 29 30 const mime = mimeType.toLowerCase(); 31 32 // Text-based web assets and uncompressed audio that benefit from compression 33 const compressibleTypes = [ 34 'text/html', 35 'text/css', 36 'text/javascript', 37 'application/javascript', 38 'application/x-javascript', 39 'text/xml', 40 'application/xml', 41 'application/json', 42 'text/plain', 43 'image/svg+xml', 44 // Uncompressed audio formats 45 'audio/wav', 46 'audio/wave', 47 'audio/x-wav', 48 'audio/aiff', 49 'audio/x-aiff', 50 ]; 51 52 if (compressibleTypes.some(type => mime === type || mime.startsWith(type))) { 53 return true; 54 } 55 56 // Already-compressed formats that should NOT be double-compressed 57 const alreadyCompressedPrefixes = [ 58 'video/', 59 'audio/', 60 'image/', 61 'application/pdf', 62 'application/zip', 63 'application/gzip', 64 ]; 65 66 if (alreadyCompressedPrefixes.some(prefix => mime.startsWith(prefix))) { 67 return false; 68 } 69 70 // Default to not compressing for unknown types 71 return false; 72} 73 74interface IpldLink { 75 $link: string; 76} 77 78interface TypedBlobRef { 79 ref: CID | IpldLink; 80} 81 82interface UntypedBlobRef { 83 cid: string; 84} 85 86function isIpldLink(obj: unknown): obj is IpldLink { 87 return typeof obj === 'object' && obj !== null && '$link' in obj && typeof (obj as IpldLink).$link === 'string'; 88} 89 90function isTypedBlobRef(obj: unknown): obj is TypedBlobRef { 91 return typeof obj === 'object' && obj !== null && 'ref' in obj; 92} 93 94function isUntypedBlobRef(obj: unknown): obj is UntypedBlobRef { 95 return typeof obj === 'object' && obj !== null && 'cid' in obj && typeof (obj as UntypedBlobRef).cid === 'string'; 96} 97 98export async function resolveDid(identifier: string): Promise<string | null> { 99 try { 100 // If it's already a DID, return it 101 if (identifier.startsWith('did:')) { 102 return identifier; 103 } 104 105 // Otherwise, resolve the handle using agent's built-in method 106 const agent = new AtpAgent({ service: 'https://public.api.bsky.app' }); 107 const response = await agent.resolveHandle({ handle: identifier }); 108 return response.data.did; 109 } catch (err) { 110 console.error('Failed to resolve identifier', identifier, err); 111 return null; 112 } 113} 114 115export async function getPdsForDid(did: string): Promise<string | null> { 116 try { 117 let doc; 118 119 if (did.startsWith('did:plc:')) { 120 doc = await safeFetchJson(`https://plc.directory/${encodeURIComponent(did)}`); 121 } else if (did.startsWith('did:web:')) { 122 const didUrl = didWebToHttps(did); 123 doc = await safeFetchJson(didUrl); 124 } else { 125 console.error('Unsupported DID method', did); 126 return null; 127 } 128 129 const services = doc.service || []; 130 const pdsService = services.find((s: any) => s.id === '#atproto_pds'); 131 132 return pdsService?.serviceEndpoint || null; 133 } catch (err) { 134 console.error('Failed to get PDS for DID', did, err); 135 return null; 136 } 137} 138 139function didWebToHttps(did: string): string { 140 const didParts = did.split(':'); 141 if (didParts.length < 3 || didParts[0] !== 'did' || didParts[1] !== 'web') { 142 throw new Error('Invalid did:web format'); 143 } 144 145 const domain = didParts[2]; 146 const pathParts = didParts.slice(3); 147 148 if (pathParts.length === 0) { 149 return `https://${domain}/.well-known/did.json`; 150 } else { 151 const path = pathParts.join('/'); 152 return `https://${domain}/${path}/did.json`; 153 } 154} 155 156export async function fetchSiteRecord(did: string, rkey: string): Promise<{ record: WispFsRecord; cid: string } | null> { 157 try { 158 const pdsEndpoint = await getPdsForDid(did); 159 if (!pdsEndpoint) return null; 160 161 const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(rkey)}`; 162 const data = await safeFetchJson(url); 163 164 return { 165 record: data.value as WispFsRecord, 166 cid: data.cid || '' 167 }; 168 } catch (err) { 169 console.error('Failed to fetch site record', did, rkey, err); 170 return null; 171 } 172} 173 174export function extractBlobCid(blobRef: unknown): string | null { 175 if (isIpldLink(blobRef)) { 176 return blobRef.$link; 177 } 178 179 if (isTypedBlobRef(blobRef)) { 180 const ref = blobRef.ref; 181 182 const cid = CID.asCID(ref); 183 if (cid) { 184 return cid.toString(); 185 } 186 187 if (isIpldLink(ref)) { 188 return ref.$link; 189 } 190 } 191 192 if (isUntypedBlobRef(blobRef)) { 193 return blobRef.cid; 194 } 195 196 return null; 197} 198 199/** 200 * Extract all subfs URIs from a directory tree with their mount paths 201 */ 202function extractSubfsUris(directory: Directory, currentPath: string = ''): Array<{ uri: string; path: string }> { 203 const uris: Array<{ uri: string; path: string }> = []; 204 205 for (const entry of directory.entries) { 206 const fullPath = currentPath ? `${currentPath}/${entry.name}` : entry.name; 207 208 if ('type' in entry.node) { 209 if (entry.node.type === 'subfs') { 210 // Subfs node with subject URI 211 const subfsNode = entry.node as any; 212 if (subfsNode.subject) { 213 uris.push({ uri: subfsNode.subject, path: fullPath }); 214 } 215 } else if (entry.node.type === 'directory') { 216 // Recursively search subdirectories 217 const subUris = extractSubfsUris(entry.node as Directory, fullPath); 218 uris.push(...subUris); 219 } 220 } 221 } 222 223 return uris; 224} 225 226/** 227 * Fetch a subfs record from the PDS 228 */ 229async function fetchSubfsRecord(uri: string, pdsEndpoint: string): Promise<SubfsRecord | null> { 230 try { 231 // Parse URI: at://did/collection/rkey 232 const parts = uri.replace('at://', '').split('/'); 233 if (parts.length < 3) { 234 console.error('Invalid subfs URI:', uri); 235 return null; 236 } 237 238 const did = parts[0]; 239 const collection = parts[1]; 240 const rkey = parts[2]; 241 242 // Fetch the record from PDS 243 const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=${encodeURIComponent(collection)}&rkey=${encodeURIComponent(rkey)}`; 244 const response = await safeFetchJson(url); 245 246 if (!response || !response.value) { 247 console.error('Subfs record not found:', uri); 248 return null; 249 } 250 251 return response.value as SubfsRecord; 252 } catch (err) { 253 console.error('Failed to fetch subfs record:', uri, err); 254 return null; 255 } 256} 257 258/** 259 * Replace subfs nodes in a directory tree with their actual content 260 * Subfs entries are "merged" - their root entries are hoisted into the parent directory 261 */ 262async function expandSubfsNodes(directory: Directory, pdsEndpoint: string): Promise<Directory> { 263 // Extract all subfs URIs 264 const subfsUris = extractSubfsUris(directory); 265 266 if (subfsUris.length === 0) { 267 // No subfs nodes, return as-is 268 return directory; 269 } 270 271 console.log(`Found ${subfsUris.length} subfs records, fetching...`); 272 273 // Fetch all subfs records in parallel 274 const subfsRecords = await Promise.all( 275 subfsUris.map(async ({ uri, path }) => { 276 const record = await fetchSubfsRecord(uri, pdsEndpoint); 277 return { record, path }; 278 }) 279 ); 280 281 // Build a map of path -> root entries to merge 282 const subfsMap = new Map<string, Entry[]>(); 283 for (const { record, path } of subfsRecords) { 284 if (record && record.root && record.root.entries) { 285 subfsMap.set(path, record.root.entries); 286 } 287 } 288 289 // Replace subfs nodes by merging their root entries into the parent directory 290 function replaceSubfsInEntries(entries: Entry[], currentPath: string = ''): Entry[] { 291 const result: Entry[] = []; 292 293 for (const entry of entries) { 294 const fullPath = currentPath ? `${currentPath}/${entry.name}` : entry.name; 295 const node = entry.node; 296 297 if ('type' in node && node.type === 'subfs') { 298 // Merge subfs entries into parent directory 299 const subfsEntries = subfsMap.get(fullPath); 300 if (subfsEntries) { 301 console.log(`Merging subfs node at ${fullPath} (${subfsEntries.length} entries)`); 302 // Recursively process the merged entries in case they contain nested subfs 303 const processedEntries = replaceSubfsInEntries(subfsEntries, currentPath); 304 result.push(...processedEntries); 305 } else { 306 // If fetch failed, skip this entry 307 console.warn(`Failed to fetch subfs at ${fullPath}, skipping`); 308 } 309 } else if ('type' in node && node.type === 'directory' && 'entries' in node) { 310 // Recursively process subdirectories 311 result.push({ 312 ...entry, 313 node: { 314 ...node, 315 entries: replaceSubfsInEntries(node.entries, fullPath) 316 } 317 }); 318 } else { 319 // Regular file entry 320 result.push(entry); 321 } 322 } 323 324 return result; 325 } 326 327 return { 328 ...directory, 329 entries: replaceSubfsInEntries(directory.entries) 330 }; 331} 332 333export async function downloadAndCacheSite(did: string, rkey: string, record: WispFsRecord, pdsEndpoint: string, recordCid: string): Promise<void> { 334 console.log('Caching site', did, rkey); 335 336 if (!record.root) { 337 console.error('Record missing root directory:', JSON.stringify(record, null, 2)); 338 throw new Error('Invalid record structure: missing root directory'); 339 } 340 341 if (!record.root.entries || !Array.isArray(record.root.entries)) { 342 console.error('Record root missing entries array:', JSON.stringify(record.root, null, 2)); 343 throw new Error('Invalid record structure: root missing entries array'); 344 } 345 346 // Expand subfs nodes before caching 347 const expandedRoot = await expandSubfsNodes(record.root, pdsEndpoint); 348 349 // Get existing cache metadata to check for incremental updates 350 const existingMetadata = await getCacheMetadata(did, rkey); 351 const existingFileCids = existingMetadata?.fileCids || {}; 352 353 // Use a temporary directory with timestamp to avoid collisions 354 const tempSuffix = `.tmp-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`; 355 const tempDir = `${CACHE_DIR}/${did}/${rkey}${tempSuffix}`; 356 const finalDir = `${CACHE_DIR}/${did}/${rkey}`; 357 358 try { 359 // Collect file CIDs from the new record (using expanded root) 360 const newFileCids: Record<string, string> = {}; 361 collectFileCidsFromEntries(expandedRoot.entries, '', newFileCids); 362 363 // Download/copy files to temporary directory (with incremental logic, using expanded root) 364 await cacheFiles(did, rkey, expandedRoot.entries, pdsEndpoint, '', tempSuffix, existingFileCids, finalDir); 365 await saveCacheMetadata(did, rkey, recordCid, tempSuffix, newFileCids); 366 367 // Atomically replace old cache with new cache 368 // On POSIX systems (Linux/macOS), rename is atomic 369 if (existsSync(finalDir)) { 370 // Rename old directory to backup 371 const backupDir = `${finalDir}.old-${Date.now()}`; 372 await rename(finalDir, backupDir); 373 374 try { 375 // Rename new directory to final location 376 await rename(tempDir, finalDir); 377 378 // Clean up old backup 379 rmSync(backupDir, { recursive: true, force: true }); 380 } catch (err) { 381 // If rename failed, restore backup 382 if (existsSync(backupDir) && !existsSync(finalDir)) { 383 await rename(backupDir, finalDir); 384 } 385 throw err; 386 } 387 } else { 388 // No existing cache, just rename temp to final 389 await rename(tempDir, finalDir); 390 } 391 392 console.log('Successfully cached site atomically', did, rkey); 393 } catch (err) { 394 // Clean up temp directory on failure 395 if (existsSync(tempDir)) { 396 rmSync(tempDir, { recursive: true, force: true }); 397 } 398 throw err; 399 } 400} 401 402/** 403 * Recursively collect file CIDs from entries for incremental update tracking 404 */ 405function collectFileCidsFromEntries(entries: Entry[], pathPrefix: string, fileCids: Record<string, string>): void { 406 for (const entry of entries) { 407 const currentPath = pathPrefix ? `${pathPrefix}/${entry.name}` : entry.name; 408 const node = entry.node; 409 410 if ('type' in node && node.type === 'directory' && 'entries' in node) { 411 collectFileCidsFromEntries(node.entries, currentPath, fileCids); 412 } else if ('type' in node && node.type === 'file' && 'blob' in node) { 413 const fileNode = node as File; 414 const cid = extractBlobCid(fileNode.blob); 415 if (cid) { 416 fileCids[currentPath] = cid; 417 } 418 } 419 } 420} 421 422async function cacheFiles( 423 did: string, 424 site: string, 425 entries: Entry[], 426 pdsEndpoint: string, 427 pathPrefix: string, 428 dirSuffix: string = '', 429 existingFileCids: Record<string, string> = {}, 430 existingCacheDir?: string 431): Promise<void> { 432 // Collect file tasks, separating unchanged files from new/changed files 433 const downloadTasks: Array<() => Promise<void>> = []; 434 const copyTasks: Array<() => Promise<void>> = []; 435 436 function collectFileTasks( 437 entries: Entry[], 438 currentPathPrefix: string 439 ) { 440 for (const entry of entries) { 441 const currentPath = currentPathPrefix ? `${currentPathPrefix}/${entry.name}` : entry.name; 442 const node = entry.node; 443 444 if ('type' in node && node.type === 'directory' && 'entries' in node) { 445 collectFileTasks(node.entries, currentPath); 446 } else if ('type' in node && node.type === 'file' && 'blob' in node) { 447 const fileNode = node as File; 448 const cid = extractBlobCid(fileNode.blob); 449 450 // Check if file is unchanged (same CID as existing cache) 451 if (cid && existingFileCids[currentPath] === cid && existingCacheDir) { 452 // File unchanged - copy from existing cache instead of downloading 453 copyTasks.push(() => copyExistingFile( 454 did, 455 site, 456 currentPath, 457 dirSuffix, 458 existingCacheDir 459 )); 460 } else { 461 // File new or changed - download it 462 downloadTasks.push(() => cacheFileBlob( 463 did, 464 site, 465 currentPath, 466 fileNode.blob, 467 pdsEndpoint, 468 fileNode.encoding, 469 fileNode.mimeType, 470 fileNode.base64, 471 dirSuffix 472 )); 473 } 474 } 475 } 476 } 477 478 collectFileTasks(entries, pathPrefix); 479 480 console.log(`[Incremental Update] Files to copy: ${copyTasks.length}, Files to download: ${downloadTasks.length}`); 481 482 // Copy unchanged files in parallel (fast local operations) - increased limit for better performance 483 const copyLimit = 50; 484 for (let i = 0; i < copyTasks.length; i += copyLimit) { 485 const batch = copyTasks.slice(i, i + copyLimit); 486 await Promise.all(batch.map(task => task())); 487 if (copyTasks.length > copyLimit) { 488 console.log(`[Cache Progress] Copied ${Math.min(i + copyLimit, copyTasks.length)}/${copyTasks.length} unchanged files`); 489 } 490 } 491 492 // Download new/changed files concurrently - increased from 3 to 20 for much better performance 493 const downloadLimit = 20; 494 for (let i = 0; i < downloadTasks.length; i += downloadLimit) { 495 const batch = downloadTasks.slice(i, i + downloadLimit); 496 await Promise.all(batch.map(task => task())); 497 if (downloadTasks.length > downloadLimit) { 498 console.log(`[Cache Progress] Downloaded ${Math.min(i + downloadLimit, downloadTasks.length)}/${downloadTasks.length} files`); 499 } 500 } 501} 502 503/** 504 * Copy an unchanged file from existing cache to new cache location 505 */ 506async function copyExistingFile( 507 did: string, 508 site: string, 509 filePath: string, 510 dirSuffix: string, 511 existingCacheDir: string 512): Promise<void> { 513 const { copyFile } = await import('fs/promises'); 514 515 const sourceFile = `${existingCacheDir}/${filePath}`; 516 const destFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`; 517 const destDir = destFile.substring(0, destFile.lastIndexOf('/')); 518 519 // Create destination directory if needed 520 if (destDir && !existsSync(destDir)) { 521 mkdirSync(destDir, { recursive: true }); 522 } 523 524 try { 525 // Copy the file 526 await copyFile(sourceFile, destFile); 527 528 // Copy metadata file if it exists 529 const sourceMetaFile = `${sourceFile}.meta`; 530 const destMetaFile = `${destFile}.meta`; 531 if (existsSync(sourceMetaFile)) { 532 await copyFile(sourceMetaFile, destMetaFile); 533 } 534 } catch (err) { 535 console.error(`Failed to copy cached file ${filePath}, will attempt download:`, err); 536 throw err; 537 } 538} 539 540async function cacheFileBlob( 541 did: string, 542 site: string, 543 filePath: string, 544 blobRef: any, 545 pdsEndpoint: string, 546 encoding?: 'gzip', 547 mimeType?: string, 548 base64?: boolean, 549 dirSuffix: string = '' 550): Promise<void> { 551 const cid = extractBlobCid(blobRef); 552 if (!cid) { 553 console.error('Could not extract CID from blob', blobRef); 554 return; 555 } 556 557 const blobUrl = `${pdsEndpoint}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${encodeURIComponent(cid)}`; 558 559 // Allow up to 500MB per file blob, with 5 minute timeout 560 let content = await safeFetchBlob(blobUrl, { maxSize: 500 * 1024 * 1024, timeout: 300000 }); 561 562 // If content is base64-encoded, decode it back to raw binary (gzipped or not) 563 if (base64) { 564 // Decode base64 directly from raw bytes - no string conversion 565 // The blob contains base64-encoded text as raw bytes, decode it in-place 566 const textDecoder = new TextDecoder(); 567 const base64String = textDecoder.decode(content); 568 content = Buffer.from(base64String, 'base64'); 569 } 570 571 const cacheFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`; 572 const fileDir = cacheFile.substring(0, cacheFile.lastIndexOf('/')); 573 574 if (fileDir && !existsSync(fileDir)) { 575 mkdirSync(fileDir, { recursive: true }); 576 } 577 578 // Use the shared function to determine if this should remain compressed 579 const shouldStayCompressed = shouldCompressMimeType(mimeType); 580 581 // Decompress files that shouldn't be stored compressed 582 if (encoding === 'gzip' && !shouldStayCompressed && content.length >= 2 && 583 content[0] === 0x1f && content[1] === 0x8b) { 584 try { 585 const { gunzipSync } = await import('zlib'); 586 const decompressed = gunzipSync(content); 587 content = decompressed; 588 // Clear the encoding flag since we're storing decompressed 589 encoding = undefined; 590 } catch (error) { 591 console.error(`Failed to decompress ${filePath}, storing original gzipped content:`, error); 592 } 593 } 594 595 await writeFile(cacheFile, content); 596 597 // Store metadata only if file is still compressed 598 if (encoding === 'gzip' && mimeType) { 599 const metaFile = `${cacheFile}.meta`; 600 await writeFile(metaFile, JSON.stringify({ encoding, mimeType })); 601 console.log('Cached file', filePath, content.length, 'bytes (gzipped,', mimeType + ')'); 602 } else { 603 console.log('Cached file', filePath, content.length, 'bytes'); 604 } 605} 606 607/** 608 * Sanitize a file path to prevent directory traversal attacks 609 * Removes any path segments that attempt to go up directories 610 */ 611export function sanitizePath(filePath: string): string { 612 // Remove leading slashes 613 let cleaned = filePath.replace(/^\/+/, ''); 614 615 // Split into segments and filter out dangerous ones 616 const segments = cleaned.split('/').filter(segment => { 617 // Remove empty segments 618 if (!segment || segment === '.') return false; 619 // Remove parent directory references 620 if (segment === '..') return false; 621 // Remove segments with null bytes 622 if (segment.includes('\0')) return false; 623 return true; 624 }); 625 626 // Rejoin the safe segments 627 return segments.join('/'); 628} 629 630export function getCachedFilePath(did: string, site: string, filePath: string): string { 631 const sanitizedPath = sanitizePath(filePath); 632 return `${CACHE_DIR}/${did}/${site}/${sanitizedPath}`; 633} 634 635export function isCached(did: string, site: string): boolean { 636 return existsSync(`${CACHE_DIR}/${did}/${site}`); 637} 638 639async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = '', fileCids?: Record<string, string>): Promise<void> { 640 const metadata: CacheMetadata = { 641 recordCid, 642 cachedAt: Date.now(), 643 did, 644 rkey, 645 fileCids 646 }; 647 648 const metadataPath = `${CACHE_DIR}/${did}/${rkey}${dirSuffix}/.metadata.json`; 649 const metadataDir = metadataPath.substring(0, metadataPath.lastIndexOf('/')); 650 651 if (!existsSync(metadataDir)) { 652 mkdirSync(metadataDir, { recursive: true }); 653 } 654 655 await writeFile(metadataPath, JSON.stringify(metadata, null, 2)); 656} 657 658async function getCacheMetadata(did: string, rkey: string): Promise<CacheMetadata | null> { 659 try { 660 const metadataPath = `${CACHE_DIR}/${did}/${rkey}/.metadata.json`; 661 if (!existsSync(metadataPath)) return null; 662 663 const content = await readFile(metadataPath, 'utf-8'); 664 return JSON.parse(content) as CacheMetadata; 665 } catch (err) { 666 console.error('Failed to read cache metadata', err); 667 return null; 668 } 669} 670 671export async function isCacheValid(did: string, rkey: string, currentRecordCid?: string): Promise<boolean> { 672 const metadata = await getCacheMetadata(did, rkey); 673 if (!metadata) return false; 674 675 // Check if cache has expired (14 days TTL) 676 const cacheAge = Date.now() - metadata.cachedAt; 677 if (cacheAge > CACHE_TTL) { 678 console.log('[Cache] Cache expired for', did, rkey); 679 return false; 680 } 681 682 // If current CID is provided, verify it matches 683 if (currentRecordCid && metadata.recordCid !== currentRecordCid) { 684 console.log('[Cache] CID mismatch for', did, rkey, 'cached:', metadata.recordCid, 'current:', currentRecordCid); 685 return false; 686 } 687 688 return true; 689}