Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place
1import { AtpAgent } from '@atproto/api'; 2import type { Record as WispFsRecord, Directory, Entry, File } from '../lexicon/types/place/wisp/fs'; 3import type { Record as SubfsRecord } from '../lexicon/types/place/wisp/subfs'; 4import { existsSync, mkdirSync, readFileSync, rmSync } from 'fs'; 5import { writeFile, readFile, rename } from 'fs/promises'; 6import { safeFetchJson, safeFetchBlob } from './safe-fetch'; 7import { CID } from 'multiformats'; 8 9const CACHE_DIR = process.env.CACHE_DIR || './cache/sites'; 10const CACHE_TTL = 14 * 24 * 60 * 60 * 1000; // 14 days cache TTL 11 12interface CacheMetadata { 13 recordCid: string; 14 cachedAt: number; 15 did: string; 16 rkey: string; 17 // Map of file path to blob CID for incremental updates 18 fileCids?: Record<string, string>; 19} 20 21/** 22 * Determines if a MIME type should benefit from gzip compression. 23 * Returns true for text-based web assets (HTML, CSS, JS, JSON, XML, SVG). 24 * Returns false for already-compressed formats (images, video, audio, PDFs). 25 * 26 */ 27export function shouldCompressMimeType(mimeType: string | undefined): boolean { 28 if (!mimeType) return false; 29 30 const mime = mimeType.toLowerCase(); 31 32 // Text-based web assets that benefit from compression 33 const compressibleTypes = [ 34 'text/html', 35 'text/css', 36 'text/javascript', 37 'application/javascript', 38 'application/x-javascript', 39 'text/xml', 40 'application/xml', 41 'application/json', 42 'text/plain', 43 'image/svg+xml', 44 ]; 45 46 if (compressibleTypes.some(type => mime === type || mime.startsWith(type))) { 47 return true; 48 } 49 50 // Already-compressed formats that should NOT be double-compressed 51 const alreadyCompressedPrefixes = [ 52 'video/', 53 'audio/', 54 'image/', 55 'application/pdf', 56 'application/zip', 57 'application/gzip', 58 ]; 59 60 if (alreadyCompressedPrefixes.some(prefix => mime.startsWith(prefix))) { 61 return false; 62 } 63 64 // Default to not compressing for unknown types 65 return false; 66} 67 68interface IpldLink { 69 $link: string; 70} 71 72interface TypedBlobRef { 73 ref: CID | IpldLink; 74} 75 76interface UntypedBlobRef { 77 cid: string; 78} 79 80function isIpldLink(obj: unknown): obj is IpldLink { 81 return typeof obj === 'object' && obj !== null && '$link' in obj && typeof (obj as IpldLink).$link === 'string'; 82} 83 84function isTypedBlobRef(obj: unknown): obj is TypedBlobRef { 85 return typeof obj === 'object' && obj !== null && 'ref' in obj; 86} 87 88function isUntypedBlobRef(obj: unknown): obj is UntypedBlobRef { 89 return typeof obj === 'object' && obj !== null && 'cid' in obj && typeof (obj as UntypedBlobRef).cid === 'string'; 90} 91 92export async function resolveDid(identifier: string): Promise<string | null> { 93 try { 94 // If it's already a DID, return it 95 if (identifier.startsWith('did:')) { 96 return identifier; 97 } 98 99 // Otherwise, resolve the handle using agent's built-in method 100 const agent = new AtpAgent({ service: 'https://public.api.bsky.app' }); 101 const response = await agent.resolveHandle({ handle: identifier }); 102 return response.data.did; 103 } catch (err) { 104 console.error('Failed to resolve identifier', identifier, err); 105 return null; 106 } 107} 108 109export async function getPdsForDid(did: string): Promise<string | null> { 110 try { 111 let doc; 112 113 if (did.startsWith('did:plc:')) { 114 doc = await safeFetchJson(`https://plc.directory/${encodeURIComponent(did)}`); 115 } else if (did.startsWith('did:web:')) { 116 const didUrl = didWebToHttps(did); 117 doc = await safeFetchJson(didUrl); 118 } else { 119 console.error('Unsupported DID method', did); 120 return null; 121 } 122 123 const services = doc.service || []; 124 const pdsService = services.find((s: any) => s.id === '#atproto_pds'); 125 126 return pdsService?.serviceEndpoint || null; 127 } catch (err) { 128 console.error('Failed to get PDS for DID', did, err); 129 return null; 130 } 131} 132 133function didWebToHttps(did: string): string { 134 const didParts = did.split(':'); 135 if (didParts.length < 3 || didParts[0] !== 'did' || didParts[1] !== 'web') { 136 throw new Error('Invalid did:web format'); 137 } 138 139 const domain = didParts[2]; 140 const pathParts = didParts.slice(3); 141 142 if (pathParts.length === 0) { 143 return `https://${domain}/.well-known/did.json`; 144 } else { 145 const path = pathParts.join('/'); 146 return `https://${domain}/${path}/did.json`; 147 } 148} 149 150export async function fetchSiteRecord(did: string, rkey: string): Promise<{ record: WispFsRecord; cid: string } | null> { 151 try { 152 const pdsEndpoint = await getPdsForDid(did); 153 if (!pdsEndpoint) return null; 154 155 const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(rkey)}`; 156 const data = await safeFetchJson(url); 157 158 return { 159 record: data.value as WispFsRecord, 160 cid: data.cid || '' 161 }; 162 } catch (err) { 163 console.error('Failed to fetch site record', did, rkey, err); 164 return null; 165 } 166} 167 168export function extractBlobCid(blobRef: unknown): string | null { 169 if (isIpldLink(blobRef)) { 170 return blobRef.$link; 171 } 172 173 if (isTypedBlobRef(blobRef)) { 174 const ref = blobRef.ref; 175 176 const cid = CID.asCID(ref); 177 if (cid) { 178 return cid.toString(); 179 } 180 181 if (isIpldLink(ref)) { 182 return ref.$link; 183 } 184 } 185 186 if (isUntypedBlobRef(blobRef)) { 187 return blobRef.cid; 188 } 189 190 return null; 191} 192 193/** 194 * Extract all subfs URIs from a directory tree with their mount paths 195 */ 196function extractSubfsUris(directory: Directory, currentPath: string = ''): Array<{ uri: string; path: string }> { 197 const uris: Array<{ uri: string; path: string }> = []; 198 199 for (const entry of directory.entries) { 200 const fullPath = currentPath ? `${currentPath}/${entry.name}` : entry.name; 201 202 if ('type' in entry.node) { 203 if (entry.node.type === 'subfs') { 204 // Subfs node with subject URI 205 const subfsNode = entry.node as any; 206 if (subfsNode.subject) { 207 uris.push({ uri: subfsNode.subject, path: fullPath }); 208 } 209 } else if (entry.node.type === 'directory') { 210 // Recursively search subdirectories 211 const subUris = extractSubfsUris(entry.node as Directory, fullPath); 212 uris.push(...subUris); 213 } 214 } 215 } 216 217 return uris; 218} 219 220/** 221 * Fetch a subfs record from the PDS 222 */ 223async function fetchSubfsRecord(uri: string, pdsEndpoint: string): Promise<SubfsRecord | null> { 224 try { 225 // Parse URI: at://did/collection/rkey 226 const parts = uri.replace('at://', '').split('/'); 227 if (parts.length < 3) { 228 console.error('Invalid subfs URI:', uri); 229 return null; 230 } 231 232 const did = parts[0]; 233 const collection = parts[1]; 234 const rkey = parts[2]; 235 236 // Fetch the record from PDS 237 const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=${encodeURIComponent(collection)}&rkey=${encodeURIComponent(rkey)}`; 238 const response = await safeFetchJson(url); 239 240 if (!response || !response.value) { 241 console.error('Subfs record not found:', uri); 242 return null; 243 } 244 245 return response.value as SubfsRecord; 246 } catch (err) { 247 console.error('Failed to fetch subfs record:', uri, err); 248 return null; 249 } 250} 251 252/** 253 * Replace subfs nodes in a directory tree with their actual content 254 */ 255async function expandSubfsNodes(directory: Directory, pdsEndpoint: string): Promise<Directory> { 256 // Extract all subfs URIs 257 const subfsUris = extractSubfsUris(directory); 258 259 if (subfsUris.length === 0) { 260 // No subfs nodes, return as-is 261 return directory; 262 } 263 264 console.log(`Found ${subfsUris.length} subfs records, fetching...`); 265 266 // Fetch all subfs records in parallel 267 const subfsRecords = await Promise.all( 268 subfsUris.map(async ({ uri, path }) => { 269 const record = await fetchSubfsRecord(uri, pdsEndpoint); 270 return { record, path }; 271 }) 272 ); 273 274 // Build a map of path -> directory content 275 const subfsMap = new Map<string, Directory>(); 276 for (const { record, path } of subfsRecords) { 277 if (record && record.root) { 278 subfsMap.set(path, record.root); 279 } 280 } 281 282 // Replace subfs nodes with their actual content 283 function replaceSubfsInEntries(entries: Entry[], currentPath: string = ''): Entry[] { 284 return entries.map(entry => { 285 const fullPath = currentPath ? `${currentPath}/${entry.name}` : entry.name; 286 const node = entry.node; 287 288 if ('type' in node && node.type === 'subfs') { 289 // Replace with actual directory content 290 const subfsDir = subfsMap.get(fullPath); 291 if (subfsDir) { 292 console.log(`Expanding subfs node at ${fullPath}`); 293 return { 294 ...entry, 295 node: subfsDir 296 }; 297 } 298 // If fetch failed, keep the subfs node (will be skipped later) 299 return entry; 300 } else if ('type' in node && node.type === 'directory' && 'entries' in node) { 301 // Recursively process subdirectories 302 return { 303 ...entry, 304 node: { 305 ...node, 306 entries: replaceSubfsInEntries(node.entries, fullPath) 307 } 308 }; 309 } 310 311 return entry; 312 }); 313 } 314 315 return { 316 ...directory, 317 entries: replaceSubfsInEntries(directory.entries) 318 }; 319} 320 321export async function downloadAndCacheSite(did: string, rkey: string, record: WispFsRecord, pdsEndpoint: string, recordCid: string): Promise<void> { 322 console.log('Caching site', did, rkey); 323 324 if (!record.root) { 325 console.error('Record missing root directory:', JSON.stringify(record, null, 2)); 326 throw new Error('Invalid record structure: missing root directory'); 327 } 328 329 if (!record.root.entries || !Array.isArray(record.root.entries)) { 330 console.error('Record root missing entries array:', JSON.stringify(record.root, null, 2)); 331 throw new Error('Invalid record structure: root missing entries array'); 332 } 333 334 // Expand subfs nodes before caching 335 const expandedRoot = await expandSubfsNodes(record.root, pdsEndpoint); 336 337 // Get existing cache metadata to check for incremental updates 338 const existingMetadata = await getCacheMetadata(did, rkey); 339 const existingFileCids = existingMetadata?.fileCids || {}; 340 341 // Use a temporary directory with timestamp to avoid collisions 342 const tempSuffix = `.tmp-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`; 343 const tempDir = `${CACHE_DIR}/${did}/${rkey}${tempSuffix}`; 344 const finalDir = `${CACHE_DIR}/${did}/${rkey}`; 345 346 try { 347 // Collect file CIDs from the new record (using expanded root) 348 const newFileCids: Record<string, string> = {}; 349 collectFileCidsFromEntries(expandedRoot.entries, '', newFileCids); 350 351 // Download/copy files to temporary directory (with incremental logic, using expanded root) 352 await cacheFiles(did, rkey, expandedRoot.entries, pdsEndpoint, '', tempSuffix, existingFileCids, finalDir); 353 await saveCacheMetadata(did, rkey, recordCid, tempSuffix, newFileCids); 354 355 // Atomically replace old cache with new cache 356 // On POSIX systems (Linux/macOS), rename is atomic 357 if (existsSync(finalDir)) { 358 // Rename old directory to backup 359 const backupDir = `${finalDir}.old-${Date.now()}`; 360 await rename(finalDir, backupDir); 361 362 try { 363 // Rename new directory to final location 364 await rename(tempDir, finalDir); 365 366 // Clean up old backup 367 rmSync(backupDir, { recursive: true, force: true }); 368 } catch (err) { 369 // If rename failed, restore backup 370 if (existsSync(backupDir) && !existsSync(finalDir)) { 371 await rename(backupDir, finalDir); 372 } 373 throw err; 374 } 375 } else { 376 // No existing cache, just rename temp to final 377 await rename(tempDir, finalDir); 378 } 379 380 console.log('Successfully cached site atomically', did, rkey); 381 } catch (err) { 382 // Clean up temp directory on failure 383 if (existsSync(tempDir)) { 384 rmSync(tempDir, { recursive: true, force: true }); 385 } 386 throw err; 387 } 388} 389 390/** 391 * Recursively collect file CIDs from entries for incremental update tracking 392 */ 393function collectFileCidsFromEntries(entries: Entry[], pathPrefix: string, fileCids: Record<string, string>): void { 394 for (const entry of entries) { 395 const currentPath = pathPrefix ? `${pathPrefix}/${entry.name}` : entry.name; 396 const node = entry.node; 397 398 if ('type' in node && node.type === 'directory' && 'entries' in node) { 399 collectFileCidsFromEntries(node.entries, currentPath, fileCids); 400 } else if ('type' in node && node.type === 'file' && 'blob' in node) { 401 const fileNode = node as File; 402 const cid = extractBlobCid(fileNode.blob); 403 if (cid) { 404 fileCids[currentPath] = cid; 405 } 406 } 407 } 408} 409 410async function cacheFiles( 411 did: string, 412 site: string, 413 entries: Entry[], 414 pdsEndpoint: string, 415 pathPrefix: string, 416 dirSuffix: string = '', 417 existingFileCids: Record<string, string> = {}, 418 existingCacheDir?: string 419): Promise<void> { 420 // Collect file tasks, separating unchanged files from new/changed files 421 const downloadTasks: Array<() => Promise<void>> = []; 422 const copyTasks: Array<() => Promise<void>> = []; 423 424 function collectFileTasks( 425 entries: Entry[], 426 currentPathPrefix: string 427 ) { 428 for (const entry of entries) { 429 const currentPath = currentPathPrefix ? `${currentPathPrefix}/${entry.name}` : entry.name; 430 const node = entry.node; 431 432 if ('type' in node && node.type === 'directory' && 'entries' in node) { 433 collectFileTasks(node.entries, currentPath); 434 } else if ('type' in node && node.type === 'file' && 'blob' in node) { 435 const fileNode = node as File; 436 const cid = extractBlobCid(fileNode.blob); 437 438 // Check if file is unchanged (same CID as existing cache) 439 if (cid && existingFileCids[currentPath] === cid && existingCacheDir) { 440 // File unchanged - copy from existing cache instead of downloading 441 copyTasks.push(() => copyExistingFile( 442 did, 443 site, 444 currentPath, 445 dirSuffix, 446 existingCacheDir 447 )); 448 } else { 449 // File new or changed - download it 450 downloadTasks.push(() => cacheFileBlob( 451 did, 452 site, 453 currentPath, 454 fileNode.blob, 455 pdsEndpoint, 456 fileNode.encoding, 457 fileNode.mimeType, 458 fileNode.base64, 459 dirSuffix 460 )); 461 } 462 } 463 } 464 } 465 466 collectFileTasks(entries, pathPrefix); 467 468 console.log(`[Incremental Update] Files to copy: ${copyTasks.length}, Files to download: ${downloadTasks.length}`); 469 470 // Copy unchanged files in parallel (fast local operations) 471 const copyLimit = 10; 472 for (let i = 0; i < copyTasks.length; i += copyLimit) { 473 const batch = copyTasks.slice(i, i + copyLimit); 474 await Promise.all(batch.map(task => task())); 475 } 476 477 // Download new/changed files concurrently with a limit of 3 at a time 478 const downloadLimit = 3; 479 for (let i = 0; i < downloadTasks.length; i += downloadLimit) { 480 const batch = downloadTasks.slice(i, i + downloadLimit); 481 await Promise.all(batch.map(task => task())); 482 } 483} 484 485/** 486 * Copy an unchanged file from existing cache to new cache location 487 */ 488async function copyExistingFile( 489 did: string, 490 site: string, 491 filePath: string, 492 dirSuffix: string, 493 existingCacheDir: string 494): Promise<void> { 495 const { copyFile } = await import('fs/promises'); 496 497 const sourceFile = `${existingCacheDir}/${filePath}`; 498 const destFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`; 499 const destDir = destFile.substring(0, destFile.lastIndexOf('/')); 500 501 // Create destination directory if needed 502 if (destDir && !existsSync(destDir)) { 503 mkdirSync(destDir, { recursive: true }); 504 } 505 506 try { 507 // Copy the file 508 await copyFile(sourceFile, destFile); 509 510 // Copy metadata file if it exists 511 const sourceMetaFile = `${sourceFile}.meta`; 512 const destMetaFile = `${destFile}.meta`; 513 if (existsSync(sourceMetaFile)) { 514 await copyFile(sourceMetaFile, destMetaFile); 515 } 516 517 console.log(`[Incremental] Copied unchanged file: ${filePath}`); 518 } catch (err) { 519 console.error(`[Incremental] Failed to copy file ${filePath}, will attempt download:`, err); 520 throw err; 521 } 522} 523 524async function cacheFileBlob( 525 did: string, 526 site: string, 527 filePath: string, 528 blobRef: any, 529 pdsEndpoint: string, 530 encoding?: 'gzip', 531 mimeType?: string, 532 base64?: boolean, 533 dirSuffix: string = '' 534): Promise<void> { 535 const cid = extractBlobCid(blobRef); 536 if (!cid) { 537 console.error('Could not extract CID from blob', blobRef); 538 return; 539 } 540 541 const blobUrl = `${pdsEndpoint}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${encodeURIComponent(cid)}`; 542 543 // Allow up to 500MB per file blob, with 5 minute timeout 544 let content = await safeFetchBlob(blobUrl, { maxSize: 500 * 1024 * 1024, timeout: 300000 }); 545 546 console.log(`[DEBUG] ${filePath}: fetched ${content.length} bytes, base64=${base64}, encoding=${encoding}, mimeType=${mimeType}`); 547 548 // If content is base64-encoded, decode it back to raw binary (gzipped or not) 549 if (base64) { 550 const originalSize = content.length; 551 // Decode base64 directly from raw bytes - no string conversion 552 // The blob contains base64-encoded text as raw bytes, decode it in-place 553 const textDecoder = new TextDecoder(); 554 const base64String = textDecoder.decode(content); 555 content = Buffer.from(base64String, 'base64'); 556 console.log(`[DEBUG] ${filePath}: decoded base64 from ${originalSize} bytes to ${content.length} bytes`); 557 558 // Check if it's actually gzipped by looking at magic bytes 559 if (content.length >= 2) { 560 const hasGzipMagic = content[0] === 0x1f && content[1] === 0x8b; 561 console.log(`[DEBUG] ${filePath}: has gzip magic bytes: ${hasGzipMagic}`); 562 } 563 } 564 565 const cacheFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`; 566 const fileDir = cacheFile.substring(0, cacheFile.lastIndexOf('/')); 567 568 if (fileDir && !existsSync(fileDir)) { 569 mkdirSync(fileDir, { recursive: true }); 570 } 571 572 // Use the shared function to determine if this should remain compressed 573 const shouldStayCompressed = shouldCompressMimeType(mimeType); 574 575 // Decompress files that shouldn't be stored compressed 576 if (encoding === 'gzip' && !shouldStayCompressed && content.length >= 2 && 577 content[0] === 0x1f && content[1] === 0x8b) { 578 console.log(`[DEBUG] ${filePath}: decompressing non-compressible type (${mimeType}) before caching`); 579 try { 580 const { gunzipSync } = await import('zlib'); 581 const decompressed = gunzipSync(content); 582 console.log(`[DEBUG] ${filePath}: decompressed from ${content.length} to ${decompressed.length} bytes`); 583 content = decompressed; 584 // Clear the encoding flag since we're storing decompressed 585 encoding = undefined; 586 } catch (error) { 587 console.log(`[DEBUG] ${filePath}: failed to decompress, storing original gzipped content. Error:`, error); 588 } 589 } 590 591 await writeFile(cacheFile, content); 592 593 // Store metadata only if file is still compressed 594 if (encoding === 'gzip' && mimeType) { 595 const metaFile = `${cacheFile}.meta`; 596 await writeFile(metaFile, JSON.stringify({ encoding, mimeType })); 597 console.log('Cached file', filePath, content.length, 'bytes (gzipped,', mimeType + ')'); 598 } else { 599 console.log('Cached file', filePath, content.length, 'bytes'); 600 } 601} 602 603/** 604 * Sanitize a file path to prevent directory traversal attacks 605 * Removes any path segments that attempt to go up directories 606 */ 607export function sanitizePath(filePath: string): string { 608 // Remove leading slashes 609 let cleaned = filePath.replace(/^\/+/, ''); 610 611 // Split into segments and filter out dangerous ones 612 const segments = cleaned.split('/').filter(segment => { 613 // Remove empty segments 614 if (!segment || segment === '.') return false; 615 // Remove parent directory references 616 if (segment === '..') return false; 617 // Remove segments with null bytes 618 if (segment.includes('\0')) return false; 619 return true; 620 }); 621 622 // Rejoin the safe segments 623 return segments.join('/'); 624} 625 626export function getCachedFilePath(did: string, site: string, filePath: string): string { 627 const sanitizedPath = sanitizePath(filePath); 628 return `${CACHE_DIR}/${did}/${site}/${sanitizedPath}`; 629} 630 631export function isCached(did: string, site: string): boolean { 632 return existsSync(`${CACHE_DIR}/${did}/${site}`); 633} 634 635async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = '', fileCids?: Record<string, string>): Promise<void> { 636 const metadata: CacheMetadata = { 637 recordCid, 638 cachedAt: Date.now(), 639 did, 640 rkey, 641 fileCids 642 }; 643 644 const metadataPath = `${CACHE_DIR}/${did}/${rkey}${dirSuffix}/.metadata.json`; 645 const metadataDir = metadataPath.substring(0, metadataPath.lastIndexOf('/')); 646 647 if (!existsSync(metadataDir)) { 648 mkdirSync(metadataDir, { recursive: true }); 649 } 650 651 await writeFile(metadataPath, JSON.stringify(metadata, null, 2)); 652} 653 654async function getCacheMetadata(did: string, rkey: string): Promise<CacheMetadata | null> { 655 try { 656 const metadataPath = `${CACHE_DIR}/${did}/${rkey}/.metadata.json`; 657 if (!existsSync(metadataPath)) return null; 658 659 const content = await readFile(metadataPath, 'utf-8'); 660 return JSON.parse(content) as CacheMetadata; 661 } catch (err) { 662 console.error('Failed to read cache metadata', err); 663 return null; 664 } 665} 666 667export async function isCacheValid(did: string, rkey: string, currentRecordCid?: string): Promise<boolean> { 668 const metadata = await getCacheMetadata(did, rkey); 669 if (!metadata) return false; 670 671 // Check if cache has expired (14 days TTL) 672 const cacheAge = Date.now() - metadata.cachedAt; 673 if (cacheAge > CACHE_TTL) { 674 console.log('[Cache] Cache expired for', did, rkey); 675 return false; 676 } 677 678 // If current CID is provided, verify it matches 679 if (currentRecordCid && metadata.recordCid !== currentRecordCid) { 680 console.log('[Cache] CID mismatch for', did, rkey, 'cached:', metadata.recordCid, 'current:', currentRecordCid); 681 return false; 682 } 683 684 return true; 685}