Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place
1import { AtpAgent } from '@atproto/api'; 2import type { Record as WispFsRecord, Directory, Entry, File } from '../lexicon/types/place/wisp/fs'; 3import { existsSync, mkdirSync, readFileSync, rmSync } from 'fs'; 4import { writeFile, readFile, rename } from 'fs/promises'; 5import { safeFetchJson, safeFetchBlob } from './safe-fetch'; 6import { CID } from 'multiformats'; 7 8const CACHE_DIR = process.env.CACHE_DIR || './cache/sites'; 9const CACHE_TTL = 14 * 24 * 60 * 60 * 1000; // 14 days cache TTL 10 11interface CacheMetadata { 12 recordCid: string; 13 cachedAt: number; 14 did: string; 15 rkey: string; 16 // Map of file path to blob CID for incremental updates 17 fileCids?: Record<string, string>; 18} 19 20/** 21 * Determines if a MIME type should benefit from gzip compression. 22 * Returns true for text-based web assets (HTML, CSS, JS, JSON, XML, SVG). 23 * Returns false for already-compressed formats (images, video, audio, PDFs). 24 * 25 */ 26export function shouldCompressMimeType(mimeType: string | undefined): boolean { 27 if (!mimeType) return false; 28 29 const mime = mimeType.toLowerCase(); 30 31 // Text-based web assets that benefit from compression 32 const compressibleTypes = [ 33 'text/html', 34 'text/css', 35 'text/javascript', 36 'application/javascript', 37 'application/x-javascript', 38 'text/xml', 39 'application/xml', 40 'application/json', 41 'text/plain', 42 'image/svg+xml', 43 ]; 44 45 if (compressibleTypes.some(type => mime === type || mime.startsWith(type))) { 46 return true; 47 } 48 49 // Already-compressed formats that should NOT be double-compressed 50 const alreadyCompressedPrefixes = [ 51 'video/', 52 'audio/', 53 'image/', 54 'application/pdf', 55 'application/zip', 56 'application/gzip', 57 ]; 58 59 if (alreadyCompressedPrefixes.some(prefix => mime.startsWith(prefix))) { 60 return false; 61 } 62 63 // Default to not compressing for unknown types 64 return false; 65} 66 67interface IpldLink { 68 $link: string; 69} 70 71interface TypedBlobRef { 72 ref: CID | IpldLink; 73} 74 75interface UntypedBlobRef { 76 cid: string; 77} 78 79function isIpldLink(obj: unknown): obj is IpldLink { 80 return typeof obj === 'object' && obj !== null && '$link' in obj && typeof (obj as IpldLink).$link === 'string'; 81} 82 83function isTypedBlobRef(obj: unknown): obj is TypedBlobRef { 84 return typeof obj === 'object' && obj !== null && 'ref' in obj; 85} 86 87function isUntypedBlobRef(obj: unknown): obj is UntypedBlobRef { 88 return typeof obj === 'object' && obj !== null && 'cid' in obj && typeof (obj as UntypedBlobRef).cid === 'string'; 89} 90 91export async function resolveDid(identifier: string): Promise<string | null> { 92 try { 93 // If it's already a DID, return it 94 if (identifier.startsWith('did:')) { 95 return identifier; 96 } 97 98 // Otherwise, resolve the handle using agent's built-in method 99 const agent = new AtpAgent({ service: 'https://public.api.bsky.app' }); 100 const response = await agent.resolveHandle({ handle: identifier }); 101 return response.data.did; 102 } catch (err) { 103 console.error('Failed to resolve identifier', identifier, err); 104 return null; 105 } 106} 107 108export async function getPdsForDid(did: string): Promise<string | null> { 109 try { 110 let doc; 111 112 if (did.startsWith('did:plc:')) { 113 doc = await safeFetchJson(`https://plc.directory/${encodeURIComponent(did)}`); 114 } else if (did.startsWith('did:web:')) { 115 const didUrl = didWebToHttps(did); 116 doc = await safeFetchJson(didUrl); 117 } else { 118 console.error('Unsupported DID method', did); 119 return null; 120 } 121 122 const services = doc.service || []; 123 const pdsService = services.find((s: any) => s.id === '#atproto_pds'); 124 125 return pdsService?.serviceEndpoint || null; 126 } catch (err) { 127 console.error('Failed to get PDS for DID', did, err); 128 return null; 129 } 130} 131 132function didWebToHttps(did: string): string { 133 const didParts = did.split(':'); 134 if (didParts.length < 3 || didParts[0] !== 'did' || didParts[1] !== 'web') { 135 throw new Error('Invalid did:web format'); 136 } 137 138 const domain = didParts[2]; 139 const pathParts = didParts.slice(3); 140 141 if (pathParts.length === 0) { 142 return `https://${domain}/.well-known/did.json`; 143 } else { 144 const path = pathParts.join('/'); 145 return `https://${domain}/${path}/did.json`; 146 } 147} 148 149export async function fetchSiteRecord(did: string, rkey: string): Promise<{ record: WispFsRecord; cid: string } | null> { 150 try { 151 const pdsEndpoint = await getPdsForDid(did); 152 if (!pdsEndpoint) return null; 153 154 const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(rkey)}`; 155 const data = await safeFetchJson(url); 156 157 return { 158 record: data.value as WispFsRecord, 159 cid: data.cid || '' 160 }; 161 } catch (err) { 162 console.error('Failed to fetch site record', did, rkey, err); 163 return null; 164 } 165} 166 167export function extractBlobCid(blobRef: unknown): string | null { 168 if (isIpldLink(blobRef)) { 169 return blobRef.$link; 170 } 171 172 if (isTypedBlobRef(blobRef)) { 173 const ref = blobRef.ref; 174 175 const cid = CID.asCID(ref); 176 if (cid) { 177 return cid.toString(); 178 } 179 180 if (isIpldLink(ref)) { 181 return ref.$link; 182 } 183 } 184 185 if (isUntypedBlobRef(blobRef)) { 186 return blobRef.cid; 187 } 188 189 return null; 190} 191 192export async function downloadAndCacheSite(did: string, rkey: string, record: WispFsRecord, pdsEndpoint: string, recordCid: string): Promise<void> { 193 console.log('Caching site', did, rkey); 194 195 if (!record.root) { 196 console.error('Record missing root directory:', JSON.stringify(record, null, 2)); 197 throw new Error('Invalid record structure: missing root directory'); 198 } 199 200 if (!record.root.entries || !Array.isArray(record.root.entries)) { 201 console.error('Record root missing entries array:', JSON.stringify(record.root, null, 2)); 202 throw new Error('Invalid record structure: root missing entries array'); 203 } 204 205 // Get existing cache metadata to check for incremental updates 206 const existingMetadata = await getCacheMetadata(did, rkey); 207 const existingFileCids = existingMetadata?.fileCids || {}; 208 209 // Use a temporary directory with timestamp to avoid collisions 210 const tempSuffix = `.tmp-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`; 211 const tempDir = `${CACHE_DIR}/${did}/${rkey}${tempSuffix}`; 212 const finalDir = `${CACHE_DIR}/${did}/${rkey}`; 213 214 try { 215 // Collect file CIDs from the new record 216 const newFileCids: Record<string, string> = {}; 217 collectFileCidsFromEntries(record.root.entries, '', newFileCids); 218 219 // Download/copy files to temporary directory (with incremental logic) 220 await cacheFiles(did, rkey, record.root.entries, pdsEndpoint, '', tempSuffix, existingFileCids, finalDir); 221 await saveCacheMetadata(did, rkey, recordCid, tempSuffix, newFileCids); 222 223 // Atomically replace old cache with new cache 224 // On POSIX systems (Linux/macOS), rename is atomic 225 if (existsSync(finalDir)) { 226 // Rename old directory to backup 227 const backupDir = `${finalDir}.old-${Date.now()}`; 228 await rename(finalDir, backupDir); 229 230 try { 231 // Rename new directory to final location 232 await rename(tempDir, finalDir); 233 234 // Clean up old backup 235 rmSync(backupDir, { recursive: true, force: true }); 236 } catch (err) { 237 // If rename failed, restore backup 238 if (existsSync(backupDir) && !existsSync(finalDir)) { 239 await rename(backupDir, finalDir); 240 } 241 throw err; 242 } 243 } else { 244 // No existing cache, just rename temp to final 245 await rename(tempDir, finalDir); 246 } 247 248 console.log('Successfully cached site atomically', did, rkey); 249 } catch (err) { 250 // Clean up temp directory on failure 251 if (existsSync(tempDir)) { 252 rmSync(tempDir, { recursive: true, force: true }); 253 } 254 throw err; 255 } 256} 257 258/** 259 * Recursively collect file CIDs from entries for incremental update tracking 260 */ 261function collectFileCidsFromEntries(entries: Entry[], pathPrefix: string, fileCids: Record<string, string>): void { 262 for (const entry of entries) { 263 const currentPath = pathPrefix ? `${pathPrefix}/${entry.name}` : entry.name; 264 const node = entry.node; 265 266 if ('type' in node && node.type === 'directory' && 'entries' in node) { 267 collectFileCidsFromEntries(node.entries, currentPath, fileCids); 268 } else if ('type' in node && node.type === 'file' && 'blob' in node) { 269 const fileNode = node as File; 270 const cid = extractBlobCid(fileNode.blob); 271 if (cid) { 272 fileCids[currentPath] = cid; 273 } 274 } 275 } 276} 277 278async function cacheFiles( 279 did: string, 280 site: string, 281 entries: Entry[], 282 pdsEndpoint: string, 283 pathPrefix: string, 284 dirSuffix: string = '', 285 existingFileCids: Record<string, string> = {}, 286 existingCacheDir?: string 287): Promise<void> { 288 // Collect file tasks, separating unchanged files from new/changed files 289 const downloadTasks: Array<() => Promise<void>> = []; 290 const copyTasks: Array<() => Promise<void>> = []; 291 292 function collectFileTasks( 293 entries: Entry[], 294 currentPathPrefix: string 295 ) { 296 for (const entry of entries) { 297 const currentPath = currentPathPrefix ? `${currentPathPrefix}/${entry.name}` : entry.name; 298 const node = entry.node; 299 300 if ('type' in node && node.type === 'directory' && 'entries' in node) { 301 collectFileTasks(node.entries, currentPath); 302 } else if ('type' in node && node.type === 'file' && 'blob' in node) { 303 const fileNode = node as File; 304 const cid = extractBlobCid(fileNode.blob); 305 306 // Check if file is unchanged (same CID as existing cache) 307 if (cid && existingFileCids[currentPath] === cid && existingCacheDir) { 308 // File unchanged - copy from existing cache instead of downloading 309 copyTasks.push(() => copyExistingFile( 310 did, 311 site, 312 currentPath, 313 dirSuffix, 314 existingCacheDir 315 )); 316 } else { 317 // File new or changed - download it 318 downloadTasks.push(() => cacheFileBlob( 319 did, 320 site, 321 currentPath, 322 fileNode.blob, 323 pdsEndpoint, 324 fileNode.encoding, 325 fileNode.mimeType, 326 fileNode.base64, 327 dirSuffix 328 )); 329 } 330 } 331 } 332 } 333 334 collectFileTasks(entries, pathPrefix); 335 336 console.log(`[Incremental Update] Files to copy: ${copyTasks.length}, Files to download: ${downloadTasks.length}`); 337 338 // Copy unchanged files in parallel (fast local operations) 339 const copyLimit = 10; 340 for (let i = 0; i < copyTasks.length; i += copyLimit) { 341 const batch = copyTasks.slice(i, i + copyLimit); 342 await Promise.all(batch.map(task => task())); 343 } 344 345 // Download new/changed files concurrently with a limit of 3 at a time 346 const downloadLimit = 3; 347 for (let i = 0; i < downloadTasks.length; i += downloadLimit) { 348 const batch = downloadTasks.slice(i, i + downloadLimit); 349 await Promise.all(batch.map(task => task())); 350 } 351} 352 353/** 354 * Copy an unchanged file from existing cache to new cache location 355 */ 356async function copyExistingFile( 357 did: string, 358 site: string, 359 filePath: string, 360 dirSuffix: string, 361 existingCacheDir: string 362): Promise<void> { 363 const { copyFile } = await import('fs/promises'); 364 365 const sourceFile = `${existingCacheDir}/${filePath}`; 366 const destFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`; 367 const destDir = destFile.substring(0, destFile.lastIndexOf('/')); 368 369 // Create destination directory if needed 370 if (destDir && !existsSync(destDir)) { 371 mkdirSync(destDir, { recursive: true }); 372 } 373 374 try { 375 // Copy the file 376 await copyFile(sourceFile, destFile); 377 378 // Copy metadata file if it exists 379 const sourceMetaFile = `${sourceFile}.meta`; 380 const destMetaFile = `${destFile}.meta`; 381 if (existsSync(sourceMetaFile)) { 382 await copyFile(sourceMetaFile, destMetaFile); 383 } 384 385 console.log(`[Incremental] Copied unchanged file: ${filePath}`); 386 } catch (err) { 387 console.error(`[Incremental] Failed to copy file ${filePath}, will attempt download:`, err); 388 throw err; 389 } 390} 391 392async function cacheFileBlob( 393 did: string, 394 site: string, 395 filePath: string, 396 blobRef: any, 397 pdsEndpoint: string, 398 encoding?: 'gzip', 399 mimeType?: string, 400 base64?: boolean, 401 dirSuffix: string = '' 402): Promise<void> { 403 const cid = extractBlobCid(blobRef); 404 if (!cid) { 405 console.error('Could not extract CID from blob', blobRef); 406 return; 407 } 408 409 const blobUrl = `${pdsEndpoint}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${encodeURIComponent(cid)}`; 410 411 // Allow up to 500MB per file blob, with 5 minute timeout 412 let content = await safeFetchBlob(blobUrl, { maxSize: 500 * 1024 * 1024, timeout: 300000 }); 413 414 console.log(`[DEBUG] ${filePath}: fetched ${content.length} bytes, base64=${base64}, encoding=${encoding}, mimeType=${mimeType}`); 415 416 // If content is base64-encoded, decode it back to raw binary (gzipped or not) 417 if (base64) { 418 const originalSize = content.length; 419 // Decode base64 directly from raw bytes - no string conversion 420 // The blob contains base64-encoded text as raw bytes, decode it in-place 421 const textDecoder = new TextDecoder(); 422 const base64String = textDecoder.decode(content); 423 content = Buffer.from(base64String, 'base64'); 424 console.log(`[DEBUG] ${filePath}: decoded base64 from ${originalSize} bytes to ${content.length} bytes`); 425 426 // Check if it's actually gzipped by looking at magic bytes 427 if (content.length >= 2) { 428 const hasGzipMagic = content[0] === 0x1f && content[1] === 0x8b; 429 console.log(`[DEBUG] ${filePath}: has gzip magic bytes: ${hasGzipMagic}`); 430 } 431 } 432 433 const cacheFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`; 434 const fileDir = cacheFile.substring(0, cacheFile.lastIndexOf('/')); 435 436 if (fileDir && !existsSync(fileDir)) { 437 mkdirSync(fileDir, { recursive: true }); 438 } 439 440 // Use the shared function to determine if this should remain compressed 441 const shouldStayCompressed = shouldCompressMimeType(mimeType); 442 443 // Decompress files that shouldn't be stored compressed 444 if (encoding === 'gzip' && !shouldStayCompressed && content.length >= 2 && 445 content[0] === 0x1f && content[1] === 0x8b) { 446 console.log(`[DEBUG] ${filePath}: decompressing non-compressible type (${mimeType}) before caching`); 447 try { 448 const { gunzipSync } = await import('zlib'); 449 const decompressed = gunzipSync(content); 450 console.log(`[DEBUG] ${filePath}: decompressed from ${content.length} to ${decompressed.length} bytes`); 451 content = decompressed; 452 // Clear the encoding flag since we're storing decompressed 453 encoding = undefined; 454 } catch (error) { 455 console.log(`[DEBUG] ${filePath}: failed to decompress, storing original gzipped content. Error:`, error); 456 } 457 } 458 459 await writeFile(cacheFile, content); 460 461 // Store metadata only if file is still compressed 462 if (encoding === 'gzip' && mimeType) { 463 const metaFile = `${cacheFile}.meta`; 464 await writeFile(metaFile, JSON.stringify({ encoding, mimeType })); 465 console.log('Cached file', filePath, content.length, 'bytes (gzipped,', mimeType + ')'); 466 } else { 467 console.log('Cached file', filePath, content.length, 'bytes'); 468 } 469} 470 471/** 472 * Sanitize a file path to prevent directory traversal attacks 473 * Removes any path segments that attempt to go up directories 474 */ 475export function sanitizePath(filePath: string): string { 476 // Remove leading slashes 477 let cleaned = filePath.replace(/^\/+/, ''); 478 479 // Split into segments and filter out dangerous ones 480 const segments = cleaned.split('/').filter(segment => { 481 // Remove empty segments 482 if (!segment || segment === '.') return false; 483 // Remove parent directory references 484 if (segment === '..') return false; 485 // Remove segments with null bytes 486 if (segment.includes('\0')) return false; 487 return true; 488 }); 489 490 // Rejoin the safe segments 491 return segments.join('/'); 492} 493 494export function getCachedFilePath(did: string, site: string, filePath: string): string { 495 const sanitizedPath = sanitizePath(filePath); 496 return `${CACHE_DIR}/${did}/${site}/${sanitizedPath}`; 497} 498 499export function isCached(did: string, site: string): boolean { 500 return existsSync(`${CACHE_DIR}/${did}/${site}`); 501} 502 503async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = '', fileCids?: Record<string, string>): Promise<void> { 504 const metadata: CacheMetadata = { 505 recordCid, 506 cachedAt: Date.now(), 507 did, 508 rkey, 509 fileCids 510 }; 511 512 const metadataPath = `${CACHE_DIR}/${did}/${rkey}${dirSuffix}/.metadata.json`; 513 const metadataDir = metadataPath.substring(0, metadataPath.lastIndexOf('/')); 514 515 if (!existsSync(metadataDir)) { 516 mkdirSync(metadataDir, { recursive: true }); 517 } 518 519 await writeFile(metadataPath, JSON.stringify(metadata, null, 2)); 520} 521 522async function getCacheMetadata(did: string, rkey: string): Promise<CacheMetadata | null> { 523 try { 524 const metadataPath = `${CACHE_DIR}/${did}/${rkey}/.metadata.json`; 525 if (!existsSync(metadataPath)) return null; 526 527 const content = await readFile(metadataPath, 'utf-8'); 528 return JSON.parse(content) as CacheMetadata; 529 } catch (err) { 530 console.error('Failed to read cache metadata', err); 531 return null; 532 } 533} 534 535export async function isCacheValid(did: string, rkey: string, currentRecordCid?: string): Promise<boolean> { 536 const metadata = await getCacheMetadata(did, rkey); 537 if (!metadata) return false; 538 539 // Check if cache has expired (14 days TTL) 540 const cacheAge = Date.now() - metadata.cachedAt; 541 if (cacheAge > CACHE_TTL) { 542 console.log('[Cache] Cache expired for', did, rkey); 543 return false; 544 } 545 546 // If current CID is provided, verify it matches 547 if (currentRecordCid && metadata.recordCid !== currentRecordCid) { 548 console.log('[Cache] CID mismatch for', did, rkey, 'cached:', metadata.recordCid, 'current:', currentRecordCid); 549 return false; 550 } 551 552 return true; 553}