Monorepo for Wisp.place. A static site hosting service built on top of the AT Protocol.
1import { AtpAgent } from '@atproto/api'; 2import type { Record as WispFsRecord, Directory, Entry, File } from '../lexicon/types/place/wisp/fs'; 3import { existsSync, mkdirSync, readFileSync, rmSync } from 'fs'; 4import { writeFile, readFile, rename } from 'fs/promises'; 5import { safeFetchJson, safeFetchBlob } from './safe-fetch'; 6import { CID } from 'multiformats'; 7 8const CACHE_DIR = process.env.CACHE_DIR || './cache/sites'; 9const CACHE_TTL = 14 * 24 * 60 * 60 * 1000; // 14 days cache TTL 10 11interface CacheMetadata { 12 recordCid: string; 13 cachedAt: number; 14 did: string; 15 rkey: string; 16} 17 18/** 19 * Determines if a MIME type should benefit from gzip compression. 20 * Returns true for text-based web assets (HTML, CSS, JS, JSON, XML, SVG). 21 * Returns false for already-compressed formats (images, video, audio, PDFs). 22 * 23 */ 24export function shouldCompressMimeType(mimeType: string | undefined): boolean { 25 if (!mimeType) return false; 26 27 const mime = mimeType.toLowerCase(); 28 29 // Text-based web assets that benefit from compression 30 const compressibleTypes = [ 31 'text/html', 32 'text/css', 33 'text/javascript', 34 'application/javascript', 35 'application/x-javascript', 36 'text/xml', 37 'application/xml', 38 'application/json', 39 'text/plain', 40 'image/svg+xml', 41 ]; 42 43 if (compressibleTypes.some(type => mime === type || mime.startsWith(type))) { 44 return true; 45 } 46 47 // Already-compressed formats that should NOT be double-compressed 48 const alreadyCompressedPrefixes = [ 49 'video/', 50 'audio/', 51 'image/', 52 'application/pdf', 53 'application/zip', 54 'application/gzip', 55 ]; 56 57 if (alreadyCompressedPrefixes.some(prefix => mime.startsWith(prefix))) { 58 return false; 59 } 60 61 // Default to not compressing for unknown types 62 return false; 63} 64 65interface IpldLink { 66 $link: string; 67} 68 69interface TypedBlobRef { 70 ref: CID | IpldLink; 71} 72 73interface UntypedBlobRef { 74 cid: string; 75} 76 77function isIpldLink(obj: unknown): obj is IpldLink { 78 return typeof obj === 'object' && obj !== null && '$link' in obj && typeof (obj as IpldLink).$link === 'string'; 79} 80 81function isTypedBlobRef(obj: unknown): obj is TypedBlobRef { 82 return typeof obj === 'object' && obj !== null && 'ref' in obj; 83} 84 85function isUntypedBlobRef(obj: unknown): obj is UntypedBlobRef { 86 return typeof obj === 'object' && obj !== null && 'cid' in obj && typeof (obj as UntypedBlobRef).cid === 'string'; 87} 88 89export async function resolveDid(identifier: string): Promise<string | null> { 90 try { 91 // If it's already a DID, return it 92 if (identifier.startsWith('did:')) { 93 return identifier; 94 } 95 96 // Otherwise, resolve the handle using agent's built-in method 97 const agent = new AtpAgent({ service: 'https://public.api.bsky.app' }); 98 const response = await agent.resolveHandle({ handle: identifier }); 99 return response.data.did; 100 } catch (err) { 101 console.error('Failed to resolve identifier', identifier, err); 102 return null; 103 } 104} 105 106export async function getPdsForDid(did: string): Promise<string | null> { 107 try { 108 let doc; 109 110 if (did.startsWith('did:plc:')) { 111 doc = await safeFetchJson(`https://plc.directory/${encodeURIComponent(did)}`); 112 } else if (did.startsWith('did:web:')) { 113 const didUrl = didWebToHttps(did); 114 doc = await safeFetchJson(didUrl); 115 } else { 116 console.error('Unsupported DID method', did); 117 return null; 118 } 119 120 const services = doc.service || []; 121 const pdsService = services.find((s: any) => s.id === '#atproto_pds'); 122 123 return pdsService?.serviceEndpoint || null; 124 } catch (err) { 125 console.error('Failed to get PDS for DID', did, err); 126 return null; 127 } 128} 129 130function didWebToHttps(did: string): string { 131 const didParts = did.split(':'); 132 if (didParts.length < 3 || didParts[0] !== 'did' || didParts[1] !== 'web') { 133 throw new Error('Invalid did:web format'); 134 } 135 136 const domain = didParts[2]; 137 const pathParts = didParts.slice(3); 138 139 if (pathParts.length === 0) { 140 return `https://${domain}/.well-known/did.json`; 141 } else { 142 const path = pathParts.join('/'); 143 return `https://${domain}/${path}/did.json`; 144 } 145} 146 147export async function fetchSiteRecord(did: string, rkey: string): Promise<{ record: WispFsRecord; cid: string } | null> { 148 try { 149 const pdsEndpoint = await getPdsForDid(did); 150 if (!pdsEndpoint) return null; 151 152 const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(rkey)}`; 153 const data = await safeFetchJson(url); 154 155 return { 156 record: data.value as WispFsRecord, 157 cid: data.cid || '' 158 }; 159 } catch (err) { 160 console.error('Failed to fetch site record', did, rkey, err); 161 return null; 162 } 163} 164 165export function extractBlobCid(blobRef: unknown): string | null { 166 if (isIpldLink(blobRef)) { 167 return blobRef.$link; 168 } 169 170 if (isTypedBlobRef(blobRef)) { 171 const ref = blobRef.ref; 172 173 const cid = CID.asCID(ref); 174 if (cid) { 175 return cid.toString(); 176 } 177 178 if (isIpldLink(ref)) { 179 return ref.$link; 180 } 181 } 182 183 if (isUntypedBlobRef(blobRef)) { 184 return blobRef.cid; 185 } 186 187 return null; 188} 189 190export async function downloadAndCacheSite(did: string, rkey: string, record: WispFsRecord, pdsEndpoint: string, recordCid: string): Promise<void> { 191 console.log('Caching site', did, rkey); 192 193 if (!record.root) { 194 console.error('Record missing root directory:', JSON.stringify(record, null, 2)); 195 throw new Error('Invalid record structure: missing root directory'); 196 } 197 198 if (!record.root.entries || !Array.isArray(record.root.entries)) { 199 console.error('Record root missing entries array:', JSON.stringify(record.root, null, 2)); 200 throw new Error('Invalid record structure: root missing entries array'); 201 } 202 203 // Use a temporary directory with timestamp to avoid collisions 204 const tempSuffix = `.tmp-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`; 205 const tempDir = `${CACHE_DIR}/${did}/${rkey}${tempSuffix}`; 206 const finalDir = `${CACHE_DIR}/${did}/${rkey}`; 207 208 try { 209 // Download to temporary directory 210 await cacheFiles(did, rkey, record.root.entries, pdsEndpoint, '', tempSuffix); 211 await saveCacheMetadata(did, rkey, recordCid, tempSuffix); 212 213 // Atomically replace old cache with new cache 214 // On POSIX systems (Linux/macOS), rename is atomic 215 if (existsSync(finalDir)) { 216 // Rename old directory to backup 217 const backupDir = `${finalDir}.old-${Date.now()}`; 218 await rename(finalDir, backupDir); 219 220 try { 221 // Rename new directory to final location 222 await rename(tempDir, finalDir); 223 224 // Clean up old backup 225 rmSync(backupDir, { recursive: true, force: true }); 226 } catch (err) { 227 // If rename failed, restore backup 228 if (existsSync(backupDir) && !existsSync(finalDir)) { 229 await rename(backupDir, finalDir); 230 } 231 throw err; 232 } 233 } else { 234 // No existing cache, just rename temp to final 235 await rename(tempDir, finalDir); 236 } 237 238 console.log('Successfully cached site atomically', did, rkey); 239 } catch (err) { 240 // Clean up temp directory on failure 241 if (existsSync(tempDir)) { 242 rmSync(tempDir, { recursive: true, force: true }); 243 } 244 throw err; 245 } 246} 247 248async function cacheFiles( 249 did: string, 250 site: string, 251 entries: Entry[], 252 pdsEndpoint: string, 253 pathPrefix: string, 254 dirSuffix: string = '' 255): Promise<void> { 256 // Collect all file blob download tasks first 257 const downloadTasks: Array<() => Promise<void>> = []; 258 259 function collectFileTasks( 260 entries: Entry[], 261 currentPathPrefix: string 262 ) { 263 for (const entry of entries) { 264 const currentPath = currentPathPrefix ? `${currentPathPrefix}/${entry.name}` : entry.name; 265 const node = entry.node; 266 267 if ('type' in node && node.type === 'directory' && 'entries' in node) { 268 collectFileTasks(node.entries, currentPath); 269 } else if ('type' in node && node.type === 'file' && 'blob' in node) { 270 const fileNode = node as File; 271 downloadTasks.push(() => cacheFileBlob( 272 did, 273 site, 274 currentPath, 275 fileNode.blob, 276 pdsEndpoint, 277 fileNode.encoding, 278 fileNode.mimeType, 279 fileNode.base64, 280 dirSuffix 281 )); 282 } 283 } 284 } 285 286 collectFileTasks(entries, pathPrefix); 287 288 // Execute downloads concurrently with a limit of 3 at a time 289 const concurrencyLimit = 3; 290 for (let i = 0; i < downloadTasks.length; i += concurrencyLimit) { 291 const batch = downloadTasks.slice(i, i + concurrencyLimit); 292 await Promise.all(batch.map(task => task())); 293 } 294} 295 296async function cacheFileBlob( 297 did: string, 298 site: string, 299 filePath: string, 300 blobRef: any, 301 pdsEndpoint: string, 302 encoding?: 'gzip', 303 mimeType?: string, 304 base64?: boolean, 305 dirSuffix: string = '' 306): Promise<void> { 307 const cid = extractBlobCid(blobRef); 308 if (!cid) { 309 console.error('Could not extract CID from blob', blobRef); 310 return; 311 } 312 313 const blobUrl = `${pdsEndpoint}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${encodeURIComponent(cid)}`; 314 315 // Allow up to 100MB per file blob, with 2 minute timeout 316 let content = await safeFetchBlob(blobUrl, { maxSize: 100 * 1024 * 1024, timeout: 120000 }); 317 318 console.log(`[DEBUG] ${filePath}: fetched ${content.length} bytes, base64=${base64}, encoding=${encoding}, mimeType=${mimeType}`); 319 320 // If content is base64-encoded, decode it back to raw binary (gzipped or not) 321 if (base64) { 322 const originalSize = content.length; 323 // Decode base64 directly from raw bytes - no string conversion 324 // The blob contains base64-encoded text as raw bytes, decode it in-place 325 const textDecoder = new TextDecoder(); 326 const base64String = textDecoder.decode(content); 327 content = Buffer.from(base64String, 'base64'); 328 console.log(`[DEBUG] ${filePath}: decoded base64 from ${originalSize} bytes to ${content.length} bytes`); 329 330 // Check if it's actually gzipped by looking at magic bytes 331 if (content.length >= 2) { 332 const hasGzipMagic = content[0] === 0x1f && content[1] === 0x8b; 333 console.log(`[DEBUG] ${filePath}: has gzip magic bytes: ${hasGzipMagic}`); 334 } 335 } 336 337 const cacheFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`; 338 const fileDir = cacheFile.substring(0, cacheFile.lastIndexOf('/')); 339 340 if (fileDir && !existsSync(fileDir)) { 341 mkdirSync(fileDir, { recursive: true }); 342 } 343 344 // Use the shared function to determine if this should remain compressed 345 const shouldStayCompressed = shouldCompressMimeType(mimeType); 346 347 // Decompress files that shouldn't be stored compressed 348 if (encoding === 'gzip' && !shouldStayCompressed && content.length >= 2 && 349 content[0] === 0x1f && content[1] === 0x8b) { 350 console.log(`[DEBUG] ${filePath}: decompressing non-compressible type (${mimeType}) before caching`); 351 try { 352 const { gunzipSync } = await import('zlib'); 353 const decompressed = gunzipSync(content); 354 console.log(`[DEBUG] ${filePath}: decompressed from ${content.length} to ${decompressed.length} bytes`); 355 content = decompressed; 356 // Clear the encoding flag since we're storing decompressed 357 encoding = undefined; 358 } catch (error) { 359 console.log(`[DEBUG] ${filePath}: failed to decompress, storing original gzipped content. Error:`, error); 360 } 361 } 362 363 await writeFile(cacheFile, content); 364 365 // Store metadata only if file is still compressed 366 if (encoding === 'gzip' && mimeType) { 367 const metaFile = `${cacheFile}.meta`; 368 await writeFile(metaFile, JSON.stringify({ encoding, mimeType })); 369 console.log('Cached file', filePath, content.length, 'bytes (gzipped,', mimeType + ')'); 370 } else { 371 console.log('Cached file', filePath, content.length, 'bytes'); 372 } 373} 374 375/** 376 * Sanitize a file path to prevent directory traversal attacks 377 * Removes any path segments that attempt to go up directories 378 */ 379export function sanitizePath(filePath: string): string { 380 // Remove leading slashes 381 let cleaned = filePath.replace(/^\/+/, ''); 382 383 // Split into segments and filter out dangerous ones 384 const segments = cleaned.split('/').filter(segment => { 385 // Remove empty segments 386 if (!segment || segment === '.') return false; 387 // Remove parent directory references 388 if (segment === '..') return false; 389 // Remove segments with null bytes 390 if (segment.includes('\0')) return false; 391 return true; 392 }); 393 394 // Rejoin the safe segments 395 return segments.join('/'); 396} 397 398export function getCachedFilePath(did: string, site: string, filePath: string): string { 399 const sanitizedPath = sanitizePath(filePath); 400 return `${CACHE_DIR}/${did}/${site}/${sanitizedPath}`; 401} 402 403export function isCached(did: string, site: string): boolean { 404 return existsSync(`${CACHE_DIR}/${did}/${site}`); 405} 406 407async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = ''): Promise<void> { 408 const metadata: CacheMetadata = { 409 recordCid, 410 cachedAt: Date.now(), 411 did, 412 rkey 413 }; 414 415 const metadataPath = `${CACHE_DIR}/${did}/${rkey}${dirSuffix}/.metadata.json`; 416 const metadataDir = metadataPath.substring(0, metadataPath.lastIndexOf('/')); 417 418 if (!existsSync(metadataDir)) { 419 mkdirSync(metadataDir, { recursive: true }); 420 } 421 422 await writeFile(metadataPath, JSON.stringify(metadata, null, 2)); 423} 424 425async function getCacheMetadata(did: string, rkey: string): Promise<CacheMetadata | null> { 426 try { 427 const metadataPath = `${CACHE_DIR}/${did}/${rkey}/.metadata.json`; 428 if (!existsSync(metadataPath)) return null; 429 430 const content = await readFile(metadataPath, 'utf-8'); 431 return JSON.parse(content) as CacheMetadata; 432 } catch (err) { 433 console.error('Failed to read cache metadata', err); 434 return null; 435 } 436} 437 438export async function isCacheValid(did: string, rkey: string, currentRecordCid?: string): Promise<boolean> { 439 const metadata = await getCacheMetadata(did, rkey); 440 if (!metadata) return false; 441 442 // Check if cache has expired (14 days TTL) 443 const cacheAge = Date.now() - metadata.cachedAt; 444 if (cacheAge > CACHE_TTL) { 445 console.log('[Cache] Cache expired for', did, rkey); 446 return false; 447 } 448 449 // If current CID is provided, verify it matches 450 if (currentRecordCid && metadata.recordCid !== currentRecordCid) { 451 console.log('[Cache] CID mismatch for', did, rkey, 'cached:', metadata.recordCid, 'current:', currentRecordCid); 452 return false; 453 } 454 455 return true; 456}