Monorepo for Wisp.place. A static site hosting service built on top of the AT Protocol.
1import { AtpAgent } from '@atproto/api'; 2import type { Record as WispFsRecord, Directory, Entry, File } from '../lexicon/types/place/wisp/fs'; 3import { existsSync, mkdirSync, readFileSync, rmSync } from 'fs'; 4import { writeFile, readFile, rename } from 'fs/promises'; 5import { safeFetchJson, safeFetchBlob } from './safe-fetch'; 6import { CID } from 'multiformats'; 7 8const CACHE_DIR = './cache/sites'; 9const CACHE_TTL = 14 * 24 * 60 * 60 * 1000; // 14 days cache TTL 10 11interface CacheMetadata { 12 recordCid: string; 13 cachedAt: number; 14 did: string; 15 rkey: string; 16} 17 18interface IpldLink { 19 $link: string; 20} 21 22interface TypedBlobRef { 23 ref: CID | IpldLink; 24} 25 26interface UntypedBlobRef { 27 cid: string; 28} 29 30function isIpldLink(obj: unknown): obj is IpldLink { 31 return typeof obj === 'object' && obj !== null && '$link' in obj && typeof (obj as IpldLink).$link === 'string'; 32} 33 34function isTypedBlobRef(obj: unknown): obj is TypedBlobRef { 35 return typeof obj === 'object' && obj !== null && 'ref' in obj; 36} 37 38function isUntypedBlobRef(obj: unknown): obj is UntypedBlobRef { 39 return typeof obj === 'object' && obj !== null && 'cid' in obj && typeof (obj as UntypedBlobRef).cid === 'string'; 40} 41 42export async function resolveDid(identifier: string): Promise<string | null> { 43 try { 44 // If it's already a DID, return it 45 if (identifier.startsWith('did:')) { 46 return identifier; 47 } 48 49 // Otherwise, resolve the handle using agent's built-in method 50 const agent = new AtpAgent({ service: 'https://public.api.bsky.app' }); 51 const response = await agent.resolveHandle({ handle: identifier }); 52 return response.data.did; 53 } catch (err) { 54 console.error('Failed to resolve identifier', identifier, err); 55 return null; 56 } 57} 58 59export async function getPdsForDid(did: string): Promise<string | null> { 60 try { 61 let doc; 62 63 if (did.startsWith('did:plc:')) { 64 doc = await safeFetchJson(`https://plc.directory/${encodeURIComponent(did)}`); 65 } else if (did.startsWith('did:web:')) { 66 const didUrl = didWebToHttps(did); 67 doc = await safeFetchJson(didUrl); 68 } else { 69 console.error('Unsupported DID method', did); 70 return null; 71 } 72 73 const services = doc.service || []; 74 const pdsService = services.find((s: any) => s.id === '#atproto_pds'); 75 76 return pdsService?.serviceEndpoint || null; 77 } catch (err) { 78 console.error('Failed to get PDS for DID', did, err); 79 return null; 80 } 81} 82 83function didWebToHttps(did: string): string { 84 const didParts = did.split(':'); 85 if (didParts.length < 3 || didParts[0] !== 'did' || didParts[1] !== 'web') { 86 throw new Error('Invalid did:web format'); 87 } 88 89 const domain = didParts[2]; 90 const pathParts = didParts.slice(3); 91 92 if (pathParts.length === 0) { 93 return `https://${domain}/.well-known/did.json`; 94 } else { 95 const path = pathParts.join('/'); 96 return `https://${domain}/${path}/did.json`; 97 } 98} 99 100export async function fetchSiteRecord(did: string, rkey: string): Promise<{ record: WispFsRecord; cid: string } | null> { 101 try { 102 const pdsEndpoint = await getPdsForDid(did); 103 if (!pdsEndpoint) return null; 104 105 const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(rkey)}`; 106 const data = await safeFetchJson(url); 107 108 return { 109 record: data.value as WispFsRecord, 110 cid: data.cid || '' 111 }; 112 } catch (err) { 113 console.error('Failed to fetch site record', did, rkey, err); 114 return null; 115 } 116} 117 118export function extractBlobCid(blobRef: unknown): string | null { 119 if (isIpldLink(blobRef)) { 120 return blobRef.$link; 121 } 122 123 if (isTypedBlobRef(blobRef)) { 124 const ref = blobRef.ref; 125 126 const cid = CID.asCID(ref); 127 if (cid) { 128 return cid.toString(); 129 } 130 131 if (isIpldLink(ref)) { 132 return ref.$link; 133 } 134 } 135 136 if (isUntypedBlobRef(blobRef)) { 137 return blobRef.cid; 138 } 139 140 return null; 141} 142 143export async function downloadAndCacheSite(did: string, rkey: string, record: WispFsRecord, pdsEndpoint: string, recordCid: string): Promise<void> { 144 console.log('Caching site', did, rkey); 145 146 if (!record.root) { 147 console.error('Record missing root directory:', JSON.stringify(record, null, 2)); 148 throw new Error('Invalid record structure: missing root directory'); 149 } 150 151 if (!record.root.entries || !Array.isArray(record.root.entries)) { 152 console.error('Record root missing entries array:', JSON.stringify(record.root, null, 2)); 153 throw new Error('Invalid record structure: root missing entries array'); 154 } 155 156 // Use a temporary directory with timestamp to avoid collisions 157 const tempSuffix = `.tmp-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`; 158 const tempDir = `${CACHE_DIR}/${did}/${rkey}${tempSuffix}`; 159 const finalDir = `${CACHE_DIR}/${did}/${rkey}`; 160 161 try { 162 // Download to temporary directory 163 await cacheFiles(did, rkey, record.root.entries, pdsEndpoint, '', tempSuffix); 164 await saveCacheMetadata(did, rkey, recordCid, tempSuffix); 165 166 // Atomically replace old cache with new cache 167 // On POSIX systems (Linux/macOS), rename is atomic 168 if (existsSync(finalDir)) { 169 // Rename old directory to backup 170 const backupDir = `${finalDir}.old-${Date.now()}`; 171 await rename(finalDir, backupDir); 172 173 try { 174 // Rename new directory to final location 175 await rename(tempDir, finalDir); 176 177 // Clean up old backup 178 rmSync(backupDir, { recursive: true, force: true }); 179 } catch (err) { 180 // If rename failed, restore backup 181 if (existsSync(backupDir) && !existsSync(finalDir)) { 182 await rename(backupDir, finalDir); 183 } 184 throw err; 185 } 186 } else { 187 // No existing cache, just rename temp to final 188 await rename(tempDir, finalDir); 189 } 190 191 console.log('Successfully cached site atomically', did, rkey); 192 } catch (err) { 193 // Clean up temp directory on failure 194 if (existsSync(tempDir)) { 195 rmSync(tempDir, { recursive: true, force: true }); 196 } 197 throw err; 198 } 199} 200 201async function cacheFiles( 202 did: string, 203 site: string, 204 entries: Entry[], 205 pdsEndpoint: string, 206 pathPrefix: string, 207 dirSuffix: string = '' 208): Promise<void> { 209 // Collect all file blob download tasks first 210 const downloadTasks: Array<() => Promise<void>> = []; 211 212 function collectFileTasks( 213 entries: Entry[], 214 currentPathPrefix: string 215 ) { 216 for (const entry of entries) { 217 const currentPath = currentPathPrefix ? `${currentPathPrefix}/${entry.name}` : entry.name; 218 const node = entry.node; 219 220 if ('type' in node && node.type === 'directory' && 'entries' in node) { 221 collectFileTasks(node.entries, currentPath); 222 } else if ('type' in node && node.type === 'file' && 'blob' in node) { 223 const fileNode = node as File; 224 downloadTasks.push(() => cacheFileBlob( 225 did, 226 site, 227 currentPath, 228 fileNode.blob, 229 pdsEndpoint, 230 fileNode.encoding, 231 fileNode.mimeType, 232 fileNode.base64, 233 dirSuffix 234 )); 235 } 236 } 237 } 238 239 collectFileTasks(entries, pathPrefix); 240 241 // Execute downloads concurrently with a limit of 3 at a time 242 const concurrencyLimit = 3; 243 for (let i = 0; i < downloadTasks.length; i += concurrencyLimit) { 244 const batch = downloadTasks.slice(i, i + concurrencyLimit); 245 await Promise.all(batch.map(task => task())); 246 } 247} 248 249async function cacheFileBlob( 250 did: string, 251 site: string, 252 filePath: string, 253 blobRef: any, 254 pdsEndpoint: string, 255 encoding?: 'gzip', 256 mimeType?: string, 257 base64?: boolean, 258 dirSuffix: string = '' 259): Promise<void> { 260 const cid = extractBlobCid(blobRef); 261 if (!cid) { 262 console.error('Could not extract CID from blob', blobRef); 263 return; 264 } 265 266 const blobUrl = `${pdsEndpoint}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${encodeURIComponent(cid)}`; 267 268 // Allow up to 100MB per file blob, with 2 minute timeout 269 let content = await safeFetchBlob(blobUrl, { maxSize: 100 * 1024 * 1024, timeout: 120000 }); 270 271 console.log(`[DEBUG] ${filePath}: fetched ${content.length} bytes, base64=${base64}, encoding=${encoding}, mimeType=${mimeType}`); 272 273 // If content is base64-encoded, decode it back to binary (gzipped or not) 274 if (base64) { 275 const originalSize = content.length; 276 // The content from the blob is base64 text, decode it directly to binary 277 const buffer = Buffer.from(content); 278 const base64String = buffer.toString('ascii'); // Use ascii for base64 text, not utf-8 279 console.log(`[DEBUG] ${filePath}: base64 string first 100 chars: ${base64String.substring(0, 100)}`); 280 content = Buffer.from(base64String, 'base64'); 281 console.log(`[DEBUG] ${filePath}: decoded from ${originalSize} bytes to ${content.length} bytes`); 282 283 // Check if it's actually gzipped by looking at magic bytes 284 if (content.length >= 2) { 285 const magic = content[0] === 0x1f && content[1] === 0x8b; 286 const byte0 = content[0]; 287 const byte1 = content[1]; 288 console.log(`[DEBUG] ${filePath}: has gzip magic bytes: ${magic} (0x${byte0?.toString(16)}, 0x${byte1?.toString(16)})`); 289 } 290 } 291 292 const cacheFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`; 293 const fileDir = cacheFile.substring(0, cacheFile.lastIndexOf('/')); 294 295 if (fileDir && !existsSync(fileDir)) { 296 mkdirSync(fileDir, { recursive: true }); 297 } 298 299 await writeFile(cacheFile, content); 300 301 // Store metadata if file is compressed 302 if (encoding === 'gzip' && mimeType) { 303 const metaFile = `${cacheFile}.meta`; 304 await writeFile(metaFile, JSON.stringify({ encoding, mimeType })); 305 console.log('Cached file', filePath, content.length, 'bytes (gzipped,', mimeType + ')'); 306 } else { 307 console.log('Cached file', filePath, content.length, 'bytes'); 308 } 309} 310 311/** 312 * Sanitize a file path to prevent directory traversal attacks 313 * Removes any path segments that attempt to go up directories 314 */ 315export function sanitizePath(filePath: string): string { 316 // Remove leading slashes 317 let cleaned = filePath.replace(/^\/+/, ''); 318 319 // Split into segments and filter out dangerous ones 320 const segments = cleaned.split('/').filter(segment => { 321 // Remove empty segments 322 if (!segment || segment === '.') return false; 323 // Remove parent directory references 324 if (segment === '..') return false; 325 // Remove segments with null bytes 326 if (segment.includes('\0')) return false; 327 return true; 328 }); 329 330 // Rejoin the safe segments 331 return segments.join('/'); 332} 333 334export function getCachedFilePath(did: string, site: string, filePath: string): string { 335 const sanitizedPath = sanitizePath(filePath); 336 return `${CACHE_DIR}/${did}/${site}/${sanitizedPath}`; 337} 338 339export function isCached(did: string, site: string): boolean { 340 return existsSync(`${CACHE_DIR}/${did}/${site}`); 341} 342 343async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = ''): Promise<void> { 344 const metadata: CacheMetadata = { 345 recordCid, 346 cachedAt: Date.now(), 347 did, 348 rkey 349 }; 350 351 const metadataPath = `${CACHE_DIR}/${did}/${rkey}${dirSuffix}/.metadata.json`; 352 const metadataDir = metadataPath.substring(0, metadataPath.lastIndexOf('/')); 353 354 if (!existsSync(metadataDir)) { 355 mkdirSync(metadataDir, { recursive: true }); 356 } 357 358 await writeFile(metadataPath, JSON.stringify(metadata, null, 2)); 359} 360 361async function getCacheMetadata(did: string, rkey: string): Promise<CacheMetadata | null> { 362 try { 363 const metadataPath = `${CACHE_DIR}/${did}/${rkey}/.metadata.json`; 364 if (!existsSync(metadataPath)) return null; 365 366 const content = await readFile(metadataPath, 'utf-8'); 367 return JSON.parse(content) as CacheMetadata; 368 } catch (err) { 369 console.error('Failed to read cache metadata', err); 370 return null; 371 } 372} 373 374export async function isCacheValid(did: string, rkey: string, currentRecordCid?: string): Promise<boolean> { 375 const metadata = await getCacheMetadata(did, rkey); 376 if (!metadata) return false; 377 378 // Check if cache has expired (14 days TTL) 379 const cacheAge = Date.now() - metadata.cachedAt; 380 if (cacheAge > CACHE_TTL) { 381 console.log('[Cache] Cache expired for', did, rkey); 382 return false; 383 } 384 385 // If current CID is provided, verify it matches 386 if (currentRecordCid && metadata.recordCid !== currentRecordCid) { 387 console.log('[Cache] CID mismatch for', did, rkey, 'cached:', metadata.recordCid, 'current:', currentRecordCid); 388 return false; 389 } 390 391 return true; 392}