Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol.
wisp.place
1import { AtpAgent } from '@atproto/api';
2import type { Record as WispFsRecord, Directory, Entry, File } from '../lexicon/types/place/wisp/fs';
3import { existsSync, mkdirSync, readFileSync, rmSync } from 'fs';
4import { writeFile, readFile, rename } from 'fs/promises';
5import { safeFetchJson, safeFetchBlob } from './safe-fetch';
6import { CID } from 'multiformats';
7
8const CACHE_DIR = process.env.CACHE_DIR || './cache/sites';
9const CACHE_TTL = 14 * 24 * 60 * 60 * 1000; // 14 days cache TTL
10
11interface CacheMetadata {
12 recordCid: string;
13 cachedAt: number;
14 did: string;
15 rkey: string;
16 // Map of file path to blob CID for incremental updates
17 fileCids?: Record<string, string>;
18}
19
20/**
21 * Determines if a MIME type should benefit from gzip compression.
22 * Returns true for text-based web assets (HTML, CSS, JS, JSON, XML, SVG).
23 * Returns false for already-compressed formats (images, video, audio, PDFs).
24 *
25 */
26export function shouldCompressMimeType(mimeType: string | undefined): boolean {
27 if (!mimeType) return false;
28
29 const mime = mimeType.toLowerCase();
30
31 // Text-based web assets that benefit from compression
32 const compressibleTypes = [
33 'text/html',
34 'text/css',
35 'text/javascript',
36 'application/javascript',
37 'application/x-javascript',
38 'text/xml',
39 'application/xml',
40 'application/json',
41 'text/plain',
42 'image/svg+xml',
43 ];
44
45 if (compressibleTypes.some(type => mime === type || mime.startsWith(type))) {
46 return true;
47 }
48
49 // Already-compressed formats that should NOT be double-compressed
50 const alreadyCompressedPrefixes = [
51 'video/',
52 'audio/',
53 'image/',
54 'application/pdf',
55 'application/zip',
56 'application/gzip',
57 ];
58
59 if (alreadyCompressedPrefixes.some(prefix => mime.startsWith(prefix))) {
60 return false;
61 }
62
63 // Default to not compressing for unknown types
64 return false;
65}
66
67interface IpldLink {
68 $link: string;
69}
70
71interface TypedBlobRef {
72 ref: CID | IpldLink;
73}
74
75interface UntypedBlobRef {
76 cid: string;
77}
78
79function isIpldLink(obj: unknown): obj is IpldLink {
80 return typeof obj === 'object' && obj !== null && '$link' in obj && typeof (obj as IpldLink).$link === 'string';
81}
82
83function isTypedBlobRef(obj: unknown): obj is TypedBlobRef {
84 return typeof obj === 'object' && obj !== null && 'ref' in obj;
85}
86
87function isUntypedBlobRef(obj: unknown): obj is UntypedBlobRef {
88 return typeof obj === 'object' && obj !== null && 'cid' in obj && typeof (obj as UntypedBlobRef).cid === 'string';
89}
90
91export async function resolveDid(identifier: string): Promise<string | null> {
92 try {
93 // If it's already a DID, return it
94 if (identifier.startsWith('did:')) {
95 return identifier;
96 }
97
98 // Otherwise, resolve the handle using agent's built-in method
99 const agent = new AtpAgent({ service: 'https://public.api.bsky.app' });
100 const response = await agent.resolveHandle({ handle: identifier });
101 return response.data.did;
102 } catch (err) {
103 console.error('Failed to resolve identifier', identifier, err);
104 return null;
105 }
106}
107
108export async function getPdsForDid(did: string): Promise<string | null> {
109 try {
110 let doc;
111
112 if (did.startsWith('did:plc:')) {
113 doc = await safeFetchJson(`https://plc.directory/${encodeURIComponent(did)}`);
114 } else if (did.startsWith('did:web:')) {
115 const didUrl = didWebToHttps(did);
116 doc = await safeFetchJson(didUrl);
117 } else {
118 console.error('Unsupported DID method', did);
119 return null;
120 }
121
122 const services = doc.service || [];
123 const pdsService = services.find((s: any) => s.id === '#atproto_pds');
124
125 return pdsService?.serviceEndpoint || null;
126 } catch (err) {
127 console.error('Failed to get PDS for DID', did, err);
128 return null;
129 }
130}
131
132function didWebToHttps(did: string): string {
133 const didParts = did.split(':');
134 if (didParts.length < 3 || didParts[0] !== 'did' || didParts[1] !== 'web') {
135 throw new Error('Invalid did:web format');
136 }
137
138 const domain = didParts[2];
139 const pathParts = didParts.slice(3);
140
141 if (pathParts.length === 0) {
142 return `https://${domain}/.well-known/did.json`;
143 } else {
144 const path = pathParts.join('/');
145 return `https://${domain}/${path}/did.json`;
146 }
147}
148
149export async function fetchSiteRecord(did: string, rkey: string): Promise<{ record: WispFsRecord; cid: string } | null> {
150 try {
151 const pdsEndpoint = await getPdsForDid(did);
152 if (!pdsEndpoint) return null;
153
154 const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(rkey)}`;
155 const data = await safeFetchJson(url);
156
157 return {
158 record: data.value as WispFsRecord,
159 cid: data.cid || ''
160 };
161 } catch (err) {
162 console.error('Failed to fetch site record', did, rkey, err);
163 return null;
164 }
165}
166
167export function extractBlobCid(blobRef: unknown): string | null {
168 if (isIpldLink(blobRef)) {
169 return blobRef.$link;
170 }
171
172 if (isTypedBlobRef(blobRef)) {
173 const ref = blobRef.ref;
174
175 const cid = CID.asCID(ref);
176 if (cid) {
177 return cid.toString();
178 }
179
180 if (isIpldLink(ref)) {
181 return ref.$link;
182 }
183 }
184
185 if (isUntypedBlobRef(blobRef)) {
186 return blobRef.cid;
187 }
188
189 return null;
190}
191
192export async function downloadAndCacheSite(did: string, rkey: string, record: WispFsRecord, pdsEndpoint: string, recordCid: string): Promise<void> {
193 console.log('Caching site', did, rkey);
194
195 if (!record.root) {
196 console.error('Record missing root directory:', JSON.stringify(record, null, 2));
197 throw new Error('Invalid record structure: missing root directory');
198 }
199
200 if (!record.root.entries || !Array.isArray(record.root.entries)) {
201 console.error('Record root missing entries array:', JSON.stringify(record.root, null, 2));
202 throw new Error('Invalid record structure: root missing entries array');
203 }
204
205 // Get existing cache metadata to check for incremental updates
206 const existingMetadata = await getCacheMetadata(did, rkey);
207 const existingFileCids = existingMetadata?.fileCids || {};
208
209 // Use a temporary directory with timestamp to avoid collisions
210 const tempSuffix = `.tmp-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
211 const tempDir = `${CACHE_DIR}/${did}/${rkey}${tempSuffix}`;
212 const finalDir = `${CACHE_DIR}/${did}/${rkey}`;
213
214 try {
215 // Collect file CIDs from the new record
216 const newFileCids: Record<string, string> = {};
217 collectFileCidsFromEntries(record.root.entries, '', newFileCids);
218
219 // Download/copy files to temporary directory (with incremental logic)
220 await cacheFiles(did, rkey, record.root.entries, pdsEndpoint, '', tempSuffix, existingFileCids, finalDir);
221 await saveCacheMetadata(did, rkey, recordCid, tempSuffix, newFileCids);
222
223 // Atomically replace old cache with new cache
224 // On POSIX systems (Linux/macOS), rename is atomic
225 if (existsSync(finalDir)) {
226 // Rename old directory to backup
227 const backupDir = `${finalDir}.old-${Date.now()}`;
228 await rename(finalDir, backupDir);
229
230 try {
231 // Rename new directory to final location
232 await rename(tempDir, finalDir);
233
234 // Clean up old backup
235 rmSync(backupDir, { recursive: true, force: true });
236 } catch (err) {
237 // If rename failed, restore backup
238 if (existsSync(backupDir) && !existsSync(finalDir)) {
239 await rename(backupDir, finalDir);
240 }
241 throw err;
242 }
243 } else {
244 // No existing cache, just rename temp to final
245 await rename(tempDir, finalDir);
246 }
247
248 console.log('Successfully cached site atomically', did, rkey);
249 } catch (err) {
250 // Clean up temp directory on failure
251 if (existsSync(tempDir)) {
252 rmSync(tempDir, { recursive: true, force: true });
253 }
254 throw err;
255 }
256}
257
258/**
259 * Recursively collect file CIDs from entries for incremental update tracking
260 */
261function collectFileCidsFromEntries(entries: Entry[], pathPrefix: string, fileCids: Record<string, string>): void {
262 for (const entry of entries) {
263 const currentPath = pathPrefix ? `${pathPrefix}/${entry.name}` : entry.name;
264 const node = entry.node;
265
266 if ('type' in node && node.type === 'directory' && 'entries' in node) {
267 collectFileCidsFromEntries(node.entries, currentPath, fileCids);
268 } else if ('type' in node && node.type === 'file' && 'blob' in node) {
269 const fileNode = node as File;
270 const cid = extractBlobCid(fileNode.blob);
271 if (cid) {
272 fileCids[currentPath] = cid;
273 }
274 }
275 }
276}
277
278async function cacheFiles(
279 did: string,
280 site: string,
281 entries: Entry[],
282 pdsEndpoint: string,
283 pathPrefix: string,
284 dirSuffix: string = '',
285 existingFileCids: Record<string, string> = {},
286 existingCacheDir?: string
287): Promise<void> {
288 // Collect file tasks, separating unchanged files from new/changed files
289 const downloadTasks: Array<() => Promise<void>> = [];
290 const copyTasks: Array<() => Promise<void>> = [];
291
292 function collectFileTasks(
293 entries: Entry[],
294 currentPathPrefix: string
295 ) {
296 for (const entry of entries) {
297 const currentPath = currentPathPrefix ? `${currentPathPrefix}/${entry.name}` : entry.name;
298 const node = entry.node;
299
300 if ('type' in node && node.type === 'directory' && 'entries' in node) {
301 collectFileTasks(node.entries, currentPath);
302 } else if ('type' in node && node.type === 'file' && 'blob' in node) {
303 const fileNode = node as File;
304 const cid = extractBlobCid(fileNode.blob);
305
306 // Check if file is unchanged (same CID as existing cache)
307 if (cid && existingFileCids[currentPath] === cid && existingCacheDir) {
308 // File unchanged - copy from existing cache instead of downloading
309 copyTasks.push(() => copyExistingFile(
310 did,
311 site,
312 currentPath,
313 dirSuffix,
314 existingCacheDir
315 ));
316 } else {
317 // File new or changed - download it
318 downloadTasks.push(() => cacheFileBlob(
319 did,
320 site,
321 currentPath,
322 fileNode.blob,
323 pdsEndpoint,
324 fileNode.encoding,
325 fileNode.mimeType,
326 fileNode.base64,
327 dirSuffix
328 ));
329 }
330 }
331 }
332 }
333
334 collectFileTasks(entries, pathPrefix);
335
336 console.log(`[Incremental Update] Files to copy: ${copyTasks.length}, Files to download: ${downloadTasks.length}`);
337
338 // Copy unchanged files in parallel (fast local operations)
339 const copyLimit = 10;
340 for (let i = 0; i < copyTasks.length; i += copyLimit) {
341 const batch = copyTasks.slice(i, i + copyLimit);
342 await Promise.all(batch.map(task => task()));
343 }
344
345 // Download new/changed files concurrently with a limit of 3 at a time
346 const downloadLimit = 3;
347 for (let i = 0; i < downloadTasks.length; i += downloadLimit) {
348 const batch = downloadTasks.slice(i, i + downloadLimit);
349 await Promise.all(batch.map(task => task()));
350 }
351}
352
353/**
354 * Copy an unchanged file from existing cache to new cache location
355 */
356async function copyExistingFile(
357 did: string,
358 site: string,
359 filePath: string,
360 dirSuffix: string,
361 existingCacheDir: string
362): Promise<void> {
363 const { copyFile } = await import('fs/promises');
364
365 const sourceFile = `${existingCacheDir}/${filePath}`;
366 const destFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`;
367 const destDir = destFile.substring(0, destFile.lastIndexOf('/'));
368
369 // Create destination directory if needed
370 if (destDir && !existsSync(destDir)) {
371 mkdirSync(destDir, { recursive: true });
372 }
373
374 try {
375 // Copy the file
376 await copyFile(sourceFile, destFile);
377
378 // Copy metadata file if it exists
379 const sourceMetaFile = `${sourceFile}.meta`;
380 const destMetaFile = `${destFile}.meta`;
381 if (existsSync(sourceMetaFile)) {
382 await copyFile(sourceMetaFile, destMetaFile);
383 }
384
385 console.log(`[Incremental] Copied unchanged file: ${filePath}`);
386 } catch (err) {
387 console.error(`[Incremental] Failed to copy file ${filePath}, will attempt download:`, err);
388 throw err;
389 }
390}
391
392async function cacheFileBlob(
393 did: string,
394 site: string,
395 filePath: string,
396 blobRef: any,
397 pdsEndpoint: string,
398 encoding?: 'gzip',
399 mimeType?: string,
400 base64?: boolean,
401 dirSuffix: string = ''
402): Promise<void> {
403 const cid = extractBlobCid(blobRef);
404 if (!cid) {
405 console.error('Could not extract CID from blob', blobRef);
406 return;
407 }
408
409 const blobUrl = `${pdsEndpoint}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${encodeURIComponent(cid)}`;
410
411 // Allow up to 500MB per file blob, with 5 minute timeout
412 let content = await safeFetchBlob(blobUrl, { maxSize: 500 * 1024 * 1024, timeout: 300000 });
413
414 console.log(`[DEBUG] ${filePath}: fetched ${content.length} bytes, base64=${base64}, encoding=${encoding}, mimeType=${mimeType}`);
415
416 // If content is base64-encoded, decode it back to raw binary (gzipped or not)
417 if (base64) {
418 const originalSize = content.length;
419 // Decode base64 directly from raw bytes - no string conversion
420 // The blob contains base64-encoded text as raw bytes, decode it in-place
421 const textDecoder = new TextDecoder();
422 const base64String = textDecoder.decode(content);
423 content = Buffer.from(base64String, 'base64');
424 console.log(`[DEBUG] ${filePath}: decoded base64 from ${originalSize} bytes to ${content.length} bytes`);
425
426 // Check if it's actually gzipped by looking at magic bytes
427 if (content.length >= 2) {
428 const hasGzipMagic = content[0] === 0x1f && content[1] === 0x8b;
429 console.log(`[DEBUG] ${filePath}: has gzip magic bytes: ${hasGzipMagic}`);
430 }
431 }
432
433 const cacheFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`;
434 const fileDir = cacheFile.substring(0, cacheFile.lastIndexOf('/'));
435
436 if (fileDir && !existsSync(fileDir)) {
437 mkdirSync(fileDir, { recursive: true });
438 }
439
440 // Use the shared function to determine if this should remain compressed
441 const shouldStayCompressed = shouldCompressMimeType(mimeType);
442
443 // Decompress files that shouldn't be stored compressed
444 if (encoding === 'gzip' && !shouldStayCompressed && content.length >= 2 &&
445 content[0] === 0x1f && content[1] === 0x8b) {
446 console.log(`[DEBUG] ${filePath}: decompressing non-compressible type (${mimeType}) before caching`);
447 try {
448 const { gunzipSync } = await import('zlib');
449 const decompressed = gunzipSync(content);
450 console.log(`[DEBUG] ${filePath}: decompressed from ${content.length} to ${decompressed.length} bytes`);
451 content = decompressed;
452 // Clear the encoding flag since we're storing decompressed
453 encoding = undefined;
454 } catch (error) {
455 console.log(`[DEBUG] ${filePath}: failed to decompress, storing original gzipped content. Error:`, error);
456 }
457 }
458
459 await writeFile(cacheFile, content);
460
461 // Store metadata only if file is still compressed
462 if (encoding === 'gzip' && mimeType) {
463 const metaFile = `${cacheFile}.meta`;
464 await writeFile(metaFile, JSON.stringify({ encoding, mimeType }));
465 console.log('Cached file', filePath, content.length, 'bytes (gzipped,', mimeType + ')');
466 } else {
467 console.log('Cached file', filePath, content.length, 'bytes');
468 }
469}
470
471/**
472 * Sanitize a file path to prevent directory traversal attacks
473 * Removes any path segments that attempt to go up directories
474 */
475export function sanitizePath(filePath: string): string {
476 // Remove leading slashes
477 let cleaned = filePath.replace(/^\/+/, '');
478
479 // Split into segments and filter out dangerous ones
480 const segments = cleaned.split('/').filter(segment => {
481 // Remove empty segments
482 if (!segment || segment === '.') return false;
483 // Remove parent directory references
484 if (segment === '..') return false;
485 // Remove segments with null bytes
486 if (segment.includes('\0')) return false;
487 return true;
488 });
489
490 // Rejoin the safe segments
491 return segments.join('/');
492}
493
494export function getCachedFilePath(did: string, site: string, filePath: string): string {
495 const sanitizedPath = sanitizePath(filePath);
496 return `${CACHE_DIR}/${did}/${site}/${sanitizedPath}`;
497}
498
499export function isCached(did: string, site: string): boolean {
500 return existsSync(`${CACHE_DIR}/${did}/${site}`);
501}
502
503async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = '', fileCids?: Record<string, string>): Promise<void> {
504 const metadata: CacheMetadata = {
505 recordCid,
506 cachedAt: Date.now(),
507 did,
508 rkey,
509 fileCids
510 };
511
512 const metadataPath = `${CACHE_DIR}/${did}/${rkey}${dirSuffix}/.metadata.json`;
513 const metadataDir = metadataPath.substring(0, metadataPath.lastIndexOf('/'));
514
515 if (!existsSync(metadataDir)) {
516 mkdirSync(metadataDir, { recursive: true });
517 }
518
519 await writeFile(metadataPath, JSON.stringify(metadata, null, 2));
520}
521
522async function getCacheMetadata(did: string, rkey: string): Promise<CacheMetadata | null> {
523 try {
524 const metadataPath = `${CACHE_DIR}/${did}/${rkey}/.metadata.json`;
525 if (!existsSync(metadataPath)) return null;
526
527 const content = await readFile(metadataPath, 'utf-8');
528 return JSON.parse(content) as CacheMetadata;
529 } catch (err) {
530 console.error('Failed to read cache metadata', err);
531 return null;
532 }
533}
534
535export async function isCacheValid(did: string, rkey: string, currentRecordCid?: string): Promise<boolean> {
536 const metadata = await getCacheMetadata(did, rkey);
537 if (!metadata) return false;
538
539 // Check if cache has expired (14 days TTL)
540 const cacheAge = Date.now() - metadata.cachedAt;
541 if (cacheAge > CACHE_TTL) {
542 console.log('[Cache] Cache expired for', did, rkey);
543 return false;
544 }
545
546 // If current CID is provided, verify it matches
547 if (currentRecordCid && metadata.recordCid !== currentRecordCid) {
548 console.log('[Cache] CID mismatch for', did, rkey, 'cached:', metadata.recordCid, 'current:', currentRecordCid);
549 return false;
550 }
551
552 return true;
553}