forked from
nekomimi.pet/wisp.place-monorepo
Monorepo for Wisp.place. A static site hosting service built on top of the AT Protocol.
1import { AtpAgent } from '@atproto/api';
2import type { Record as WispFsRecord, Directory, Entry, File } from '../lexicon/types/place/wisp/fs';
3import { existsSync, mkdirSync, readFileSync, rmSync } from 'fs';
4import { writeFile, readFile, rename } from 'fs/promises';
5import { safeFetchJson, safeFetchBlob } from './safe-fetch';
6import { CID } from 'multiformats';
7
8const CACHE_DIR = process.env.CACHE_DIR || './cache/sites';
9const CACHE_TTL = 14 * 24 * 60 * 60 * 1000; // 14 days cache TTL
10
11interface CacheMetadata {
12 recordCid: string;
13 cachedAt: number;
14 did: string;
15 rkey: string;
16}
17
18/**
19 * Determines if a MIME type should benefit from gzip compression.
20 * Returns true for text-based web assets (HTML, CSS, JS, JSON, XML, SVG).
21 * Returns false for already-compressed formats (images, video, audio, PDFs).
22 *
23 */
24export function shouldCompressMimeType(mimeType: string | undefined): boolean {
25 if (!mimeType) return false;
26
27 const mime = mimeType.toLowerCase();
28
29 // Text-based web assets that benefit from compression
30 const compressibleTypes = [
31 'text/html',
32 'text/css',
33 'text/javascript',
34 'application/javascript',
35 'application/x-javascript',
36 'text/xml',
37 'application/xml',
38 'application/json',
39 'text/plain',
40 'image/svg+xml',
41 ];
42
43 if (compressibleTypes.some(type => mime === type || mime.startsWith(type))) {
44 return true;
45 }
46
47 // Already-compressed formats that should NOT be double-compressed
48 const alreadyCompressedPrefixes = [
49 'video/',
50 'audio/',
51 'image/',
52 'application/pdf',
53 'application/zip',
54 'application/gzip',
55 ];
56
57 if (alreadyCompressedPrefixes.some(prefix => mime.startsWith(prefix))) {
58 return false;
59 }
60
61 // Default to not compressing for unknown types
62 return false;
63}
64
65interface IpldLink {
66 $link: string;
67}
68
69interface TypedBlobRef {
70 ref: CID | IpldLink;
71}
72
73interface UntypedBlobRef {
74 cid: string;
75}
76
77function isIpldLink(obj: unknown): obj is IpldLink {
78 return typeof obj === 'object' && obj !== null && '$link' in obj && typeof (obj as IpldLink).$link === 'string';
79}
80
81function isTypedBlobRef(obj: unknown): obj is TypedBlobRef {
82 return typeof obj === 'object' && obj !== null && 'ref' in obj;
83}
84
85function isUntypedBlobRef(obj: unknown): obj is UntypedBlobRef {
86 return typeof obj === 'object' && obj !== null && 'cid' in obj && typeof (obj as UntypedBlobRef).cid === 'string';
87}
88
89export async function resolveDid(identifier: string): Promise<string | null> {
90 try {
91 // If it's already a DID, return it
92 if (identifier.startsWith('did:')) {
93 return identifier;
94 }
95
96 // Otherwise, resolve the handle using agent's built-in method
97 const agent = new AtpAgent({ service: 'https://public.api.bsky.app' });
98 const response = await agent.resolveHandle({ handle: identifier });
99 return response.data.did;
100 } catch (err) {
101 console.error('Failed to resolve identifier', identifier, err);
102 return null;
103 }
104}
105
106export async function getPdsForDid(did: string): Promise<string | null> {
107 try {
108 let doc;
109
110 if (did.startsWith('did:plc:')) {
111 doc = await safeFetchJson(`https://plc.directory/${encodeURIComponent(did)}`);
112 } else if (did.startsWith('did:web:')) {
113 const didUrl = didWebToHttps(did);
114 doc = await safeFetchJson(didUrl);
115 } else {
116 console.error('Unsupported DID method', did);
117 return null;
118 }
119
120 const services = doc.service || [];
121 const pdsService = services.find((s: any) => s.id === '#atproto_pds');
122
123 return pdsService?.serviceEndpoint || null;
124 } catch (err) {
125 console.error('Failed to get PDS for DID', did, err);
126 return null;
127 }
128}
129
130function didWebToHttps(did: string): string {
131 const didParts = did.split(':');
132 if (didParts.length < 3 || didParts[0] !== 'did' || didParts[1] !== 'web') {
133 throw new Error('Invalid did:web format');
134 }
135
136 const domain = didParts[2];
137 const pathParts = didParts.slice(3);
138
139 if (pathParts.length === 0) {
140 return `https://${domain}/.well-known/did.json`;
141 } else {
142 const path = pathParts.join('/');
143 return `https://${domain}/${path}/did.json`;
144 }
145}
146
147export async function fetchSiteRecord(did: string, rkey: string): Promise<{ record: WispFsRecord; cid: string } | null> {
148 try {
149 const pdsEndpoint = await getPdsForDid(did);
150 if (!pdsEndpoint) return null;
151
152 const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(rkey)}`;
153 const data = await safeFetchJson(url);
154
155 return {
156 record: data.value as WispFsRecord,
157 cid: data.cid || ''
158 };
159 } catch (err) {
160 console.error('Failed to fetch site record', did, rkey, err);
161 return null;
162 }
163}
164
165export function extractBlobCid(blobRef: unknown): string | null {
166 if (isIpldLink(blobRef)) {
167 return blobRef.$link;
168 }
169
170 if (isTypedBlobRef(blobRef)) {
171 const ref = blobRef.ref;
172
173 const cid = CID.asCID(ref);
174 if (cid) {
175 return cid.toString();
176 }
177
178 if (isIpldLink(ref)) {
179 return ref.$link;
180 }
181 }
182
183 if (isUntypedBlobRef(blobRef)) {
184 return blobRef.cid;
185 }
186
187 return null;
188}
189
190export async function downloadAndCacheSite(did: string, rkey: string, record: WispFsRecord, pdsEndpoint: string, recordCid: string): Promise<void> {
191 console.log('Caching site', did, rkey);
192
193 if (!record.root) {
194 console.error('Record missing root directory:', JSON.stringify(record, null, 2));
195 throw new Error('Invalid record structure: missing root directory');
196 }
197
198 if (!record.root.entries || !Array.isArray(record.root.entries)) {
199 console.error('Record root missing entries array:', JSON.stringify(record.root, null, 2));
200 throw new Error('Invalid record structure: root missing entries array');
201 }
202
203 // Use a temporary directory with timestamp to avoid collisions
204 const tempSuffix = `.tmp-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
205 const tempDir = `${CACHE_DIR}/${did}/${rkey}${tempSuffix}`;
206 const finalDir = `${CACHE_DIR}/${did}/${rkey}`;
207
208 try {
209 // Download to temporary directory
210 await cacheFiles(did, rkey, record.root.entries, pdsEndpoint, '', tempSuffix);
211 await saveCacheMetadata(did, rkey, recordCid, tempSuffix);
212
213 // Atomically replace old cache with new cache
214 // On POSIX systems (Linux/macOS), rename is atomic
215 if (existsSync(finalDir)) {
216 // Rename old directory to backup
217 const backupDir = `${finalDir}.old-${Date.now()}`;
218 await rename(finalDir, backupDir);
219
220 try {
221 // Rename new directory to final location
222 await rename(tempDir, finalDir);
223
224 // Clean up old backup
225 rmSync(backupDir, { recursive: true, force: true });
226 } catch (err) {
227 // If rename failed, restore backup
228 if (existsSync(backupDir) && !existsSync(finalDir)) {
229 await rename(backupDir, finalDir);
230 }
231 throw err;
232 }
233 } else {
234 // No existing cache, just rename temp to final
235 await rename(tempDir, finalDir);
236 }
237
238 console.log('Successfully cached site atomically', did, rkey);
239 } catch (err) {
240 // Clean up temp directory on failure
241 if (existsSync(tempDir)) {
242 rmSync(tempDir, { recursive: true, force: true });
243 }
244 throw err;
245 }
246}
247
248async function cacheFiles(
249 did: string,
250 site: string,
251 entries: Entry[],
252 pdsEndpoint: string,
253 pathPrefix: string,
254 dirSuffix: string = ''
255): Promise<void> {
256 // Collect all file blob download tasks first
257 const downloadTasks: Array<() => Promise<void>> = [];
258
259 function collectFileTasks(
260 entries: Entry[],
261 currentPathPrefix: string
262 ) {
263 for (const entry of entries) {
264 const currentPath = currentPathPrefix ? `${currentPathPrefix}/${entry.name}` : entry.name;
265 const node = entry.node;
266
267 if ('type' in node && node.type === 'directory' && 'entries' in node) {
268 collectFileTasks(node.entries, currentPath);
269 } else if ('type' in node && node.type === 'file' && 'blob' in node) {
270 const fileNode = node as File;
271 downloadTasks.push(() => cacheFileBlob(
272 did,
273 site,
274 currentPath,
275 fileNode.blob,
276 pdsEndpoint,
277 fileNode.encoding,
278 fileNode.mimeType,
279 fileNode.base64,
280 dirSuffix
281 ));
282 }
283 }
284 }
285
286 collectFileTasks(entries, pathPrefix);
287
288 // Execute downloads concurrently with a limit of 3 at a time
289 const concurrencyLimit = 3;
290 for (let i = 0; i < downloadTasks.length; i += concurrencyLimit) {
291 const batch = downloadTasks.slice(i, i + concurrencyLimit);
292 await Promise.all(batch.map(task => task()));
293 }
294}
295
296async function cacheFileBlob(
297 did: string,
298 site: string,
299 filePath: string,
300 blobRef: any,
301 pdsEndpoint: string,
302 encoding?: 'gzip',
303 mimeType?: string,
304 base64?: boolean,
305 dirSuffix: string = ''
306): Promise<void> {
307 const cid = extractBlobCid(blobRef);
308 if (!cid) {
309 console.error('Could not extract CID from blob', blobRef);
310 return;
311 }
312
313 const blobUrl = `${pdsEndpoint}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${encodeURIComponent(cid)}`;
314
315 // Allow up to 100MB per file blob, with 2 minute timeout
316 let content = await safeFetchBlob(blobUrl, { maxSize: 100 * 1024 * 1024, timeout: 120000 });
317
318 console.log(`[DEBUG] ${filePath}: fetched ${content.length} bytes, base64=${base64}, encoding=${encoding}, mimeType=${mimeType}`);
319
320 // If content is base64-encoded, decode it back to raw binary (gzipped or not)
321 if (base64) {
322 const originalSize = content.length;
323 // Decode base64 directly from raw bytes - no string conversion
324 // The blob contains base64-encoded text as raw bytes, decode it in-place
325 const textDecoder = new TextDecoder();
326 const base64String = textDecoder.decode(content);
327 content = Buffer.from(base64String, 'base64');
328 console.log(`[DEBUG] ${filePath}: decoded base64 from ${originalSize} bytes to ${content.length} bytes`);
329
330 // Check if it's actually gzipped by looking at magic bytes
331 if (content.length >= 2) {
332 const hasGzipMagic = content[0] === 0x1f && content[1] === 0x8b;
333 console.log(`[DEBUG] ${filePath}: has gzip magic bytes: ${hasGzipMagic}`);
334 }
335 }
336
337 const cacheFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`;
338 const fileDir = cacheFile.substring(0, cacheFile.lastIndexOf('/'));
339
340 if (fileDir && !existsSync(fileDir)) {
341 mkdirSync(fileDir, { recursive: true });
342 }
343
344 // Use the shared function to determine if this should remain compressed
345 const shouldStayCompressed = shouldCompressMimeType(mimeType);
346
347 // Decompress files that shouldn't be stored compressed
348 if (encoding === 'gzip' && !shouldStayCompressed && content.length >= 2 &&
349 content[0] === 0x1f && content[1] === 0x8b) {
350 console.log(`[DEBUG] ${filePath}: decompressing non-compressible type (${mimeType}) before caching`);
351 try {
352 const { gunzipSync } = await import('zlib');
353 const decompressed = gunzipSync(content);
354 console.log(`[DEBUG] ${filePath}: decompressed from ${content.length} to ${decompressed.length} bytes`);
355 content = decompressed;
356 // Clear the encoding flag since we're storing decompressed
357 encoding = undefined;
358 } catch (error) {
359 console.log(`[DEBUG] ${filePath}: failed to decompress, storing original gzipped content. Error:`, error);
360 }
361 }
362
363 await writeFile(cacheFile, content);
364
365 // Store metadata only if file is still compressed
366 if (encoding === 'gzip' && mimeType) {
367 const metaFile = `${cacheFile}.meta`;
368 await writeFile(metaFile, JSON.stringify({ encoding, mimeType }));
369 console.log('Cached file', filePath, content.length, 'bytes (gzipped,', mimeType + ')');
370 } else {
371 console.log('Cached file', filePath, content.length, 'bytes');
372 }
373}
374
375/**
376 * Sanitize a file path to prevent directory traversal attacks
377 * Removes any path segments that attempt to go up directories
378 */
379export function sanitizePath(filePath: string): string {
380 // Remove leading slashes
381 let cleaned = filePath.replace(/^\/+/, '');
382
383 // Split into segments and filter out dangerous ones
384 const segments = cleaned.split('/').filter(segment => {
385 // Remove empty segments
386 if (!segment || segment === '.') return false;
387 // Remove parent directory references
388 if (segment === '..') return false;
389 // Remove segments with null bytes
390 if (segment.includes('\0')) return false;
391 return true;
392 });
393
394 // Rejoin the safe segments
395 return segments.join('/');
396}
397
398export function getCachedFilePath(did: string, site: string, filePath: string): string {
399 const sanitizedPath = sanitizePath(filePath);
400 return `${CACHE_DIR}/${did}/${site}/${sanitizedPath}`;
401}
402
403export function isCached(did: string, site: string): boolean {
404 return existsSync(`${CACHE_DIR}/${did}/${site}`);
405}
406
407async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = ''): Promise<void> {
408 const metadata: CacheMetadata = {
409 recordCid,
410 cachedAt: Date.now(),
411 did,
412 rkey
413 };
414
415 const metadataPath = `${CACHE_DIR}/${did}/${rkey}${dirSuffix}/.metadata.json`;
416 const metadataDir = metadataPath.substring(0, metadataPath.lastIndexOf('/'));
417
418 if (!existsSync(metadataDir)) {
419 mkdirSync(metadataDir, { recursive: true });
420 }
421
422 await writeFile(metadataPath, JSON.stringify(metadata, null, 2));
423}
424
425async function getCacheMetadata(did: string, rkey: string): Promise<CacheMetadata | null> {
426 try {
427 const metadataPath = `${CACHE_DIR}/${did}/${rkey}/.metadata.json`;
428 if (!existsSync(metadataPath)) return null;
429
430 const content = await readFile(metadataPath, 'utf-8');
431 return JSON.parse(content) as CacheMetadata;
432 } catch (err) {
433 console.error('Failed to read cache metadata', err);
434 return null;
435 }
436}
437
438export async function isCacheValid(did: string, rkey: string, currentRecordCid?: string): Promise<boolean> {
439 const metadata = await getCacheMetadata(did, rkey);
440 if (!metadata) return false;
441
442 // Check if cache has expired (14 days TTL)
443 const cacheAge = Date.now() - metadata.cachedAt;
444 if (cacheAge > CACHE_TTL) {
445 console.log('[Cache] Cache expired for', did, rkey);
446 return false;
447 }
448
449 // If current CID is provided, verify it matches
450 if (currentRecordCid && metadata.recordCid !== currentRecordCid) {
451 console.log('[Cache] CID mismatch for', did, rkey, 'cached:', metadata.recordCid, 'current:', currentRecordCid);
452 return false;
453 }
454
455 return true;
456}