forked from
nekomimi.pet/wisp.place-monorepo
Monorepo for Wisp.place. A static site hosting service built on top of the AT Protocol.
1import { AtpAgent } from '@atproto/api';
2import type { Record as WispFsRecord, Directory, Entry, File } from '../lexicon/types/place/wisp/fs';
3import { existsSync, mkdirSync, readFileSync, rmSync } from 'fs';
4import { writeFile, readFile, rename } from 'fs/promises';
5import { safeFetchJson, safeFetchBlob } from './safe-fetch';
6import { CID } from 'multiformats';
7
8const CACHE_DIR = './cache/sites';
9const CACHE_TTL = 14 * 24 * 60 * 60 * 1000; // 14 days cache TTL
10
11interface CacheMetadata {
12 recordCid: string;
13 cachedAt: number;
14 did: string;
15 rkey: string;
16}
17
18interface IpldLink {
19 $link: string;
20}
21
22interface TypedBlobRef {
23 ref: CID | IpldLink;
24}
25
26interface UntypedBlobRef {
27 cid: string;
28}
29
30function isIpldLink(obj: unknown): obj is IpldLink {
31 return typeof obj === 'object' && obj !== null && '$link' in obj && typeof (obj as IpldLink).$link === 'string';
32}
33
34function isTypedBlobRef(obj: unknown): obj is TypedBlobRef {
35 return typeof obj === 'object' && obj !== null && 'ref' in obj;
36}
37
38function isUntypedBlobRef(obj: unknown): obj is UntypedBlobRef {
39 return typeof obj === 'object' && obj !== null && 'cid' in obj && typeof (obj as UntypedBlobRef).cid === 'string';
40}
41
42export async function resolveDid(identifier: string): Promise<string | null> {
43 try {
44 // If it's already a DID, return it
45 if (identifier.startsWith('did:')) {
46 return identifier;
47 }
48
49 // Otherwise, resolve the handle using agent's built-in method
50 const agent = new AtpAgent({ service: 'https://public.api.bsky.app' });
51 const response = await agent.resolveHandle({ handle: identifier });
52 return response.data.did;
53 } catch (err) {
54 console.error('Failed to resolve identifier', identifier, err);
55 return null;
56 }
57}
58
59export async function getPdsForDid(did: string): Promise<string | null> {
60 try {
61 let doc;
62
63 if (did.startsWith('did:plc:')) {
64 doc = await safeFetchJson(`https://plc.directory/${encodeURIComponent(did)}`);
65 } else if (did.startsWith('did:web:')) {
66 const didUrl = didWebToHttps(did);
67 doc = await safeFetchJson(didUrl);
68 } else {
69 console.error('Unsupported DID method', did);
70 return null;
71 }
72
73 const services = doc.service || [];
74 const pdsService = services.find((s: any) => s.id === '#atproto_pds');
75
76 return pdsService?.serviceEndpoint || null;
77 } catch (err) {
78 console.error('Failed to get PDS for DID', did, err);
79 return null;
80 }
81}
82
83function didWebToHttps(did: string): string {
84 const didParts = did.split(':');
85 if (didParts.length < 3 || didParts[0] !== 'did' || didParts[1] !== 'web') {
86 throw new Error('Invalid did:web format');
87 }
88
89 const domain = didParts[2];
90 const pathParts = didParts.slice(3);
91
92 if (pathParts.length === 0) {
93 return `https://${domain}/.well-known/did.json`;
94 } else {
95 const path = pathParts.join('/');
96 return `https://${domain}/${path}/did.json`;
97 }
98}
99
100export async function fetchSiteRecord(did: string, rkey: string): Promise<{ record: WispFsRecord; cid: string } | null> {
101 try {
102 const pdsEndpoint = await getPdsForDid(did);
103 if (!pdsEndpoint) return null;
104
105 const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(rkey)}`;
106 const data = await safeFetchJson(url);
107
108 return {
109 record: data.value as WispFsRecord,
110 cid: data.cid || ''
111 };
112 } catch (err) {
113 console.error('Failed to fetch site record', did, rkey, err);
114 return null;
115 }
116}
117
118export function extractBlobCid(blobRef: unknown): string | null {
119 if (isIpldLink(blobRef)) {
120 return blobRef.$link;
121 }
122
123 if (isTypedBlobRef(blobRef)) {
124 const ref = blobRef.ref;
125
126 const cid = CID.asCID(ref);
127 if (cid) {
128 return cid.toString();
129 }
130
131 if (isIpldLink(ref)) {
132 return ref.$link;
133 }
134 }
135
136 if (isUntypedBlobRef(blobRef)) {
137 return blobRef.cid;
138 }
139
140 return null;
141}
142
143export async function downloadAndCacheSite(did: string, rkey: string, record: WispFsRecord, pdsEndpoint: string, recordCid: string): Promise<void> {
144 console.log('Caching site', did, rkey);
145
146 if (!record.root) {
147 console.error('Record missing root directory:', JSON.stringify(record, null, 2));
148 throw new Error('Invalid record structure: missing root directory');
149 }
150
151 if (!record.root.entries || !Array.isArray(record.root.entries)) {
152 console.error('Record root missing entries array:', JSON.stringify(record.root, null, 2));
153 throw new Error('Invalid record structure: root missing entries array');
154 }
155
156 // Use a temporary directory with timestamp to avoid collisions
157 const tempSuffix = `.tmp-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
158 const tempDir = `${CACHE_DIR}/${did}/${rkey}${tempSuffix}`;
159 const finalDir = `${CACHE_DIR}/${did}/${rkey}`;
160
161 try {
162 // Download to temporary directory
163 await cacheFiles(did, rkey, record.root.entries, pdsEndpoint, '', tempSuffix);
164 await saveCacheMetadata(did, rkey, recordCid, tempSuffix);
165
166 // Atomically replace old cache with new cache
167 // On POSIX systems (Linux/macOS), rename is atomic
168 if (existsSync(finalDir)) {
169 // Rename old directory to backup
170 const backupDir = `${finalDir}.old-${Date.now()}`;
171 await rename(finalDir, backupDir);
172
173 try {
174 // Rename new directory to final location
175 await rename(tempDir, finalDir);
176
177 // Clean up old backup
178 rmSync(backupDir, { recursive: true, force: true });
179 } catch (err) {
180 // If rename failed, restore backup
181 if (existsSync(backupDir) && !existsSync(finalDir)) {
182 await rename(backupDir, finalDir);
183 }
184 throw err;
185 }
186 } else {
187 // No existing cache, just rename temp to final
188 await rename(tempDir, finalDir);
189 }
190
191 console.log('Successfully cached site atomically', did, rkey);
192 } catch (err) {
193 // Clean up temp directory on failure
194 if (existsSync(tempDir)) {
195 rmSync(tempDir, { recursive: true, force: true });
196 }
197 throw err;
198 }
199}
200
201async function cacheFiles(
202 did: string,
203 site: string,
204 entries: Entry[],
205 pdsEndpoint: string,
206 pathPrefix: string,
207 dirSuffix: string = ''
208): Promise<void> {
209 // Collect all file blob download tasks first
210 const downloadTasks: Array<() => Promise<void>> = [];
211
212 function collectFileTasks(
213 entries: Entry[],
214 currentPathPrefix: string
215 ) {
216 for (const entry of entries) {
217 const currentPath = currentPathPrefix ? `${currentPathPrefix}/${entry.name}` : entry.name;
218 const node = entry.node;
219
220 if ('type' in node && node.type === 'directory' && 'entries' in node) {
221 collectFileTasks(node.entries, currentPath);
222 } else if ('type' in node && node.type === 'file' && 'blob' in node) {
223 const fileNode = node as File;
224 downloadTasks.push(() => cacheFileBlob(
225 did,
226 site,
227 currentPath,
228 fileNode.blob,
229 pdsEndpoint,
230 fileNode.encoding,
231 fileNode.mimeType,
232 fileNode.base64,
233 dirSuffix
234 ));
235 }
236 }
237 }
238
239 collectFileTasks(entries, pathPrefix);
240
241 // Execute downloads concurrently with a limit of 3 at a time
242 const concurrencyLimit = 3;
243 for (let i = 0; i < downloadTasks.length; i += concurrencyLimit) {
244 const batch = downloadTasks.slice(i, i + concurrencyLimit);
245 await Promise.all(batch.map(task => task()));
246 }
247}
248
249async function cacheFileBlob(
250 did: string,
251 site: string,
252 filePath: string,
253 blobRef: any,
254 pdsEndpoint: string,
255 encoding?: 'gzip',
256 mimeType?: string,
257 base64?: boolean,
258 dirSuffix: string = ''
259): Promise<void> {
260 const cid = extractBlobCid(blobRef);
261 if (!cid) {
262 console.error('Could not extract CID from blob', blobRef);
263 return;
264 }
265
266 const blobUrl = `${pdsEndpoint}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${encodeURIComponent(cid)}`;
267
268 // Allow up to 100MB per file blob, with 2 minute timeout
269 let content = await safeFetchBlob(blobUrl, { maxSize: 100 * 1024 * 1024, timeout: 120000 });
270
271 console.log(`[DEBUG] ${filePath}: fetched ${content.length} bytes, base64=${base64}, encoding=${encoding}, mimeType=${mimeType}`);
272
273 // If content is base64-encoded, decode it back to binary (gzipped or not)
274 if (base64) {
275 const originalSize = content.length;
276 // The content from the blob is base64 text, decode it directly to binary
277 const buffer = Buffer.from(content);
278 const base64String = buffer.toString('ascii'); // Use ascii for base64 text, not utf-8
279 console.log(`[DEBUG] ${filePath}: base64 string first 100 chars: ${base64String.substring(0, 100)}`);
280 content = Buffer.from(base64String, 'base64');
281 console.log(`[DEBUG] ${filePath}: decoded from ${originalSize} bytes to ${content.length} bytes`);
282
283 // Check if it's actually gzipped by looking at magic bytes
284 if (content.length >= 2) {
285 const magic = content[0] === 0x1f && content[1] === 0x8b;
286 const byte0 = content[0];
287 const byte1 = content[1];
288 console.log(`[DEBUG] ${filePath}: has gzip magic bytes: ${magic} (0x${byte0?.toString(16)}, 0x${byte1?.toString(16)})`);
289 }
290 }
291
292 const cacheFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`;
293 const fileDir = cacheFile.substring(0, cacheFile.lastIndexOf('/'));
294
295 if (fileDir && !existsSync(fileDir)) {
296 mkdirSync(fileDir, { recursive: true });
297 }
298
299 await writeFile(cacheFile, content);
300
301 // Store metadata if file is compressed
302 if (encoding === 'gzip' && mimeType) {
303 const metaFile = `${cacheFile}.meta`;
304 await writeFile(metaFile, JSON.stringify({ encoding, mimeType }));
305 console.log('Cached file', filePath, content.length, 'bytes (gzipped,', mimeType + ')');
306 } else {
307 console.log('Cached file', filePath, content.length, 'bytes');
308 }
309}
310
311/**
312 * Sanitize a file path to prevent directory traversal attacks
313 * Removes any path segments that attempt to go up directories
314 */
315export function sanitizePath(filePath: string): string {
316 // Remove leading slashes
317 let cleaned = filePath.replace(/^\/+/, '');
318
319 // Split into segments and filter out dangerous ones
320 const segments = cleaned.split('/').filter(segment => {
321 // Remove empty segments
322 if (!segment || segment === '.') return false;
323 // Remove parent directory references
324 if (segment === '..') return false;
325 // Remove segments with null bytes
326 if (segment.includes('\0')) return false;
327 return true;
328 });
329
330 // Rejoin the safe segments
331 return segments.join('/');
332}
333
334export function getCachedFilePath(did: string, site: string, filePath: string): string {
335 const sanitizedPath = sanitizePath(filePath);
336 return `${CACHE_DIR}/${did}/${site}/${sanitizedPath}`;
337}
338
339export function isCached(did: string, site: string): boolean {
340 return existsSync(`${CACHE_DIR}/${did}/${site}`);
341}
342
343async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = ''): Promise<void> {
344 const metadata: CacheMetadata = {
345 recordCid,
346 cachedAt: Date.now(),
347 did,
348 rkey
349 };
350
351 const metadataPath = `${CACHE_DIR}/${did}/${rkey}${dirSuffix}/.metadata.json`;
352 const metadataDir = metadataPath.substring(0, metadataPath.lastIndexOf('/'));
353
354 if (!existsSync(metadataDir)) {
355 mkdirSync(metadataDir, { recursive: true });
356 }
357
358 await writeFile(metadataPath, JSON.stringify(metadata, null, 2));
359}
360
361async function getCacheMetadata(did: string, rkey: string): Promise<CacheMetadata | null> {
362 try {
363 const metadataPath = `${CACHE_DIR}/${did}/${rkey}/.metadata.json`;
364 if (!existsSync(metadataPath)) return null;
365
366 const content = await readFile(metadataPath, 'utf-8');
367 return JSON.parse(content) as CacheMetadata;
368 } catch (err) {
369 console.error('Failed to read cache metadata', err);
370 return null;
371 }
372}
373
374export async function isCacheValid(did: string, rkey: string, currentRecordCid?: string): Promise<boolean> {
375 const metadata = await getCacheMetadata(did, rkey);
376 if (!metadata) return false;
377
378 // Check if cache has expired (14 days TTL)
379 const cacheAge = Date.now() - metadata.cachedAt;
380 if (cacheAge > CACHE_TTL) {
381 console.log('[Cache] Cache expired for', did, rkey);
382 return false;
383 }
384
385 // If current CID is provided, verify it matches
386 if (currentRecordCid && metadata.recordCid !== currentRecordCid) {
387 console.log('[Cache] CID mismatch for', did, rkey, 'cached:', metadata.recordCid, 'current:', currentRecordCid);
388 return false;
389 }
390
391 return true;
392}