apps/hosting-service/src/lib/utils.ts at main · nekomimi.pet/wisp.place-monorepo

Monorepo for wisp.place. A static site hosting service built on top of the AT Protocol. wisp.place
wisp.place-monorepo / apps / hosting-service / src / lib / utils.ts
at main 26 kB view raw
  1import { AtpAgent } from '@atproto/api';
  2import type { Record as WispFsRecord, Directory, Entry, File } from '@wisp/lexicons/types/place/wisp/fs';
  3import type { Record as SubfsRecord } from '@wisp/lexicons/types/place/wisp/subfs';
  4import type { Record as WispSettings } from '@wisp/lexicons/types/place/wisp/settings';
  5import { existsSync, mkdirSync, readFileSync, rmSync } from 'fs';
  6import { writeFile, readFile, rename } from 'fs/promises';
  7import { safeFetchJson, safeFetchBlob } from '@wisp/safe-fetch';
  8import { CID } from 'multiformats';
  9import { extractBlobCid } from '@wisp/atproto-utils';
 10import { sanitizePath, collectFileCidsFromEntries, countFilesInDirectory } from '@wisp/fs-utils';
 11import { shouldCompressMimeType } from '@wisp/atproto-utils/compression';
 12import { MAX_BLOB_SIZE, MAX_FILE_COUNT, MAX_SITE_SIZE } from '@wisp/constants';
 13
 14// Re-export shared utilities for local usage and tests
 15export { extractBlobCid, sanitizePath };
 16
 17const CACHE_DIR = process.env.CACHE_DIR || './cache/sites';
 18const CACHE_TTL = 14 * 24 * 60 * 60 * 1000; // 14 days cache TTL
 19
 20interface CacheMetadata {
 21  recordCid: string;
 22  cachedAt: number;
 23  did: string;
 24  rkey: string;
 25  // Map of file path to blob CID for incremental updates
 26  fileCids?: Record<string, string>;
 27  // Site settings (null = explicitly no settings, undefined = not yet checked)
 28  settings?: WispSettings | null;
 29}
 30
 31
 32export async function resolveDid(identifier: string): Promise<string | null> {
 33  try {
 34    // If it's already a DID, return it
 35    if (identifier.startsWith('did:')) {
 36      return identifier;
 37    }
 38
 39    // Otherwise, resolve the handle using agent's built-in method
 40    const agent = new AtpAgent({ service: 'https://public.api.bsky.app' });
 41    const response = await agent.resolveHandle({ handle: identifier });
 42    return response.data.did;
 43  } catch (err) {
 44    console.error('Failed to resolve identifier', identifier, err);
 45    return null;
 46  }
 47}
 48
 49export async function getPdsForDid(did: string): Promise<string | null> {
 50  try {
 51    let doc;
 52
 53    if (did.startsWith('did:plc:')) {
 54      doc = await safeFetchJson(`https://plc.directory/${encodeURIComponent(did)}`);
 55    } else if (did.startsWith('did:web:')) {
 56      const didUrl = didWebToHttps(did);
 57      doc = await safeFetchJson(didUrl);
 58    } else {
 59      console.error('Unsupported DID method', did);
 60      return null;
 61    }
 62
 63    const services = doc.service || [];
 64    const pdsService = services.find((s: any) => s.id === '#atproto_pds');
 65
 66    return pdsService?.serviceEndpoint || null;
 67  } catch (err) {
 68    console.error('Failed to get PDS for DID', did, err);
 69    return null;
 70  }
 71}
 72
 73function didWebToHttps(did: string): string {
 74  const didParts = did.split(':');
 75  if (didParts.length < 3 || didParts[0] !== 'did' || didParts[1] !== 'web') {
 76    throw new Error('Invalid did:web format');
 77  }
 78
 79  const domain = didParts[2];
 80  const pathParts = didParts.slice(3);
 81
 82  if (pathParts.length === 0) {
 83    return `https://${domain}/.well-known/did.json`;
 84  } else {
 85    const path = pathParts.join('/');
 86    return `https://${domain}/${path}/did.json`;
 87  }
 88}
 89
 90export async function fetchSiteRecord(did: string, rkey: string): Promise<{ record: WispFsRecord; cid: string } | null> {
 91  try {
 92    const pdsEndpoint = await getPdsForDid(did);
 93    if (!pdsEndpoint) return null;
 94
 95    const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.fs&rkey=${encodeURIComponent(rkey)}`;
 96    const data = await safeFetchJson(url);
 97
 98    return {
 99      record: data.value as WispFsRecord,
100      cid: data.cid || ''
101    };
102  } catch (err) {
103    console.error('Failed to fetch site record', did, rkey, err);
104    return null;
105  }
106}
107
108export async function fetchSiteSettings(did: string, rkey: string): Promise<WispSettings | null> {
109  try {
110    const pdsEndpoint = await getPdsForDid(did);
111    if (!pdsEndpoint) return null;
112
113    const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=place.wisp.settings&rkey=${encodeURIComponent(rkey)}`;
114    const data = await safeFetchJson(url);
115
116    return data.value as WispSettings;
117  } catch (err) {
118    // Settings are optional, so return null if not found
119    return null;
120  }
121}
122
123/**
124 * Calculate total size of all blobs in a directory tree from manifest metadata
125 */
126function calculateTotalBlobSize(directory: Directory): number {
127  let totalSize = 0;
128
129  function sumBlobSizes(entries: Entry[]) {
130    for (const entry of entries) {
131      const node = entry.node;
132
133      if ('type' in node && node.type === 'directory' && 'entries' in node) {
134        // Recursively sum subdirectories
135        sumBlobSizes(node.entries);
136      } else if ('type' in node && node.type === 'file' && 'blob' in node) {
137        // Add blob size from manifest
138        const fileNode = node as File;
139        const blobSize = (fileNode.blob as any)?.size || 0;
140        totalSize += blobSize;
141      }
142    }
143  }
144
145  sumBlobSizes(directory.entries);
146  return totalSize;
147}
148
149/**
150 * Extract all subfs URIs from a directory tree with their mount paths
151 */
152export function extractSubfsUris(directory: Directory, currentPath: string = ''): Array<{ uri: string; path: string }> {
153  const uris: Array<{ uri: string; path: string }> = [];
154
155  for (const entry of directory.entries) {
156    const fullPath = currentPath ? `${currentPath}/${entry.name}` : entry.name;
157
158    if ('type' in entry.node) {
159      if (entry.node.type === 'subfs') {
160        // Subfs node with subject URI
161        const subfsNode = entry.node as any;
162        if (subfsNode.subject) {
163          uris.push({ uri: subfsNode.subject, path: fullPath });
164        }
165      } else if (entry.node.type === 'directory') {
166        // Recursively search subdirectories
167        const subUris = extractSubfsUris(entry.node as Directory, fullPath);
168        uris.push(...subUris);
169      }
170    }
171  }
172
173  return uris;
174}
175
176/**
177 * Fetch a subfs record from the PDS
178 */
179async function fetchSubfsRecord(uri: string, pdsEndpoint: string): Promise<SubfsRecord | null> {
180  try {
181    // Parse URI: at://did/collection/rkey
182    const parts = uri.replace('at://', '').split('/');
183    if (parts.length < 3) {
184      console.error('Invalid subfs URI:', uri);
185      return null;
186    }
187
188    const did = parts[0] || '';
189    const collection = parts[1] || '';
190    const rkey = parts[2] || '';
191
192    // Fetch the record from PDS
193    const url = `${pdsEndpoint}/xrpc/com.atproto.repo.getRecord?repo=${encodeURIComponent(did)}&collection=${encodeURIComponent(collection)}&rkey=${encodeURIComponent(rkey)}`;
194    const response = await safeFetchJson(url);
195
196    if (!response || !response.value) {
197      console.error('Subfs record not found:', uri);
198      return null;
199    }
200
201    return response.value as SubfsRecord;
202  } catch (err) {
203    console.error('Failed to fetch subfs record:', uri, err);
204    return null;
205  }
206}
207
208/**
209 * Replace subfs nodes in a directory tree with their actual content
210 * Subfs entries are "merged" - their root entries are hoisted into the parent directory
211 * This function is recursive - it will keep expanding until no subfs nodes remain
212 * Uses a cache to avoid re-fetching the same subfs records across recursion depths
213 */
214export async function expandSubfsNodes(
215  directory: Directory,
216  pdsEndpoint: string,
217  depth: number = 0,
218  subfsCache: Map<string, SubfsRecord | null> = new Map()
219): Promise<Directory> {
220  const MAX_DEPTH = 10; // Prevent infinite loops
221
222  if (depth >= MAX_DEPTH) {
223    console.error('Max subfs expansion depth reached, stopping to prevent infinite loop');
224    return directory;
225  }
226
227  // Extract all subfs URIs
228  const subfsUris = extractSubfsUris(directory);
229
230  if (subfsUris.length === 0) {
231    // No subfs nodes, return as-is
232    return directory;
233  }
234
235  // Filter to only URIs we haven't fetched yet
236  const uncachedUris = subfsUris.filter(({ uri }) => !subfsCache.has(uri));
237
238  if (uncachedUris.length > 0) {
239    console.log(`[Depth ${depth}] Found ${subfsUris.length} subfs references, fetching ${uncachedUris.length} new records (${subfsUris.length - uncachedUris.length} cached)...`);
240
241    // Fetch only uncached subfs records in parallel
242    const fetchedRecords = await Promise.all(
243      uncachedUris.map(async ({ uri }) => {
244        const record = await fetchSubfsRecord(uri, pdsEndpoint);
245        return { uri, record };
246      })
247    );
248
249    // Add fetched records to cache
250    for (const { uri, record } of fetchedRecords) {
251      subfsCache.set(uri, record);
252    }
253  } else {
254    console.log(`[Depth ${depth}] Found ${subfsUris.length} subfs references, all cached`);
255  }
256
257  // Build a map of path -> root entries to merge using the cache
258  // Note: SubFS entries are compatible with FS entries at runtime
259  const subfsMap = new Map<string, Entry[]>();
260  for (const { uri, path } of subfsUris) {
261    const record = subfsCache.get(uri);
262    if (record && record.root && record.root.entries) {
263      subfsMap.set(path, record.root.entries as unknown as Entry[]);
264    }
265  }
266
267  // Replace subfs nodes by merging their root entries into the parent directory
268  function replaceSubfsInEntries(entries: Entry[], currentPath: string = ''): Entry[] {
269    const result: Entry[] = [];
270
271    for (const entry of entries) {
272      const fullPath = currentPath ? `${currentPath}/${entry.name}` : entry.name;
273      const node = entry.node;
274
275      if ('type' in node && node.type === 'subfs') {
276        // Check if this is a flat merge or subdirectory merge (default to flat if not specified)
277        const subfsNode = node as any;
278        const isFlat = subfsNode.flat !== false; // Default to true
279        const subfsEntries = subfsMap.get(fullPath);
280
281        if (subfsEntries) {
282          console.log(`[Depth ${depth}] Merging subfs node at ${fullPath} (${subfsEntries.length} entries, flat: ${isFlat})`);
283
284          if (isFlat) {
285            // Flat merge: hoist entries directly into parent directory
286            const processedEntries = replaceSubfsInEntries(subfsEntries, currentPath);
287            result.push(...processedEntries);
288          } else {
289            // Subdirectory merge: create a directory with the subfs node's name
290            const processedEntries = replaceSubfsInEntries(subfsEntries, fullPath);
291            const directoryNode: Directory = {
292              type: 'directory',
293              entries: processedEntries
294            };
295            result.push({
296              name: entry.name,
297              node: directoryNode as any  // Type assertion needed due to lexicon type complexity
298            });
299          }
300        } else {
301          // If not in map yet, preserve the subfs node for next recursion depth
302          console.log(`[Depth ${depth}] Subfs at ${fullPath} not yet fetched, preserving for next iteration`);
303          result.push(entry);
304        }
305      } else if ('type' in node && node.type === 'directory' && 'entries' in node) {
306        // Recursively process subdirectories
307        result.push({
308          ...entry,
309          node: {
310            ...node,
311            entries: replaceSubfsInEntries(node.entries, fullPath)
312          }
313        });
314      } else {
315        // Regular file entry
316        result.push(entry);
317      }
318    }
319
320    return result;
321  }
322
323  const partiallyExpanded = {
324    ...directory,
325    entries: replaceSubfsInEntries(directory.entries)
326  };
327
328  // Recursively expand any remaining subfs nodes (e.g., nested subfs inside parent subfs)
329  // Pass the cache to avoid re-fetching records
330  return expandSubfsNodes(partiallyExpanded, pdsEndpoint, depth + 1, subfsCache);
331}
332
333
334export async function downloadAndCacheSite(did: string, rkey: string, record: WispFsRecord, pdsEndpoint: string, recordCid: string): Promise<void> {
335  console.log('Caching site', did, rkey);
336
337  if (!record.root) {
338    console.error('Record missing root directory:', JSON.stringify(record, null, 2));
339    throw new Error('Invalid record structure: missing root directory');
340  }
341
342  if (!record.root.entries || !Array.isArray(record.root.entries)) {
343    console.error('Record root missing entries array:', JSON.stringify(record.root, null, 2));
344    throw new Error('Invalid record structure: root missing entries array');
345  }
346
347  // Expand subfs nodes before caching
348  const expandedRoot = await expandSubfsNodes(record.root, pdsEndpoint);
349
350  // Verify all subfs nodes were expanded
351  const remainingSubfs = extractSubfsUris(expandedRoot);
352  if (remainingSubfs.length > 0) {
353    console.warn(`[Cache] Warning: ${remainingSubfs.length} subfs nodes remain unexpanded after expansion`, remainingSubfs);
354  }
355
356  // Validate file count limit
357  const fileCount = countFilesInDirectory(expandedRoot);
358  if (fileCount > MAX_FILE_COUNT) {
359    throw new Error(`Site exceeds file count limit: ${fileCount} files (max ${MAX_FILE_COUNT})`);
360  }
361  console.log(`[Cache] File count validation passed: ${fileCount} files (limit: ${MAX_FILE_COUNT})`);
362
363  // Validate total size from blob metadata
364  const totalBlobSize = calculateTotalBlobSize(expandedRoot);
365  if (totalBlobSize > MAX_SITE_SIZE) {
366    throw new Error(`Site exceeds size limit: ${(totalBlobSize / 1024 / 1024).toFixed(2)}MB (max ${(MAX_SITE_SIZE / 1024 / 1024).toFixed(0)}MB)`);
367  }
368  console.log(`[Cache] Size validation passed: ${(totalBlobSize / 1024 / 1024).toFixed(2)}MB (limit: ${(MAX_SITE_SIZE / 1024 / 1024).toFixed(0)}MB)`);
369
370  // Get existing cache metadata to check for incremental updates
371  const existingMetadata = await getCacheMetadata(did, rkey);
372  const existingFileCids = existingMetadata?.fileCids || {};
373
374  // Use a temporary directory with timestamp to avoid collisions
375  const tempSuffix = `.tmp-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`;
376  const tempDir = `${CACHE_DIR}/${did}/${rkey}${tempSuffix}`;
377  const finalDir = `${CACHE_DIR}/${did}/${rkey}`;
378
379  try {
380    // Collect file CIDs from the new record (using expanded root)
381    const newFileCids: Record<string, string> = {};
382    collectFileCidsFromEntries(expandedRoot.entries, '', newFileCids);
383
384    // Fetch site settings (optional)
385    const settings = await fetchSiteSettings(did, rkey);
386
387    // Download/copy files to temporary directory (with incremental logic, using expanded root)
388    await cacheFiles(did, rkey, expandedRoot.entries, pdsEndpoint, '', tempSuffix, existingFileCids, finalDir);
389    await saveCacheMetadata(did, rkey, recordCid, tempSuffix, newFileCids, settings);
390
391    // Atomically replace old cache with new cache
392    // On POSIX systems (Linux/macOS), rename is atomic
393    if (existsSync(finalDir)) {
394      // Rename old directory to backup
395      const backupDir = `${finalDir}.old-${Date.now()}`;
396      await rename(finalDir, backupDir);
397
398      try {
399        // Rename new directory to final location
400        await rename(tempDir, finalDir);
401
402        // Clean up old backup
403        rmSync(backupDir, { recursive: true, force: true });
404      } catch (err) {
405        // If rename failed, restore backup
406        if (existsSync(backupDir) && !existsSync(finalDir)) {
407          await rename(backupDir, finalDir);
408        }
409        throw err;
410      }
411    } else {
412      // No existing cache, just rename temp to final
413      await rename(tempDir, finalDir);
414    }
415
416    console.log('Successfully cached site atomically', did, rkey);
417  } catch (err) {
418    // Clean up temp directory on failure
419    if (existsSync(tempDir)) {
420      rmSync(tempDir, { recursive: true, force: true });
421    }
422    throw err;
423  }
424}
425
426
427async function cacheFiles(
428  did: string,
429  site: string,
430  entries: Entry[],
431  pdsEndpoint: string,
432  pathPrefix: string,
433  dirSuffix: string = '',
434  existingFileCids: Record<string, string> = {},
435  existingCacheDir?: string
436): Promise<void> {
437  // Collect file tasks, separating unchanged files from new/changed files
438  const downloadTasks: Array<() => Promise<void>> = [];
439  const copyTasks: Array<() => Promise<void>> = [];
440
441  function collectFileTasks(
442    entries: Entry[],
443    currentPathPrefix: string
444  ) {
445    for (const entry of entries) {
446      const currentPath = currentPathPrefix ? `${currentPathPrefix}/${entry.name}` : entry.name;
447      const node = entry.node;
448
449      if ('type' in node && node.type === 'directory' && 'entries' in node) {
450        collectFileTasks(node.entries, currentPath);
451      } else if ('type' in node && node.type === 'file' && 'blob' in node) {
452        const fileNode = node as File;
453        const cid = extractBlobCid(fileNode.blob);
454
455        // Check if file is unchanged (same CID as existing cache)
456        if (cid && existingFileCids[currentPath] === cid && existingCacheDir) {
457          // File unchanged - copy from existing cache instead of downloading
458          copyTasks.push(() => copyExistingFile(
459            did,
460            site,
461            currentPath,
462            dirSuffix,
463            existingCacheDir
464          ));
465        } else {
466          // File new or changed - download it
467          downloadTasks.push(() => cacheFileBlob(
468            did,
469            site,
470            currentPath,
471            fileNode.blob,
472            pdsEndpoint,
473            fileNode.encoding,
474            fileNode.mimeType,
475            fileNode.base64,
476            dirSuffix
477          ));
478        }
479      }
480    }
481  }
482
483  collectFileTasks(entries, pathPrefix);
484
485  console.log(`[Incremental Update] Files to copy: ${copyTasks.length}, Files to download: ${downloadTasks.length}`);
486
487  // Copy unchanged files in parallel (fast local operations) - increased limit for better performance
488  const copyLimit = 50;
489  for (let i = 0; i < copyTasks.length; i += copyLimit) {
490    const batch = copyTasks.slice(i, i + copyLimit);
491    await Promise.all(batch.map(task => task()));
492    if (copyTasks.length > copyLimit) {
493      console.log(`[Cache Progress] Copied ${Math.min(i + copyLimit, copyTasks.length)}/${copyTasks.length} unchanged files`);
494    }
495  }
496
497  // Download new/changed files concurrently - increased from 3 to 20 for much better performance
498  const downloadLimit = 20;
499  let successCount = 0;
500  let failureCount = 0;
501
502  for (let i = 0; i < downloadTasks.length; i += downloadLimit) {
503    const batch = downloadTasks.slice(i, i + downloadLimit);
504    const results = await Promise.allSettled(batch.map(task => task()));
505
506    // Count successes and failures
507    results.forEach((result, index) => {
508      if (result.status === 'fulfilled') {
509        successCount++;
510      } else {
511        failureCount++;
512        console.error(`[Cache] Failed to download file (continuing with others):`, result.reason);
513      }
514    });
515
516    if (downloadTasks.length > downloadLimit) {
517      console.log(`[Cache Progress] Downloaded ${Math.min(i + downloadLimit, downloadTasks.length)}/${downloadTasks.length} files (${failureCount} failed)`);
518    }
519  }
520
521  if (failureCount > 0) {
522    console.warn(`[Cache] Completed with ${successCount} successful and ${failureCount} failed file downloads`);
523  }
524}
525
526/**
527 * Copy an unchanged file from existing cache to new cache location
528 */
529async function copyExistingFile(
530  did: string,
531  site: string,
532  filePath: string,
533  dirSuffix: string,
534  existingCacheDir: string
535): Promise<void> {
536  const { copyFile } = await import('fs/promises');
537
538  const sourceFile = `${existingCacheDir}/${filePath}`;
539  const destFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`;
540  const destDir = destFile.substring(0, destFile.lastIndexOf('/'));
541
542  // Create destination directory if needed
543  if (destDir && !existsSync(destDir)) {
544    mkdirSync(destDir, { recursive: true });
545  }
546
547  try {
548    // Copy the file
549    await copyFile(sourceFile, destFile);
550
551    // Copy metadata file if it exists
552    const sourceMetaFile = `${sourceFile}.meta`;
553    const destMetaFile = `${destFile}.meta`;
554    if (existsSync(sourceMetaFile)) {
555      await copyFile(sourceMetaFile, destMetaFile);
556    }
557  } catch (err) {
558    console.error(`Failed to copy cached file ${filePath}, will attempt download:`, err);
559    throw err;
560  }
561}
562
563async function cacheFileBlob(
564  did: string,
565  site: string,
566  filePath: string,
567  blobRef: any,
568  pdsEndpoint: string,
569  encoding?: 'gzip',
570  mimeType?: string,
571  base64?: boolean,
572  dirSuffix: string = ''
573): Promise<void> {
574  const cid = extractBlobCid(blobRef);
575  if (!cid) {
576    console.error('Could not extract CID from blob', blobRef);
577    return;
578  }
579
580  const blobUrl = `${pdsEndpoint}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${encodeURIComponent(cid)}`;
581
582  console.log(`[Cache] Fetching blob for file: ${filePath}, CID: ${cid}`);
583
584  let content = await safeFetchBlob(blobUrl, { maxSize: MAX_BLOB_SIZE, timeout: 300000 });
585
586  // If content is base64-encoded, decode it back to raw binary (gzipped or not)
587  if (base64) {
588    // Decode base64 directly from raw bytes - no string conversion
589    // The blob contains base64-encoded text as raw bytes, decode it in-place
590    const textDecoder = new TextDecoder();
591    const base64String = textDecoder.decode(content);
592    content = Buffer.from(base64String, 'base64');
593  }
594
595  const cacheFile = `${CACHE_DIR}/${did}/${site}${dirSuffix}/${filePath}`;
596  const fileDir = cacheFile.substring(0, cacheFile.lastIndexOf('/'));
597
598  if (fileDir && !existsSync(fileDir)) {
599    mkdirSync(fileDir, { recursive: true });
600  }
601
602  // Use the shared function to determine if this should remain compressed
603  const shouldStayCompressed = shouldCompressMimeType(mimeType);
604
605  // Decompress files that shouldn't be stored compressed
606  if (encoding === 'gzip' && !shouldStayCompressed && content.length >= 2 &&
607      content[0] === 0x1f && content[1] === 0x8b) {
608    try {
609      const { gunzipSync } = await import('zlib');
610      const decompressed = gunzipSync(content);
611      content = decompressed;
612      // Clear the encoding flag since we're storing decompressed
613      encoding = undefined;
614    } catch (error) {
615      console.error(`Failed to decompress ${filePath}, storing original gzipped content:`, error);
616    }
617  }
618
619  await writeFile(cacheFile, content);
620
621  // Store metadata only if file is still compressed
622  if (encoding === 'gzip' && mimeType) {
623    const metaFile = `${cacheFile}.meta`;
624    await writeFile(metaFile, JSON.stringify({ encoding, mimeType }));
625    console.log('Cached file', filePath, content.length, 'bytes (gzipped,', mimeType + ')');
626  } else {
627    console.log('Cached file', filePath, content.length, 'bytes');
628  }
629}
630
631
632export function getCachedFilePath(did: string, site: string, filePath: string): string {
633  const sanitizedPath = sanitizePath(filePath);
634  return `${CACHE_DIR}/${did}/${site}/${sanitizedPath}`;
635}
636
637export function isCached(did: string, site: string): boolean {
638  return existsSync(`${CACHE_DIR}/${did}/${site}`);
639}
640
641async function saveCacheMetadata(did: string, rkey: string, recordCid: string, dirSuffix: string = '', fileCids?: Record<string, string>, settings?: WispSettings | null): Promise<void> {
642  const metadata: CacheMetadata = {
643    recordCid,
644    cachedAt: Date.now(),
645    did,
646    rkey,
647    fileCids,
648    settings: settings || undefined
649  };
650
651  const metadataPath = `${CACHE_DIR}/${did}/${rkey}${dirSuffix}/.metadata.json`;
652  const metadataDir = metadataPath.substring(0, metadataPath.lastIndexOf('/'));
653
654  if (!existsSync(metadataDir)) {
655    mkdirSync(metadataDir, { recursive: true });
656  }
657
658  await writeFile(metadataPath, JSON.stringify(metadata, null, 2));
659}
660
661async function getCacheMetadata(did: string, rkey: string): Promise<CacheMetadata | null> {
662  try {
663    const metadataPath = `${CACHE_DIR}/${did}/${rkey}/.metadata.json`;
664    if (!existsSync(metadataPath)) return null;
665
666    const content = await readFile(metadataPath, 'utf-8');
667    return JSON.parse(content) as CacheMetadata;
668  } catch (err) {
669    console.error('Failed to read cache metadata', err);
670    return null;
671  }
672}
673
674export async function getCachedSettings(did: string, rkey: string): Promise<WispSettings | null> {
675  const metadata = await getCacheMetadata(did, rkey);
676
677  // If metadata has settings (including explicit null for "no settings"), return them
678  if (metadata && 'settings' in metadata) {
679    return metadata.settings ?? null;
680  }
681
682  // If metadata exists but has never checked for settings, try to fetch from PDS and update cache
683  if (metadata) {
684    console.log('[Cache] Metadata missing settings, fetching from PDS', { did, rkey });
685    try {
686      const settings = await fetchSiteSettings(did, rkey);
687      // Update cache with settings (or null if none found)
688      // This caches the "no settings" state to avoid repeated PDS fetches
689      await updateCacheMetadataSettings(did, rkey, settings);
690      console.log('[Cache] Updated metadata with fetched settings', { did, rkey, hasSettings: !!settings });
691      return settings;
692    } catch (err) {
693      console.error('[Cache] Failed to fetch/update settings', { did, rkey, err });
694    }
695  }
696
697  return null;
698}
699
700export async function updateCacheMetadataSettings(did: string, rkey: string, settings: WispSettings | null): Promise<void> {
701  const metadataPath = `${CACHE_DIR}/${did}/${rkey}/.metadata.json`;
702
703  if (!existsSync(metadataPath)) {
704    console.warn('Metadata file does not exist, cannot update settings', { did, rkey });
705    return;
706  }
707
708  try {
709    // Read existing metadata
710    const content = await readFile(metadataPath, 'utf-8');
711    const metadata = JSON.parse(content) as CacheMetadata;
712
713    // Update settings field
714    // Store null explicitly to cache "no settings" state and avoid repeated fetches
715    metadata.settings = settings ?? null;
716
717    // Write back to disk
718    await writeFile(metadataPath, JSON.stringify(metadata, null, 2), 'utf-8');
719    console.log('Updated metadata settings', { did, rkey, hasSettings: !!settings });
720  } catch (err) {
721    console.error('Failed to update metadata settings', err);
722    throw err;
723  }
724}
725
726export async function isCacheValid(did: string, rkey: string, currentRecordCid?: string): Promise<boolean> {
727  const metadata = await getCacheMetadata(did, rkey);
728  if (!metadata) return false;
729
730  // Check if cache has expired (14 days TTL)
731  const cacheAge = Date.now() - metadata.cachedAt;
732  if (cacheAge > CACHE_TTL) {
733    console.log('[Cache] Cache expired for', did, rkey);
734    return false;
735  }
736
737  // If current CID is provided, verify it matches
738  if (currentRecordCid && metadata.recordCid !== currentRecordCid) {
739    console.log('[Cache] CID mismatch for', did, rkey, 'cached:', metadata.recordCid, 'current:', currentRecordCid);
740    return false;
741  }
742
743  return true;
744}