wip library to store cold objects in s3, warm objects on disk, and hot objects in memory
nodejs typescript
at main 22 kB view raw
1import type { 2 TieredStorageConfig, 3 SetOptions, 4 StorageResult, 5 SetResult, 6 StorageMetadata, 7 StorageTier, 8 AllTierStats, 9 StorageSnapshot, 10 PlacementRule, 11} from './types/index'; 12import { compress, decompress } from './utils/compression.js'; 13import { defaultSerialize, defaultDeserialize } from './utils/serialization.js'; 14import { calculateChecksum } from './utils/checksum.js'; 15import { matchGlob } from './utils/glob.js'; 16 17/** 18 * Main orchestrator for tiered storage system. 19 * 20 * @typeParam T - The type of data being stored 21 * 22 * @remarks 23 * Implements a cascading containment model: 24 * - **Write Strategy (Cascading Down):** Write to hot → also writes to warm and cold 25 * - **Read Strategy (Bubbling Up):** Check hot first → if miss, check warm → if miss, check cold 26 * - **Bootstrap Strategy:** Hot can bootstrap from warm, warm can bootstrap from cold 27 * 28 * The cold tier is the source of truth and is required. 29 * Hot and warm tiers are optional performance optimizations. 30 * 31 * @example 32 * ```typescript 33 * const storage = new TieredStorage({ 34 * tiers: { 35 * hot: new MemoryStorageTier({ maxSizeBytes: 100 * 1024 * 1024 }), // 100MB 36 * warm: new DiskStorageTier({ directory: './cache' }), 37 * cold: new S3StorageTier({ bucket: 'my-bucket', region: 'us-east-1' }), 38 * }, 39 * compression: true, 40 * defaultTTL: 14 * 24 * 60 * 60 * 1000, // 14 days 41 * promotionStrategy: 'lazy', 42 * }); 43 * 44 * // Store data (cascades to all tiers) 45 * await storage.set('user:123', { name: 'Alice' }); 46 * 47 * // Retrieve data (bubbles up from cold → warm → hot) 48 * const user = await storage.get('user:123'); 49 * 50 * // Invalidate all keys with prefix 51 * await storage.invalidate('user:'); 52 * ``` 53 */ 54export class TieredStorage<T = unknown> { 55 private serialize: (data: unknown) => Promise<Uint8Array>; 56 private deserialize: (data: Uint8Array) => Promise<unknown>; 57 58 constructor(private config: TieredStorageConfig) { 59 if (!config.tiers.cold) { 60 throw new Error('Cold tier is required'); 61 } 62 63 this.serialize = config.serialization?.serialize ?? defaultSerialize; 64 this.deserialize = config.serialization?.deserialize ?? defaultDeserialize; 65 } 66 67 /** 68 * Retrieve data for a key. 69 * 70 * @param key - The key to retrieve 71 * @returns The data, or null if not found or expired 72 * 73 * @remarks 74 * Checks tiers in order: hot → warm → cold. 75 * On cache miss, promotes data to upper tiers based on promotionStrategy. 76 * Automatically handles decompression and deserialization. 77 * Returns null if key doesn't exist or has expired (TTL). 78 */ 79 async get(key: string): Promise<T | null> { 80 const result = await this.getWithMetadata(key); 81 return result ? result.data : null; 82 } 83 84 /** 85 * Retrieve data with metadata and source tier information. 86 * 87 * @param key - The key to retrieve 88 * @returns The data, metadata, and source tier, or null if not found 89 * 90 * @remarks 91 * Use this when you need to know: 92 * - Which tier served the data (for observability) 93 * - Metadata like access count, TTL, checksum 94 * - When the data was created/last accessed 95 */ 96 async getWithMetadata(key: string): Promise<StorageResult<T> | null> { 97 // 1. Check hot tier first 98 if (this.config.tiers.hot) { 99 const result = await this.getFromTier(this.config.tiers.hot, key); 100 if (result) { 101 if (this.isExpired(result.metadata)) { 102 await this.delete(key); 103 return null; 104 } 105 // Fire-and-forget access stats update (non-critical) 106 void this.updateAccessStats(key, 'hot'); 107 return { 108 data: (await this.deserializeData(result.data)) as T, 109 metadata: result.metadata, 110 source: 'hot', 111 }; 112 } 113 } 114 115 // 2. Check warm tier 116 if (this.config.tiers.warm) { 117 const result = await this.getFromTier(this.config.tiers.warm, key); 118 if (result) { 119 if (this.isExpired(result.metadata)) { 120 await this.delete(key); 121 return null; 122 } 123 // Eager promotion to hot tier (awaited - guaranteed to complete) 124 if (this.config.tiers.hot && this.config.promotionStrategy === 'eager') { 125 await this.config.tiers.hot.set(key, result.data, result.metadata); 126 } 127 // Fire-and-forget access stats update (non-critical) 128 void this.updateAccessStats(key, 'warm'); 129 return { 130 data: (await this.deserializeData(result.data)) as T, 131 metadata: result.metadata, 132 source: 'warm', 133 }; 134 } 135 } 136 137 // 3. Check cold tier (source of truth) 138 const result = await this.getFromTier(this.config.tiers.cold, key); 139 if (result) { 140 if (this.isExpired(result.metadata)) { 141 await this.delete(key); 142 return null; 143 } 144 145 // Promote to warm and hot (if configured) 146 // Eager promotion is awaited to guarantee completion 147 if (this.config.promotionStrategy === 'eager') { 148 const promotions: Promise<void>[] = []; 149 if (this.config.tiers.warm) { 150 promotions.push(this.config.tiers.warm.set(key, result.data, result.metadata)); 151 } 152 if (this.config.tiers.hot) { 153 promotions.push(this.config.tiers.hot.set(key, result.data, result.metadata)); 154 } 155 await Promise.all(promotions); 156 } 157 158 // Fire-and-forget access stats update (non-critical) 159 void this.updateAccessStats(key, 'cold'); 160 return { 161 data: (await this.deserializeData(result.data)) as T, 162 metadata: result.metadata, 163 source: 'cold', 164 }; 165 } 166 167 return null; 168 } 169 170 /** 171 * Get data and metadata from a tier using the most efficient method. 172 * 173 * @remarks 174 * Uses the tier's getWithMetadata if available, otherwise falls back 175 * to separate get() and getMetadata() calls. 176 */ 177 private async getFromTier( 178 tier: StorageTier, 179 key: string 180 ): Promise<{ data: Uint8Array; metadata: StorageMetadata } | null> { 181 // Use optimized combined method if available 182 if (tier.getWithMetadata) { 183 return tier.getWithMetadata(key); 184 } 185 186 // Fallback: separate calls 187 const data = await tier.get(key); 188 if (!data) { 189 return null; 190 } 191 const metadata = await tier.getMetadata(key); 192 if (!metadata) { 193 return null; 194 } 195 return { data, metadata }; 196 } 197 198 /** 199 * Store data with optional configuration. 200 * 201 * @param key - The key to store under 202 * @param data - The data to store 203 * @param options - Optional configuration (TTL, metadata, tier skipping) 204 * @returns Information about what was stored and where 205 * 206 * @remarks 207 * Data cascades down through tiers: 208 * - If written to hot, also written to warm and cold 209 * - If written to warm (hot skipped), also written to cold 210 * - Cold is always written (source of truth) 211 * 212 * Use `skipTiers` to control placement. For example: 213 * - Large files: `skipTiers: ['hot']` to avoid memory bloat 214 * - Critical small files: Write to all tiers for fastest access 215 * 216 * Automatically handles serialization and optional compression. 217 */ 218 async set(key: string, data: T, options?: SetOptions): Promise<SetResult> { 219 // 1. Serialize data 220 const serialized = await this.serialize(data); 221 222 // 2. Optionally compress 223 const finalData = this.config.compression ? await compress(serialized) : serialized; 224 225 // 3. Create metadata 226 const metadata = this.createMetadata(key, finalData, options); 227 228 // 4. Determine which tiers to write to 229 const allowedTiers = this.getTiersForKey(key, options?.skipTiers); 230 231 // 5. Write to tiers 232 const tiersWritten: ('hot' | 'warm' | 'cold')[] = []; 233 234 if (this.config.tiers.hot && allowedTiers.includes('hot')) { 235 await this.config.tiers.hot.set(key, finalData, metadata); 236 tiersWritten.push('hot'); 237 } 238 239 if (this.config.tiers.warm && allowedTiers.includes('warm')) { 240 await this.config.tiers.warm.set(key, finalData, metadata); 241 tiersWritten.push('warm'); 242 } 243 244 // Always write to cold (source of truth) 245 await this.config.tiers.cold.set(key, finalData, metadata); 246 tiersWritten.push('cold'); 247 248 return { key, metadata, tiersWritten }; 249 } 250 251 /** 252 * Determine which tiers a key should be written to. 253 * 254 * @param key - The key being stored 255 * @param skipTiers - Explicit tiers to skip (overrides placement rules) 256 * @returns Array of tiers to write to 257 * 258 * @remarks 259 * Priority: skipTiers option > placementRules > all configured tiers 260 */ 261 private getTiersForKey( 262 key: string, 263 skipTiers?: ('hot' | 'warm')[] 264 ): ('hot' | 'warm' | 'cold')[] { 265 // If explicit skipTiers provided, use that 266 if (skipTiers && skipTiers.length > 0) { 267 const allTiers: ('hot' | 'warm' | 'cold')[] = ['hot', 'warm', 'cold']; 268 return allTiers.filter((t) => !skipTiers.includes(t as 'hot' | 'warm')); 269 } 270 271 // Check placement rules 272 if (this.config.placementRules) { 273 for (const rule of this.config.placementRules) { 274 if (matchGlob(rule.pattern, key)) { 275 // Ensure cold is always included 276 if (!rule.tiers.includes('cold')) { 277 return [...rule.tiers, 'cold']; 278 } 279 return rule.tiers; 280 } 281 } 282 } 283 284 // Default: write to all configured tiers 285 return ['hot', 'warm', 'cold']; 286 } 287 288 /** 289 * Delete data from all tiers. 290 * 291 * @param key - The key to delete 292 * 293 * @remarks 294 * Deletes from all configured tiers in parallel. 295 * Does not throw if the key doesn't exist. 296 */ 297 async delete(key: string): Promise<void> { 298 await Promise.all([ 299 this.config.tiers.hot?.delete(key), 300 this.config.tiers.warm?.delete(key), 301 this.config.tiers.cold.delete(key), 302 ]); 303 } 304 305 /** 306 * Check if a key exists in any tier. 307 * 308 * @param key - The key to check 309 * @returns true if the key exists and hasn't expired 310 * 311 * @remarks 312 * Checks tiers in order: hot → warm → cold. 313 * Returns false if key exists but has expired. 314 */ 315 async exists(key: string): Promise<boolean> { 316 // Check hot first (fastest) 317 if (this.config.tiers.hot && (await this.config.tiers.hot.exists(key))) { 318 const metadata = await this.config.tiers.hot.getMetadata(key); 319 if (metadata && !this.isExpired(metadata)) { 320 return true; 321 } 322 } 323 324 // Check warm 325 if (this.config.tiers.warm && (await this.config.tiers.warm.exists(key))) { 326 const metadata = await this.config.tiers.warm.getMetadata(key); 327 if (metadata && !this.isExpired(metadata)) { 328 return true; 329 } 330 } 331 332 // Check cold (source of truth) 333 if (await this.config.tiers.cold.exists(key)) { 334 const metadata = await this.config.tiers.cold.getMetadata(key); 335 if (metadata && !this.isExpired(metadata)) { 336 return true; 337 } 338 } 339 340 return false; 341 } 342 343 /** 344 * Renew TTL for a key. 345 * 346 * @param key - The key to touch 347 * @param ttlMs - Optional new TTL in milliseconds (uses default if not provided) 348 * 349 * @remarks 350 * Updates the TTL and lastAccessed timestamp in all tiers. 351 * Useful for implementing "keep alive" behavior for actively used keys. 352 * Does nothing if no TTL is configured. 353 */ 354 async touch(key: string, ttlMs?: number): Promise<void> { 355 const ttl = ttlMs ?? this.config.defaultTTL; 356 if (!ttl) return; 357 358 const newTTL = new Date(Date.now() + ttl); 359 360 for (const tier of [this.config.tiers.hot, this.config.tiers.warm, this.config.tiers.cold]) { 361 if (!tier) continue; 362 363 const metadata = await tier.getMetadata(key); 364 if (metadata) { 365 metadata.ttl = newTTL; 366 metadata.lastAccessed = new Date(); 367 await tier.setMetadata(key, metadata); 368 } 369 } 370 } 371 372 /** 373 * Invalidate all keys matching a prefix. 374 * 375 * @param prefix - The prefix to match (e.g., 'user:' matches 'user:123', 'user:456') 376 * @returns Number of keys deleted 377 * 378 * @remarks 379 * Useful for bulk invalidation: 380 * - Site invalidation: `invalidate('site:abc:')` 381 * - User invalidation: `invalidate('user:123:')` 382 * - Global invalidation: `invalidate('')` (deletes everything) 383 * 384 * Deletes from all tiers in parallel for efficiency. 385 */ 386 async invalidate(prefix: string): Promise<number> { 387 const keysToDelete = new Set<string>(); 388 389 // Collect all keys matching prefix from all tiers 390 if (this.config.tiers.hot) { 391 for await (const key of this.config.tiers.hot.listKeys(prefix)) { 392 keysToDelete.add(key); 393 } 394 } 395 396 if (this.config.tiers.warm) { 397 for await (const key of this.config.tiers.warm.listKeys(prefix)) { 398 keysToDelete.add(key); 399 } 400 } 401 402 for await (const key of this.config.tiers.cold.listKeys(prefix)) { 403 keysToDelete.add(key); 404 } 405 406 // Delete from all tiers in parallel 407 const keys = Array.from(keysToDelete); 408 409 await Promise.all([ 410 this.config.tiers.hot?.deleteMany(keys), 411 this.config.tiers.warm?.deleteMany(keys), 412 this.config.tiers.cold.deleteMany(keys), 413 ]); 414 415 return keys.length; 416 } 417 418 /** 419 * List all keys, optionally filtered by prefix. 420 * 421 * @param prefix - Optional prefix to filter keys 422 * @returns Async iterator of keys 423 * 424 * @remarks 425 * Returns keys from the cold tier (source of truth). 426 * Memory-efficient - streams keys rather than loading all into memory. 427 * 428 * @example 429 * ```typescript 430 * for await (const key of storage.listKeys('user:')) { 431 * console.log(key); 432 * } 433 * ``` 434 */ 435 async *listKeys(prefix?: string): AsyncIterableIterator<string> { 436 // List from cold tier (source of truth) 437 for await (const key of this.config.tiers.cold.listKeys(prefix)) { 438 yield key; 439 } 440 } 441 442 /** 443 * Get aggregated statistics across all tiers. 444 * 445 * @returns Statistics including size, item count, hits, misses, hit rate 446 * 447 * @remarks 448 * Useful for monitoring and capacity planning. 449 * Hit rate is calculated as: hits / (hits + misses). 450 */ 451 async getStats(): Promise<AllTierStats> { 452 const [hot, warm, cold] = await Promise.all([ 453 this.config.tiers.hot?.getStats(), 454 this.config.tiers.warm?.getStats(), 455 this.config.tiers.cold.getStats(), 456 ]); 457 458 const totalHits = (hot?.hits ?? 0) + (warm?.hits ?? 0) + (cold?.hits ?? 0); 459 const totalMisses = (hot?.misses ?? 0) + (warm?.misses ?? 0) + (cold?.misses ?? 0); 460 const hitRate = totalHits + totalMisses > 0 ? totalHits / (totalHits + totalMisses) : 0; 461 462 return { 463 ...(hot && { hot }), 464 ...(warm && { warm }), 465 cold, 466 totalHits, 467 totalMisses, 468 hitRate, 469 }; 470 } 471 472 /** 473 * Clear all data from all tiers. 474 * 475 * @remarks 476 * Use with extreme caution! This will delete all data in the entire storage system. 477 * Cannot be undone. 478 */ 479 async clear(): Promise<void> { 480 await Promise.all([ 481 this.config.tiers.hot?.clear(), 482 this.config.tiers.warm?.clear(), 483 this.config.tiers.cold.clear(), 484 ]); 485 } 486 487 /** 488 * Clear a specific tier. 489 * 490 * @param tier - Which tier to clear 491 * 492 * @remarks 493 * Useful for: 494 * - Clearing hot tier to test warm/cold performance 495 * - Clearing warm tier to force rebuilding from cold 496 * - Clearing cold tier to start fresh (⚠️ loses source of truth!) 497 */ 498 async clearTier(tier: 'hot' | 'warm' | 'cold'): Promise<void> { 499 switch (tier) { 500 case 'hot': 501 await this.config.tiers.hot?.clear(); 502 break; 503 case 'warm': 504 await this.config.tiers.warm?.clear(); 505 break; 506 case 'cold': 507 await this.config.tiers.cold.clear(); 508 break; 509 } 510 } 511 512 /** 513 * Export metadata snapshot for backup or migration. 514 * 515 * @returns Snapshot containing all keys, metadata, and statistics 516 * 517 * @remarks 518 * The snapshot includes metadata but not the actual data (data remains in tiers). 519 * Useful for: 520 * - Backup and restore 521 * - Migration between storage systems 522 * - Auditing and compliance 523 */ 524 async export(): Promise<StorageSnapshot> { 525 const keys: string[] = []; 526 const metadata: Record<string, StorageMetadata> = {}; 527 528 // Export from cold tier (source of truth) 529 for await (const key of this.config.tiers.cold.listKeys()) { 530 keys.push(key); 531 const meta = await this.config.tiers.cold.getMetadata(key); 532 if (meta) { 533 metadata[key] = meta; 534 } 535 } 536 537 const stats = await this.getStats(); 538 539 return { 540 version: 1, 541 exportedAt: new Date(), 542 keys, 543 metadata, 544 stats, 545 }; 546 } 547 548 /** 549 * Import metadata snapshot. 550 * 551 * @param snapshot - Snapshot to import 552 * 553 * @remarks 554 * Validates version compatibility before importing. 555 * Only imports metadata - assumes data already exists in cold tier. 556 */ 557 async import(snapshot: StorageSnapshot): Promise<void> { 558 if (snapshot.version !== 1) { 559 throw new Error(`Unsupported snapshot version: ${snapshot.version}`); 560 } 561 562 // Import metadata into all configured tiers 563 for (const key of snapshot.keys) { 564 const metadata = snapshot.metadata[key]; 565 if (!metadata) continue; 566 567 if (this.config.tiers.hot) { 568 await this.config.tiers.hot.setMetadata(key, metadata); 569 } 570 571 if (this.config.tiers.warm) { 572 await this.config.tiers.warm.setMetadata(key, metadata); 573 } 574 575 await this.config.tiers.cold.setMetadata(key, metadata); 576 } 577 } 578 579 /** 580 * Bootstrap hot tier from warm tier. 581 * 582 * @param limit - Optional limit on number of items to load 583 * @returns Number of items loaded 584 * 585 * @remarks 586 * Loads the most frequently accessed items from warm into hot. 587 * Useful for warming up the cache after a restart. 588 * Items are sorted by: accessCount * lastAccessed timestamp (higher is better). 589 */ 590 async bootstrapHot(limit?: number): Promise<number> { 591 if (!this.config.tiers.hot || !this.config.tiers.warm) { 592 return 0; 593 } 594 595 let loaded = 0; 596 const keyMetadata: Array<[string, StorageMetadata]> = []; 597 598 // Load metadata for all keys 599 for await (const key of this.config.tiers.warm.listKeys()) { 600 const metadata = await this.config.tiers.warm.getMetadata(key); 601 if (metadata) { 602 keyMetadata.push([key, metadata]); 603 } 604 } 605 606 // Sort by access count * recency (simple scoring) 607 keyMetadata.sort((a, b) => { 608 const scoreA = a[1].accessCount * a[1].lastAccessed.getTime(); 609 const scoreB = b[1].accessCount * b[1].lastAccessed.getTime(); 610 return scoreB - scoreA; 611 }); 612 613 // Load top N keys into hot tier 614 const keysToLoad = limit ? keyMetadata.slice(0, limit) : keyMetadata; 615 616 for (const [key, metadata] of keysToLoad) { 617 const data = await this.config.tiers.warm.get(key); 618 if (data) { 619 await this.config.tiers.hot.set(key, data, metadata); 620 loaded++; 621 } 622 } 623 624 return loaded; 625 } 626 627 /** 628 * Bootstrap warm tier from cold tier. 629 * 630 * @param options - Optional limit and date filter 631 * @returns Number of items loaded 632 * 633 * @remarks 634 * Loads recent items from cold into warm. 635 * Useful for: 636 * - Initial cache population 637 * - Recovering from warm tier failure 638 * - Migrating to a new warm tier implementation 639 */ 640 async bootstrapWarm(options?: { limit?: number; sinceDate?: Date }): Promise<number> { 641 if (!this.config.tiers.warm) { 642 return 0; 643 } 644 645 let loaded = 0; 646 647 for await (const key of this.config.tiers.cold.listKeys()) { 648 const metadata = await this.config.tiers.cold.getMetadata(key); 649 if (!metadata) continue; 650 651 // Skip if too old 652 if (options?.sinceDate && metadata.lastAccessed < options.sinceDate) { 653 continue; 654 } 655 656 const data = await this.config.tiers.cold.get(key); 657 if (data) { 658 await this.config.tiers.warm.set(key, data, metadata); 659 loaded++; 660 661 if (options?.limit && loaded >= options.limit) { 662 break; 663 } 664 } 665 } 666 667 return loaded; 668 } 669 670 /** 671 * Check if data has expired based on TTL. 672 */ 673 private isExpired(metadata: StorageMetadata): boolean { 674 if (!metadata.ttl) return false; 675 return Date.now() > metadata.ttl.getTime(); 676 } 677 678 /** 679 * Update access statistics for a key. 680 */ 681 private async updateAccessStats(key: string, tier: 'hot' | 'warm' | 'cold'): Promise<void> { 682 const tierObj = 683 tier === 'hot' 684 ? this.config.tiers.hot 685 : tier === 'warm' 686 ? this.config.tiers.warm 687 : this.config.tiers.cold; 688 689 if (!tierObj) return; 690 691 const metadata = await tierObj.getMetadata(key); 692 if (metadata) { 693 metadata.lastAccessed = new Date(); 694 metadata.accessCount++; 695 await tierObj.setMetadata(key, metadata); 696 } 697 } 698 699 /** 700 * Create metadata for new data. 701 */ 702 private createMetadata(key: string, data: Uint8Array, options?: SetOptions): StorageMetadata { 703 const now = new Date(); 704 const ttl = options?.ttl ?? this.config.defaultTTL; 705 706 const metadata: StorageMetadata = { 707 key, 708 size: data.byteLength, 709 createdAt: now, 710 lastAccessed: now, 711 accessCount: 0, 712 compressed: this.config.compression ?? false, 713 checksum: calculateChecksum(data), 714 }; 715 716 if (ttl) { 717 metadata.ttl = new Date(now.getTime() + ttl); 718 } 719 720 if (options?.metadata) { 721 metadata.customMetadata = options.metadata; 722 } 723 724 return metadata; 725 } 726 727 /** 728 * Deserialize data, handling compression automatically. 729 */ 730 private async deserializeData(data: Uint8Array): Promise<unknown> { 731 // Decompress if needed (check for gzip magic bytes) 732 const finalData = 733 this.config.compression && data[0] === 0x1f && data[1] === 0x8b 734 ? await decompress(data) 735 : data; 736 737 return this.deserialize(finalData); 738 } 739}