wip library to store cold objects in s3, warm objects on disk, and hot objects in memory
nodejs typescript
1/** 2 * Metadata associated with stored data in a tier. 3 * 4 * @remarks 5 * This metadata is stored alongside the actual data and is used for: 6 * - TTL management and expiration 7 * - Access tracking for LRU/eviction policies 8 * - Data integrity verification via checksum 9 * - Content type information for HTTP serving 10 */ 11export interface StorageMetadata { 12 /** Original key used to store the data (human-readable) */ 13 key: string; 14 15 /** Size of the data in bytes (uncompressed size) */ 16 size: number; 17 18 /** Timestamp when the data was first created */ 19 createdAt: Date; 20 21 /** Timestamp when the data was last accessed */ 22 lastAccessed: Date; 23 24 /** Number of times this data has been accessed */ 25 accessCount: number; 26 27 /** Optional expiration timestamp. Data expires when current time > ttl */ 28 ttl?: Date; 29 30 /** Whether the data is compressed (e.g., with gzip) */ 31 compressed: boolean; 32 33 /** SHA256 checksum of the data for integrity verification */ 34 checksum: string; 35 36 /** Optional MIME type (e.g., 'text/html', 'application/json') */ 37 mimeType?: string; 38 39 /** Optional encoding (e.g., 'gzip', 'base64') */ 40 encoding?: string; 41 42 /** User-defined metadata fields */ 43 customMetadata?: Record<string, string>; 44} 45 46/** 47 * Statistics for a single storage tier. 48 * 49 * @remarks 50 * Used for monitoring cache performance and capacity planning. 51 */ 52export interface TierStats { 53 /** Total bytes stored in this tier */ 54 bytes: number; 55 56 /** Total number of items stored in this tier */ 57 items: number; 58 59 /** Number of cache hits (only tracked if tier implements hit tracking) */ 60 hits?: number; 61 62 /** Number of cache misses (only tracked if tier implements miss tracking) */ 63 misses?: number; 64 65 /** Number of evictions due to size/count limits (only tracked if tier implements eviction) */ 66 evictions?: number; 67} 68 69/** 70 * Aggregated statistics across all configured tiers. 71 * 72 * @remarks 73 * Provides a complete view of cache performance across the entire storage hierarchy. 74 */ 75export interface AllTierStats { 76 /** Statistics for hot tier (if configured) */ 77 hot?: TierStats; 78 79 /** Statistics for warm tier (if configured) */ 80 warm?: TierStats; 81 82 /** Statistics for cold tier (always present) */ 83 cold: TierStats; 84 85 /** Total hits across all tiers */ 86 totalHits: number; 87 88 /** Total misses across all tiers */ 89 totalMisses: number; 90 91 /** Hit rate as a percentage (0-1) */ 92 hitRate: number; 93} 94 95/** 96 * Interface that all storage tier implementations must satisfy. 97 * 98 * @remarks 99 * This is the core abstraction that allows pluggable backends. 100 * Implementations can be memory-based (Map, Redis), disk-based (filesystem, SQLite), 101 * or cloud-based (S3, R2, etc.). 102 * 103 * @example 104 * ```typescript 105 * class RedisStorageTier implements StorageTier { 106 * constructor(private client: RedisClient) {} 107 * 108 * async get(key: string): Promise<Uint8Array | null> { 109 * const buffer = await this.client.getBuffer(key); 110 * return buffer ? new Uint8Array(buffer) : null; 111 * } 112 * 113 * // ... implement other methods 114 * } 115 * ``` 116 */ 117/** 118 * Result from a combined get+metadata operation on a tier. 119 */ 120export interface TierGetResult { 121 /** The retrieved data */ 122 data: Uint8Array; 123 /** Metadata associated with the data */ 124 metadata: StorageMetadata; 125} 126 127export interface StorageTier { 128 /** 129 * Retrieve data for a key. 130 * 131 * @param key - The key to retrieve 132 * @returns The data as a Uint8Array, or null if not found 133 */ 134 get(key: string): Promise<Uint8Array | null>; 135 136 /** 137 * Retrieve data and metadata together in a single operation. 138 * 139 * @param key - The key to retrieve 140 * @returns The data and metadata, or null if not found 141 * 142 * @remarks 143 * This is more efficient than calling get() and getMetadata() separately, 144 * especially for disk and network-based tiers. 145 */ 146 getWithMetadata?(key: string): Promise<TierGetResult | null>; 147 148 /** 149 * Store data with associated metadata. 150 * 151 * @param key - The key to store under 152 * @param data - The data to store (as Uint8Array) 153 * @param metadata - Metadata to store alongside the data 154 * 155 * @remarks 156 * If the key already exists, it should be overwritten. 157 */ 158 set(key: string, data: Uint8Array, metadata: StorageMetadata): Promise<void>; 159 160 /** 161 * Delete data for a key. 162 * 163 * @param key - The key to delete 164 * 165 * @remarks 166 * Should not throw if the key doesn't exist. 167 */ 168 delete(key: string): Promise<void>; 169 170 /** 171 * Check if a key exists in this tier. 172 * 173 * @param key - The key to check 174 * @returns true if the key exists, false otherwise 175 */ 176 exists(key: string): Promise<boolean>; 177 178 /** 179 * List all keys in this tier, optionally filtered by prefix. 180 * 181 * @param prefix - Optional prefix to filter keys (e.g., 'user:' matches 'user:123', 'user:456') 182 * @returns An async iterator of keys 183 * 184 * @remarks 185 * This should be memory-efficient and stream keys rather than loading all into memory. 186 * Useful for prefix-based invalidation and cache warming. 187 * 188 * @example 189 * ```typescript 190 * for await (const key of tier.listKeys('site:')) { 191 * console.log(key); // 'site:abc', 'site:xyz', etc. 192 * } 193 * ``` 194 */ 195 listKeys(prefix?: string): AsyncIterableIterator<string>; 196 197 /** 198 * Delete multiple keys in a single operation. 199 * 200 * @param keys - Array of keys to delete 201 * 202 * @remarks 203 * This is more efficient than calling delete() in a loop. 204 * Implementations should batch deletions where possible. 205 */ 206 deleteMany(keys: string[]): Promise<void>; 207 208 /** 209 * Retrieve metadata for a key without fetching the data. 210 * 211 * @param key - The key to get metadata for 212 * @returns The metadata, or null if not found 213 * 214 * @remarks 215 * This is useful for checking TTL, access counts, etc. without loading large data. 216 */ 217 getMetadata(key: string): Promise<StorageMetadata | null>; 218 219 /** 220 * Update metadata for a key without modifying the data. 221 * 222 * @param key - The key to update metadata for 223 * @param metadata - The new metadata 224 * 225 * @remarks 226 * Useful for updating TTL (via touch()) or access counts. 227 */ 228 setMetadata(key: string, metadata: StorageMetadata): Promise<void>; 229 230 /** 231 * Get statistics about this tier. 232 * 233 * @returns Statistics including size, item count, hits, misses, etc. 234 */ 235 getStats(): Promise<TierStats>; 236 237 /** 238 * Clear all data from this tier. 239 * 240 * @remarks 241 * Use with caution! This will delete all data in the tier. 242 */ 243 clear(): Promise<void>; 244} 245 246/** 247 * Rule for automatic tier placement based on key patterns. 248 * 249 * @remarks 250 * Rules are evaluated in order. First matching rule wins. 251 * Use this to define which keys go to which tiers without 252 * specifying skipTiers on every set() call. 253 * 254 * @example 255 * ```typescript 256 * placementRules: [ 257 * { pattern: 'index.html', tiers: ['hot', 'warm', 'cold'] }, 258 * { pattern: '*.html', tiers: ['warm', 'cold'] }, 259 * { pattern: 'assets/**', tiers: ['warm', 'cold'] }, 260 * { pattern: '**', tiers: ['warm', 'cold'] }, // default 261 * ] 262 * ``` 263 */ 264export interface PlacementRule { 265 /** 266 * Glob pattern to match against keys. 267 * 268 * @remarks 269 * Supports basic globs: 270 * - `*` matches any characters except `/` 271 * - `**` matches any characters including `/` 272 * - Exact matches work too: `index.html` 273 */ 274 pattern: string; 275 276 /** 277 * Which tiers to write to for matching keys. 278 * 279 * @remarks 280 * Cold is always included (source of truth). 281 * Use `['hot', 'warm', 'cold']` for critical files. 282 * Use `['warm', 'cold']` for large files. 283 * Use `['cold']` for archival only. 284 */ 285 tiers: ('hot' | 'warm' | 'cold')[]; 286} 287 288/** 289 * Configuration for the TieredStorage system. 290 * 291 * @typeParam T - The type of data being stored (for serialization) 292 * 293 * @remarks 294 * The tiered storage system uses a cascading containment model: 295 * - Hot tier (optional): Fastest, smallest capacity (memory/Redis) 296 * - Warm tier (optional): Medium speed, medium capacity (disk/database) 297 * - Cold tier (required): Slowest, unlimited capacity (S3/object storage) 298 * 299 * Data flows down on writes (hot → warm → cold) and bubbles up on reads (cold → warm → hot). 300 */ 301export interface TieredStorageConfig { 302 /** Storage tier configuration */ 303 tiers: { 304 /** Optional hot tier - fastest, smallest capacity (e.g., in-memory, Redis) */ 305 hot?: StorageTier; 306 307 /** Optional warm tier - medium speed, medium capacity (e.g., disk, SQLite, Postgres) */ 308 warm?: StorageTier; 309 310 /** Required cold tier - slowest, largest capacity (e.g., S3, R2, object storage) */ 311 cold: StorageTier; 312 }; 313 314 /** Rules for automatic tier placement based on key patterns. First match wins. */ 315 placementRules?: PlacementRule[]; 316 317 /** 318 * Whether to automatically compress data before storing. 319 * 320 * @defaultValue false 321 * 322 * @remarks 323 * Uses gzip compression. Compression is transparent - data is automatically 324 * decompressed on retrieval. The `compressed` flag in metadata indicates compression state. 325 */ 326 compression?: boolean; 327 328 /** 329 * Default TTL (time-to-live) in milliseconds. 330 * 331 * @remarks 332 * Data will expire after this duration. Can be overridden per-key via SetOptions. 333 * If not set, data never expires. 334 */ 335 defaultTTL?: number; 336 337 /** 338 * Strategy for promoting data to upper tiers on cache miss. 339 * 340 * @defaultValue 'lazy' 341 * 342 * @remarks 343 * - 'eager': Immediately promote data to all upper tiers on read 344 * - 'lazy': Don't automatically promote; rely on explicit promotion or next write 345 * 346 * Eager promotion increases hot tier hit rate but adds write overhead. 347 * Lazy promotion reduces writes but may serve from lower tiers more often. 348 */ 349 promotionStrategy?: 'eager' | 'lazy'; 350 351 /** 352 * Custom serialization/deserialization functions. 353 * 354 * @remarks 355 * By default, JSON serialization is used. Provide custom functions for: 356 * - Non-JSON types (e.g., Buffer, custom classes) 357 * - Performance optimization (e.g., msgpack, protobuf) 358 * - Encryption (serialize includes encryption, deserialize includes decryption) 359 */ 360 serialization?: { 361 /** Convert data to Uint8Array for storage */ 362 serialize: (data: unknown) => Promise<Uint8Array>; 363 364 /** Convert Uint8Array back to original data */ 365 deserialize: (data: Uint8Array) => Promise<unknown>; 366 }; 367} 368 369/** 370 * Options for setting data in the cache. 371 * 372 * @remarks 373 * These options allow fine-grained control over where and how data is stored. 374 */ 375export interface SetOptions { 376 /** 377 * Custom TTL in milliseconds for this specific key. 378 * 379 * @remarks 380 * Overrides the default TTL from TieredStorageConfig. 381 * Data will expire after this duration from the current time. 382 */ 383 ttl?: number; 384 385 /** 386 * Custom metadata to attach to this key. 387 * 388 * @remarks 389 * Merged with system-generated metadata (size, checksum, timestamps). 390 * Useful for storing application-specific information like content-type, encoding, etc. 391 */ 392 metadata?: Record<string, string>; 393 394 /** 395 * Skip writing to specific tiers. 396 * 397 * @remarks 398 * Useful for controlling which tiers receive data. For example: 399 * - Large files: `skipTiers: ['hot']` to avoid filling memory 400 * - Small critical files: Write to hot only for fastest access 401 * 402 * Note: Cold tier can never be skipped (it's the source of truth). 403 * 404 * @example 405 * ```typescript 406 * // Store large file only in warm and cold (skip memory) 407 * await storage.set('large-video.mp4', videoData, { skipTiers: ['hot'] }); 408 * 409 * // Store index.html in all tiers for fast access 410 * await storage.set('index.html', htmlData); // No skipping 411 * ``` 412 */ 413 skipTiers?: ('hot' | 'warm')[]; 414} 415 416/** 417 * Result from retrieving data with metadata. 418 * 419 * @typeParam T - The type of data being retrieved 420 * 421 * @remarks 422 * Includes both the data and information about where it was served from. 423 */ 424export interface StorageResult<T> { 425 /** The retrieved data */ 426 data: T; 427 428 /** Metadata associated with the data */ 429 metadata: StorageMetadata; 430 431 /** Which tier the data was served from */ 432 source: 'hot' | 'warm' | 'cold'; 433} 434 435/** 436 * Result from setting data in the cache. 437 * 438 * @remarks 439 * Indicates which tiers successfully received the data. 440 */ 441export interface SetResult { 442 /** The key that was set */ 443 key: string; 444 445 /** Metadata that was stored with the data */ 446 metadata: StorageMetadata; 447 448 /** Which tiers received the data */ 449 tiersWritten: ('hot' | 'warm' | 'cold')[]; 450} 451 452/** 453 * Snapshot of the entire storage state. 454 * 455 * @remarks 456 * Used for export/import, backup, and migration scenarios. 457 * The snapshot includes metadata but not the actual data (data remains in tiers). 458 */ 459export interface StorageSnapshot { 460 /** Snapshot format version (for compatibility) */ 461 version: number; 462 463 /** When this snapshot was created */ 464 exportedAt: Date; 465 466 /** All keys present in cold tier (source of truth) */ 467 keys: string[]; 468 469 /** Metadata for each key */ 470 metadata: Record<string, StorageMetadata>; 471 472 /** Statistics at time of export */ 473 stats: AllTierStats; 474}