wip library to store cold objects in s3, warm objects on disk, and hot objects in memory
nodejs typescript
1/** 2 * Metadata associated with stored data in a tier. 3 * 4 * @remarks 5 * This metadata is stored alongside the actual data and is used for: 6 * - TTL management and expiration 7 * - Access tracking for LRU/eviction policies 8 * - Data integrity verification via checksum 9 * - Content type information for HTTP serving 10 */ 11export interface StorageMetadata { 12 /** Original key used to store the data (human-readable) */ 13 key: string; 14 15 /** Size of the data in bytes (uncompressed size) */ 16 size: number; 17 18 /** Timestamp when the data was first created */ 19 createdAt: Date; 20 21 /** Timestamp when the data was last accessed */ 22 lastAccessed: Date; 23 24 /** Number of times this data has been accessed */ 25 accessCount: number; 26 27 /** Optional expiration timestamp. Data expires when current time > ttl */ 28 ttl?: Date; 29 30 /** Whether the data is compressed (e.g., with gzip) */ 31 compressed: boolean; 32 33 /** SHA256 checksum of the data for integrity verification */ 34 checksum: string; 35 36 /** Optional MIME type (e.g., 'text/html', 'application/json') */ 37 mimeType?: string; 38 39 /** Optional encoding (e.g., 'gzip', 'base64') */ 40 encoding?: string; 41 42 /** User-defined metadata fields */ 43 customMetadata?: Record<string, string>; 44} 45 46/** 47 * Statistics for a single storage tier. 48 * 49 * @remarks 50 * Used for monitoring cache performance and capacity planning. 51 */ 52export interface TierStats { 53 /** Total bytes stored in this tier */ 54 bytes: number; 55 56 /** Total number of items stored in this tier */ 57 items: number; 58 59 /** Number of cache hits (only tracked if tier implements hit tracking) */ 60 hits?: number; 61 62 /** Number of cache misses (only tracked if tier implements miss tracking) */ 63 misses?: number; 64 65 /** Number of evictions due to size/count limits (only tracked if tier implements eviction) */ 66 evictions?: number; 67} 68 69/** 70 * Aggregated statistics across all configured tiers. 71 * 72 * @remarks 73 * Provides a complete view of cache performance across the entire storage hierarchy. 74 */ 75export interface AllTierStats { 76 /** Statistics for hot tier (if configured) */ 77 hot?: TierStats; 78 79 /** Statistics for warm tier (if configured) */ 80 warm?: TierStats; 81 82 /** Statistics for cold tier (always present) */ 83 cold: TierStats; 84 85 /** Total hits across all tiers */ 86 totalHits: number; 87 88 /** Total misses across all tiers */ 89 totalMisses: number; 90 91 /** Hit rate as a percentage (0-1) */ 92 hitRate: number; 93} 94 95/** 96 * Interface that all storage tier implementations must satisfy. 97 * 98 * @remarks 99 * This is the core abstraction that allows pluggable backends. 100 * Implementations can be memory-based (Map, Redis), disk-based (filesystem, SQLite), 101 * or cloud-based (S3, R2, etc.). 102 * 103 * @example 104 * ```typescript 105 * class RedisStorageTier implements StorageTier { 106 * constructor(private client: RedisClient) {} 107 * 108 * async get(key: string): Promise<Uint8Array | null> { 109 * const buffer = await this.client.getBuffer(key); 110 * return buffer ? new Uint8Array(buffer) : null; 111 * } 112 * 113 * // ... implement other methods 114 * } 115 * ``` 116 */ 117export interface StorageTier { 118 /** 119 * Retrieve data for a key. 120 * 121 * @param key - The key to retrieve 122 * @returns The data as a Uint8Array, or null if not found 123 */ 124 get(key: string): Promise<Uint8Array | null>; 125 126 /** 127 * Store data with associated metadata. 128 * 129 * @param key - The key to store under 130 * @param data - The data to store (as Uint8Array) 131 * @param metadata - Metadata to store alongside the data 132 * 133 * @remarks 134 * If the key already exists, it should be overwritten. 135 */ 136 set(key: string, data: Uint8Array, metadata: StorageMetadata): Promise<void>; 137 138 /** 139 * Delete data for a key. 140 * 141 * @param key - The key to delete 142 * 143 * @remarks 144 * Should not throw if the key doesn't exist. 145 */ 146 delete(key: string): Promise<void>; 147 148 /** 149 * Check if a key exists in this tier. 150 * 151 * @param key - The key to check 152 * @returns true if the key exists, false otherwise 153 */ 154 exists(key: string): Promise<boolean>; 155 156 /** 157 * List all keys in this tier, optionally filtered by prefix. 158 * 159 * @param prefix - Optional prefix to filter keys (e.g., 'user:' matches 'user:123', 'user:456') 160 * @returns An async iterator of keys 161 * 162 * @remarks 163 * This should be memory-efficient and stream keys rather than loading all into memory. 164 * Useful for prefix-based invalidation and cache warming. 165 * 166 * @example 167 * ```typescript 168 * for await (const key of tier.listKeys('site:')) { 169 * console.log(key); // 'site:abc', 'site:xyz', etc. 170 * } 171 * ``` 172 */ 173 listKeys(prefix?: string): AsyncIterableIterator<string>; 174 175 /** 176 * Delete multiple keys in a single operation. 177 * 178 * @param keys - Array of keys to delete 179 * 180 * @remarks 181 * This is more efficient than calling delete() in a loop. 182 * Implementations should batch deletions where possible. 183 */ 184 deleteMany(keys: string[]): Promise<void>; 185 186 /** 187 * Retrieve metadata for a key without fetching the data. 188 * 189 * @param key - The key to get metadata for 190 * @returns The metadata, or null if not found 191 * 192 * @remarks 193 * This is useful for checking TTL, access counts, etc. without loading large data. 194 */ 195 getMetadata(key: string): Promise<StorageMetadata | null>; 196 197 /** 198 * Update metadata for a key without modifying the data. 199 * 200 * @param key - The key to update metadata for 201 * @param metadata - The new metadata 202 * 203 * @remarks 204 * Useful for updating TTL (via touch()) or access counts. 205 */ 206 setMetadata(key: string, metadata: StorageMetadata): Promise<void>; 207 208 /** 209 * Get statistics about this tier. 210 * 211 * @returns Statistics including size, item count, hits, misses, etc. 212 */ 213 getStats(): Promise<TierStats>; 214 215 /** 216 * Clear all data from this tier. 217 * 218 * @remarks 219 * Use with caution! This will delete all data in the tier. 220 */ 221 clear(): Promise<void>; 222} 223 224/** 225 * Configuration for the TieredStorage system. 226 * 227 * @typeParam T - The type of data being stored (for serialization) 228 * 229 * @remarks 230 * The tiered storage system uses a cascading containment model: 231 * - Hot tier (optional): Fastest, smallest capacity (memory/Redis) 232 * - Warm tier (optional): Medium speed, medium capacity (disk/database) 233 * - Cold tier (required): Slowest, unlimited capacity (S3/object storage) 234 * 235 * Data flows down on writes (hot → warm → cold) and bubbles up on reads (cold → warm → hot). 236 */ 237export interface TieredStorageConfig { 238 /** Storage tier configuration */ 239 tiers: { 240 /** Optional hot tier - fastest, smallest capacity (e.g., in-memory, Redis) */ 241 hot?: StorageTier; 242 243 /** Optional warm tier - medium speed, medium capacity (e.g., disk, SQLite, Postgres) */ 244 warm?: StorageTier; 245 246 /** Required cold tier - slowest, largest capacity (e.g., S3, R2, object storage) */ 247 cold: StorageTier; 248 }; 249 250 /** 251 * Whether to automatically compress data before storing. 252 * 253 * @defaultValue false 254 * 255 * @remarks 256 * Uses gzip compression. Compression is transparent - data is automatically 257 * decompressed on retrieval. The `compressed` flag in metadata indicates compression state. 258 */ 259 compression?: boolean; 260 261 /** 262 * Default TTL (time-to-live) in milliseconds. 263 * 264 * @remarks 265 * Data will expire after this duration. Can be overridden per-key via SetOptions. 266 * If not set, data never expires. 267 */ 268 defaultTTL?: number; 269 270 /** 271 * Strategy for promoting data to upper tiers on cache miss. 272 * 273 * @defaultValue 'lazy' 274 * 275 * @remarks 276 * - 'eager': Immediately promote data to all upper tiers on read 277 * - 'lazy': Don't automatically promote; rely on explicit promotion or next write 278 * 279 * Eager promotion increases hot tier hit rate but adds write overhead. 280 * Lazy promotion reduces writes but may serve from lower tiers more often. 281 */ 282 promotionStrategy?: 'eager' | 'lazy'; 283 284 /** 285 * Custom serialization/deserialization functions. 286 * 287 * @remarks 288 * By default, JSON serialization is used. Provide custom functions for: 289 * - Non-JSON types (e.g., Buffer, custom classes) 290 * - Performance optimization (e.g., msgpack, protobuf) 291 * - Encryption (serialize includes encryption, deserialize includes decryption) 292 */ 293 serialization?: { 294 /** Convert data to Uint8Array for storage */ 295 serialize: (data: unknown) => Promise<Uint8Array>; 296 297 /** Convert Uint8Array back to original data */ 298 deserialize: (data: Uint8Array) => Promise<unknown>; 299 }; 300} 301 302/** 303 * Options for setting data in the cache. 304 * 305 * @remarks 306 * These options allow fine-grained control over where and how data is stored. 307 */ 308export interface SetOptions { 309 /** 310 * Custom TTL in milliseconds for this specific key. 311 * 312 * @remarks 313 * Overrides the default TTL from TieredStorageConfig. 314 * Data will expire after this duration from the current time. 315 */ 316 ttl?: number; 317 318 /** 319 * Custom metadata to attach to this key. 320 * 321 * @remarks 322 * Merged with system-generated metadata (size, checksum, timestamps). 323 * Useful for storing application-specific information like content-type, encoding, etc. 324 */ 325 metadata?: Record<string, string>; 326 327 /** 328 * Skip writing to specific tiers. 329 * 330 * @remarks 331 * Useful for controlling which tiers receive data. For example: 332 * - Large files: `skipTiers: ['hot']` to avoid filling memory 333 * - Small critical files: Write to hot only for fastest access 334 * 335 * Note: Cold tier can never be skipped (it's the source of truth). 336 * 337 * @example 338 * ```typescript 339 * // Store large file only in warm and cold (skip memory) 340 * await storage.set('large-video.mp4', videoData, { skipTiers: ['hot'] }); 341 * 342 * // Store index.html in all tiers for fast access 343 * await storage.set('index.html', htmlData); // No skipping 344 * ``` 345 */ 346 skipTiers?: ('hot' | 'warm')[]; 347} 348 349/** 350 * Result from retrieving data with metadata. 351 * 352 * @typeParam T - The type of data being retrieved 353 * 354 * @remarks 355 * Includes both the data and information about where it was served from. 356 */ 357export interface StorageResult<T> { 358 /** The retrieved data */ 359 data: T; 360 361 /** Metadata associated with the data */ 362 metadata: StorageMetadata; 363 364 /** Which tier the data was served from */ 365 source: 'hot' | 'warm' | 'cold'; 366} 367 368/** 369 * Result from setting data in the cache. 370 * 371 * @remarks 372 * Indicates which tiers successfully received the data. 373 */ 374export interface SetResult { 375 /** The key that was set */ 376 key: string; 377 378 /** Metadata that was stored with the data */ 379 metadata: StorageMetadata; 380 381 /** Which tiers received the data */ 382 tiersWritten: ('hot' | 'warm' | 'cold')[]; 383} 384 385/** 386 * Snapshot of the entire storage state. 387 * 388 * @remarks 389 * Used for export/import, backup, and migration scenarios. 390 * The snapshot includes metadata but not the actual data (data remains in tiers). 391 */ 392export interface StorageSnapshot { 393 /** Snapshot format version (for compatibility) */ 394 version: number; 395 396 /** When this snapshot was created */ 397 exportedAt: Date; 398 399 /** All keys present in cold tier (source of truth) */ 400 keys: string[]; 401 402 /** Metadata for each key */ 403 metadata: Record<string, StorageMetadata>; 404 405 /** Statistics at time of export */ 406 stats: AllTierStats; 407}