wip library to store cold objects in s3, warm objects on disk, and hot objects in memory
nodejs
typescript
1/**
2 * Metadata associated with stored data in a tier.
3 *
4 * @remarks
5 * This metadata is stored alongside the actual data and is used for:
6 * - TTL management and expiration
7 * - Access tracking for LRU/eviction policies
8 * - Data integrity verification via checksum
9 * - Content type information for HTTP serving
10 */
11export interface StorageMetadata {
12 /** Original key used to store the data (human-readable) */
13 key: string;
14
15 /** Size of the data in bytes (uncompressed size) */
16 size: number;
17
18 /** Timestamp when the data was first created */
19 createdAt: Date;
20
21 /** Timestamp when the data was last accessed */
22 lastAccessed: Date;
23
24 /** Number of times this data has been accessed */
25 accessCount: number;
26
27 /** Optional expiration timestamp. Data expires when current time > ttl */
28 ttl?: Date;
29
30 /** Whether the data is compressed (e.g., with gzip) */
31 compressed: boolean;
32
33 /** SHA256 checksum of the data for integrity verification */
34 checksum: string;
35
36 /** Optional MIME type (e.g., 'text/html', 'application/json') */
37 mimeType?: string;
38
39 /** Optional encoding (e.g., 'gzip', 'base64') */
40 encoding?: string;
41
42 /** User-defined metadata fields */
43 customMetadata?: Record<string, string>;
44}
45
46/**
47 * Statistics for a single storage tier.
48 *
49 * @remarks
50 * Used for monitoring cache performance and capacity planning.
51 */
52export interface TierStats {
53 /** Total bytes stored in this tier */
54 bytes: number;
55
56 /** Total number of items stored in this tier */
57 items: number;
58
59 /** Number of cache hits (only tracked if tier implements hit tracking) */
60 hits?: number;
61
62 /** Number of cache misses (only tracked if tier implements miss tracking) */
63 misses?: number;
64
65 /** Number of evictions due to size/count limits (only tracked if tier implements eviction) */
66 evictions?: number;
67}
68
69/**
70 * Aggregated statistics across all configured tiers.
71 *
72 * @remarks
73 * Provides a complete view of cache performance across the entire storage hierarchy.
74 */
75export interface AllTierStats {
76 /** Statistics for hot tier (if configured) */
77 hot?: TierStats;
78
79 /** Statistics for warm tier (if configured) */
80 warm?: TierStats;
81
82 /** Statistics for cold tier (always present) */
83 cold: TierStats;
84
85 /** Total hits across all tiers */
86 totalHits: number;
87
88 /** Total misses across all tiers */
89 totalMisses: number;
90
91 /** Hit rate as a percentage (0-1) */
92 hitRate: number;
93}
94
95/**
96 * Interface that all storage tier implementations must satisfy.
97 *
98 * @remarks
99 * This is the core abstraction that allows pluggable backends.
100 * Implementations can be memory-based (Map, Redis), disk-based (filesystem, SQLite),
101 * or cloud-based (S3, R2, etc.).
102 *
103 * @example
104 * ```typescript
105 * class RedisStorageTier implements StorageTier {
106 * constructor(private client: RedisClient) {}
107 *
108 * async get(key: string): Promise<Uint8Array | null> {
109 * const buffer = await this.client.getBuffer(key);
110 * return buffer ? new Uint8Array(buffer) : null;
111 * }
112 *
113 * // ... implement other methods
114 * }
115 * ```
116 */
117/**
118 * Result from a combined get+metadata operation on a tier.
119 */
120export interface TierGetResult {
121 /** The retrieved data */
122 data: Uint8Array;
123 /** Metadata associated with the data */
124 metadata: StorageMetadata;
125}
126
127export interface StorageTier {
128 /**
129 * Retrieve data for a key.
130 *
131 * @param key - The key to retrieve
132 * @returns The data as a Uint8Array, or null if not found
133 */
134 get(key: string): Promise<Uint8Array | null>;
135
136 /**
137 * Retrieve data and metadata together in a single operation.
138 *
139 * @param key - The key to retrieve
140 * @returns The data and metadata, or null if not found
141 *
142 * @remarks
143 * This is more efficient than calling get() and getMetadata() separately,
144 * especially for disk and network-based tiers.
145 */
146 getWithMetadata?(key: string): Promise<TierGetResult | null>;
147
148 /**
149 * Store data with associated metadata.
150 *
151 * @param key - The key to store under
152 * @param data - The data to store (as Uint8Array)
153 * @param metadata - Metadata to store alongside the data
154 *
155 * @remarks
156 * If the key already exists, it should be overwritten.
157 */
158 set(key: string, data: Uint8Array, metadata: StorageMetadata): Promise<void>;
159
160 /**
161 * Delete data for a key.
162 *
163 * @param key - The key to delete
164 *
165 * @remarks
166 * Should not throw if the key doesn't exist.
167 */
168 delete(key: string): Promise<void>;
169
170 /**
171 * Check if a key exists in this tier.
172 *
173 * @param key - The key to check
174 * @returns true if the key exists, false otherwise
175 */
176 exists(key: string): Promise<boolean>;
177
178 /**
179 * List all keys in this tier, optionally filtered by prefix.
180 *
181 * @param prefix - Optional prefix to filter keys (e.g., 'user:' matches 'user:123', 'user:456')
182 * @returns An async iterator of keys
183 *
184 * @remarks
185 * This should be memory-efficient and stream keys rather than loading all into memory.
186 * Useful for prefix-based invalidation and cache warming.
187 *
188 * @example
189 * ```typescript
190 * for await (const key of tier.listKeys('site:')) {
191 * console.log(key); // 'site:abc', 'site:xyz', etc.
192 * }
193 * ```
194 */
195 listKeys(prefix?: string): AsyncIterableIterator<string>;
196
197 /**
198 * Delete multiple keys in a single operation.
199 *
200 * @param keys - Array of keys to delete
201 *
202 * @remarks
203 * This is more efficient than calling delete() in a loop.
204 * Implementations should batch deletions where possible.
205 */
206 deleteMany(keys: string[]): Promise<void>;
207
208 /**
209 * Retrieve metadata for a key without fetching the data.
210 *
211 * @param key - The key to get metadata for
212 * @returns The metadata, or null if not found
213 *
214 * @remarks
215 * This is useful for checking TTL, access counts, etc. without loading large data.
216 */
217 getMetadata(key: string): Promise<StorageMetadata | null>;
218
219 /**
220 * Update metadata for a key without modifying the data.
221 *
222 * @param key - The key to update metadata for
223 * @param metadata - The new metadata
224 *
225 * @remarks
226 * Useful for updating TTL (via touch()) or access counts.
227 */
228 setMetadata(key: string, metadata: StorageMetadata): Promise<void>;
229
230 /**
231 * Get statistics about this tier.
232 *
233 * @returns Statistics including size, item count, hits, misses, etc.
234 */
235 getStats(): Promise<TierStats>;
236
237 /**
238 * Clear all data from this tier.
239 *
240 * @remarks
241 * Use with caution! This will delete all data in the tier.
242 */
243 clear(): Promise<void>;
244}
245
246/**
247 * Rule for automatic tier placement based on key patterns.
248 *
249 * @remarks
250 * Rules are evaluated in order. First matching rule wins.
251 * Use this to define which keys go to which tiers without
252 * specifying skipTiers on every set() call.
253 *
254 * @example
255 * ```typescript
256 * placementRules: [
257 * { pattern: 'index.html', tiers: ['hot', 'warm', 'cold'] },
258 * { pattern: '*.html', tiers: ['warm', 'cold'] },
259 * { pattern: 'assets/**', tiers: ['warm', 'cold'] },
260 * { pattern: '**', tiers: ['warm', 'cold'] }, // default
261 * ]
262 * ```
263 */
264export interface PlacementRule {
265 /**
266 * Glob pattern to match against keys.
267 *
268 * @remarks
269 * Supports basic globs:
270 * - `*` matches any characters except `/`
271 * - `**` matches any characters including `/`
272 * - Exact matches work too: `index.html`
273 */
274 pattern: string;
275
276 /**
277 * Which tiers to write to for matching keys.
278 *
279 * @remarks
280 * Cold is always included (source of truth).
281 * Use `['hot', 'warm', 'cold']` for critical files.
282 * Use `['warm', 'cold']` for large files.
283 * Use `['cold']` for archival only.
284 */
285 tiers: ('hot' | 'warm' | 'cold')[];
286}
287
288/**
289 * Configuration for the TieredStorage system.
290 *
291 * @typeParam T - The type of data being stored (for serialization)
292 *
293 * @remarks
294 * The tiered storage system uses a cascading containment model:
295 * - Hot tier (optional): Fastest, smallest capacity (memory/Redis)
296 * - Warm tier (optional): Medium speed, medium capacity (disk/database)
297 * - Cold tier (required): Slowest, unlimited capacity (S3/object storage)
298 *
299 * Data flows down on writes (hot → warm → cold) and bubbles up on reads (cold → warm → hot).
300 */
301export interface TieredStorageConfig {
302 /** Storage tier configuration */
303 tiers: {
304 /** Optional hot tier - fastest, smallest capacity (e.g., in-memory, Redis) */
305 hot?: StorageTier;
306
307 /** Optional warm tier - medium speed, medium capacity (e.g., disk, SQLite, Postgres) */
308 warm?: StorageTier;
309
310 /** Required cold tier - slowest, largest capacity (e.g., S3, R2, object storage) */
311 cold: StorageTier;
312 };
313
314 /** Rules for automatic tier placement based on key patterns. First match wins. */
315 placementRules?: PlacementRule[];
316
317 /**
318 * Whether to automatically compress data before storing.
319 *
320 * @defaultValue false
321 *
322 * @remarks
323 * Uses gzip compression. Compression is transparent - data is automatically
324 * decompressed on retrieval. The `compressed` flag in metadata indicates compression state.
325 */
326 compression?: boolean;
327
328 /**
329 * Default TTL (time-to-live) in milliseconds.
330 *
331 * @remarks
332 * Data will expire after this duration. Can be overridden per-key via SetOptions.
333 * If not set, data never expires.
334 */
335 defaultTTL?: number;
336
337 /**
338 * Strategy for promoting data to upper tiers on cache miss.
339 *
340 * @defaultValue 'lazy'
341 *
342 * @remarks
343 * - 'eager': Immediately promote data to all upper tiers on read
344 * - 'lazy': Don't automatically promote; rely on explicit promotion or next write
345 *
346 * Eager promotion increases hot tier hit rate but adds write overhead.
347 * Lazy promotion reduces writes but may serve from lower tiers more often.
348 */
349 promotionStrategy?: 'eager' | 'lazy';
350
351 /**
352 * Custom serialization/deserialization functions.
353 *
354 * @remarks
355 * By default, JSON serialization is used. Provide custom functions for:
356 * - Non-JSON types (e.g., Buffer, custom classes)
357 * - Performance optimization (e.g., msgpack, protobuf)
358 * - Encryption (serialize includes encryption, deserialize includes decryption)
359 */
360 serialization?: {
361 /** Convert data to Uint8Array for storage */
362 serialize: (data: unknown) => Promise<Uint8Array>;
363
364 /** Convert Uint8Array back to original data */
365 deserialize: (data: Uint8Array) => Promise<unknown>;
366 };
367}
368
369/**
370 * Options for setting data in the cache.
371 *
372 * @remarks
373 * These options allow fine-grained control over where and how data is stored.
374 */
375export interface SetOptions {
376 /**
377 * Custom TTL in milliseconds for this specific key.
378 *
379 * @remarks
380 * Overrides the default TTL from TieredStorageConfig.
381 * Data will expire after this duration from the current time.
382 */
383 ttl?: number;
384
385 /**
386 * Custom metadata to attach to this key.
387 *
388 * @remarks
389 * Merged with system-generated metadata (size, checksum, timestamps).
390 * Useful for storing application-specific information like content-type, encoding, etc.
391 */
392 metadata?: Record<string, string>;
393
394 /**
395 * Skip writing to specific tiers.
396 *
397 * @remarks
398 * Useful for controlling which tiers receive data. For example:
399 * - Large files: `skipTiers: ['hot']` to avoid filling memory
400 * - Small critical files: Write to hot only for fastest access
401 *
402 * Note: Cold tier can never be skipped (it's the source of truth).
403 *
404 * @example
405 * ```typescript
406 * // Store large file only in warm and cold (skip memory)
407 * await storage.set('large-video.mp4', videoData, { skipTiers: ['hot'] });
408 *
409 * // Store index.html in all tiers for fast access
410 * await storage.set('index.html', htmlData); // No skipping
411 * ```
412 */
413 skipTiers?: ('hot' | 'warm')[];
414}
415
416/**
417 * Result from retrieving data with metadata.
418 *
419 * @typeParam T - The type of data being retrieved
420 *
421 * @remarks
422 * Includes both the data and information about where it was served from.
423 */
424export interface StorageResult<T> {
425 /** The retrieved data */
426 data: T;
427
428 /** Metadata associated with the data */
429 metadata: StorageMetadata;
430
431 /** Which tier the data was served from */
432 source: 'hot' | 'warm' | 'cold';
433}
434
435/**
436 * Result from setting data in the cache.
437 *
438 * @remarks
439 * Indicates which tiers successfully received the data.
440 */
441export interface SetResult {
442 /** The key that was set */
443 key: string;
444
445 /** Metadata that was stored with the data */
446 metadata: StorageMetadata;
447
448 /** Which tiers received the data */
449 tiersWritten: ('hot' | 'warm' | 'cold')[];
450}
451
452/**
453 * Snapshot of the entire storage state.
454 *
455 * @remarks
456 * Used for export/import, backup, and migration scenarios.
457 * The snapshot includes metadata but not the actual data (data remains in tiers).
458 */
459export interface StorageSnapshot {
460 /** Snapshot format version (for compatibility) */
461 version: number;
462
463 /** When this snapshot was created */
464 exportedAt: Date;
465
466 /** All keys present in cold tier (source of truth) */
467 keys: string[];
468
469 /** Metadata for each key */
470 metadata: Record<string, StorageMetadata>;
471
472 /** Statistics at time of export */
473 stats: AllTierStats;
474}