wip library to store cold objects in s3, warm objects on disk, and hot objects in memory
nodejs
typescript
1/**
2 * Metadata associated with stored data in a tier.
3 *
4 * @remarks
5 * This metadata is stored alongside the actual data and is used for:
6 * - TTL management and expiration
7 * - Access tracking for LRU/eviction policies
8 * - Data integrity verification via checksum
9 * - Content type information for HTTP serving
10 */
11export interface StorageMetadata {
12 /** Original key used to store the data (human-readable) */
13 key: string;
14
15 /** Size of the data in bytes (uncompressed size) */
16 size: number;
17
18 /** Timestamp when the data was first created */
19 createdAt: Date;
20
21 /** Timestamp when the data was last accessed */
22 lastAccessed: Date;
23
24 /** Number of times this data has been accessed */
25 accessCount: number;
26
27 /** Optional expiration timestamp. Data expires when current time > ttl */
28 ttl?: Date;
29
30 /** Whether the data is compressed (e.g., with gzip) */
31 compressed: boolean;
32
33 /** SHA256 checksum of the data for integrity verification */
34 checksum: string;
35
36 /** Optional MIME type (e.g., 'text/html', 'application/json') */
37 mimeType?: string;
38
39 /** Optional encoding (e.g., 'gzip', 'base64') */
40 encoding?: string;
41
42 /** User-defined metadata fields */
43 customMetadata?: Record<string, string>;
44}
45
46/**
47 * Statistics for a single storage tier.
48 *
49 * @remarks
50 * Used for monitoring cache performance and capacity planning.
51 */
52export interface TierStats {
53 /** Total bytes stored in this tier */
54 bytes: number;
55
56 /** Total number of items stored in this tier */
57 items: number;
58
59 /** Number of cache hits (only tracked if tier implements hit tracking) */
60 hits?: number;
61
62 /** Number of cache misses (only tracked if tier implements miss tracking) */
63 misses?: number;
64
65 /** Number of evictions due to size/count limits (only tracked if tier implements eviction) */
66 evictions?: number;
67}
68
69/**
70 * Aggregated statistics across all configured tiers.
71 *
72 * @remarks
73 * Provides a complete view of cache performance across the entire storage hierarchy.
74 */
75export interface AllTierStats {
76 /** Statistics for hot tier (if configured) */
77 hot?: TierStats;
78
79 /** Statistics for warm tier (if configured) */
80 warm?: TierStats;
81
82 /** Statistics for cold tier (always present) */
83 cold: TierStats;
84
85 /** Total hits across all tiers */
86 totalHits: number;
87
88 /** Total misses across all tiers */
89 totalMisses: number;
90
91 /** Hit rate as a percentage (0-1) */
92 hitRate: number;
93}
94
95/**
96 * Interface that all storage tier implementations must satisfy.
97 *
98 * @remarks
99 * This is the core abstraction that allows pluggable backends.
100 * Implementations can be memory-based (Map, Redis), disk-based (filesystem, SQLite),
101 * or cloud-based (S3, R2, etc.).
102 *
103 * @example
104 * ```typescript
105 * class RedisStorageTier implements StorageTier {
106 * constructor(private client: RedisClient) {}
107 *
108 * async get(key: string): Promise<Uint8Array | null> {
109 * const buffer = await this.client.getBuffer(key);
110 * return buffer ? new Uint8Array(buffer) : null;
111 * }
112 *
113 * // ... implement other methods
114 * }
115 * ```
116 */
117export interface StorageTier {
118 /**
119 * Retrieve data for a key.
120 *
121 * @param key - The key to retrieve
122 * @returns The data as a Uint8Array, or null if not found
123 */
124 get(key: string): Promise<Uint8Array | null>;
125
126 /**
127 * Store data with associated metadata.
128 *
129 * @param key - The key to store under
130 * @param data - The data to store (as Uint8Array)
131 * @param metadata - Metadata to store alongside the data
132 *
133 * @remarks
134 * If the key already exists, it should be overwritten.
135 */
136 set(key: string, data: Uint8Array, metadata: StorageMetadata): Promise<void>;
137
138 /**
139 * Delete data for a key.
140 *
141 * @param key - The key to delete
142 *
143 * @remarks
144 * Should not throw if the key doesn't exist.
145 */
146 delete(key: string): Promise<void>;
147
148 /**
149 * Check if a key exists in this tier.
150 *
151 * @param key - The key to check
152 * @returns true if the key exists, false otherwise
153 */
154 exists(key: string): Promise<boolean>;
155
156 /**
157 * List all keys in this tier, optionally filtered by prefix.
158 *
159 * @param prefix - Optional prefix to filter keys (e.g., 'user:' matches 'user:123', 'user:456')
160 * @returns An async iterator of keys
161 *
162 * @remarks
163 * This should be memory-efficient and stream keys rather than loading all into memory.
164 * Useful for prefix-based invalidation and cache warming.
165 *
166 * @example
167 * ```typescript
168 * for await (const key of tier.listKeys('site:')) {
169 * console.log(key); // 'site:abc', 'site:xyz', etc.
170 * }
171 * ```
172 */
173 listKeys(prefix?: string): AsyncIterableIterator<string>;
174
175 /**
176 * Delete multiple keys in a single operation.
177 *
178 * @param keys - Array of keys to delete
179 *
180 * @remarks
181 * This is more efficient than calling delete() in a loop.
182 * Implementations should batch deletions where possible.
183 */
184 deleteMany(keys: string[]): Promise<void>;
185
186 /**
187 * Retrieve metadata for a key without fetching the data.
188 *
189 * @param key - The key to get metadata for
190 * @returns The metadata, or null if not found
191 *
192 * @remarks
193 * This is useful for checking TTL, access counts, etc. without loading large data.
194 */
195 getMetadata(key: string): Promise<StorageMetadata | null>;
196
197 /**
198 * Update metadata for a key without modifying the data.
199 *
200 * @param key - The key to update metadata for
201 * @param metadata - The new metadata
202 *
203 * @remarks
204 * Useful for updating TTL (via touch()) or access counts.
205 */
206 setMetadata(key: string, metadata: StorageMetadata): Promise<void>;
207
208 /**
209 * Get statistics about this tier.
210 *
211 * @returns Statistics including size, item count, hits, misses, etc.
212 */
213 getStats(): Promise<TierStats>;
214
215 /**
216 * Clear all data from this tier.
217 *
218 * @remarks
219 * Use with caution! This will delete all data in the tier.
220 */
221 clear(): Promise<void>;
222}
223
224/**
225 * Configuration for the TieredStorage system.
226 *
227 * @typeParam T - The type of data being stored (for serialization)
228 *
229 * @remarks
230 * The tiered storage system uses a cascading containment model:
231 * - Hot tier (optional): Fastest, smallest capacity (memory/Redis)
232 * - Warm tier (optional): Medium speed, medium capacity (disk/database)
233 * - Cold tier (required): Slowest, unlimited capacity (S3/object storage)
234 *
235 * Data flows down on writes (hot → warm → cold) and bubbles up on reads (cold → warm → hot).
236 */
237export interface TieredStorageConfig {
238 /** Storage tier configuration */
239 tiers: {
240 /** Optional hot tier - fastest, smallest capacity (e.g., in-memory, Redis) */
241 hot?: StorageTier;
242
243 /** Optional warm tier - medium speed, medium capacity (e.g., disk, SQLite, Postgres) */
244 warm?: StorageTier;
245
246 /** Required cold tier - slowest, largest capacity (e.g., S3, R2, object storage) */
247 cold: StorageTier;
248 };
249
250 /**
251 * Whether to automatically compress data before storing.
252 *
253 * @defaultValue false
254 *
255 * @remarks
256 * Uses gzip compression. Compression is transparent - data is automatically
257 * decompressed on retrieval. The `compressed` flag in metadata indicates compression state.
258 */
259 compression?: boolean;
260
261 /**
262 * Default TTL (time-to-live) in milliseconds.
263 *
264 * @remarks
265 * Data will expire after this duration. Can be overridden per-key via SetOptions.
266 * If not set, data never expires.
267 */
268 defaultTTL?: number;
269
270 /**
271 * Strategy for promoting data to upper tiers on cache miss.
272 *
273 * @defaultValue 'lazy'
274 *
275 * @remarks
276 * - 'eager': Immediately promote data to all upper tiers on read
277 * - 'lazy': Don't automatically promote; rely on explicit promotion or next write
278 *
279 * Eager promotion increases hot tier hit rate but adds write overhead.
280 * Lazy promotion reduces writes but may serve from lower tiers more often.
281 */
282 promotionStrategy?: 'eager' | 'lazy';
283
284 /**
285 * Custom serialization/deserialization functions.
286 *
287 * @remarks
288 * By default, JSON serialization is used. Provide custom functions for:
289 * - Non-JSON types (e.g., Buffer, custom classes)
290 * - Performance optimization (e.g., msgpack, protobuf)
291 * - Encryption (serialize includes encryption, deserialize includes decryption)
292 */
293 serialization?: {
294 /** Convert data to Uint8Array for storage */
295 serialize: (data: unknown) => Promise<Uint8Array>;
296
297 /** Convert Uint8Array back to original data */
298 deserialize: (data: Uint8Array) => Promise<unknown>;
299 };
300}
301
302/**
303 * Options for setting data in the cache.
304 *
305 * @remarks
306 * These options allow fine-grained control over where and how data is stored.
307 */
308export interface SetOptions {
309 /**
310 * Custom TTL in milliseconds for this specific key.
311 *
312 * @remarks
313 * Overrides the default TTL from TieredStorageConfig.
314 * Data will expire after this duration from the current time.
315 */
316 ttl?: number;
317
318 /**
319 * Custom metadata to attach to this key.
320 *
321 * @remarks
322 * Merged with system-generated metadata (size, checksum, timestamps).
323 * Useful for storing application-specific information like content-type, encoding, etc.
324 */
325 metadata?: Record<string, string>;
326
327 /**
328 * Skip writing to specific tiers.
329 *
330 * @remarks
331 * Useful for controlling which tiers receive data. For example:
332 * - Large files: `skipTiers: ['hot']` to avoid filling memory
333 * - Small critical files: Write to hot only for fastest access
334 *
335 * Note: Cold tier can never be skipped (it's the source of truth).
336 *
337 * @example
338 * ```typescript
339 * // Store large file only in warm and cold (skip memory)
340 * await storage.set('large-video.mp4', videoData, { skipTiers: ['hot'] });
341 *
342 * // Store index.html in all tiers for fast access
343 * await storage.set('index.html', htmlData); // No skipping
344 * ```
345 */
346 skipTiers?: ('hot' | 'warm')[];
347}
348
349/**
350 * Result from retrieving data with metadata.
351 *
352 * @typeParam T - The type of data being retrieved
353 *
354 * @remarks
355 * Includes both the data and information about where it was served from.
356 */
357export interface StorageResult<T> {
358 /** The retrieved data */
359 data: T;
360
361 /** Metadata associated with the data */
362 metadata: StorageMetadata;
363
364 /** Which tier the data was served from */
365 source: 'hot' | 'warm' | 'cold';
366}
367
368/**
369 * Result from setting data in the cache.
370 *
371 * @remarks
372 * Indicates which tiers successfully received the data.
373 */
374export interface SetResult {
375 /** The key that was set */
376 key: string;
377
378 /** Metadata that was stored with the data */
379 metadata: StorageMetadata;
380
381 /** Which tiers received the data */
382 tiersWritten: ('hot' | 'warm' | 'cold')[];
383}
384
385/**
386 * Snapshot of the entire storage state.
387 *
388 * @remarks
389 * Used for export/import, backup, and migration scenarios.
390 * The snapshot includes metadata but not the actual data (data remains in tiers).
391 */
392export interface StorageSnapshot {
393 /** Snapshot format version (for compatibility) */
394 version: number;
395
396 /** When this snapshot was created */
397 exportedAt: Date;
398
399 /** All keys present in cold tier (source of truth) */
400 keys: string[];
401
402 /** Metadata for each key */
403 metadata: Record<string, StorageMetadata>;
404
405 /** Statistics at time of export */
406 stats: AllTierStats;
407}