wip library to store cold objects in s3, warm objects on disk, and hot objects in memory
nodejs
typescript
1import {
2 S3Client,
3 PutObjectCommand,
4 GetObjectCommand,
5 DeleteObjectCommand,
6 HeadObjectCommand,
7 ListObjectsV2Command,
8 DeleteObjectsCommand,
9 CopyObjectCommand,
10 type S3ClientConfig,
11} from '@aws-sdk/client-s3';
12import type { Readable } from 'node:stream';
13import type { StorageTier, StorageMetadata, TierStats } from '../types/index.js';
14
15/**
16 * Configuration for S3StorageTier.
17 */
18export interface S3StorageTierConfig {
19 /**
20 * S3 bucket name.
21 */
22 bucket: string;
23
24 /**
25 * AWS region.
26 */
27 region: string;
28
29 /**
30 * Optional S3-compatible endpoint (for R2, Minio, etc.).
31 *
32 * @example 'https://s3.us-east-1.amazonaws.com'
33 * @example 'https://account-id.r2.cloudflarestorage.com'
34 */
35 endpoint?: string;
36
37 /**
38 * Optional AWS credentials.
39 *
40 * @remarks
41 * If not provided, uses the default AWS credential chain
42 * (environment variables, ~/.aws/credentials, IAM roles, etc.)
43 */
44 credentials?: {
45 accessKeyId: string;
46 secretAccessKey: string;
47 };
48
49 /**
50 * Optional key prefix for namespacing.
51 *
52 * @remarks
53 * All keys will be prefixed with this value.
54 * Useful for multi-tenant scenarios or organizing data.
55 *
56 * @example 'tiered-storage/'
57 */
58 prefix?: string;
59
60 /**
61 * Force path-style addressing for S3-compatible services.
62 *
63 * @defaultValue true
64 *
65 * @remarks
66 * Most S3-compatible services (MinIO, R2, etc.) require path-style URLs.
67 * AWS S3 uses virtual-hosted-style by default, but path-style also works.
68 *
69 * - true: `https://endpoint.com/bucket/key` (path-style)
70 * - false: `https://bucket.endpoint.com/key` (virtual-hosted-style)
71 */
72 forcePathStyle?: boolean;
73
74 /**
75 * Optional separate bucket for storing metadata.
76 *
77 * @remarks
78 * **RECOMMENDED for production use!**
79 *
80 * By default, metadata is stored in S3 object metadata fields. However, updating
81 * metadata requires copying the entire object, which is slow and expensive for large files.
82 *
83 * When `metadataBucket` is specified, metadata is stored as separate JSON objects
84 * in this bucket. This allows fast, cheap metadata updates without copying data.
85 *
86 * **Benefits:**
87 * - Fast metadata updates (no object copying)
88 * - Much cheaper for large objects
89 * - No impact on data object performance
90 *
91 * **Trade-offs:**
92 * - Requires managing two buckets
93 * - Metadata and data could become out of sync if not handled carefully
94 * - Additional S3 API calls for metadata operations
95 *
96 * @example
97 * ```typescript
98 * const tier = new S3StorageTier({
99 * bucket: 'my-data-bucket',
100 * metadataBucket: 'my-metadata-bucket', // Separate bucket for metadata
101 * region: 'us-east-1',
102 * });
103 * ```
104 */
105 metadataBucket?: string;
106}
107
108/**
109 * AWS S3 (or compatible) storage tier.
110 *
111 * @remarks
112 * - Supports AWS S3, Cloudflare R2, MinIO, and other S3-compatible services
113 * - Uses object metadata for StorageMetadata
114 * - Requires `@aws-sdk/client-s3` peer dependency
115 * - Typically used as the cold tier (source of truth)
116 *
117 * **Metadata Storage:**
118 * Metadata is stored in S3 object metadata fields:
119 * - Custom metadata fields are prefixed with `x-amz-meta-`
120 * - Built-in fields use standard S3 headers
121 *
122 * @example
123 * ```typescript
124 * const tier = new S3StorageTier({
125 * bucket: 'my-bucket',
126 * region: 'us-east-1',
127 * credentials: {
128 * accessKeyId: process.env.AWS_ACCESS_KEY_ID!,
129 * secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!,
130 * },
131 * prefix: 'cache/',
132 * });
133 * ```
134 *
135 * @example Cloudflare R2
136 * ```typescript
137 * const tier = new S3StorageTier({
138 * bucket: 'my-bucket',
139 * region: 'auto',
140 * endpoint: 'https://account-id.r2.cloudflarestorage.com',
141 * credentials: {
142 * accessKeyId: process.env.R2_ACCESS_KEY_ID!,
143 * secretAccessKey: process.env.R2_SECRET_ACCESS_KEY!,
144 * },
145 * });
146 * ```
147 */
148export class S3StorageTier implements StorageTier {
149 private client: S3Client;
150 private prefix: string;
151 private metadataBucket?: string;
152
153 constructor(private config: S3StorageTierConfig) {
154 const clientConfig: S3ClientConfig = {
155 region: config.region,
156 // Most S3-compatible services need path-style URLs
157 forcePathStyle: config.forcePathStyle ?? true,
158 ...(config.endpoint && { endpoint: config.endpoint }),
159 ...(config.credentials && { credentials: config.credentials }),
160 };
161
162 this.client = new S3Client(clientConfig);
163 this.prefix = config.prefix ?? '';
164 if (config.metadataBucket) {
165 this.metadataBucket = config.metadataBucket;
166 }
167 }
168
169 async get(key: string): Promise<Uint8Array | null> {
170 try {
171 const command = new GetObjectCommand({
172 Bucket: this.config.bucket,
173 Key: this.getS3Key(key),
174 });
175
176 const response = await this.client.send(command);
177
178 if (!response.Body) {
179 return null;
180 }
181
182 return await this.streamToUint8Array(response.Body as Readable);
183 } catch (error) {
184 if (this.isNoSuchKeyError(error)) {
185 return null;
186 }
187 throw error;
188 }
189 }
190
191 private async streamToUint8Array(stream: Readable): Promise<Uint8Array> {
192 const chunks: Uint8Array[] = [];
193
194 for await (const chunk of stream) {
195 if (Buffer.isBuffer(chunk)) {
196 chunks.push(new Uint8Array(chunk));
197 } else if (chunk instanceof Uint8Array) {
198 chunks.push(chunk);
199 } else {
200 throw new Error('Unexpected chunk type in S3 stream');
201 }
202 }
203
204 const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
205 const result = new Uint8Array(totalLength);
206 let offset = 0;
207 for (const chunk of chunks) {
208 result.set(chunk, offset);
209 offset += chunk.length;
210 }
211
212 return result;
213 }
214
215 private isNoSuchKeyError(error: unknown): boolean {
216 return (
217 typeof error === 'object' &&
218 error !== null &&
219 'name' in error &&
220 (error.name === 'NoSuchKey' || error.name === 'NotFound')
221 );
222 }
223
224 async set(key: string, data: Uint8Array, metadata: StorageMetadata): Promise<void> {
225 const s3Key = this.getS3Key(key);
226
227 if (this.metadataBucket) {
228 const dataCommand = new PutObjectCommand({
229 Bucket: this.config.bucket,
230 Key: s3Key,
231 Body: data,
232 ContentLength: data.byteLength,
233 });
234
235 const metadataJson = JSON.stringify(metadata);
236 const metadataBuffer = new TextEncoder().encode(metadataJson);
237 const metadataCommand = new PutObjectCommand({
238 Bucket: this.metadataBucket,
239 Key: s3Key + '.meta',
240 Body: metadataBuffer,
241 ContentType: 'application/json',
242 });
243
244 await Promise.all([
245 this.client.send(dataCommand),
246 this.client.send(metadataCommand),
247 ]);
248 } else {
249 const command = new PutObjectCommand({
250 Bucket: this.config.bucket,
251 Key: s3Key,
252 Body: data,
253 ContentLength: data.byteLength,
254 Metadata: this.metadataToS3(metadata),
255 });
256
257 await this.client.send(command);
258 }
259 }
260
261 async delete(key: string): Promise<void> {
262 const s3Key = this.getS3Key(key);
263
264 try {
265 const dataCommand = new DeleteObjectCommand({
266 Bucket: this.config.bucket,
267 Key: s3Key,
268 });
269
270 if (this.metadataBucket) {
271 const metadataCommand = new DeleteObjectCommand({
272 Bucket: this.metadataBucket,
273 Key: s3Key + '.meta',
274 });
275
276 await Promise.all([
277 this.client.send(dataCommand),
278 this.client.send(metadataCommand).catch((error) => {
279 if (!this.isNoSuchKeyError(error)) throw error;
280 }),
281 ]);
282 } else {
283 await this.client.send(dataCommand);
284 }
285 } catch (error) {
286 if (!this.isNoSuchKeyError(error)) {
287 throw error;
288 }
289 }
290 }
291
292 async exists(key: string): Promise<boolean> {
293 try {
294 const command = new HeadObjectCommand({
295 Bucket: this.config.bucket,
296 Key: this.getS3Key(key),
297 });
298
299 await this.client.send(command);
300 return true;
301 } catch (error) {
302 if (this.isNoSuchKeyError(error)) {
303 return false;
304 }
305 throw error;
306 }
307 }
308
309 async *listKeys(prefix?: string): AsyncIterableIterator<string> {
310 const s3Prefix = prefix ? this.getS3Key(prefix) : this.prefix;
311 let continuationToken: string | undefined;
312
313 do {
314 const command = new ListObjectsV2Command({
315 Bucket: this.config.bucket,
316 Prefix: s3Prefix,
317 ContinuationToken: continuationToken,
318 });
319
320 const response = await this.client.send(command);
321
322 if (response.Contents) {
323 for (const object of response.Contents) {
324 if (object.Key) {
325 // Remove prefix to get original key
326 const key = this.removePrefix(object.Key);
327 yield key;
328 }
329 }
330 }
331
332 continuationToken = response.NextContinuationToken;
333 } while (continuationToken);
334 }
335
336 async deleteMany(keys: string[]): Promise<void> {
337 if (keys.length === 0) return;
338
339 const batchSize = 1000;
340
341 for (let i = 0; i < keys.length; i += batchSize) {
342 const batch = keys.slice(i, i + batchSize);
343
344 const dataCommand = new DeleteObjectsCommand({
345 Bucket: this.config.bucket,
346 Delete: {
347 Objects: batch.map((key) => ({ Key: this.getS3Key(key) })),
348 },
349 });
350
351 if (this.metadataBucket) {
352 const metadataCommand = new DeleteObjectsCommand({
353 Bucket: this.metadataBucket,
354 Delete: {
355 Objects: batch.map((key) => ({ Key: this.getS3Key(key) + '.meta' })),
356 },
357 });
358
359 await Promise.all([
360 this.client.send(dataCommand),
361 this.client.send(metadataCommand).catch(() => {}),
362 ]);
363 } else {
364 await this.client.send(dataCommand);
365 }
366 }
367 }
368
369 async getMetadata(key: string): Promise<StorageMetadata | null> {
370 if (this.metadataBucket) {
371 try {
372 const command = new GetObjectCommand({
373 Bucket: this.metadataBucket,
374 Key: this.getS3Key(key) + '.meta',
375 });
376
377 const response = await this.client.send(command);
378
379 if (!response.Body) {
380 return null;
381 }
382
383 const buffer = await this.streamToUint8Array(response.Body as Readable);
384 const json = new TextDecoder().decode(buffer);
385 const metadata = JSON.parse(json) as StorageMetadata;
386
387 metadata.createdAt = new Date(metadata.createdAt);
388 metadata.lastAccessed = new Date(metadata.lastAccessed);
389 if (metadata.ttl) {
390 metadata.ttl = new Date(metadata.ttl);
391 }
392
393 return metadata;
394 } catch (error) {
395 if (this.isNoSuchKeyError(error)) {
396 return null;
397 }
398 throw error;
399 }
400 }
401
402 try {
403 const command = new HeadObjectCommand({
404 Bucket: this.config.bucket,
405 Key: this.getS3Key(key),
406 });
407
408 const response = await this.client.send(command);
409
410 if (!response.Metadata) {
411 return null;
412 }
413
414 return this.s3ToMetadata(response.Metadata);
415 } catch (error) {
416 if (this.isNoSuchKeyError(error)) {
417 return null;
418 }
419 throw error;
420 }
421 }
422
423 async setMetadata(key: string, metadata: StorageMetadata): Promise<void> {
424 if (this.metadataBucket) {
425 const metadataJson = JSON.stringify(metadata);
426 const buffer = new TextEncoder().encode(metadataJson);
427
428 const command = new PutObjectCommand({
429 Bucket: this.metadataBucket,
430 Key: this.getS3Key(key) + '.meta',
431 Body: buffer,
432 ContentType: 'application/json',
433 });
434
435 await this.client.send(command);
436 return;
437 }
438
439 const s3Key = this.getS3Key(key);
440 const command = new CopyObjectCommand({
441 Bucket: this.config.bucket,
442 Key: s3Key,
443 CopySource: `${this.config.bucket}/${s3Key}`,
444 Metadata: this.metadataToS3(metadata),
445 MetadataDirective: 'REPLACE',
446 });
447
448 await this.client.send(command);
449 }
450
451 async getStats(): Promise<TierStats> {
452 let bytes = 0;
453 let items = 0;
454
455 // List all objects and sum up sizes
456 let continuationToken: string | undefined;
457
458 do {
459 const command = new ListObjectsV2Command({
460 Bucket: this.config.bucket,
461 Prefix: this.prefix,
462 ContinuationToken: continuationToken,
463 });
464
465 const response = await this.client.send(command);
466
467 if (response.Contents) {
468 for (const object of response.Contents) {
469 items++;
470 bytes += object.Size ?? 0;
471 }
472 }
473
474 continuationToken = response.NextContinuationToken;
475 } while (continuationToken);
476
477 return { bytes, items };
478 }
479
480 async clear(): Promise<void> {
481 // List and delete all objects with the prefix
482 const keys: string[] = [];
483
484 for await (const key of this.listKeys()) {
485 keys.push(key);
486 }
487
488 await this.deleteMany(keys);
489 }
490
491 /**
492 * Get the full S3 key including prefix.
493 */
494 private getS3Key(key: string): string {
495 return this.prefix + key;
496 }
497
498 /**
499 * Remove the prefix from an S3 key to get the original key.
500 */
501 private removePrefix(s3Key: string): string {
502 if (this.prefix && s3Key.startsWith(this.prefix)) {
503 return s3Key.slice(this.prefix.length);
504 }
505 return s3Key;
506 }
507
508 /**
509 * Convert StorageMetadata to S3 metadata format.
510 *
511 * @remarks
512 * S3 metadata keys must be lowercase and values must be strings.
513 * We serialize complex values as JSON.
514 */
515 private metadataToS3(metadata: StorageMetadata): Record<string, string> {
516 return {
517 key: metadata.key,
518 size: metadata.size.toString(),
519 createdat: metadata.createdAt.toISOString(),
520 lastaccessed: metadata.lastAccessed.toISOString(),
521 accesscount: metadata.accessCount.toString(),
522 compressed: metadata.compressed.toString(),
523 checksum: metadata.checksum,
524 ...(metadata.ttl && { ttl: metadata.ttl.toISOString() }),
525 ...(metadata.mimeType && { mimetype: metadata.mimeType }),
526 ...(metadata.encoding && { encoding: metadata.encoding }),
527 ...(metadata.customMetadata && { custom: JSON.stringify(metadata.customMetadata) }),
528 };
529 }
530
531 /**
532 * Convert S3 metadata to StorageMetadata format.
533 */
534 private s3ToMetadata(s3Metadata: Record<string, string>): StorageMetadata {
535 const metadata: StorageMetadata = {
536 key: s3Metadata.key ?? '',
537 size: parseInt(s3Metadata.size ?? '0', 10),
538 createdAt: new Date(s3Metadata.createdat ?? Date.now()),
539 lastAccessed: new Date(s3Metadata.lastaccessed ?? Date.now()),
540 accessCount: parseInt(s3Metadata.accesscount ?? '0', 10),
541 compressed: s3Metadata.compressed === 'true',
542 checksum: s3Metadata.checksum ?? '',
543 };
544
545 if (s3Metadata.ttl) {
546 metadata.ttl = new Date(s3Metadata.ttl);
547 }
548
549 if (s3Metadata.mimetype) {
550 metadata.mimeType = s3Metadata.mimetype;
551 }
552
553 if (s3Metadata.encoding) {
554 metadata.encoding = s3Metadata.encoding;
555 }
556
557 if (s3Metadata.custom) {
558 try {
559 metadata.customMetadata = JSON.parse(s3Metadata.custom);
560 } catch {
561 // Ignore invalid JSON
562 }
563 }
564
565 return metadata;
566 }
567}