wip library to store cold objects in s3, warm objects on disk, and hot objects in memory
nodejs typescript
1import { 2 S3Client, 3 PutObjectCommand, 4 GetObjectCommand, 5 DeleteObjectCommand, 6 HeadObjectCommand, 7 ListObjectsV2Command, 8 DeleteObjectsCommand, 9 CopyObjectCommand, 10 type S3ClientConfig, 11} from '@aws-sdk/client-s3'; 12import type { Readable } from 'node:stream'; 13import type { StorageTier, StorageMetadata, TierStats } from '../types/index.js'; 14 15/** 16 * Configuration for S3StorageTier. 17 */ 18export interface S3StorageTierConfig { 19 /** 20 * S3 bucket name. 21 */ 22 bucket: string; 23 24 /** 25 * AWS region. 26 */ 27 region: string; 28 29 /** 30 * Optional S3-compatible endpoint (for R2, Minio, etc.). 31 * 32 * @example 'https://s3.us-east-1.amazonaws.com' 33 * @example 'https://account-id.r2.cloudflarestorage.com' 34 */ 35 endpoint?: string; 36 37 /** 38 * Optional AWS credentials. 39 * 40 * @remarks 41 * If not provided, uses the default AWS credential chain 42 * (environment variables, ~/.aws/credentials, IAM roles, etc.) 43 */ 44 credentials?: { 45 accessKeyId: string; 46 secretAccessKey: string; 47 }; 48 49 /** 50 * Optional key prefix for namespacing. 51 * 52 * @remarks 53 * All keys will be prefixed with this value. 54 * Useful for multi-tenant scenarios or organizing data. 55 * 56 * @example 'tiered-storage/' 57 */ 58 prefix?: string; 59 60 /** 61 * Force path-style addressing for S3-compatible services. 62 * 63 * @defaultValue true 64 * 65 * @remarks 66 * Most S3-compatible services (MinIO, R2, etc.) require path-style URLs. 67 * AWS S3 uses virtual-hosted-style by default, but path-style also works. 68 * 69 * - true: `https://endpoint.com/bucket/key` (path-style) 70 * - false: `https://bucket.endpoint.com/key` (virtual-hosted-style) 71 */ 72 forcePathStyle?: boolean; 73 74 /** 75 * Optional separate bucket for storing metadata. 76 * 77 * @remarks 78 * **RECOMMENDED for production use!** 79 * 80 * By default, metadata is stored in S3 object metadata fields. However, updating 81 * metadata requires copying the entire object, which is slow and expensive for large files. 82 * 83 * When `metadataBucket` is specified, metadata is stored as separate JSON objects 84 * in this bucket. This allows fast, cheap metadata updates without copying data. 85 * 86 * **Benefits:** 87 * - Fast metadata updates (no object copying) 88 * - Much cheaper for large objects 89 * - No impact on data object performance 90 * 91 * **Trade-offs:** 92 * - Requires managing two buckets 93 * - Metadata and data could become out of sync if not handled carefully 94 * - Additional S3 API calls for metadata operations 95 * 96 * @example 97 * ```typescript 98 * const tier = new S3StorageTier({ 99 * bucket: 'my-data-bucket', 100 * metadataBucket: 'my-metadata-bucket', // Separate bucket for metadata 101 * region: 'us-east-1', 102 * }); 103 * ``` 104 */ 105 metadataBucket?: string; 106} 107 108/** 109 * AWS S3 (or compatible) storage tier. 110 * 111 * @remarks 112 * - Supports AWS S3, Cloudflare R2, MinIO, and other S3-compatible services 113 * - Uses object metadata for StorageMetadata 114 * - Requires `@aws-sdk/client-s3` peer dependency 115 * - Typically used as the cold tier (source of truth) 116 * 117 * **Metadata Storage:** 118 * Metadata is stored in S3 object metadata fields: 119 * - Custom metadata fields are prefixed with `x-amz-meta-` 120 * - Built-in fields use standard S3 headers 121 * 122 * @example 123 * ```typescript 124 * const tier = new S3StorageTier({ 125 * bucket: 'my-bucket', 126 * region: 'us-east-1', 127 * credentials: { 128 * accessKeyId: process.env.AWS_ACCESS_KEY_ID!, 129 * secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY!, 130 * }, 131 * prefix: 'cache/', 132 * }); 133 * ``` 134 * 135 * @example Cloudflare R2 136 * ```typescript 137 * const tier = new S3StorageTier({ 138 * bucket: 'my-bucket', 139 * region: 'auto', 140 * endpoint: 'https://account-id.r2.cloudflarestorage.com', 141 * credentials: { 142 * accessKeyId: process.env.R2_ACCESS_KEY_ID!, 143 * secretAccessKey: process.env.R2_SECRET_ACCESS_KEY!, 144 * }, 145 * }); 146 * ``` 147 */ 148export class S3StorageTier implements StorageTier { 149 private client: S3Client; 150 private prefix: string; 151 private metadataBucket?: string; 152 153 constructor(private config: S3StorageTierConfig) { 154 const clientConfig: S3ClientConfig = { 155 region: config.region, 156 // Most S3-compatible services need path-style URLs 157 forcePathStyle: config.forcePathStyle ?? true, 158 ...(config.endpoint && { endpoint: config.endpoint }), 159 ...(config.credentials && { credentials: config.credentials }), 160 }; 161 162 this.client = new S3Client(clientConfig); 163 this.prefix = config.prefix ?? ''; 164 if (config.metadataBucket) { 165 this.metadataBucket = config.metadataBucket; 166 } 167 } 168 169 async get(key: string): Promise<Uint8Array | null> { 170 try { 171 const command = new GetObjectCommand({ 172 Bucket: this.config.bucket, 173 Key: this.getS3Key(key), 174 }); 175 176 const response = await this.client.send(command); 177 178 if (!response.Body) { 179 return null; 180 } 181 182 return await this.streamToUint8Array(response.Body as Readable); 183 } catch (error) { 184 if (this.isNoSuchKeyError(error)) { 185 return null; 186 } 187 throw error; 188 } 189 } 190 191 private async streamToUint8Array(stream: Readable): Promise<Uint8Array> { 192 const chunks: Uint8Array[] = []; 193 194 for await (const chunk of stream) { 195 if (Buffer.isBuffer(chunk)) { 196 chunks.push(new Uint8Array(chunk)); 197 } else if (chunk instanceof Uint8Array) { 198 chunks.push(chunk); 199 } else { 200 throw new Error('Unexpected chunk type in S3 stream'); 201 } 202 } 203 204 const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0); 205 const result = new Uint8Array(totalLength); 206 let offset = 0; 207 for (const chunk of chunks) { 208 result.set(chunk, offset); 209 offset += chunk.length; 210 } 211 212 return result; 213 } 214 215 private isNoSuchKeyError(error: unknown): boolean { 216 return ( 217 typeof error === 'object' && 218 error !== null && 219 'name' in error && 220 (error.name === 'NoSuchKey' || error.name === 'NotFound') 221 ); 222 } 223 224 async set(key: string, data: Uint8Array, metadata: StorageMetadata): Promise<void> { 225 const s3Key = this.getS3Key(key); 226 227 if (this.metadataBucket) { 228 const dataCommand = new PutObjectCommand({ 229 Bucket: this.config.bucket, 230 Key: s3Key, 231 Body: data, 232 ContentLength: data.byteLength, 233 }); 234 235 const metadataJson = JSON.stringify(metadata); 236 const metadataBuffer = new TextEncoder().encode(metadataJson); 237 const metadataCommand = new PutObjectCommand({ 238 Bucket: this.metadataBucket, 239 Key: s3Key + '.meta', 240 Body: metadataBuffer, 241 ContentType: 'application/json', 242 }); 243 244 await Promise.all([ 245 this.client.send(dataCommand), 246 this.client.send(metadataCommand), 247 ]); 248 } else { 249 const command = new PutObjectCommand({ 250 Bucket: this.config.bucket, 251 Key: s3Key, 252 Body: data, 253 ContentLength: data.byteLength, 254 Metadata: this.metadataToS3(metadata), 255 }); 256 257 await this.client.send(command); 258 } 259 } 260 261 async delete(key: string): Promise<void> { 262 const s3Key = this.getS3Key(key); 263 264 try { 265 const dataCommand = new DeleteObjectCommand({ 266 Bucket: this.config.bucket, 267 Key: s3Key, 268 }); 269 270 if (this.metadataBucket) { 271 const metadataCommand = new DeleteObjectCommand({ 272 Bucket: this.metadataBucket, 273 Key: s3Key + '.meta', 274 }); 275 276 await Promise.all([ 277 this.client.send(dataCommand), 278 this.client.send(metadataCommand).catch((error) => { 279 if (!this.isNoSuchKeyError(error)) throw error; 280 }), 281 ]); 282 } else { 283 await this.client.send(dataCommand); 284 } 285 } catch (error) { 286 if (!this.isNoSuchKeyError(error)) { 287 throw error; 288 } 289 } 290 } 291 292 async exists(key: string): Promise<boolean> { 293 try { 294 const command = new HeadObjectCommand({ 295 Bucket: this.config.bucket, 296 Key: this.getS3Key(key), 297 }); 298 299 await this.client.send(command); 300 return true; 301 } catch (error) { 302 if (this.isNoSuchKeyError(error)) { 303 return false; 304 } 305 throw error; 306 } 307 } 308 309 async *listKeys(prefix?: string): AsyncIterableIterator<string> { 310 const s3Prefix = prefix ? this.getS3Key(prefix) : this.prefix; 311 let continuationToken: string | undefined; 312 313 do { 314 const command = new ListObjectsV2Command({ 315 Bucket: this.config.bucket, 316 Prefix: s3Prefix, 317 ContinuationToken: continuationToken, 318 }); 319 320 const response = await this.client.send(command); 321 322 if (response.Contents) { 323 for (const object of response.Contents) { 324 if (object.Key) { 325 // Remove prefix to get original key 326 const key = this.removePrefix(object.Key); 327 yield key; 328 } 329 } 330 } 331 332 continuationToken = response.NextContinuationToken; 333 } while (continuationToken); 334 } 335 336 async deleteMany(keys: string[]): Promise<void> { 337 if (keys.length === 0) return; 338 339 const batchSize = 1000; 340 341 for (let i = 0; i < keys.length; i += batchSize) { 342 const batch = keys.slice(i, i + batchSize); 343 344 const dataCommand = new DeleteObjectsCommand({ 345 Bucket: this.config.bucket, 346 Delete: { 347 Objects: batch.map((key) => ({ Key: this.getS3Key(key) })), 348 }, 349 }); 350 351 if (this.metadataBucket) { 352 const metadataCommand = new DeleteObjectsCommand({ 353 Bucket: this.metadataBucket, 354 Delete: { 355 Objects: batch.map((key) => ({ Key: this.getS3Key(key) + '.meta' })), 356 }, 357 }); 358 359 await Promise.all([ 360 this.client.send(dataCommand), 361 this.client.send(metadataCommand).catch(() => {}), 362 ]); 363 } else { 364 await this.client.send(dataCommand); 365 } 366 } 367 } 368 369 async getMetadata(key: string): Promise<StorageMetadata | null> { 370 if (this.metadataBucket) { 371 try { 372 const command = new GetObjectCommand({ 373 Bucket: this.metadataBucket, 374 Key: this.getS3Key(key) + '.meta', 375 }); 376 377 const response = await this.client.send(command); 378 379 if (!response.Body) { 380 return null; 381 } 382 383 const buffer = await this.streamToUint8Array(response.Body as Readable); 384 const json = new TextDecoder().decode(buffer); 385 const metadata = JSON.parse(json) as StorageMetadata; 386 387 metadata.createdAt = new Date(metadata.createdAt); 388 metadata.lastAccessed = new Date(metadata.lastAccessed); 389 if (metadata.ttl) { 390 metadata.ttl = new Date(metadata.ttl); 391 } 392 393 return metadata; 394 } catch (error) { 395 if (this.isNoSuchKeyError(error)) { 396 return null; 397 } 398 throw error; 399 } 400 } 401 402 try { 403 const command = new HeadObjectCommand({ 404 Bucket: this.config.bucket, 405 Key: this.getS3Key(key), 406 }); 407 408 const response = await this.client.send(command); 409 410 if (!response.Metadata) { 411 return null; 412 } 413 414 return this.s3ToMetadata(response.Metadata); 415 } catch (error) { 416 if (this.isNoSuchKeyError(error)) { 417 return null; 418 } 419 throw error; 420 } 421 } 422 423 async setMetadata(key: string, metadata: StorageMetadata): Promise<void> { 424 if (this.metadataBucket) { 425 const metadataJson = JSON.stringify(metadata); 426 const buffer = new TextEncoder().encode(metadataJson); 427 428 const command = new PutObjectCommand({ 429 Bucket: this.metadataBucket, 430 Key: this.getS3Key(key) + '.meta', 431 Body: buffer, 432 ContentType: 'application/json', 433 }); 434 435 await this.client.send(command); 436 return; 437 } 438 439 const s3Key = this.getS3Key(key); 440 const command = new CopyObjectCommand({ 441 Bucket: this.config.bucket, 442 Key: s3Key, 443 CopySource: `${this.config.bucket}/${s3Key}`, 444 Metadata: this.metadataToS3(metadata), 445 MetadataDirective: 'REPLACE', 446 }); 447 448 await this.client.send(command); 449 } 450 451 async getStats(): Promise<TierStats> { 452 let bytes = 0; 453 let items = 0; 454 455 // List all objects and sum up sizes 456 let continuationToken: string | undefined; 457 458 do { 459 const command = new ListObjectsV2Command({ 460 Bucket: this.config.bucket, 461 Prefix: this.prefix, 462 ContinuationToken: continuationToken, 463 }); 464 465 const response = await this.client.send(command); 466 467 if (response.Contents) { 468 for (const object of response.Contents) { 469 items++; 470 bytes += object.Size ?? 0; 471 } 472 } 473 474 continuationToken = response.NextContinuationToken; 475 } while (continuationToken); 476 477 return { bytes, items }; 478 } 479 480 async clear(): Promise<void> { 481 // List and delete all objects with the prefix 482 const keys: string[] = []; 483 484 for await (const key of this.listKeys()) { 485 keys.push(key); 486 } 487 488 await this.deleteMany(keys); 489 } 490 491 /** 492 * Get the full S3 key including prefix. 493 */ 494 private getS3Key(key: string): string { 495 return this.prefix + key; 496 } 497 498 /** 499 * Remove the prefix from an S3 key to get the original key. 500 */ 501 private removePrefix(s3Key: string): string { 502 if (this.prefix && s3Key.startsWith(this.prefix)) { 503 return s3Key.slice(this.prefix.length); 504 } 505 return s3Key; 506 } 507 508 /** 509 * Convert StorageMetadata to S3 metadata format. 510 * 511 * @remarks 512 * S3 metadata keys must be lowercase and values must be strings. 513 * We serialize complex values as JSON. 514 */ 515 private metadataToS3(metadata: StorageMetadata): Record<string, string> { 516 return { 517 key: metadata.key, 518 size: metadata.size.toString(), 519 createdat: metadata.createdAt.toISOString(), 520 lastaccessed: metadata.lastAccessed.toISOString(), 521 accesscount: metadata.accessCount.toString(), 522 compressed: metadata.compressed.toString(), 523 checksum: metadata.checksum, 524 ...(metadata.ttl && { ttl: metadata.ttl.toISOString() }), 525 ...(metadata.mimeType && { mimetype: metadata.mimeType }), 526 ...(metadata.encoding && { encoding: metadata.encoding }), 527 ...(metadata.customMetadata && { custom: JSON.stringify(metadata.customMetadata) }), 528 }; 529 } 530 531 /** 532 * Convert S3 metadata to StorageMetadata format. 533 */ 534 private s3ToMetadata(s3Metadata: Record<string, string>): StorageMetadata { 535 const metadata: StorageMetadata = { 536 key: s3Metadata.key ?? '', 537 size: parseInt(s3Metadata.size ?? '0', 10), 538 createdAt: new Date(s3Metadata.createdat ?? Date.now()), 539 lastAccessed: new Date(s3Metadata.lastaccessed ?? Date.now()), 540 accessCount: parseInt(s3Metadata.accesscount ?? '0', 10), 541 compressed: s3Metadata.compressed === 'true', 542 checksum: s3Metadata.checksum ?? '', 543 }; 544 545 if (s3Metadata.ttl) { 546 metadata.ttl = new Date(s3Metadata.ttl); 547 } 548 549 if (s3Metadata.mimetype) { 550 metadata.mimeType = s3Metadata.mimetype; 551 } 552 553 if (s3Metadata.encoding) { 554 metadata.encoding = s3Metadata.encoding; 555 } 556 557 if (s3Metadata.custom) { 558 try { 559 metadata.customMetadata = JSON.parse(s3Metadata.custom); 560 } catch { 561 // Ignore invalid JSON 562 } 563 } 564 565 return metadata; 566 } 567}