A tool for backing up ATProto related data to S3

initial commit with a working PDS repo and blob backup

Signed-off-by: Will Andrews <will7989@hotmail.com>

+6
.env.example
···
+
ENDPOINT="S3-endpoint"
+
ACCESS_ID="S3-ID"
+
SECRET_ACCESS_KEY="S3-secret"
+
BUCKET_NAME="my-super-duper-bucket"
+
DID="the-did-to-backup"
+
PDS_HOST="https://your-pds.com"
+1
.gitignore
···
+
.env
+27
go.mod
···
+
module tangled.sh/willdot.net/backatit
+
+
go 1.25.0
+
+
require (
+
github.com/joho/godotenv v1.5.1
+
github.com/minio/minio-go/v7 v7.0.95
+
)
+
+
require (
+
github.com/dustin/go-humanize v1.0.1 // indirect
+
github.com/go-ini/ini v1.67.0 // indirect
+
github.com/goccy/go-json v0.10.5 // indirect
+
github.com/google/uuid v1.6.0 // indirect
+
github.com/klauspost/compress v1.18.0 // indirect
+
github.com/klauspost/cpuid/v2 v2.2.11 // indirect
+
github.com/minio/crc64nvme v1.0.2 // indirect
+
github.com/minio/md5-simd v1.1.2 // indirect
+
github.com/philhofer/fwd v1.2.0 // indirect
+
github.com/rs/xid v1.6.0 // indirect
+
github.com/stretchr/testify v1.10.0 // indirect
+
github.com/tinylib/msgp v1.3.0 // indirect
+
golang.org/x/crypto v0.39.0 // indirect
+
golang.org/x/net v0.41.0 // indirect
+
golang.org/x/sys v0.33.0 // indirect
+
golang.org/x/text v0.26.0 // indirect
+
)
+43
go.sum
···
+
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
+
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
+
github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A=
+
github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8=
+
github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
+
github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
+
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
+
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
+
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
+
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
+
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
+
github.com/klauspost/cpuid/v2 v2.2.11 h1:0OwqZRYI2rFrjS4kvkDnqJkKHdHaRnCm68/DY4OxRzU=
+
github.com/klauspost/cpuid/v2 v2.2.11/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
+
github.com/minio/crc64nvme v1.0.2 h1:6uO1UxGAD+kwqWWp7mBFsi5gAse66C4NXO8cmcVculg=
+
github.com/minio/crc64nvme v1.0.2/go.mod h1:eVfm2fAzLlxMdUGc0EEBGSMmPwmXD5XiNRpnu9J3bvg=
+
github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
+
github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
+
github.com/minio/minio-go/v7 v7.0.95 h1:ywOUPg+PebTMTzn9VDsoFJy32ZuARN9zhB+K3IYEvYU=
+
github.com/minio/minio-go/v7 v7.0.95/go.mod h1:wOOX3uxS334vImCNRVyIDdXX9OsXDm89ToynKgqUKlo=
+
github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM=
+
github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM=
+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+
github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU=
+
github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
+
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+
github.com/tinylib/msgp v1.3.0 h1:ULuf7GPooDaIlbyvgAxBV/FI7ynli6LZ1/nVUNu+0ww=
+
github.com/tinylib/msgp v1.3.0/go.mod h1:ykjzy2wzgrlvpDCRc4LA8UXy6D8bzMSuAF3WD57Gok0=
+
golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM=
+
golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U=
+
golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw=
+
golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA=
+
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
+
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+
golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M=
+
golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA=
+
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+214
main.go
···
+
package main
+
+
import (
+
"archive/zip"
+
"context"
+
"encoding/json"
+
"fmt"
+
"io"
+
"log/slog"
+
"net/http"
+
"os"
+
+
"github.com/joho/godotenv"
+
"github.com/minio/minio-go/v7"
+
"github.com/minio/minio-go/v7/pkg/credentials"
+
)
+
+
func main() {
+
ctx := context.Background()
+
+
err := godotenv.Load(".env")
+
if err != nil {
+
if !os.IsNotExist(err) {
+
slog.Error("load env", "error", err)
+
return
+
}
+
}
+
+
minioClient, err := createMinioClient()
+
if err != nil {
+
slog.Error("create minio client", "error", err)
+
return
+
}
+
+
bucketName := os.Getenv("BUCKET_NAME")
+
+
err = minioClient.MakeBucket(ctx, bucketName, minio.MakeBucketOptions{})
+
if err != nil {
+
slog.Error("create bucket", "error", err)
+
return
+
}
+
+
err = backupRepo(ctx, minioClient, bucketName)
+
if err != nil {
+
slog.Error("backup repo", "error", err)
+
return
+
}
+
+
err = backupBlobs(ctx, minioClient, bucketName)
+
if err != nil {
+
slog.Error("backup blobs", "error", err)
+
return
+
}
+
}
+
+
func createMinioClient() (*minio.Client, error) {
+
endpoint := os.Getenv("ENDPOINT")
+
accessKeyID := os.Getenv("ACCESS_ID")
+
secretAccessKey := os.Getenv("SECRET_ACCESS_KEY")
+
useSSL := true
+
+
return minio.New(endpoint, &minio.Options{
+
Creds: credentials.NewStaticV4(accessKeyID, secretAccessKey, ""),
+
Secure: useSSL,
+
})
+
}
+
+
func backupRepo(ctx context.Context, minioClient *minio.Client, bucketName string) error {
+
pdsHost := os.Getenv("PDS_HOST")
+
did := os.Getenv("DID")
+
+
url := fmt.Sprintf("%s/xrpc/com.atproto.sync.getRepo?did=%s", pdsHost, did)
+
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
+
if err != nil {
+
return fmt.Errorf("create get repo request: %w", err)
+
}
+
+
req.Header.Add("ACCEPT", "application/vnd.ipld.car")
+
resp, err := http.DefaultClient.Do(req)
+
if err != nil {
+
return fmt.Errorf("get repo: %w", err)
+
}
+
+
defer resp.Body.Close()
+
+
_, err = minioClient.PutObject(ctx, bucketName, "pds-repo", resp.Body, -1, minio.PutObjectOptions{})
+
if err != nil {
+
return fmt.Errorf("stream repo to bucket: %w", err)
+
}
+
+
return nil
+
}
+
+
func backupBlobs(ctx context.Context, minioClient *minio.Client, bucketName string) error {
+
cids, err := getAllBlobCIDs(ctx)
+
if err != nil {
+
return fmt.Errorf("get all blob CIDs: %w", err)
+
}
+
+
reader, writer := io.Pipe()
+
defer reader.Close()
+
+
zipWriter := zip.NewWriter(writer)
+
+
go func() {
+
defer writer.Close()
+
defer zipWriter.Close()
+
+
for _, cid := range cids {
+
slog.Info("processing cid", "cid", cid)
+
blob, err := getBlob(ctx, cid)
+
if err != nil {
+
slog.Error("failed to get blob", "cid", cid, "error", err)
+
continue
+
}
+
+
zipFile, err := zipWriter.Create(cid)
+
if err != nil {
+
slog.Error("create new file in zipwriter", "cid", cid, "error", err)
+
blob.Close()
+
continue
+
}
+
+
io.Copy(zipFile, blob)
+
blob.Close()
+
}
+
}()
+
+
_, err = minioClient.PutObject(ctx, bucketName, "pds-blobs.zip", reader, -1, minio.PutObjectOptions{})
+
if err != nil {
+
return fmt.Errorf("stream blobs to bucket: %w", err)
+
}
+
+
return nil
+
}
+
+
func getAllBlobCIDs(ctx context.Context) ([]string, error) {
+
cursor := ""
+
limit := 100
+
var cids []string
+
for {
+
res, err := listBlobs(ctx, cursor, int64(limit))
+
if err != nil {
+
return nil, fmt.Errorf("list blobs: %w", err)
+
}
+
if len(res.CIDs) == 0 {
+
return cids, nil
+
}
+
+
cids = append(cids, res.CIDs...)
+
+
if len(res.CIDs) < limit {
+
return cids, nil
+
}
+
+
cursor = res.Cursor
+
}
+
}
+
+
type listBlobsResponse struct {
+
Cursor string `json:"cursor"`
+
CIDs []string `json:"cids"`
+
}
+
+
func listBlobs(ctx context.Context, cursor string, limit int64) (listBlobsResponse, error) {
+
pdsHost := os.Getenv("PDS_HOST")
+
did := os.Getenv("DID")
+
+
// TODO: do proper url encoding of query params
+
url := fmt.Sprintf("%s/xrpc/com.atproto.sync.listBlobs?did=%s&cursor=%s&limit=%d", pdsHost, did, cursor, limit)
+
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
+
if err != nil {
+
return listBlobsResponse{}, fmt.Errorf("create list blobs request: %w", err)
+
}
+
+
resp, err := http.DefaultClient.Do(req)
+
if err != nil {
+
return listBlobsResponse{}, fmt.Errorf("list blobs: %w", err)
+
}
+
+
defer resp.Body.Close()
+
+
resBody, err := io.ReadAll(resp.Body)
+
if err != nil {
+
return listBlobsResponse{}, fmt.Errorf("failed to read response: %w", err)
+
}
+
+
var result listBlobsResponse
+
err = json.Unmarshal(resBody, &result)
+
if err != nil {
+
return listBlobsResponse{}, fmt.Errorf("failed to unmarshal response: %w", err)
+
}
+
+
return result, nil
+
}
+
+
func getBlob(ctx context.Context, cid string) (io.ReadCloser, error) {
+
pdsHost := os.Getenv("PDS_HOST")
+
did := os.Getenv("DID")
+
+
// TODO: do proper url encoding of query params
+
url := fmt.Sprintf("%s/xrpc/com.atproto.sync.getBlob?did=%s&cid=%s", pdsHost, did, cid)
+
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
+
if err != nil {
+
return nil, fmt.Errorf("create get blob request: %w", err)
+
}
+
+
resp, err := http.DefaultClient.Do(req)
+
if err != nil {
+
return nil, fmt.Errorf("get blob: %w", err)
+
}
+
+
return resp.Body, nil
+
}
+9
readme.md
···
+
## Back AT it
+
+
This is a tool I'm activly developing to back up my ATProtocol type things to S3 storage.
+
+
At the moment it's a one shot style script that backs up the PDS repo and then the blobs but in the future I plan on being able to backup other things (next is my Tangled Knot data).
+
+
The PDS repo data is pulled straight from the xrpc endpoint at sent straight to S3. The blob data however is streamed into a zip file and sent to S3 so that not all the data is held in memory while the backup takes place (the minio library will still keep some in memory as a multipart request).
+
+
It's very hacky right now and needs polishing to use with caution. Although let's face it, the worst it can do at the moment it backup some bad data which is better than no data 🤪