A tool for backing up ATProto related data to S3

backup tangled knot stuff

Signed-off-by: Will Andrews <will7989@hotmail.com>

+3
.env.example
···
BUCKET_NAME="my-super-duper-bucket"
DID="the-did-to-backup"
PDS_HOST="https://your-pds.com"
+
TANGLED_KNOT_DATABASE_DIRECTORY="/path/to/database/directory"
+
TANGLED_KNOT_REPOSITORY_DIRECTORY="/path/to/repository/directory"
+
BUGSNAG_API_KEY="enter-api-key-to-enable"
+4
go.mod
···
)
require (
+
github.com/bugsnag/bugsnag-go/v2 v2.6.2 // indirect
+
github.com/bugsnag/panicwrap v1.3.4 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/go-ini/ini v1.67.0 // indirect
github.com/goccy/go-json v0.10.5 // indirect
github.com/google/uuid v1.6.0 // indirect
+
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 // indirect
github.com/klauspost/compress v1.18.0 // indirect
github.com/klauspost/cpuid/v2 v2.2.11 // indirect
github.com/minio/crc64nvme v1.0.2 // indirect
github.com/minio/md5-simd v1.1.2 // indirect
github.com/philhofer/fwd v1.2.0 // indirect
+
github.com/pkg/errors v0.9.1 // indirect
github.com/rs/xid v1.6.0 // indirect
github.com/stretchr/testify v1.10.0 // indirect
github.com/tinylib/msgp v1.3.0 // indirect
+9
go.sum
···
+
github.com/bitly/go-simplejson v0.5.1/go.mod h1:YOPVLzCfwK14b4Sff3oP1AmGhI9T9Vsg84etUnlyp+Q=
+
github.com/bugsnag/bugsnag-go/v2 v2.6.2 h1:gGjr8txMtPYWKovEBC+4o6tthYveuE7fjzu6XYVIApg=
+
github.com/bugsnag/bugsnag-go/v2 v2.6.2/go.mod h1:S9njhE7l6XCiKycOZ2zp0x1zoEE5nL3HjROCSsKc/3c=
+
github.com/bugsnag/panicwrap v1.3.4 h1:A6sXFtDGsgU/4BLf5JT0o5uYg3EeKgGx3Sfs+/uk3pU=
+
github.com/bugsnag/panicwrap v1.3.4/go.mod h1:D/8v3kj0zr8ZAKg1AQ6crr+5VwKN5eIywRkfhyM/+dE=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
···
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
+
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0 h1:iQTw/8FWTuc7uiaSepXwyf3o52HaUYcV+Tu66S3F5GA=
+
github.com/kardianos/osext v0.0.0-20190222173326-2bc1f35cddc0/go.mod h1:1NbS8ALrpOvjt0rHPNLyCIeMtbizbir8U//inJ+zuB8=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
···
github.com/minio/minio-go/v7 v7.0.95/go.mod h1:wOOX3uxS334vImCNRVyIDdXX9OsXDm89ToynKgqUKlo=
github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM=
github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM=
+
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU=
+15 -161
main.go
···
package main
import (
-
"archive/zip"
"context"
-
"encoding/json"
-
"fmt"
-
"io"
"log/slog"
-
"net/http"
"os"
+
"github.com/bugsnag/bugsnag-go/v2"
"github.com/joho/godotenv"
"github.com/minio/minio-go/v7"
"github.com/minio/minio-go/v7/pkg/credentials"
···
}
}
+
configureBugsnag()
+
minioClient, err := createMinioClient()
if err != nil {
slog.Error("create minio client", "error", err)
+
bugsnag.Notify(err)
return
}
···
err = minioClient.MakeBucket(ctx, bucketName, minio.MakeBucketOptions{})
if err != nil {
slog.Error("create bucket", "error", err)
-
return
-
}
-
-
err = backupRepo(ctx, minioClient, bucketName)
-
if err != nil {
-
slog.Error("backup repo", "error", err)
+
bugsnag.Notify(err)
return
}
-
err = backupBlobs(ctx, minioClient, bucketName)
-
if err != nil {
-
slog.Error("backup blobs", "error", err)
-
return
-
}
+
backupPDS(ctx, minioClient, bucketName)
}
func createMinioClient() (*minio.Client, error) {
···
})
}
-
func backupRepo(ctx context.Context, minioClient *minio.Client, bucketName string) error {
-
pdsHost := os.Getenv("PDS_HOST")
-
did := os.Getenv("DID")
-
-
url := fmt.Sprintf("%s/xrpc/com.atproto.sync.getRepo?did=%s", pdsHost, did)
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
-
if err != nil {
-
return fmt.Errorf("create get repo request: %w", err)
-
}
-
-
req.Header.Add("ACCEPT", "application/vnd.ipld.car")
-
resp, err := http.DefaultClient.Do(req)
-
if err != nil {
-
return fmt.Errorf("get repo: %w", err)
-
}
-
-
defer resp.Body.Close()
-
-
_, err = minioClient.PutObject(ctx, bucketName, "pds-repo", resp.Body, -1, minio.PutObjectOptions{})
-
if err != nil {
-
return fmt.Errorf("stream repo to bucket: %w", err)
-
}
-
-
return nil
-
}
-
-
func backupBlobs(ctx context.Context, minioClient *minio.Client, bucketName string) error {
-
cids, err := getAllBlobCIDs(ctx)
-
if err != nil {
-
return fmt.Errorf("get all blob CIDs: %w", err)
-
}
-
-
reader, writer := io.Pipe()
-
defer reader.Close()
-
-
zipWriter := zip.NewWriter(writer)
-
-
go func() {
-
defer writer.Close()
-
defer zipWriter.Close()
-
-
for _, cid := range cids {
-
slog.Info("processing cid", "cid", cid)
-
blob, err := getBlob(ctx, cid)
-
if err != nil {
-
slog.Error("failed to get blob", "cid", cid, "error", err)
-
continue
-
}
-
-
zipFile, err := zipWriter.Create(cid)
-
if err != nil {
-
slog.Error("create new file in zipwriter", "cid", cid, "error", err)
-
blob.Close()
-
continue
-
}
-
-
io.Copy(zipFile, blob)
-
blob.Close()
-
}
-
}()
-
-
_, err = minioClient.PutObject(ctx, bucketName, "pds-blobs.zip", reader, -1, minio.PutObjectOptions{})
-
if err != nil {
-
return fmt.Errorf("stream blobs to bucket: %w", err)
-
}
-
-
return nil
-
}
-
-
func getAllBlobCIDs(ctx context.Context) ([]string, error) {
-
cursor := ""
-
limit := 100
-
var cids []string
-
for {
-
res, err := listBlobs(ctx, cursor, int64(limit))
-
if err != nil {
-
return nil, fmt.Errorf("list blobs: %w", err)
-
}
-
if len(res.CIDs) == 0 {
-
return cids, nil
-
}
-
-
cids = append(cids, res.CIDs...)
-
-
if len(res.CIDs) < limit {
-
return cids, nil
-
}
-
-
cursor = res.Cursor
-
}
-
}
-
-
type listBlobsResponse struct {
-
Cursor string `json:"cursor"`
-
CIDs []string `json:"cids"`
-
}
-
-
func listBlobs(ctx context.Context, cursor string, limit int64) (listBlobsResponse, error) {
-
pdsHost := os.Getenv("PDS_HOST")
-
did := os.Getenv("DID")
-
-
// TODO: do proper url encoding of query params
-
url := fmt.Sprintf("%s/xrpc/com.atproto.sync.listBlobs?did=%s&cursor=%s&limit=%d", pdsHost, did, cursor, limit)
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
-
if err != nil {
-
return listBlobsResponse{}, fmt.Errorf("create list blobs request: %w", err)
-
}
-
-
resp, err := http.DefaultClient.Do(req)
-
if err != nil {
-
return listBlobsResponse{}, fmt.Errorf("list blobs: %w", err)
-
}
-
-
defer resp.Body.Close()
-
-
resBody, err := io.ReadAll(resp.Body)
-
if err != nil {
-
return listBlobsResponse{}, fmt.Errorf("failed to read response: %w", err)
-
}
-
-
var result listBlobsResponse
-
err = json.Unmarshal(resBody, &result)
-
if err != nil {
-
return listBlobsResponse{}, fmt.Errorf("failed to unmarshal response: %w", err)
-
}
-
-
return result, nil
-
}
-
-
func getBlob(ctx context.Context, cid string) (io.ReadCloser, error) {
-
pdsHost := os.Getenv("PDS_HOST")
-
did := os.Getenv("DID")
-
-
// TODO: do proper url encoding of query params
-
url := fmt.Sprintf("%s/xrpc/com.atproto.sync.getBlob?did=%s&cid=%s", pdsHost, did, cid)
-
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
-
if err != nil {
-
return nil, fmt.Errorf("create get blob request: %w", err)
-
}
-
-
resp, err := http.DefaultClient.Do(req)
-
if err != nil {
-
return nil, fmt.Errorf("get blob: %w", err)
+
func configureBugsnag() {
+
apiKey := os.Getenv("BUGSNAG_API_KEY")
+
if apiKey == "" {
+
slog.Info("bugsnag not configured")
+
return
}
-
-
return resp.Body, nil
+
bugsnag.Configure(bugsnag.Configuration{
+
APIKey: apiKey,
+
ReleaseStage: "production",
+
})
}
+186
pds.go
···
+
package main
+
+
import (
+
"archive/zip"
+
"context"
+
"encoding/json"
+
"fmt"
+
"io"
+
"log/slog"
+
"net/http"
+
"os"
+
+
"github.com/bugsnag/bugsnag-go/v2"
+
"github.com/minio/minio-go/v7"
+
)
+
+
func backupPDS(ctx context.Context, minioClient *minio.Client, bucketName string) {
+
if os.Getenv("PDS_HOST") == "" || os.Getenv("DID") == "" {
+
slog.Info("PDS_HOST or DID env not set - skipping PDS backup")
+
return
+
}
+
+
err := backupRepo(ctx, minioClient, bucketName)
+
if err != nil {
+
slog.Error("backup repo", "error", err)
+
bugsnag.Notify(err)
+
return
+
}
+
+
err = backupBlobs(ctx, minioClient, bucketName)
+
if err != nil {
+
slog.Error("backup blobs", "error", err)
+
bugsnag.Notify(err)
+
return
+
}
+
}
+
+
func backupRepo(ctx context.Context, minioClient *minio.Client, bucketName string) error {
+
pdsHost := os.Getenv("PDS_HOST")
+
did := os.Getenv("DID")
+
+
url := fmt.Sprintf("%s/xrpc/com.atproto.sync.getRepo?did=%s", pdsHost, did)
+
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
+
if err != nil {
+
return fmt.Errorf("create get repo request: %w", err)
+
}
+
+
req.Header.Add("ACCEPT", "application/vnd.ipld.car")
+
resp, err := http.DefaultClient.Do(req)
+
if err != nil {
+
return fmt.Errorf("get repo: %w", err)
+
}
+
+
defer resp.Body.Close()
+
+
_, err = minioClient.PutObject(ctx, bucketName, "pds-repo", resp.Body, -1, minio.PutObjectOptions{})
+
if err != nil {
+
return fmt.Errorf("stream repo to bucket: %w", err)
+
}
+
+
return nil
+
}
+
+
func backupBlobs(ctx context.Context, minioClient *minio.Client, bucketName string) error {
+
cids, err := getAllBlobCIDs(ctx)
+
if err != nil {
+
return fmt.Errorf("get all blob CIDs: %w", err)
+
}
+
+
reader, writer := io.Pipe()
+
defer reader.Close()
+
+
zipWriter := zip.NewWriter(writer)
+
+
go func() {
+
defer writer.Close()
+
defer zipWriter.Close()
+
+
for _, cid := range cids {
+
slog.Info("processing cid", "cid", cid)
+
blob, err := getBlob(ctx, cid)
+
if err != nil {
+
slog.Error("failed to get blob", "cid", cid, "error", err)
+
bugsnag.Notify(err)
+
continue
+
}
+
+
zipFile, err := zipWriter.Create(cid)
+
if err != nil {
+
slog.Error("create new file in zipwriter", "cid", cid, "error", err)
+
bugsnag.Notify(err)
+
blob.Close()
+
continue
+
}
+
+
io.Copy(zipFile, blob)
+
blob.Close()
+
}
+
}()
+
+
_, err = minioClient.PutObject(ctx, bucketName, "pds-blobs.zip", reader, -1, minio.PutObjectOptions{})
+
if err != nil {
+
return fmt.Errorf("stream blobs to bucket: %w", err)
+
}
+
+
return nil
+
}
+
+
func getAllBlobCIDs(ctx context.Context) ([]string, error) {
+
cursor := ""
+
limit := 100
+
var cids []string
+
for {
+
res, err := listBlobs(ctx, cursor, int64(limit))
+
if err != nil {
+
return nil, fmt.Errorf("list blobs: %w", err)
+
}
+
if len(res.CIDs) == 0 {
+
return cids, nil
+
}
+
+
cids = append(cids, res.CIDs...)
+
+
if len(res.CIDs) < limit {
+
return cids, nil
+
}
+
+
cursor = res.Cursor
+
}
+
}
+
+
type listBlobsResponse struct {
+
Cursor string `json:"cursor"`
+
CIDs []string `json:"cids"`
+
}
+
+
func listBlobs(ctx context.Context, cursor string, limit int64) (listBlobsResponse, error) {
+
pdsHost := os.Getenv("PDS_HOST")
+
did := os.Getenv("DID")
+
+
// TODO: do proper url encoding of query params
+
url := fmt.Sprintf("%s/xrpc/com.atproto.sync.listBlobs?did=%s&cursor=%s&limit=%d", pdsHost, did, cursor, limit)
+
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
+
if err != nil {
+
return listBlobsResponse{}, fmt.Errorf("create list blobs request: %w", err)
+
}
+
+
resp, err := http.DefaultClient.Do(req)
+
if err != nil {
+
return listBlobsResponse{}, fmt.Errorf("list blobs: %w", err)
+
}
+
+
defer resp.Body.Close()
+
+
resBody, err := io.ReadAll(resp.Body)
+
if err != nil {
+
return listBlobsResponse{}, fmt.Errorf("failed to read response: %w", err)
+
}
+
+
var result listBlobsResponse
+
err = json.Unmarshal(resBody, &result)
+
if err != nil {
+
return listBlobsResponse{}, fmt.Errorf("failed to unmarshal response: %w", err)
+
}
+
+
return result, nil
+
}
+
+
func getBlob(ctx context.Context, cid string) (io.ReadCloser, error) {
+
pdsHost := os.Getenv("PDS_HOST")
+
did := os.Getenv("DID")
+
+
// TODO: do proper url encoding of query params
+
url := fmt.Sprintf("%s/xrpc/com.atproto.sync.getBlob?did=%s&cid=%s", pdsHost, did, cid)
+
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
+
if err != nil {
+
return nil, fmt.Errorf("create get blob request: %w", err)
+
}
+
+
resp, err := http.DefaultClient.Do(req)
+
if err != nil {
+
return nil, fmt.Errorf("get blob: %w", err)
+
}
+
+
return resp.Body, nil
+
}
+98
tangled_knot.go
···
+
package main
+
+
import (
+
"archive/tar"
+
"compress/gzip"
+
"context"
+
"io"
+
"log/slog"
+
"os"
+
"path/filepath"
+
+
"github.com/bugsnag/bugsnag-go/v2"
+
"github.com/minio/minio-go/v7"
+
)
+
+
func backupTangledKnot(ctx context.Context, minioClient *minio.Client, bucketName string) {
+
if os.Getenv("BACKUP_TANGLED_KNOT") != "true" {
+
return
+
}
+
+
backupKnotDB(ctx, minioClient, bucketName)
+
}
+
+
func backupKnotDB(ctx context.Context, minioClient *minio.Client, bucketName string) {
+
dir := os.Getenv("TANGLED_KNOT_DATABASE_DIRECTORY")
+
+
pipeReader, pipeWriter := io.Pipe()
+
defer pipeReader.Close()
+
+
go compress(dir, pipeWriter)
+
+
_, err := minioClient.PutObject(ctx, bucketName, "knot-db.zip", pipeReader, -1, minio.PutObjectOptions{})
+
if err != nil {
+
slog.Error("stream knot DB to bucket: %w")
+
bugsnag.Notify(err)
+
}
+
}
+
+
func backupKnotRepos(ctx context.Context, minioClient *minio.Client, bucketName string) {
+
dir := os.Getenv("TANGLED_KNOT_REPOSITORY_DIRECTORY")
+
+
pipeReader, pipeWriter := io.Pipe()
+
defer pipeReader.Close()
+
+
go compress(dir, pipeWriter)
+
+
_, err := minioClient.PutObject(ctx, bucketName, "knot-repos.zip", pipeReader, -1, minio.PutObjectOptions{})
+
if err != nil {
+
slog.Error("stream knot repos to bucket: %w")
+
bugsnag.Notify(err)
+
}
+
}
+
+
func compress(src string, writer io.WriteCloser) error {
+
zipWriter := gzip.NewWriter(writer)
+
tarWriter := tar.NewWriter(zipWriter)
+
+
defer writer.Close()
+
defer zipWriter.Close()
+
defer tarWriter.Close()
+
+
filepath.Walk(src, func(file string, fi os.FileInfo, err error) error {
+
header, err := tar.FileInfoHeader(fi, file)
+
if err != nil {
+
return err
+
}
+
+
// must provide real name
+
// (see https://golang.org/src/archive/tar/common.go?#L626)
+
header.Name = filepath.ToSlash(file)
+
+
if err := tarWriter.WriteHeader(header); err != nil {
+
return err
+
}
+
// if not a dir, write file content
+
if !fi.IsDir() {
+
data, err := os.Open(file)
+
if err != nil {
+
return err
+
}
+
if _, err := io.Copy(tarWriter, data); err != nil {
+
return err
+
}
+
}
+
return nil
+
})
+
+
// produce tar
+
if err := tarWriter.Close(); err != nil {
+
return err
+
}
+
// produce gzip
+
if err := zipWriter.Close(); err != nil {
+
return err
+
}
+
//
+
return nil
+
}