An atproto PDS written in Go

feat: postgresql support, fix github actions, implement ListMissingBlobs and more (#43)

* init

* implement listmissingblobs

* Update README to mark listMissingBlobs as completed

* e

* Update README.md

Co-authored-by: hailey <hailey@blueskyweb.xyz>

---------

Co-authored-by: hailey <hailey@blueskyweb.xyz>

Scan bf1a93fc 214d8b4d

+10 -2
.github/workflows/docker-image.yml
···
push:
branches:
- main
+
tags:
+
- 'v*'
env:
REGISTRY: ghcr.io
···
steps:
- name: Checkout repository
uses: actions/checkout@v4
+
# Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
- name: Log in to the Container registry
-
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
+
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
+
# This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels.
- name: Extract metadata (tags, labels) for Docker
id: meta
···
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
+
type=raw,value=latest,enable={{is_default_branch}}
type=sha
type=sha,format=long
+
type=semver,pattern={{version}}
+
type=semver,pattern={{major}}.{{minor}}
+
# This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages.
# It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository.
# It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step.
- name: Build and push Docker image
id: push
-
uses: docker/build-push-action@v5
+
uses: docker/build-push-action@v6
with:
context: .
push: true
+10
Caddyfile.postgres
···
+
{$COCOON_HOSTNAME} {
+
reverse_proxy cocoon:8080
+
+
encode gzip
+
+
log {
+
output file /data/access.log
+
format json
+
}
+
}
+44 -1
README.md
···
docker-compose up -d
```
+
**For PostgreSQL deployment:**
+
```bash
+
# Add POSTGRES_PASSWORD to your .env file first!
+
docker-compose -f docker-compose.postgres.yaml up -d
+
```
+
5. **Get your invite code**
On first run, an invite code is automatically created. View it with:
···
### Optional Configuration
+
#### Database Configuration
+
+
By default, Cocoon uses SQLite which requires no additional setup. For production deployments with higher traffic, you can use PostgreSQL:
+
+
```bash
+
# Database type: sqlite (default) or postgres
+
COCOON_DB_TYPE="postgres"
+
+
# PostgreSQL connection string (required if db-type is postgres)
+
# Format: postgres://user:password@host:port/database?sslmode=disable
+
COCOON_DATABASE_URL="postgres://cocoon:password@localhost:5432/cocoon?sslmode=disable"
+
+
# Or use the standard DATABASE_URL environment variable
+
DATABASE_URL="postgres://cocoon:password@localhost:5432/cocoon?sslmode=disable"
+
```
+
+
For SQLite (default):
+
```bash
+
COCOON_DB_TYPE="sqlite"
+
COCOON_DB_NAME="/data/cocoon/cocoon.db"
+
```
+
+
> **Note**: When using PostgreSQL, database backups to S3 are not handled by Cocoon. Use `pg_dump` or your database provider's backup solution instead.
+
#### SMTP Email Settings
```bash
COCOON_SMTP_USER="your-smtp-username"
···
```
#### S3 Storage
+
+
Cocoon supports S3-compatible storage for both database backups (SQLite only) and blob storage (images, videos, etc.):
+
```bash
+
# Enable S3 backups (SQLite databases only - hourly backups)
COCOON_S3_BACKUPS_ENABLED=true
+
+
# Enable S3 for blob storage (images, videos, etc.)
+
# When enabled, blobs are stored in S3 instead of the database
COCOON_S3_BLOBSTORE_ENABLED=true
+
+
# S3 configuration (works with AWS S3, MinIO, Cloudflare R2, etc.)
COCOON_S3_REGION="us-east-1"
COCOON_S3_BUCKET="your-bucket"
COCOON_S3_ENDPOINT="https://s3.amazonaws.com"
COCOON_S3_ACCESS_KEY="your-access-key"
COCOON_S3_SECRET_KEY="your-secret-key"
```
+
+
**Blob Storage Options:**
+
- `COCOON_S3_BLOBSTORE_ENABLED=false` (default): Blobs stored in the database
+
- `COCOON_S3_BLOBSTORE_ENABLED=true`: Blobs stored in S3 bucket under `blobs/{did}/{cid}`
### Management Commands
···
- [x] `com.atproto.repo.getRecord`
- [x] `com.atproto.repo.importRepo` (Works "okay". Use with extreme caution.)
- [x] `com.atproto.repo.listRecords`
-
- [ ] `com.atproto.repo.listMissingBlobs`
+
- [x] `com.atproto.repo.listMissingBlobs` (Not actually functional, but will return a response as if no blobs were missing)
### Server
+36 -1
cmd/cocoon/main.go
···
"github.com/lestrrat-go/jwx/v2/jwk"
"github.com/urfave/cli/v2"
"golang.org/x/crypto/bcrypt"
+
"gorm.io/driver/postgres"
"gorm.io/driver/sqlite"
"gorm.io/gorm"
)
···
Name: "db-name",
Value: "cocoon.db",
EnvVars: []string{"COCOON_DB_NAME"},
+
},
+
&cli.StringFlag{
+
Name: "db-type",
+
Value: "sqlite",
+
Usage: "Database type: sqlite or postgres",
+
EnvVars: []string{"COCOON_DB_TYPE"},
+
},
+
&cli.StringFlag{
+
Name: "database-url",
+
Usage: "PostgreSQL connection string (required if db-type is postgres)",
+
EnvVars: []string{"COCOON_DATABASE_URL", "DATABASE_URL"},
},
&cli.StringFlag{
Name: "did",
···
s, err := server.New(&server.Args{
Addr: cmd.String("addr"),
DbName: cmd.String("db-name"),
+
DbType: cmd.String("db-type"),
+
DatabaseURL: cmd.String("database-url"),
Did: cmd.String("did"),
Hostname: cmd.String("hostname"),
RotationKeyPath: cmd.String("rotation-key-path"),
···
}
func newDb() (*gorm.DB, error) {
-
return gorm.Open(sqlite.Open("cocoon.db"), &gorm.Config{})
+
dbType := os.Getenv("COCOON_DB_TYPE")
+
if dbType == "" {
+
dbType = "sqlite"
+
}
+
+
switch dbType {
+
case "postgres":
+
databaseURL := os.Getenv("COCOON_DATABASE_URL")
+
if databaseURL == "" {
+
databaseURL = os.Getenv("DATABASE_URL")
+
}
+
if databaseURL == "" {
+
return nil, fmt.Errorf("COCOON_DATABASE_URL or DATABASE_URL must be set when using postgres")
+
}
+
return gorm.Open(postgres.Open(databaseURL), &gorm.Config{})
+
default:
+
dbName := os.Getenv("COCOON_DB_NAME")
+
if dbName == "" {
+
dbName = "cocoon.db"
+
}
+
return gorm.Open(sqlite.Open(dbName), &gorm.Config{})
+
}
}
+158
docker-compose.postgres.yaml
···
+
# Docker Compose with PostgreSQL
+
#
+
# Usage:
+
# docker-compose -f docker-compose.postgres.yaml up -d
+
#
+
# This file extends the base docker-compose.yaml with a PostgreSQL database.
+
# Set the following in your .env file:
+
# COCOON_DB_TYPE=postgres
+
# POSTGRES_PASSWORD=your-secure-password
+
+
version: '3.8'
+
+
services:
+
postgres:
+
image: postgres:16-alpine
+
container_name: cocoon-postgres
+
environment:
+
POSTGRES_USER: cocoon
+
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?POSTGRES_PASSWORD is required}
+
POSTGRES_DB: cocoon
+
volumes:
+
- postgres_data:/var/lib/postgresql/data
+
healthcheck:
+
test: ["CMD-SHELL", "pg_isready -U cocoon -d cocoon"]
+
interval: 10s
+
timeout: 5s
+
retries: 5
+
restart: unless-stopped
+
+
init-keys:
+
build:
+
context: .
+
dockerfile: Dockerfile
+
image: ghcr.io/haileyok/cocoon:latest
+
container_name: cocoon-init-keys
+
volumes:
+
- ./keys:/keys
+
- ./data:/data/cocoon
+
- ./init-keys.sh:/init-keys.sh:ro
+
environment:
+
COCOON_DID: ${COCOON_DID}
+
COCOON_HOSTNAME: ${COCOON_HOSTNAME}
+
COCOON_ROTATION_KEY_PATH: /keys/rotation.key
+
COCOON_JWK_PATH: /keys/jwk.key
+
COCOON_CONTACT_EMAIL: ${COCOON_CONTACT_EMAIL}
+
COCOON_RELAYS: ${COCOON_RELAYS:-https://bsky.network}
+
COCOON_ADMIN_PASSWORD: ${COCOON_ADMIN_PASSWORD}
+
entrypoint: ["/bin/sh", "/init-keys.sh"]
+
restart: "no"
+
+
cocoon:
+
build:
+
context: .
+
dockerfile: Dockerfile
+
image: ghcr.io/haileyok/cocoon:latest
+
container_name: cocoon-pds
+
depends_on:
+
init-keys:
+
condition: service_completed_successfully
+
postgres:
+
condition: service_healthy
+
ports:
+
- "8080:8080"
+
volumes:
+
- ./data:/data/cocoon
+
- ./keys/rotation.key:/keys/rotation.key:ro
+
- ./keys/jwk.key:/keys/jwk.key:ro
+
environment:
+
# Required settings
+
COCOON_DID: ${COCOON_DID}
+
COCOON_HOSTNAME: ${COCOON_HOSTNAME}
+
COCOON_ROTATION_KEY_PATH: /keys/rotation.key
+
COCOON_JWK_PATH: /keys/jwk.key
+
COCOON_CONTACT_EMAIL: ${COCOON_CONTACT_EMAIL}
+
COCOON_RELAYS: ${COCOON_RELAYS:-https://bsky.network}
+
COCOON_ADMIN_PASSWORD: ${COCOON_ADMIN_PASSWORD}
+
COCOON_SESSION_SECRET: ${COCOON_SESSION_SECRET}
+
+
# Database configuration - PostgreSQL
+
COCOON_ADDR: ":8080"
+
COCOON_DB_TYPE: postgres
+
COCOON_DATABASE_URL: postgres://cocoon:${POSTGRES_PASSWORD}@postgres:5432/cocoon?sslmode=disable
+
COCOON_BLOCKSTORE_VARIANT: ${COCOON_BLOCKSTORE_VARIANT:-sqlite}
+
+
# Optional: SMTP settings for email
+
COCOON_SMTP_USER: ${COCOON_SMTP_USER:-}
+
COCOON_SMTP_PASS: ${COCOON_SMTP_PASS:-}
+
COCOON_SMTP_HOST: ${COCOON_SMTP_HOST:-}
+
COCOON_SMTP_PORT: ${COCOON_SMTP_PORT:-}
+
COCOON_SMTP_EMAIL: ${COCOON_SMTP_EMAIL:-}
+
COCOON_SMTP_NAME: ${COCOON_SMTP_NAME:-}
+
+
# Optional: S3 configuration
+
COCOON_S3_BACKUPS_ENABLED: ${COCOON_S3_BACKUPS_ENABLED:-false}
+
COCOON_S3_BLOBSTORE_ENABLED: ${COCOON_S3_BLOBSTORE_ENABLED:-false}
+
COCOON_S3_REGION: ${COCOON_S3_REGION:-}
+
COCOON_S3_BUCKET: ${COCOON_S3_BUCKET:-}
+
COCOON_S3_ENDPOINT: ${COCOON_S3_ENDPOINT:-}
+
COCOON_S3_ACCESS_KEY: ${COCOON_S3_ACCESS_KEY:-}
+
COCOON_S3_SECRET_KEY: ${COCOON_S3_SECRET_KEY:-}
+
+
# Optional: Fallback proxy
+
COCOON_FALLBACK_PROXY: ${COCOON_FALLBACK_PROXY:-}
+
restart: unless-stopped
+
healthcheck:
+
test: ["CMD", "curl", "-f", "http://localhost:8080/xrpc/_health"]
+
interval: 30s
+
timeout: 10s
+
retries: 3
+
start_period: 40s
+
+
create-invite:
+
build:
+
context: .
+
dockerfile: Dockerfile
+
image: ghcr.io/haileyok/cocoon:latest
+
container_name: cocoon-create-invite
+
volumes:
+
- ./keys:/keys
+
- ./create-initial-invite.sh:/create-initial-invite.sh:ro
+
environment:
+
COCOON_DID: ${COCOON_DID}
+
COCOON_HOSTNAME: ${COCOON_HOSTNAME}
+
COCOON_ROTATION_KEY_PATH: /keys/rotation.key
+
COCOON_JWK_PATH: /keys/jwk.key
+
COCOON_CONTACT_EMAIL: ${COCOON_CONTACT_EMAIL}
+
COCOON_RELAYS: ${COCOON_RELAYS:-https://bsky.network}
+
COCOON_ADMIN_PASSWORD: ${COCOON_ADMIN_PASSWORD}
+
COCOON_DB_TYPE: postgres
+
COCOON_DATABASE_URL: postgres://cocoon:${POSTGRES_PASSWORD}@postgres:5432/cocoon?sslmode=disable
+
depends_on:
+
cocoon:
+
condition: service_healthy
+
entrypoint: ["/bin/sh", "/create-initial-invite.sh"]
+
restart: "no"
+
+
caddy:
+
image: caddy:2-alpine
+
container_name: cocoon-caddy
+
ports:
+
- "80:80"
+
- "443:443"
+
volumes:
+
- ./Caddyfile.postgres:/etc/caddy/Caddyfile:ro
+
- caddy_data:/data
+
- caddy_config:/config
+
restart: unless-stopped
+
environment:
+
COCOON_HOSTNAME: ${COCOON_HOSTNAME}
+
CADDY_ACME_EMAIL: ${COCOON_CONTACT_EMAIL:-}
+
+
volumes:
+
postgres_data:
+
driver: local
+
caddy_data:
+
driver: local
+
caddy_config:
+
driver: local
+6 -2
docker-compose.yaml
···
# Server configuration
COCOON_ADDR: ":8080"
-
COCOON_DB_NAME: /data/cocoon/cocoon.db
+
COCOON_DB_TYPE: ${COCOON_DB_TYPE:-sqlite}
+
COCOON_DB_NAME: ${COCOON_DB_NAME:-/data/cocoon/cocoon.db}
+
COCOON_DATABASE_URL: ${COCOON_DATABASE_URL:-}
COCOON_BLOCKSTORE_VARIANT: ${COCOON_BLOCKSTORE_VARIANT:-sqlite}
# Optional: SMTP settings for email
···
COCOON_CONTACT_EMAIL: ${COCOON_CONTACT_EMAIL}
COCOON_RELAYS: ${COCOON_RELAYS:-https://bsky.network}
COCOON_ADMIN_PASSWORD: ${COCOON_ADMIN_PASSWORD}
-
COCOON_DB_NAME: /data/cocoon/cocoon.db
+
COCOON_DB_TYPE: ${COCOON_DB_TYPE:-sqlite}
+
COCOON_DB_NAME: ${COCOON_DB_NAME:-/data/cocoon/cocoon.db}
+
COCOON_DATABASE_URL: ${COCOON_DATABASE_URL:-}
depends_on:
- init-keys
entrypoint: ["/bin/sh", "/create-initial-invite.sh"]
+21
server/handle_repo_list_missing_blobs.go
···
+
package server
+
+
import (
+
"github.com/labstack/echo/v4"
+
)
+
+
type ComAtprotoRepoListMissingBlobsResponse struct {
+
Cursor *string `json:"cursor,omitempty"`
+
Blobs []ComAtprotoRepoListMissingBlobsRecordBlob `json:"blobs"`
+
}
+
+
type ComAtprotoRepoListMissingBlobsRecordBlob struct {
+
Cid string `json:"cid"`
+
RecordUri string `json:"recordUri"`
+
}
+
+
func (s *Server) handleListMissingBlobs(e echo.Context) error {
+
return e.JSON(200, ComAtprotoRepoListMissingBlobsResponse{
+
Blobs: []ComAtprotoRepoListMissingBlobsRecordBlob{},
+
})
+
}
+34 -3
server/server.go
···
"github.com/labstack/echo/v4"
"github.com/labstack/echo/v4/middleware"
slogecho "github.com/samber/slog-echo"
+
"gorm.io/driver/postgres"
"gorm.io/driver/sqlite"
"gorm.io/gorm"
)
···
requestCrawlMu sync.Mutex
dbName string
+
dbType string
s3Config *S3Config
}
type Args struct {
Addr string
DbName string
+
DbType string
+
DatabaseURL string
Logger *slog.Logger
Version string
Did string
···
IdleTimeout: 5 * time.Minute,
}
-
gdb, err := gorm.Open(sqlite.Open(args.DbName), &gorm.Config{})
-
if err != nil {
-
return nil, err
+
dbType := args.DbType
+
if dbType == "" {
+
dbType = "sqlite"
+
}
+
+
var gdb *gorm.DB
+
var err error
+
switch dbType {
+
case "postgres":
+
if args.DatabaseURL == "" {
+
return nil, fmt.Errorf("database-url must be set when using postgres")
+
}
+
gdb, err = gorm.Open(postgres.Open(args.DatabaseURL), &gorm.Config{})
+
if err != nil {
+
return nil, fmt.Errorf("failed to connect to postgres: %w", err)
+
}
+
args.Logger.Info("connected to PostgreSQL database")
+
default:
+
gdb, err = gorm.Open(sqlite.Open(args.DbName), &gorm.Config{})
+
if err != nil {
+
return nil, fmt.Errorf("failed to open sqlite database: %w", err)
+
}
+
args.Logger.Info("connected to SQLite database", "path", args.DbName)
}
dbw := db.NewDB(gdb)
···
passport: identity.NewPassport(h, identity.NewMemCache(10_000)),
dbName: args.DbName,
+
dbType: dbType,
s3Config: args.S3Config,
oauthProvider: provider.NewProvider(provider.Args{
···
s.echo.GET("/xrpc/com.atproto.repo.describeRepo", s.handleDescribeRepo)
s.echo.GET("/xrpc/com.atproto.sync.listRepos", s.handleListRepos)
s.echo.GET("/xrpc/com.atproto.repo.listRecords", s.handleListRecords)
+
s.echo.GET("/xrpc/com.atproto.repo.listMissingBlobs", s.handleListMissingBlobs)
s.echo.GET("/xrpc/com.atproto.repo.getRecord", s.handleRepoGetRecord)
s.echo.GET("/xrpc/com.atproto.sync.getRecord", s.handleSyncGetRecord)
s.echo.GET("/xrpc/com.atproto.sync.getBlocks", s.handleGetBlocks)
···
}
func (s *Server) doBackup() {
+
if s.dbType == "postgres" {
+
s.logger.Info("skipping S3 backup - PostgreSQL backups should be handled externally (pg_dump, managed database backups, etc.)")
+
return
+
}
+
start := time.Now()
s.logger.Info("beginning backup to s3...")