this repo has no description

feat: add bsky-users

Changed files
+194 -1
cmd
bsky-users
service
+2 -1
.gitignore
···
db/
-
firehose.db*
+
bin/
+
data/
+170
cmd/bsky-users/main.go
···
+
package main
+
+
import (
+
"context"
+
"database/sql"
+
_ "embed"
+
"encoding/json"
+
"log"
+
"os/signal"
+
"syscall"
+
"time"
+
+
jetstream "github.com/bluesky-social/jetstream/pkg/models"
+
"github.com/gorilla/websocket"
+
_ "github.com/mattn/go-sqlite3"
+
)
+
+
type CheckpointResults struct {
+
Blocked int
+
Pages int
+
Transferred int
+
}
+
+
var AppBskyAllowlist = map[string]bool{
+
"app.bsky.actor.profile": true,
+
"app.bsky.feed.generator": true,
+
"app.bsky.feed.like": true,
+
"app.bsky.feed.post": true,
+
"app.bsky.feed.postgate": true,
+
"app.bsky.feed.repost": true,
+
"app.bsky.feed.threadgate": true,
+
"app.bsky.graph.block": true,
+
"app.bsky.graph.follow": true,
+
"app.bsky.graph.list": true,
+
"app.bsky.graph.listblock": true,
+
"app.bsky.graph.listitem": true,
+
"app.bsky.graph.starterpack": true,
+
"app.bsky.labeler.service": true,
+
"chat.bsky.actor.declaration": true,
+
}
+
+
// const JetstreamUrl = `wss://jetstream1.us-west.bsky.network/subscribe`
+
+
const JetstreamUrl = `ws://localhost:6008/subscribe` // TODO(ejd): attach a reconnect cursor
+
+
const userTimestampUpdate = `insert into users (did, ts) values (?, ?) on conflict (did) do update set ts = ?`
+
+
//go:embed schema.sql
+
var ddl string
+
+
func handler(ctx context.Context, events <-chan []byte, dbCnx *sql.DB) {
+
if _, err := dbCnx.ExecContext(ctx, ddl); err != nil {
+
log.Printf("could not create tables: %v\n", err)
+
}
+
if _, err := dbCnx.ExecContext(ctx, "PRAGMA wal_autocheckpoint = 0"); err != nil {
+
log.Printf("could not set PRAGMA wal_autocheckpoint: %v\n", err)
+
}
+
+
var (
+
dbTx *sql.Tx
+
err error
+
eventCount int
+
)
+
+
for evt := range events {
+
if dbTx == nil {
+
dbTx, err = dbCnx.BeginTx(ctx, nil)
+
if err != nil {
+
log.Printf("failed to begin transaction: %v\n", err)
+
}
+
}
+
+
var event jetstream.Event
+
if err := json.Unmarshal(evt, &event); err != nil {
+
continue
+
}
+
+
if event.Kind != jetstream.EventKindCommit {
+
continue
+
}
+
if event.Commit.Operation != jetstream.CommitOperationCreate {
+
// we're missing deletes and updates but this matches how bsky-activity
+
// does it so we stay consistent
+
continue
+
}
+
+
did := event.Did
+
commit := *event.Commit
+
ts := time.Now().UTC().Unix()
+
+
if _, ok := AppBskyAllowlist[commit.Collection]; !ok {
+
continue
+
}
+
+
dbTx.ExecContext(ctx, userTimestampUpdate, did, ts, ts)
+
+
eventCount += 1
+
if eventCount%1000 == 0 {
+
if err := dbTx.Commit(); err != nil {
+
log.Printf("commit failed: %v\n")
+
}
+
+
var results CheckpointResults
+
err := dbCnx.QueryRowContext(ctx, "PRAGMA wal_checkpoint(RESTART)").Scan(&results.Blocked, &results.Pages, &results.Transferred)
+
switch {
+
case err != nil:
+
log.Printf("failed checkpoint: %v\n", err)
+
case results.Blocked == 1:
+
log.Printf("checkpoint: blocked\n")
+
case results.Pages == results.Transferred:
+
log.Printf("checkpoint: %d pages transferred\n", results.Transferred)
+
case results.Pages != results.Transferred:
+
log.Printf("checkpoint: %d pages, %d transferred\n", results.Pages, results.Transferred)
+
}
+
+
dbTx, err = dbCnx.BeginTx(ctx, nil)
+
if err != nil {
+
log.Printf("failed to begin transaction: %v\n", err)
+
}
+
}
+
}
+
}
+
+
func main() {
+
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
+
defer stop()
+
+
conn, _, err := websocket.DefaultDialer.Dial(JetstreamUrl, nil)
+
if err != nil {
+
log.Fatalf("failed to open websocket: %v\n", err)
+
}
+
defer func() {
+
if err := conn.Close(); err != nil {
+
log.Printf("failed to close websocket: %v\n", err)
+
}
+
log.Printf("websocket closed\n")
+
}()
+
+
// TODO(ejd): use more readable URL params for this
+
dbCnx, err := sql.Open("sqlite3", "data/bsky-users.db?_journal=WAL&_fk=on&_timeout=5000&_sync=1&_txlock=immediate")
+
if err != nil {
+
log.Fatalf("failed to open database: %v\n", err)
+
}
+
defer func() {
+
if _, err := dbCnx.Exec("PRAGMA wal_checkpoint(TRUNCATE)"); err != nil {
+
log.Printf("error doing final WAL checkpoint: %v\n", err)
+
}
+
if err := dbCnx.Close(); err != nil {
+
log.Printf("failed to close db: %v\n", err)
+
}
+
log.Printf("db closed\n")
+
}()
+
+
jetstreamEvents := make(chan []byte)
+
go handler(ctx, jetstreamEvents, dbCnx)
+
+
log.Printf("starting up\n")
+
go func() {
+
for {
+
_, message, err := conn.ReadMessage()
+
if err != nil {
+
stop()
+
}
+
jetstreamEvents <- message
+
}
+
}()
+
+
<-ctx.Done()
+
log.Printf("shutting down\n")
+
}
+3
cmd/bsky-users/schema.sql
···
+
CREATE TABLE IF NOT EXISTS users (did TEXT, ts TIMESTAMP);
+
CREATE UNIQUE INDEX IF NOT EXISTS did_idx on users(did);
+
CREATE INDEX IF NOT EXISTS ts_idx on users(ts);
+1
go.mod
···
github.com/bluesky-social/indigo v0.0.0-20240905024844-a4f38639767f
github.com/bluesky-social/jetstream v0.0.0-20241020000921-dcd43344c716
github.com/gorilla/websocket v1.5.1
+
github.com/mattn/go-sqlite3 v1.14.22
github.com/pemistahl/lingua-go v1.4.0
github.com/redis/go-redis/v9 v9.3.0
)
+2
go.sum
···
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
+
github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU=
+
github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM=
github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8=
github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
+16
service/bsky-users.service
···
+
[Unit]
+
Description=bsky users
+
After=network.target syslog.target
+
+
[Service]
+
Type=simple
+
User=eric
+
WorkingDirectory=/home/eric/bsky-tools
+
ExecStart=/home/eric/bsky-tools/bin/bsky-users
+
TimeoutSec=10
+
Restart=always
+
RestartSec=5
+
StandardOutput=journal
+
+
[Install]
+
WantedBy=multi-user.target