an app.bsky.* indexer

Misc updates

- Reduce number of parallel backfills
- Reduce logging
- Add ActorProfile Labels, StarterPacks, and PinnedPost
- Use .Assign() and .FirstOrCreate() instead of .Save()
- Use local fork of indigo for now
- Move ActorProfile and friends to models/actor_profile.go
- Add models/strongref.go, Use StrongRef as anonymous struct embed

+2 -2
cmd/monarch/backfill.go
···
func NewBackfillService(store backfill.Store, h *HandlerService) *backfill.Backfiller {
opts := &backfill.BackfillOptions{
-
ParallelBackfills: 10,
ParallelRecordCreates: 1,
NSIDFilter: "",
-
SyncRequestsPerSecond: 5,
RelayHost: "https://bsky.network",
}
···
func NewBackfillService(store backfill.Store, h *HandlerService) *backfill.Backfiller {
opts := &backfill.BackfillOptions{
+
ParallelBackfills: 1,
ParallelRecordCreates: 1,
NSIDFilter: "",
+
SyncRequestsPerSecond: 2,
RelayHost: "https://bsky.network",
}
-1
cmd/monarch/census.go
···
default:
}
-
slog.Info("listing repos", "cursor", curs)
res, err := atproto.SyncListRepos(ctx, xrpcc, curs, 1000)
if err != nil {
slog.Error("error listing repos", "err", err)
···
default:
}
res, err := atproto.SyncListRepos(ctx, xrpcc, curs, 1000)
if err != nil {
slog.Error("error listing repos", "err", err)
-1
cmd/monarch/cursors.go
···
case <-t.C:
}
-
slog.Info("flushing cursors")
if err := cs.Flush(); err != nil {
slog.Error("error flushing cursors", "err", err)
return
···
case <-t.C:
}
if err := cs.Flush(); err != nil {
slog.Error("error flushing cursors", "err", err)
return
+11 -3
cmd/monarch/handlers.go
···
}
func NewHandlerService(store *gorm.DB) *HandlerService {
-
store.AutoMigrate(&models.ActorProfile{})
return &HandlerService{
store: store,
···
switch uri.Collection() {
case syntax.NSID("app.bsky.actor.profile"):
profile := models.NewActorProfile(uri, *rec)
-
if err := hs.store.Save(profile).Error; err != nil {
-
return fmt.Errorf("error saving profile: %w", err)
}
}
···
}
func NewHandlerService(store *gorm.DB) *HandlerService {
+
migrations := []any{
+
&models.ActorProfile{},
+
&models.ActorProfile_Label{},
+
&models.ActorProfile_JoinedViaStarterPack{},
+
&models.ActorProfile_PinnedPost{},
+
}
+
for _, migration := range migrations {
+
store.AutoMigrate(migration)
+
}
return &HandlerService{
store: store,
···
switch uri.Collection() {
case syntax.NSID("app.bsky.actor.profile"):
profile := models.NewActorProfile(uri, *rec)
+
if err := hs.store.Where(models.ActorProfile{ID: string(uri)}).Assign(profile).FirstOrCreate(&models.ActorProfile{}).Error; err != nil {
+
return fmt.Errorf("error upserting profile: %w", err)
}
}
+3 -1
cmd/monarch/main.go
···
}
func (app *App) Start(ctx context.Context) error {
app.cursor = NewCursorService(app.state)
go app.cursor.CheckpointCursors(ctx)
···
// TODO identity
}
-
sched := parallel.NewScheduler(4, 50, "firehose", rsc.EventHandler)
if err := events.HandleRepoStream(ctx, app.wsconn, sched, nil); err != nil {
return fmt.Errorf("error starting repo stream handler: %w", err)
···
}
func (app *App) Start(ctx context.Context) error {
+
slog.Info("starting up")
+
app.cursor = NewCursorService(app.state)
go app.cursor.CheckpointCursors(ctx)
···
// TODO identity
}
+
sched := parallel.NewScheduler(1, 50, "firehose", rsc.EventHandler)
if err := events.HandleRepoStream(ctx, app.wsconn, sched, nil); err != nil {
return fmt.Errorf("error starting repo stream handler: %w", err)
+2
go.mod
···
gorm.io/driver/postgres v1.5.7 // indirect
lukechampine.com/blake3 v1.2.1 // indirect
)
···
gorm.io/driver/postgres v1.5.7 // indirect
lukechampine.com/blake3 v1.2.1 // indirect
)
+
+
replace github.com/bluesky-social/indigo => ../../../bluesky-social/indigo
+87
models/actor_profile.go
···
···
+
package models
+
+
import (
+
"bytes"
+
"log/slog"
+
"time"
+
+
appbsky "github.com/bluesky-social/indigo/api/bsky"
+
"github.com/bluesky-social/indigo/atproto/syntax"
+
)
+
+
type ActorProfile struct {
+
ID string `gorm:"primaryKey"`
+
+
// Avatar
+
// Banner
+
CreatedAt *string
+
Description *string
+
DisplayName *string
+
JoinedViaStarterPack []ActorProfile_JoinedViaStarterPack
+
Labels []ActorProfile_Label
+
PinnedPost []ActorProfile_PinnedPost
+
+
AutoCreatedAt time.Time `gorm:"autoCreateTime"`
+
AutoUpdatedAt time.Time `gorm:"autoUpdateTime"`
+
}
+
+
type ActorProfile_Label struct {
+
ActorProfileID string
+
Value string
+
}
+
+
type ActorProfile_JoinedViaStarterPack struct {
+
ActorProfileID string
+
StrongRef
+
}
+
+
type ActorProfile_PinnedPost struct {
+
ActorProfileID string
+
StrongRef
+
}
+
+
func NewActorProfile(uri syntax.ATURI, rec []byte) *ActorProfile {
+
var out appbsky.ActorProfile
+
if err := out.UnmarshalCBOR(bytes.NewReader(rec)); err != nil {
+
slog.Error("could not unmarshal profile CBOR", "err", err)
+
return nil
+
}
+
+
profile := ActorProfile{
+
ID: string(uri),
+
CreatedAt: out.CreatedAt,
+
Description: out.Description,
+
DisplayName: out.DisplayName,
+
}
+
+
if out.JoinedViaStarterPack != nil {
+
profile.JoinedViaStarterPack = append(profile.JoinedViaStarterPack, ActorProfile_JoinedViaStarterPack{
+
ActorProfileID: profile.ID,
+
StrongRef: StrongRef{
+
Uri: out.JoinedViaStarterPack.Uri,
+
Cid: out.JoinedViaStarterPack.Cid,
+
},
+
})
+
}
+
+
if out.PinnedPost != nil {
+
profile.PinnedPost = append(profile.PinnedPost, ActorProfile_PinnedPost{
+
ActorProfileID: profile.ID,
+
StrongRef: StrongRef{
+
Uri: out.PinnedPost.Uri,
+
Cid: out.PinnedPost.Cid,
+
},
+
})
+
}
+
+
if out.Labels != nil && out.Labels.LabelDefs_SelfLabels != nil && out.Labels.LabelDefs_SelfLabels.Values != nil {
+
for _, label := range out.Labels.LabelDefs_SelfLabels.Values {
+
profile.Labels = append(profile.Labels, ActorProfile_Label{
+
ActorProfileID: profile.ID,
+
Value: label.Val,
+
})
+
}
+
}
+
+
return &profile
+
}
-54
models/models.go
···
package models
-
import (
-
"bytes"
-
"log/slog"
-
"time"
-
-
appbsky "github.com/bluesky-social/indigo/api/bsky"
-
"github.com/bluesky-social/indigo/atproto/syntax"
-
)
-
-
type StrongRef struct {
-
Uri string
-
Cid string
-
}
-
-
type ActorProfile struct {
-
ID string `gorm:"primaryKey"`
-
-
// Avatar
-
// Banner
-
CreatedAt *string
-
Description *string
-
DisplayName *string
-
JoinedViaStarterPack StrongRef `gorm:"embedded;embeddedPrefix:joinedviastarterpack_"`
-
// Labels
-
// PinnedPost StrongRef
-
-
AutoCreatedAt time.Time `gorm:"autoCreateTime"`
-
AutoUpdatedAt time.Time `gorm:"autoUpdateTime"`
-
}
-
-
func NewActorProfile(uri syntax.ATURI, rec []byte) *ActorProfile {
-
var out appbsky.ActorProfile
-
if err := out.UnmarshalCBOR(bytes.NewReader(rec)); err != nil {
-
slog.Error("could not unmarshal profile CBOR", "err", err)
-
return nil
-
}
-
-
profile := ActorProfile{
-
ID: string(uri),
-
CreatedAt: out.CreatedAt,
-
Description: out.Description,
-
DisplayName: out.DisplayName,
-
}
-
-
if out.JoinedViaStarterPack != nil {
-
profile.JoinedViaStarterPack = StrongRef{
-
Uri: out.JoinedViaStarterPack.Uri,
-
Cid: out.JoinedViaStarterPack.Cid,
-
}
-
}
-
-
return &profile
-
}
-
// ActorStatus
// FeedGenerator
// FeedLike
···
package models
// ActorStatus
// FeedGenerator
// FeedLike
+6
models/strongref.go
···
···
+
package models
+
+
type StrongRef struct {
+
Uri string
+
Cid string
+
}