its for when you want to get like notifications for your reposts

feat: shard jetstream connections

ptr.pet 1e47dacb 76405e9f

verified
Changed files
+152 -33
server
+119 -13
server/jetstream.go
···
import (
"context"
+
"fmt"
"log/slog"
+
"sync"
+
"time"
"github.com/bluesky-social/jetstream/pkg/client"
"github.com/bluesky-social/jetstream/pkg/client/schedulers/sequential"
"github.com/bluesky-social/jetstream/pkg/models"
)
+
func Chunk[T any](slice []T, chunkSize int) [][]T {
+
var chunks [][]T
+
for i := 0; i < len(slice); i += chunkSize {
+
end := min(i+chunkSize, len(slice))
+
chunks = append(chunks, slice[i:end])
+
}
+
return chunks
+
}
+
+
type Stream struct {
+
inner *client.Client
+
cancel context.CancelFunc
+
}
+
type StreamManager struct {
+
ctx context.Context
+
logger *slog.Logger
+
streamsLock sync.Mutex
+
streams map[int]Stream
+
name string
+
handleEvent HandleEvent
+
optsFn OptsFn
+
}
+
+
func NewStreamManager(logger *slog.Logger, name string, handleEvent HandleEvent, optsFn OptsFn) StreamManager {
+
return StreamManager{
+
ctx: context.TODO(),
+
logger: logger.With("stream", name),
+
streamsLock: sync.Mutex{},
+
streams: make(map[int]Stream),
+
name: name,
+
handleEvent: handleEvent,
+
optsFn: optsFn,
+
}
+
}
+
+
// doesnt lock streams!!!
+
func (manager *StreamManager) startSingle(id int, opts models.SubscriberOptionsUpdatePayload) {
+
ctx, cancel := context.WithCancel(manager.ctx)
+
stream := Stream{inner: nil, cancel: cancel}
+
// add to streams and put on wait group
+
manager.streams[id] = stream
+
go startJetstreamLoop(ctx, manager.logger.With("streamId", id), &stream.inner, fmt.Sprintf("%s_%d", manager.name, id), manager.handleEvent, opts)
+
}
+
+
func (manager *StreamManager) chunkedOpts() ([]models.SubscriberOptionsUpdatePayload, int) {
+
results := make([]models.SubscriberOptionsUpdatePayload, 0)
+
opts := manager.optsFn()
+
for _, wantedDidsChunk := range Chunk(opts.WantedDIDs, 9999) {
+
results = append(results, models.SubscriberOptionsUpdatePayload{
+
WantedCollections: opts.WantedCollections,
+
WantedDIDs: wantedDidsChunk,
+
MaxMessageSizeBytes: opts.MaxMessageSizeBytes,
+
})
+
}
+
return results, len(opts.WantedDIDs)
+
}
+
+
func (manager *StreamManager) updateOpts() {
+
chunks, userCount := manager.chunkedOpts()
+
manager.streamsLock.Lock()
+
idsSeen := make(map[int]struct{}, 0)
+
// update existing streams or create new ones
+
for id, opts := range chunks {
+
idsSeen[id] = struct{}{}
+
if len(manager.streams) > id {
+
stream := manager.streams[id]
+
if stream.inner == nil {
+
continue
+
}
+
if err := stream.inner.SendOptionsUpdate(opts); err != nil {
+
manager.logger.Error("couldnt update follow stream opts", "error", err, "streamId", id)
+
}
+
} else {
+
manager.startSingle(id, opts)
+
}
+
}
+
// cancel and delete unused streams
+
for k := range manager.streams {
+
if _, exists := idsSeen[k]; !exists {
+
manager.streams[k].cancel()
+
delete(manager.streams, k)
+
}
+
}
+
manager.streamsLock.Unlock()
+
manager.logger.Info("updated opts", "userCount", userCount)
+
}
+
type HandleEvent func(context.Context, *models.Event) error
+
type OptsFn func() models.SubscriberOptionsUpdatePayload
-
func startJetstreamLoop(logger *slog.Logger, outStream **client.Client, name string, handleEvent HandleEvent, optsFn func() models.SubscriberOptionsUpdatePayload) {
+
func startJetstreamLoop(ctx context.Context, logger *slog.Logger, outStream **client.Client, name string, handleEvent HandleEvent, opts models.SubscriberOptionsUpdatePayload) {
+
backoff := time.Second
for {
-
stream, startFn, err := startJetstreamClient(name, optsFn(), handleEvent)
+
done := make(chan struct{})
+
if ctx.Err() != nil {
+
break
+
}
+
stream, startFn, err := startJetstreamClient(ctx, logger, name, handleEvent)
*outStream = stream
if startFn != nil {
-
err = startFn()
+
logger.Info("starting jetstream client", "collections", opts.WantedCollections, "userCount", len(opts.WantedDIDs))
+
go func() {
+
err = startFn()
+
done <- struct{}{}
+
}()
+
// HACK: we need to wait for the websocket connection to start here. so we do
+
// need to upstream something to jetstream client
+
time.Sleep(time.Second * 2)
+
err = stream.SendOptionsUpdate(opts)
+
if err == nil {
+
<-done
+
}
}
if err != nil {
-
logger.Error("stream failed", "name", name, "error", err)
+
logger.Error("stream failed", "error", err, "backoff", backoff)
+
time.Sleep(backoff)
+
backoff = backoff * 2
+
} else {
+
backoff = time.Second
}
}
}
-
func startJetstreamClient(name string, opts models.SubscriberOptionsUpdatePayload, handleEvent HandleEvent) (*client.Client, func() error, error) {
-
ctx := context.Background()
-
+
func startJetstreamClient(ctx context.Context, logger *slog.Logger, name string, handleEvent HandleEvent) (*client.Client, func() error, error) {
config := client.DefaultClientConfig()
config.WebsocketURL = "wss://jetstream1.us-west.bsky.network/subscribe"
config.Compress = true
-
config.WantedCollections = opts.WantedCollections
-
config.WantedDids = opts.WantedDIDs
-
config.RequireHello = false
+
config.RequireHello = true
scheduler := sequential.NewScheduler(name, logger, handleEvent)
c, err := client.NewClient(config, logger, scheduler)
if err != nil {
-
logger.Error("failed to create jetstream client", "name", name, "error", err)
+
logger.Error("failed to create jetstream client", "error", err)
return nil, nil, err
}
startFn := func() error {
-
logger.Info("starting jetstream client", "name", name, "collections", opts.WantedCollections, "wanted_dids", len(opts.WantedDIDs))
if err := c.ConnectAndRead(ctx, nil); err != nil {
-
logger.Error("jetstream client failed", "name", name, "error", err)
+
logger.Error("jetstream client failed", "error", err)
return err
}
+33 -20
server/main.go
···
"github.com/bluesky-social/indigo/api/bsky"
"github.com/bluesky-social/indigo/atproto/syntax"
"github.com/bluesky-social/indigo/xrpc"
-
"github.com/bluesky-social/jetstream/pkg/client"
"github.com/bluesky-social/jetstream/pkg/models"
"github.com/cornelk/hashmap"
"github.com/google/uuid"
···
type ActorData struct {
targets *hashmap.Map[string, *SubscriberData]
-
likes map[syntax.RecordKey]bsky.FeedLike
+
likes *hashmap.Map[syntax.RecordKey, bsky.FeedLike]
follows *hashmap.Map[syntax.RecordKey, bsky.GraphFollow]
followsCursor atomic.Pointer[string]
profile *bsky.ActorDefs_ProfileViewDetailed
···
subscribers = hashmap.New[string, *SubscriberData]()
actorData = hashmap.New[syntax.DID, *ActorData]()
-
likeStream *client.Client
-
followStream *client.Client
+
likeStreams StreamManager
+
followStreams StreamManager
upgrader = websocket.Upgrader{
CheckOrigin: func(r *http.Request) bool {
···
return dids
}
+
func getLikeDids() []string {
+
_dids := make(Set[string], subscribers.Len()*5000)
+
subscribers.Range(func(s string, sd *SubscriberData) bool {
+
for did := range sd.listenTo {
+
_dids[string(did)] = struct{}{}
+
}
+
return true
+
})
+
dids := make([]string, 0, len(_dids))
+
for k := range _dids {
+
dids = append(dids, k)
+
}
+
return dids
+
}
+
func getActorData(did syntax.DID) *ActorData {
ud, _ := actorData.GetOrInsert(did, &ActorData{
targets: hashmap.New[string, *SubscriberData](),
-
likes: make(map[syntax.RecordKey]bsky.FeedLike),
+
likes: hashmap.New[syntax.RecordKey, bsky.FeedLike](),
follows: hashmap.New[syntax.RecordKey, bsky.GraphFollow](),
})
return ud
···
func main() {
logger = slog.Default()
-
go startJetstreamLoop(logger, &likeStream, "like_tracker", HandleLikeEvent, getLikeStreamOpts)
-
go startJetstreamLoop(logger, &followStream, "subscriber", HandleFollowEvent, getFollowStreamOpts)
+
likeStreams = NewStreamManager(logger, "like-tracker", HandleLikeEvent, getLikeStreamOpts)
+
followStreams = NewStreamManager(logger, "subscriber", HandleFollowEvent, getFollowStreamOpts)
r := mux.NewRouter()
r.HandleFunc("/subscribe/{did}", handleSubscribe).Methods("GET")
···
for listenDid := range sd.listenTo {
markActorForLikes(sid, sd, listenDid)
}
-
updateFollowStreamOpts()
+
updateStreamOpts()
// delete subscriber after we are done
defer func() {
for listenDid := range sd.listenTo {
unmarkActorForLikes(sid, listenDid)
}
subscribers.Del(sid)
-
updateFollowStreamOpts()
+
updateStreamOpts()
}()
logger.Info("serving subscriber")
···
logger.Info("invalid message", "error", err)
break
}
+
// remove all current listens and add the ones the user requested
for listenDid := range sd.listenTo {
unmarkActorForLikes(sid, listenDid)
···
sd.listenTo[listenDid] = struct{}{}
markActorForLikes(sid, sd, listenDid)
}
+
+
updateStreamOpts()
}
}
}
···
func getLikeStreamOpts() models.SubscriberOptionsUpdatePayload {
return models.SubscriberOptionsUpdatePayload{
WantedCollections: []string{"app.bsky.feed.like"},
+
WantedDIDs: getLikeDids(),
}
}
···
}
}
-
func updateFollowStreamOpts() {
-
opts := getFollowStreamOpts()
-
err := followStream.SendOptionsUpdate(opts)
-
if err != nil {
-
logger.Error("couldnt update follow stream opts", "error", err)
-
return
-
}
-
logger.Info("updated follow stream opts", "userCount", len(opts.WantedDIDs))
+
func updateStreamOpts() {
+
likeStreams.updateOpts()
+
followStreams.updateOpts()
}
func HandleLikeEvent(ctx context.Context, event *models.Event) error {
···
var like bsky.FeedLike
if deleted {
-
if l, exists := ud.likes[rkey]; exists {
+
if l, exists := ud.likes.Get(rkey); exists {
like = l
-
defer delete(ud.likes, rkey)
+
defer ud.likes.Del(rkey)
} else {
logger.Error("like record not found", "rkey", rkey)
return nil
···
// store for later when it gets deleted so we can fetch the record
if !deleted {
-
ud.likes[rkey] = like
+
ud.likes.Insert(rkey, like)
}
repostURI := syntax.ATURI(like.Via.Uri)