1package jetstream
2
3import (
4 "context"
5 "fmt"
6 "log/slog"
7 "os"
8 "os/signal"
9 "sync"
10 "syscall"
11 "time"
12
13 "github.com/bluesky-social/jetstream/pkg/client"
14 "github.com/bluesky-social/jetstream/pkg/client/schedulers/sequential"
15 "github.com/bluesky-social/jetstream/pkg/models"
16 "tangled.sh/tangled.sh/core/log"
17)
18
19type DB interface {
20 GetLastTimeUs() (int64, error)
21 SaveLastTimeUs(int64) error
22}
23
24type Set[T comparable] map[T]struct{}
25
26type JetstreamClient struct {
27 cfg *client.ClientConfig
28 client *client.Client
29 ident string
30 l *slog.Logger
31
32 logDids bool
33 wantedDids Set[string]
34 db DB
35 waitForDid bool
36 mu sync.RWMutex
37
38 cancel context.CancelFunc
39 cancelMu sync.Mutex
40}
41
42func (j *JetstreamClient) AddDid(did string) {
43 if did == "" {
44 return
45 }
46
47 if j.logDids {
48 j.l.Info("adding did to in-memory filter", "did", did)
49 }
50 j.mu.Lock()
51 j.wantedDids[did] = struct{}{}
52 j.mu.Unlock()
53}
54
55func (j *JetstreamClient) RemoveDid(did string) {
56 if did == "" {
57 return
58 }
59
60 if j.logDids {
61 j.l.Info("removing did from in-memory filter", "did", did)
62 }
63 j.mu.Lock()
64 delete(j.wantedDids, did)
65 j.mu.Unlock()
66}
67
68type processor func(context.Context, *models.Event) error
69
70func (j *JetstreamClient) withDidFilter(processFunc processor) processor {
71 // empty filter => all dids allowed
72 if len(j.wantedDids) == 0 {
73 return processFunc
74 }
75 // since this closure references j.WantedDids; it should auto-update
76 // existing instances of the closure when j.WantedDids is mutated
77 return func(ctx context.Context, evt *models.Event) error {
78 if _, ok := j.wantedDids[evt.Did]; ok {
79 return processFunc(ctx, evt)
80 } else {
81 return nil
82 }
83 }
84}
85
86func NewJetstreamClient(endpoint, ident string, collections []string, cfg *client.ClientConfig, logger *slog.Logger, db DB, waitForDid, logDids bool) (*JetstreamClient, error) {
87 if cfg == nil {
88 cfg = client.DefaultClientConfig()
89 cfg.WebsocketURL = endpoint
90 cfg.WantedCollections = collections
91 }
92
93 return &JetstreamClient{
94 cfg: cfg,
95 ident: ident,
96 db: db,
97 l: logger,
98 wantedDids: make(map[string]struct{}),
99
100 logDids: logDids,
101
102 // This will make the goroutine in StartJetstream wait until
103 // j.wantedDids has been populated, typically using addDids.
104 waitForDid: waitForDid,
105 }, nil
106}
107
108// StartJetstream starts the jetstream client and processes events using the provided processFunc.
109// The caller is responsible for saving the last time_us to the database (just use your db.UpdateLastTimeUs).
110func (j *JetstreamClient) StartJetstream(ctx context.Context, processFunc func(context.Context, *models.Event) error) error {
111 logger := j.l
112
113 sched := sequential.NewScheduler(j.ident, logger, j.withDidFilter(processFunc))
114
115 client, err := client.NewClient(j.cfg, log.New("jetstream"), sched)
116 if err != nil {
117 return fmt.Errorf("failed to create jetstream client: %w", err)
118 }
119 j.client = client
120
121 go func() {
122 if j.waitForDid {
123 for len(j.wantedDids) == 0 {
124 time.Sleep(time.Second)
125 }
126 }
127 logger.Info("done waiting for did")
128
129 go j.periodicLastTimeSave(ctx)
130 j.saveIfKilled(ctx)
131
132 j.connectAndRead(ctx)
133 }()
134
135 return nil
136}
137
138func (j *JetstreamClient) connectAndRead(ctx context.Context) {
139 l := log.FromContext(ctx)
140 for {
141 cursor := j.getLastTimeUs(ctx)
142
143 connCtx, cancel := context.WithCancel(ctx)
144 j.cancelMu.Lock()
145 j.cancel = cancel
146 j.cancelMu.Unlock()
147
148 if err := j.client.ConnectAndRead(connCtx, cursor); err != nil {
149 l.Error("error reading jetstream", "error", err)
150 cancel()
151 continue
152 }
153
154 select {
155 case <-ctx.Done():
156 l.Info("context done, stopping jetstream")
157 return
158 case <-connCtx.Done():
159 l.Info("connection context done, reconnecting")
160 continue
161 }
162 }
163}
164
165// save cursor periodically
166func (j *JetstreamClient) periodicLastTimeSave(ctx context.Context) {
167 ticker := time.NewTicker(time.Minute)
168 defer ticker.Stop()
169
170 for {
171 select {
172 case <-ctx.Done():
173 return
174 case <-ticker.C:
175 j.db.SaveLastTimeUs(time.Now().UnixMicro())
176 }
177 }
178}
179
180func (j *JetstreamClient) getLastTimeUs(ctx context.Context) *int64 {
181 l := log.FromContext(ctx)
182 lastTimeUs, err := j.db.GetLastTimeUs()
183 if err != nil {
184 l.Warn("couldn't get last time us, starting from now", "error", err)
185 lastTimeUs = time.Now().UnixMicro()
186 err = j.db.SaveLastTimeUs(lastTimeUs)
187 if err != nil {
188 l.Error("failed to save last time us", "error", err)
189 }
190 }
191
192 // If last time is older than 2 days, start from now
193 if time.Now().UnixMicro()-lastTimeUs > 2*24*60*60*1000*1000 {
194 lastTimeUs = time.Now().UnixMicro()
195 l.Warn("last time us is older than 2 days; discarding that and starting from now")
196 err = j.db.SaveLastTimeUs(lastTimeUs)
197 if err != nil {
198 l.Error("failed to save last time us", "error", err)
199 }
200 }
201
202 l.Info("found last time_us", "time_us", lastTimeUs)
203 return &lastTimeUs
204}
205
206func (j *JetstreamClient) saveIfKilled(ctx context.Context) context.Context {
207 ctxWithCancel, cancel := context.WithCancel(ctx)
208
209 sigChan := make(chan os.Signal, 1)
210
211 signal.Notify(sigChan,
212 syscall.SIGINT,
213 syscall.SIGTERM,
214 syscall.SIGQUIT,
215 syscall.SIGHUP,
216 syscall.SIGKILL,
217 syscall.SIGSTOP,
218 )
219
220 go func() {
221 sig := <-sigChan
222 j.l.Info("Received signal, initiating graceful shutdown", "signal", sig)
223
224 lastTimeUs := time.Now().UnixMicro()
225 if err := j.db.SaveLastTimeUs(lastTimeUs); err != nil {
226 j.l.Error("Failed to save last time during shutdown", "error", err)
227 }
228 j.l.Info("Saved lastTimeUs before shutdown", "lastTimeUs", lastTimeUs)
229
230 j.cancelMu.Lock()
231 if j.cancel != nil {
232 j.cancel()
233 }
234 j.cancelMu.Unlock()
235
236 cancel()
237
238 os.Exit(0)
239 }()
240
241 return ctxWithCancel
242}