an app.bsky.* indexer

only crawl hosts once

Changed files
+19 -2
cmd
monarch
+19 -2
cmd/monarch/census.go
···
cursor *CursorService
backfill *backfill.Backfiller
+
seenHosts map[string]bool
+
seenLk sync.Mutex
+
storeLk sync.Mutex
}
···
func NewCensusService(cursorSvc *CursorService, backfillSvc *backfill.Backfiller) *CensusService {
return &CensusService{
-
cursor: cursorSvc,
-
backfill: backfillSvc,
+
cursor: cursorSvc,
+
backfill: backfillSvc,
+
seenHosts: make(map[string]bool),
}
}
···
}
for _, host := range res.Hosts {
+
// don't reprocess hosts already handled
+
cs.seenLk.Lock()
+
_, ok := cs.seenHosts[host.Hostname]
+
cs.seenLk.Unlock()
+
if ok {
+
slog.Info("already processed host, skipping")
+
continue
+
}
+
sem.Acquire(ctx, 1)
wg.Add(1) // TODO wg.Go
go func() {
···
break
}
}
+
+
cs.seenLk.Lock()
+
defer cs.seenLk.Unlock()
+
cs.seenHosts[host] = true
slog.Info("finished listing repos", "host", host)
}