···
···
24
+
type ProcessJob struct {
type RepoDownloader struct {
24
-
clients map[string]*http.Client
25
-
rateLimits map[string]ratelimit.Limiter
30
+
clients map[string]*http.Client
31
+
rateLimits map[string]ratelimit.Limiter
32
+
processChan chan ProcessJob
func NewRepoDownloader(p *Photocopy) *RepoDownloader {
32
-
clients: make(map[string]*http.Client),
33
-
rateLimits: make(map[string]ratelimit.Limiter),
39
+
clients: make(map[string]*http.Client),
40
+
rateLimits: make(map[string]ratelimit.Limiter),
42
+
processChan: make(chan ProcessJob, 1000),
···
bs := atproto_repo.NewTinyBlockstore()
cs, err := car.NewCarReader(bytes.NewReader(b))
133
+
fmt.Println("error opening car", err)
return fmt.Errorf("error opening car: %v\n", err)
···
r, err := repo.OpenRepo(context.TODO(), bs, cs.Header.Roots[0])
if err != nil || r == nil {
146
+
fmt.Println("error opening repo", err)
fmt.Printf("could not open repo: %v", err)
···
Status string `json:"status"`
251
+
func (p *Photocopy) runProcessRepoWorker(ctx context.Context, jobs <-chan ProcessJob) {
252
+
for j := range jobs {
253
+
p.processRepo(ctx, j.repoBytes, j.did)
func (p *Photocopy) runBackfiller(ctx context.Context) error {
fmt.Println("querying clickhouse for dids and services...")
262
+
var alreadyFetched []string
263
+
if err := p.conn.Select(ctx, &alreadyFetched, "SELECT DISTINCT(did) FROM default.record WHERE created_at < '2025-07-01'"); err != nil {
267
+
alreadyFetchedMap := map[string]bool{}
268
+
for _, d := range alreadyFetched {
269
+
alreadyFetchedMap[d] = true
272
+
fmt.Println("getting dids")
var sevs []ListServicesResponseItem
···
downloader := NewRepoDownloader(p)
serviceDids := map[string][]string{}
330
+
for range runtime.NumCPU() / 2 {
331
+
go p.runProcessRepoWorker(ctx, downloader.processChan)
for s := range servicesDids {
···
for _, r := range repos {
347
+
if alreadyFetchedMap[r.Did] {
dids = append(dids, r.Did)
···
go func(b []byte, did string) {
368
-
if err := p.processRepo(ctx, b, did); err != nil {
369
-
fmt.Printf("error processing backfill record: %v\n", err)
404
+
downloader.processChan <- ProcessJob{repoBytes: b, did: did}