···
···
+
type ProcessJob struct {
type RepoDownloader struct {
+
clients map[string]*http.Client
+
rateLimits map[string]ratelimit.Limiter
+
processChan chan ProcessJob
func NewRepoDownloader(p *Photocopy) *RepoDownloader {
+
clients: make(map[string]*http.Client),
+
rateLimits: make(map[string]ratelimit.Limiter),
+
processChan: make(chan ProcessJob, 1000),
···
bs := atproto_repo.NewTinyBlockstore()
cs, err := car.NewCarReader(bytes.NewReader(b))
+
fmt.Println("error opening car", err)
return fmt.Errorf("error opening car: %v\n", err)
···
r, err := repo.OpenRepo(context.TODO(), bs, cs.Header.Roots[0])
if err != nil || r == nil {
+
fmt.Println("error opening repo", err)
fmt.Printf("could not open repo: %v", err)
···
Status string `json:"status"`
+
func (p *Photocopy) runProcessRepoWorker(ctx context.Context, jobs <-chan ProcessJob) {
+
p.processRepo(ctx, j.repoBytes, j.did)
func (p *Photocopy) runBackfiller(ctx context.Context) error {
fmt.Println("querying clickhouse for dids and services...")
+
var alreadyFetched []string
+
if err := p.conn.Select(ctx, &alreadyFetched, "SELECT DISTINCT(did) FROM default.record WHERE created_at < '2025-07-01'"); err != nil {
+
alreadyFetchedMap := map[string]bool{}
+
for _, d := range alreadyFetched {
+
alreadyFetchedMap[d] = true
+
fmt.Println("getting dids")
var sevs []ListServicesResponseItem
···
downloader := NewRepoDownloader(p)
serviceDids := map[string][]string{}
+
for range runtime.NumCPU() / 2 {
+
go p.runProcessRepoWorker(ctx, downloader.processChan)
for s := range servicesDids {
···
for _, r := range repos {
+
if alreadyFetchedMap[r.Did] {
dids = append(dids, r.Did)
···
go func(b []byte, did string) {
+
downloader.processChan <- ProcessJob{repoBytes: b, did: did}