From 76f70027c835ce099a2f9799032e31663dada0ad Mon Sep 17 00:00:00 2001 From: oppiliappan Date: Thu, 17 Jul 2025 14:54:04 +0100 Subject: [PATCH] appview: use repo_language table to access language info Change-Id: snktzuwttuvuxtnpusopnnqxwxmrwkro this falls back to calling RepoLanguages on the knot, and caches that info at the appview. Signed-off-by: oppiliappan --- appview/repo/index.go | 69 +++++++++++++++++++++++-------------- appview/state/knotstream.go | 68 +++++++++++++++++++++++++++++------- 2 files changed, 100 insertions(+), 37 deletions(-) diff --git a/appview/repo/index.go b/appview/repo/index.go index b9ea13e..9dcac0f 100644 --- a/appview/repo/index.go +++ b/appview/repo/index.go @@ -123,7 +123,7 @@ func (rp *Repo) RepoIndex(w http.ResponseWriter, r *http.Request) { } } - languageInfo, err := getLanguageInfo(f, signedClient, ref) + languageInfo, err := rp.getLanguageInfo(f, signedClient, ref) if err != nil { log.Printf("failed to compute language percentages: %s", err) // non-fatal @@ -153,41 +153,60 @@ func (rp *Repo) RepoIndex(w http.ResponseWriter, r *http.Request) { Languages: languageInfo, Pipelines: pipelines, }) - return } -func getLanguageInfo( +func (rp *Repo) getLanguageInfo( f *reporesolver.ResolvedRepo, signedClient *knotclient.SignedClient, ref string, ) ([]types.RepoLanguageDetails, error) { - repoLanguages, err := signedClient.RepoLanguages(f.OwnerDid(), f.RepoName, ref) - if err != nil { - return []types.RepoLanguageDetails{}, err - } - if repoLanguages == nil { - repoLanguages = &types.RepoLanguageResponse{Languages: make(map[string]int64)} + // first attempt to fetch from db + langs, err := db.GetRepoLanguages( + rp.db, + db.FilterEq("repo_at", f.RepoAt), + db.FilterEq("ref", ref), + ) + + if err != nil || langs == nil { + // non-fatal, fetch langs from ks + ls, err := signedClient.RepoLanguages(f.OwnerDid(), f.RepoName, ref) + if err != nil { + return nil, err + } + if ls == nil { + return nil, nil + } + for l, s := range ls.Languages { + langs = append(langs, db.RepoLanguage{ + RepoAt: f.RepoAt, + Ref: ref, + Language: l, + Bytes: s, + }) + } + + // update appview's cache + err = db.InsertRepoLanguages(rp.db, langs) + if err != nil { + // non-fatal + log.Println("failed to cache lang results", err) + } } - var totalSize int64 - for _, fileSize := range repoLanguages.Languages { - totalSize += fileSize + var total int64 + for _, l := range langs { + total += l.Bytes } var languageStats []types.RepoLanguageDetails - var otherPercentage float32 = 0 - - for lang, size := range repoLanguages.Languages { - percentage := (float32(size) / float32(totalSize)) * 100 - - if percentage <= 0.5 { - otherPercentage += percentage - continue - } - - color := enry.GetColor(lang) - - languageStats = append(languageStats, types.RepoLanguageDetails{Name: lang, Percentage: percentage, Color: color}) + for _, l := range langs { + percentage := float32(l.Bytes) / float32(total) * 100 + color := enry.GetColor(l.Language) + languageStats = append(languageStats, types.RepoLanguageDetails{ + Name: l.Language, + Percentage: percentage, + Color: color, + }) } sort.Slice(languageStats, func(i, j int) bool { diff --git a/appview/state/knotstream.go b/appview/state/knotstream.go index 9616a49..5544c7d 100644 --- a/appview/state/knotstream.go +++ b/appview/state/knotstream.go @@ -3,6 +3,7 @@ package state import ( "context" "encoding/json" + "errors" "fmt" "slices" "time" @@ -18,6 +19,7 @@ import ( "tangled.sh/tangled.sh/core/workflow" "github.com/bluesky-social/indigo/atproto/syntax" + "github.com/go-git/go-git/v5/plumbing" "github.com/posthog/posthog-go" ) @@ -39,7 +41,7 @@ func Knotstream(ctx context.Context, c *config.Config, d *db.DB, enforcer *rbac. cfg := ec.ConsumerConfig{ Sources: srcs, - ProcessFunc: knotIngester(ctx, d, enforcer, posthog, c.Core.Dev), + ProcessFunc: knotIngester(d, enforcer, posthog, c.Core.Dev), RetryInterval: c.Knotstream.RetryInterval, MaxRetryInterval: c.Knotstream.MaxRetryInterval, ConnectionTimeout: c.Knotstream.ConnectionTimeout, @@ -53,7 +55,7 @@ func Knotstream(ctx context.Context, c *config.Config, d *db.DB, enforcer *rbac. return ec.NewConsumer(cfg), nil } -func knotIngester(ctx context.Context, d *db.DB, enforcer *rbac.Enforcer, posthog posthog.Client, dev bool) ec.ProcessFunc { +func knotIngester(d *db.DB, enforcer *rbac.Enforcer, posthog posthog.Client, dev bool) ec.ProcessFunc { return func(ctx context.Context, source ec.Source, msg ec.Message) error { switch msg.Nsid { case tangled.GitRefUpdateNSID: @@ -81,10 +83,26 @@ func ingestRefUpdate(d *db.DB, enforcer *rbac.Enforcer, pc posthog.Client, dev b return fmt.Errorf("%s does not belong to %s, something is fishy", record.CommitterDid, source.Key()) } + err1 := populatePunchcard(d, record) + err2 := updateRepoLanguages(d, record) + + var err3 error + if !dev { + err3 = pc.Enqueue(posthog.Capture{ + DistinctId: record.CommitterDid, + Event: "git_ref_update", + }) + } + + return errors.Join(err1, err2, err3) +} + +func populatePunchcard(d *db.DB, record tangled.GitRefUpdate) error { knownEmails, err := db.GetAllEmails(d, record.CommitterDid) if err != nil { return err } + count := 0 for _, ke := range knownEmails { if record.Meta == nil { @@ -108,21 +126,47 @@ func ingestRefUpdate(d *db.DB, enforcer *rbac.Enforcer, pc posthog.Client, dev b Date: time.Now(), Count: count, } - if err := db.AddPunch(d, punch); err != nil { - return err + return db.AddPunch(d, punch) +} + +func updateRepoLanguages(d *db.DB, record tangled.GitRefUpdate) error { + if record.Meta == nil && record.Meta.LangBreakdown == nil { + return fmt.Errorf("empty language data for repo: %s/%s", record.RepoDid, record.RepoName) } - if !dev { - err = pc.Enqueue(posthog.Capture{ - DistinctId: record.CommitterDid, - Event: "git_ref_update", - }) - if err != nil { - // non-fatal, TODO: log this + repos, err := db.GetRepos( + d, + db.FilterEq("did", record.RepoDid), + db.FilterEq("name", record.RepoName), + ) + if err != nil { + return fmt.Errorf("failed to look for repo in DB (%s/%s): %w", record.RepoDid, record.RepoName, err) + } + if len(repos) != 1 { + return fmt.Errorf("incorrect number of repos returned: %d (expected 1)", len(repos)) + } + repo := repos[0] + + ref := plumbing.ReferenceName(record.Ref) + if !ref.IsBranch() { + return fmt.Errorf("%s is not a valid reference name", ref) + } + + var langs []db.RepoLanguage + for _, l := range record.Meta.LangBreakdown.Inputs { + if l == nil { + continue } + + langs = append(langs, db.RepoLanguage{ + RepoAt: repo.RepoAt(), + Ref: ref.Short(), + Language: l.Lang, + Bytes: l.Size, + }) } - return nil + return db.InsertRepoLanguages(d, langs) } func ingestPipeline(d *db.DB, source ec.Source, msg ec.Message) error { -- 2.43.0