1package git
2
3import (
4 "context"
5 "path"
6 "strings"
7
8 "github.com/go-enry/go-enry/v2"
9 "github.com/go-git/go-git/v5/plumbing/object"
10)
11
12type LangBreakdown map[string]int64
13
14func (g *GitRepo) AnalyzeLanguages(ctx context.Context) (LangBreakdown, error) {
15 sizes := make(map[string]int64)
16 err := g.Walk(ctx, "", func(node object.TreeEntry, parent *object.Tree, root string) error {
17 filepath := path.Join(root, node.Name)
18
19 content, err := g.FileContentN(filepath, 16*1024) // 16KB
20 if err != nil {
21 return nil
22 }
23
24 if enry.IsGenerated(filepath, content) ||
25 enry.IsBinary(content) ||
26 strings.HasSuffix(filepath, "bun.lock") {
27 return nil
28 }
29
30 language := analyzeLanguage(node, content)
31 if group := enry.GetLanguageGroup(language); group != "" {
32 language = group
33 }
34
35 langType := enry.GetLanguageType(language)
36 if langType != enry.Programming && langType != enry.Markup && langType != enry.Unknown {
37 return nil
38 }
39
40 sz, _ := parent.Size(node.Name)
41 sizes[language] += sz
42
43 return nil
44 })
45
46 if err != nil {
47 return nil, err
48 }
49
50 return sizes, nil
51}
52
53func analyzeLanguage(node object.TreeEntry, content []byte) string {
54 language, ok := enry.GetLanguageByExtension(node.Name)
55 if ok {
56 return language
57 }
58
59 language, ok = enry.GetLanguageByFilename(node.Name)
60 if ok {
61 return language
62 }
63
64 if len(content) == 0 {
65 return enry.OtherLanguage
66 }
67
68 return enry.GetLanguage(node.Name, content)
69}