forked from tangled.org/core
Monorepo for Tangled — https://tangled.org
at master 1.4 kB view raw
1package git 2 3import ( 4 "context" 5 "path" 6 "strings" 7 8 "github.com/go-enry/go-enry/v2" 9 "github.com/go-git/go-git/v5/plumbing/object" 10) 11 12type LangBreakdown map[string]int64 13 14func (g *GitRepo) AnalyzeLanguages(ctx context.Context) (LangBreakdown, error) { 15 sizes := make(map[string]int64) 16 err := g.Walk(ctx, "", func(node object.TreeEntry, parent *object.Tree, root string) error { 17 filepath := path.Join(root, node.Name) 18 19 content, err := g.FileContentN(filepath, 16*1024) // 16KB 20 if err != nil { 21 return nil 22 } 23 24 if enry.IsGenerated(filepath, content) || 25 enry.IsBinary(content) || 26 strings.HasSuffix(filepath, "bun.lock") { 27 return nil 28 } 29 30 language := analyzeLanguage(node, content) 31 if group := enry.GetLanguageGroup(language); group != "" { 32 language = group 33 } 34 35 langType := enry.GetLanguageType(language) 36 if langType != enry.Programming && langType != enry.Markup && langType != enry.Unknown { 37 return nil 38 } 39 40 sz, _ := parent.Size(node.Name) 41 sizes[language] += sz 42 43 return nil 44 }) 45 46 if err != nil { 47 return nil, err 48 } 49 50 return sizes, nil 51} 52 53func analyzeLanguage(node object.TreeEntry, content []byte) string { 54 language, ok := enry.GetLanguageByExtension(node.Name) 55 if ok { 56 return language 57 } 58 59 language, ok = enry.GetLanguageByFilename(node.Name) 60 if ok { 61 return language 62 } 63 64 if len(content) == 0 { 65 return enry.OtherLanguage 66 } 67 68 return enry.GetLanguage(node.Name, content) 69}