1package git
2
3import (
4 "bufio"
5 "context"
6 "crypto/sha256"
7 "fmt"
8 "io"
9 "os/exec"
10 "path"
11 "strings"
12 "time"
13
14 "github.com/dgraph-io/ristretto"
15 "github.com/go-git/go-git/v5/plumbing"
16 "github.com/go-git/go-git/v5/plumbing/object"
17)
18
19var (
20 commitCache *ristretto.Cache
21)
22
23func init() {
24 cache, _ := ristretto.NewCache(&ristretto.Config{
25 NumCounters: 1e7,
26 MaxCost: 1 << 30,
27 BufferItems: 64,
28 TtlTickerDurationInSec: 120,
29 })
30 commitCache = cache
31}
32
33func (g *GitRepo) streamingGitLog(ctx context.Context, extraArgs ...string) (io.Reader, error) {
34 args := []string{}
35 args = append(args, "log")
36 args = append(args, g.h.String())
37 args = append(args, extraArgs...)
38
39 cmd := exec.CommandContext(ctx, "git", args...)
40 cmd.Dir = g.path
41
42 stdout, err := cmd.StdoutPipe()
43 if err != nil {
44 return nil, err
45 }
46
47 if err := cmd.Start(); err != nil {
48 return nil, err
49 }
50
51 return stdout, nil
52}
53
54type commit struct {
55 hash plumbing.Hash
56 when time.Time
57 files []string
58 message string
59}
60
61func cacheKey(g *GitRepo, path string) string {
62 sep := byte(':')
63 hash := sha256.Sum256(fmt.Append([]byte{}, g.path, sep, g.h.String(), sep, path))
64 return fmt.Sprintf("%x", hash)
65}
66
67func (g *GitRepo) calculateCommitTimeIn(ctx context.Context, subtree *object.Tree, parent string, timeout time.Duration) (map[string]commit, error) {
68 ctx, cancel := context.WithTimeout(ctx, timeout)
69 defer cancel()
70 return g.calculateCommitTime(ctx, subtree, parent)
71}
72
73func (g *GitRepo) calculateCommitTime(ctx context.Context, subtree *object.Tree, parent string) (map[string]commit, error) {
74 filesToDo := make(map[string]struct{})
75 filesDone := make(map[string]commit)
76 for _, e := range subtree.Entries {
77 fpath := path.Clean(path.Join(parent, e.Name))
78 filesToDo[fpath] = struct{}{}
79 }
80
81 for _, e := range subtree.Entries {
82 f := path.Clean(path.Join(parent, e.Name))
83 cacheKey := cacheKey(g, f)
84 if cached, ok := commitCache.Get(cacheKey); ok {
85 filesDone[f] = cached.(commit)
86 delete(filesToDo, f)
87 } else {
88 filesToDo[f] = struct{}{}
89 }
90 }
91
92 if len(filesToDo) == 0 {
93 return filesDone, nil
94 }
95
96 ctx, cancel := context.WithCancel(ctx)
97 defer cancel()
98
99 pathSpec := "."
100 if parent != "" {
101 pathSpec = parent
102 }
103 output, err := g.streamingGitLog(ctx, "--pretty=format:%H,%ad,%s", "--date=iso", "--name-only", "--", pathSpec)
104 if err != nil {
105 return nil, err
106 }
107
108 reader := bufio.NewReader(output)
109 var current commit
110 for {
111 line, err := reader.ReadString('\n')
112 if err != nil && err != io.EOF {
113 return nil, err
114 }
115 line = strings.TrimSpace(line)
116
117 if line == "" {
118 if !current.hash.IsZero() {
119 // we have a fully parsed commit
120 for _, f := range current.files {
121 if _, ok := filesToDo[f]; ok {
122 filesDone[f] = current
123 delete(filesToDo, f)
124 commitCache.Set(cacheKey(g, f), current, 0)
125 }
126 }
127
128 if len(filesToDo) == 0 {
129 cancel()
130 break
131 }
132 current = commit{}
133 }
134 } else if current.hash.IsZero() {
135 parts := strings.SplitN(line, ",", 3)
136 if len(parts) == 3 {
137 current.hash = plumbing.NewHash(parts[0])
138 current.when, _ = time.Parse("2006-01-02 15:04:05 -0700", parts[1])
139 current.message = parts[2]
140 }
141 } else {
142 // all ancestors along this path should also be included
143 file := path.Clean(line)
144 ancestors := ancestors(file)
145 current.files = append(current.files, file)
146 current.files = append(current.files, ancestors...)
147 }
148
149 if err == io.EOF {
150 break
151 }
152 }
153
154 return filesDone, nil
155}
156
157func ancestors(p string) []string {
158 var ancestors []string
159
160 for {
161 p = path.Dir(p)
162 if p == "." || p == "/" {
163 break
164 }
165 ancestors = append(ancestors, p)
166 }
167 return ancestors
168}