forked from tangled.org/core
this repo has no description

knotserver: git: improve performance of last_commit_time

uses `git log --name-only` to speed things along. the performance
tradeoff here is that active repositories tend to load last-commit times
really quickly, but directories with one file that was modified 20k
commits ago will result in the entire log being walked through.

git-log command is parsed as it streams to avoid having to wait for the
entire command to complete, and as soon as the entire directory's
listing is hydrated, we cancel the command. most of the time, this
results in hydration of subdirectories also (this gets cached),
resulting in good experience when browsing a repo (typically moving down
directories gets faster with each click, as fewer files need hydration).

Signed-off-by: oppiliappan <me@oppi.li>

Changed files
+205 -97
appview
pages
templates
knotserver
+4
appview/pages/templates/repo/index.html
···
</div>
</a>
+
{{ if .LastCommit }}
<time class="text-xs text-gray-500 dark:text-gray-400"
>{{ timeFmt .LastCommit.When }}</time
>
+
{{ end }}
</div>
</div>
{{ end }}
···
</div>
</a>
+
{{ if .LastCommit }}
<time class="text-xs text-gray-500 dark:text-gray-400"
>{{ timeFmt .LastCommit.When }}</time
>
+
{{ end }}
</div>
</div>
{{ end }}
+6 -2
appview/pages/templates/repo/tree.html
···
{{ i "folder" "size-4 fill-current" }}{{ .Name }}
</div>
</a>
-
<time class="text-xs text-gray-500 dark:text-gray-400">{{ timeFmt .LastCommit.When }}</time>
+
{{ if .LastCommit}}
+
<time class="text-xs text-gray-500 dark:text-gray-400">{{ timeFmt .LastCommit.When }}</time>
+
{{ end }}
</div>
</div>
{{ end }}
···
{{ i "file" "size-4" }}{{ .Name }}
</div>
</a>
-
<time class="text-xs text-gray-500 dark:text-gray-400">{{ timeFmt .LastCommit.When }}</time>
+
{{ if .LastCommit}}
+
<time class="text-xs text-gray-500 dark:text-gray-400">{{ timeFmt .LastCommit.When }}</time>
+
{{ end }}
</div>
</div>
{{ end }}
-69
knotserver/git/git.go
···
import (
"archive/tar"
-
"bytes"
"fmt"
"io"
"io/fs"
···
"sort"
"strconv"
"strings"
-
"sync"
"time"
-
"github.com/dgraph-io/ristretto"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/object"
"tangled.sh/tangled.sh/core/types"
)
-
-
var (
-
commitCache *ristretto.Cache
-
cacheMu sync.RWMutex
-
)
-
-
func init() {
-
cache, _ := ristretto.NewCache(&ristretto.Config{
-
NumCounters: 1e7,
-
MaxCost: 1 << 30,
-
BufferItems: 64,
-
TtlTickerDurationInSec: 120,
-
})
-
commitCache = cache
-
}
var (
ErrBinaryFile = fmt.Errorf("binary file")
···
}
return nil
-
}
-
-
func (g *GitRepo) LastCommitForPath(path string) (*types.LastCommitInfo, error) {
-
cacheKey := fmt.Sprintf("%s:%s", g.h.String(), path)
-
cacheMu.RLock()
-
if commitInfo, found := commitCache.Get(cacheKey); found {
-
cacheMu.RUnlock()
-
return commitInfo.(*types.LastCommitInfo), nil
-
}
-
cacheMu.RUnlock()
-
-
cmd := exec.Command("git", "-C", g.path, "log", g.h.String(), "-1", "--format=%H %ct", "--", path)
-
-
var out bytes.Buffer
-
cmd.Stdout = &out
-
cmd.Stderr = &out
-
-
if err := cmd.Run(); err != nil {
-
return nil, fmt.Errorf("failed to get commit hash: %w", err)
-
}
-
-
output := strings.TrimSpace(out.String())
-
if output == "" {
-
return nil, fmt.Errorf("no commits found for path: %s", path)
-
}
-
-
parts := strings.SplitN(output, " ", 2)
-
if len(parts) < 2 {
-
return nil, fmt.Errorf("unexpected commit log format")
-
}
-
-
commitHash := parts[0]
-
commitTimeUnix, err := strconv.ParseInt(parts[1], 10, 64)
-
if err != nil {
-
return nil, fmt.Errorf("parsing commit time: %w", err)
-
}
-
commitTime := time.Unix(commitTimeUnix, 0)
-
-
hash := plumbing.NewHash(commitHash)
-
-
commitInfo := &types.LastCommitInfo{
-
Hash: hash,
-
Message: "",
-
When: commitTime,
-
}
-
-
cacheMu.Lock()
-
commitCache.Set(cacheKey, commitInfo, 1)
-
cacheMu.Unlock()
-
-
return commitInfo, nil
}
func newInfoWrapper(
+168
knotserver/git/last_commit.go
···
+
package git
+
+
import (
+
"bufio"
+
"context"
+
"crypto/sha256"
+
"fmt"
+
"io"
+
"os/exec"
+
"path"
+
"strings"
+
"time"
+
+
"github.com/dgraph-io/ristretto"
+
"github.com/go-git/go-git/v5/plumbing"
+
"github.com/go-git/go-git/v5/plumbing/object"
+
)
+
+
var (
+
commitCache *ristretto.Cache
+
)
+
+
func init() {
+
cache, _ := ristretto.NewCache(&ristretto.Config{
+
NumCounters: 1e7,
+
MaxCost: 1 << 30,
+
BufferItems: 64,
+
TtlTickerDurationInSec: 120,
+
})
+
commitCache = cache
+
}
+
+
func (g *GitRepo) streamingGitLog(ctx context.Context, extraArgs ...string) (io.Reader, error) {
+
args := []string{}
+
args = append(args, "log")
+
args = append(args, g.h.String())
+
args = append(args, extraArgs...)
+
+
cmd := exec.CommandContext(ctx, "git", args...)
+
cmd.Dir = g.path
+
+
stdout, err := cmd.StdoutPipe()
+
if err != nil {
+
return nil, err
+
}
+
+
if err := cmd.Start(); err != nil {
+
return nil, err
+
}
+
+
return stdout, nil
+
}
+
+
type commit struct {
+
hash plumbing.Hash
+
when time.Time
+
files []string
+
message string
+
}
+
+
func cacheKey(g *GitRepo, path string) string {
+
sep := byte(':')
+
hash := sha256.Sum256(fmt.Append([]byte{}, g.path, sep, g.h.String(), sep, path))
+
return fmt.Sprintf("%x", hash)
+
}
+
+
func (g *GitRepo) calculateCommitTimeIn(ctx context.Context, subtree *object.Tree, parent string, timeout time.Duration) (map[string]commit, error) {
+
ctx, cancel := context.WithTimeout(ctx, timeout)
+
defer cancel()
+
return g.calculateCommitTime(ctx, subtree, parent)
+
}
+
+
func (g *GitRepo) calculateCommitTime(ctx context.Context, subtree *object.Tree, parent string) (map[string]commit, error) {
+
filesToDo := make(map[string]struct{})
+
filesDone := make(map[string]commit)
+
for _, e := range subtree.Entries {
+
fpath := path.Clean(path.Join(parent, e.Name))
+
filesToDo[fpath] = struct{}{}
+
}
+
+
for _, e := range subtree.Entries {
+
f := path.Clean(path.Join(parent, e.Name))
+
cacheKey := cacheKey(g, f)
+
if cached, ok := commitCache.Get(cacheKey); ok {
+
filesDone[f] = cached.(commit)
+
delete(filesToDo, f)
+
} else {
+
filesToDo[f] = struct{}{}
+
}
+
}
+
+
if len(filesToDo) == 0 {
+
return filesDone, nil
+
}
+
+
ctx, cancel := context.WithCancel(ctx)
+
defer cancel()
+
+
pathSpec := "."
+
if parent != "" {
+
pathSpec = parent
+
}
+
output, err := g.streamingGitLog(ctx, "--pretty=format:%H,%ad,%s", "--date=iso", "--name-only", "--", pathSpec)
+
if err != nil {
+
return nil, err
+
}
+
+
reader := bufio.NewReader(output)
+
var current commit
+
for {
+
line, err := reader.ReadString('\n')
+
if err != nil && err != io.EOF {
+
return nil, err
+
}
+
line = strings.TrimSpace(line)
+
+
if line == "" {
+
if !current.hash.IsZero() {
+
// we have a fully parsed commit
+
for _, f := range current.files {
+
if _, ok := filesToDo[f]; ok {
+
filesDone[f] = current
+
delete(filesToDo, f)
+
commitCache.Set(cacheKey(g, f), current, 0)
+
}
+
}
+
+
if len(filesToDo) == 0 {
+
cancel()
+
break
+
}
+
current = commit{}
+
}
+
} else if current.hash.IsZero() {
+
parts := strings.SplitN(line, ",", 3)
+
if len(parts) == 3 {
+
current.hash = plumbing.NewHash(parts[0])
+
current.when, _ = time.Parse("2006-01-02 15:04:05 -0700", parts[1])
+
current.message = parts[2]
+
}
+
} else {
+
// all ancestors along this path should also be included
+
file := path.Clean(line)
+
ancestors := ancestors(file)
+
current.files = append(current.files, file)
+
current.files = append(current.files, ancestors...)
+
}
+
+
if err == io.EOF {
+
break
+
}
+
}
+
+
return filesDone, nil
+
}
+
+
func ancestors(p string) []string {
+
var ancestors []string
+
+
for {
+
p = path.Dir(p)
+
if p == "." || p == "/" {
+
break
+
}
+
ancestors = append(ancestors, p)
+
}
+
return ancestors
+
}
+20 -20
knotserver/git/tree.go
···
package git
import (
+
"context"
"fmt"
+
"path"
"time"
"github.com/go-git/go-git/v5/plumbing/object"
"tangled.sh/tangled.sh/core/types"
)
-
func (g *GitRepo) FileTree(path string) ([]types.NiceTree, error) {
+
func (g *GitRepo) FileTree(ctx context.Context, path string) ([]types.NiceTree, error) {
c, err := g.r.CommitObject(g.h)
if err != nil {
return nil, fmt.Errorf("commit object: %w", err)
···
}
if path == "" {
-
files = g.makeNiceTree(tree, "")
+
files = g.makeNiceTree(ctx, tree, "")
} else {
o, err := tree.FindEntry(path)
if err != nil {
···
return nil, err
}
-
files = g.makeNiceTree(subtree, path)
+
files = g.makeNiceTree(ctx, subtree, path)
}
}
return files, nil
}
-
func (g *GitRepo) makeNiceTree(t *object.Tree, parent string) []types.NiceTree {
+
func (g *GitRepo) makeNiceTree(ctx context.Context, subtree *object.Tree, parent string) []types.NiceTree {
nts := []types.NiceTree{}
-
for _, e := range t.Entries {
+
times, err := g.calculateCommitTimeIn(ctx, subtree, parent, 2*time.Second)
+
if err != nil {
+
return nts
+
}
+
+
for _, e := range subtree.Entries {
mode, _ := e.Mode.ToOSFileMode()
-
sz, _ := t.Size(e.Name)
+
sz, _ := subtree.Size(e.Name)
-
var fpath string
-
if parent != "" {
-
fpath = fmt.Sprintf("%s/%s", parent, e.Name)
-
} else {
-
fpath = e.Name
-
}
-
lastCommit, err := g.LastCommitForPath(fpath)
-
if err != nil {
-
fmt.Println("error getting last commit time:", err)
-
// We don't want to skip the file, so worst case lets just
-
// populate it with "defaults".
+
fpath := path.Join(parent, e.Name)
+
+
var lastCommit *types.LastCommitInfo
+
if t, ok := times[fpath]; ok {
lastCommit = &types.LastCommitInfo{
-
Hash: g.h,
-
Message: "",
-
When: time.Now(),
+
Hash: t.hash,
+
Message: t.message,
+
When: t.when,
}
}
+7 -6
knotserver/routes.go
···
import (
"compress/gzip"
+
"context"
"crypto/hmac"
"crypto/sha256"
"encoding/hex"
···
}
}
-
files, err := gr.FileTree("")
+
files, err := gr.FileTree(r.Context(), "")
if err != nil {
writeError(w, err.Error(), http.StatusInternalServerError)
l.Error("file tree", "error", err.Error())
···
return
}
-
files, err := gr.FileTree(treePath)
+
files, err := gr.FileTree(r.Context(), treePath)
if err != nil {
writeError(w, err.Error(), http.StatusInternalServerError)
l.Error("file tree", "error", err.Error())
···
languageFileCount := make(map[string]int)
-
err = recurseEntireTree(gr, func(absPath string) {
+
err = recurseEntireTree(r.Context(), gr, func(absPath string) {
lang, safe := enry.GetLanguageByExtension(absPath)
if len(lang) == 0 || !safe {
content, _ := gr.FileContentN(absPath, 1024)
···
return
}
-
func recurseEntireTree(git *git.GitRepo, callback func(absPath string), filePath string) error {
-
files, err := git.FileTree(filePath)
+
func recurseEntireTree(ctx context.Context, git *git.GitRepo, callback func(absPath string), filePath string) error {
+
files, err := git.FileTree(ctx, filePath)
if err != nil {
log.Println(err)
return err
···
for _, file := range files {
absPath := path.Join(filePath, file.Name)
if !file.IsFile {
-
return recurseEntireTree(git, callback, absPath)
+
return recurseEntireTree(ctx, git, callback, absPath)
}
callback(absPath)
}