From ce7062a422777c00aadf43ad67a90cc8aae689a5 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sun, 2 Feb 2020 03:11:32 +0800 Subject: [PATCH] Cache last commit to accelerate the repository directory page visit (#10069) * Cache last commit to accelerate the repository directory page visit * Default use default cache configuration * add tests for last commit cache * Simplify last commit cache * Revert Enabled back * Change the last commit cache default ttl to 8760h * Fix test --- custom/conf/app.ini.sample | 14 +++- .../doc/advanced/config-cheat-sheet.en-us.md | 7 ++ .../doc/advanced/config-cheat-sheet.zh-cn.md | 7 ++ integrations/repo_test.go | 65 ++++++++++++++++++- modules/cache/cache.go | 26 +++++--- modules/cache/last_commit.go | 64 ++++++++++++++++++ modules/git/cache.go | 6 +- modules/git/commit_info.go | 45 ++++++++++++- modules/setting/cache.go | 54 +++++++++++++-- routers/repo/view.go | 8 ++- 10 files changed, 273 insertions(+), 23 deletions(-) create mode 100644 modules/cache/last_commit.go diff --git a/custom/conf/app.ini.sample b/custom/conf/app.ini.sample index b0aafb8b3..31e8bf0b0 100644 --- a/custom/conf/app.ini.sample +++ b/custom/conf/app.ini.sample @@ -43,7 +43,7 @@ DEFAULT_CLOSE_ISSUES_VIA_COMMITS_IN_ANY_BRANCH = false ENABLE_PUSH_CREATE_USER = false ENABLE_PUSH_CREATE_ORG = false ; Comma separated list of globally disabled repo units. Allowed values: repo.issues, repo.ext_issues, repo.pulls, repo.wiki, repo.ext_wiki -DISABLED_REPO_UNITS = +DISABLED_REPO_UNITS = ; Comma separated list of default repo units. Allowed values: repo.code, repo.releases, repo.issues, repo.pulls, repo.wiki. ; Note: Code and Releases can currently not be deactivated. If you specify default repo units you should still list them for future compatibility. ; External wiki and issue tracker can't be enabled by default as it requires additional settings. @@ -632,6 +632,8 @@ SENDMAIL_PATH = sendmail SENDMAIL_ARGS = [cache] +; if the cache enabled +ENABLED = true ; Either "memory", "redis", or "memcache", default is "memory" ADAPTER = memory ; For "memory" only, GC interval in seconds, default is 60 @@ -644,6 +646,16 @@ HOST = ; Setting it to 0 disables caching ITEM_TTL = 16h +; Last commit cache +[cache.last_commit] +; if the cache enabled +ENABLED = true +; Time to keep items in cache if not used, default is 8760 hours. +; Setting it to 0 disables caching +ITEM_TTL = 8760h +; Only enable the cache when repository's commits count great than +COMMITS_COUNT = 1000 + [session] ; Either "memory", "file", or "redis", default is "memory" PROVIDER = memory diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index e7b87c527..7e5b39e48 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -383,6 +383,7 @@ relation to port exhaustion. ## Cache (`cache`) +- `ENABLED`: **true**: Enable the cache. - `ADAPTER`: **memory**: Cache engine adapter, either `memory`, `redis`, or `memcache`. - `INTERVAL`: **60**: Garbage Collection interval (sec), for memory cache only. - `HOST`: **\**: Connection string for `redis` and `memcache`. @@ -390,6 +391,12 @@ relation to port exhaustion. - Memcache: `127.0.0.1:9090;127.0.0.1:9091` - `ITEM_TTL`: **16h**: Time to keep items in cache if not used, Setting it to 0 disables caching. +## Cache - LastCommitCache settings (`cache.last_commit`) + +- `ENABLED`: **true**: Enable the cache. +- `ITEM_TTL`: **8760h**: Time to keep items in cache if not used, Setting it to 0 disables caching. +- `COMMITS_COUNT`: **1000**: Only enable the cache when repository's commits count great than. + ## Session (`session`) - `PROVIDER`: **memory**: Session engine provider \[memory, file, redis, mysql, couchbase, memcache, nodb, postgres\]. diff --git a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md index a095b3bc8..80861f457 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md +++ b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md @@ -148,6 +148,7 @@ menu: ## Cache (`cache`) +- `ENABLED`: **true**: 是否启用。 - `ADAPTER`: **memory**: 缓存引擎,可以为 `memory`, `redis` 或 `memcache`。 - `INTERVAL`: **60**: 只对内存缓存有效,GC间隔,单位秒。 - `HOST`: **\**: 针对redis和memcache有效,主机地址和端口。 @@ -155,6 +156,12 @@ menu: - Memache: `127.0.0.1:9090;127.0.0.1:9091` - `ITEM_TTL`: **16h**: 缓存项目失效时间,设置为 0 则禁用缓存。 +## Cache - LastCommitCache settings (`cache.last_commit`) + +- `ENABLED`: **true**: 是否启用。 +- `ITEM_TTL`: **8760h**: 缓存项目失效时间,设置为 0 则禁用缓存。 +- `COMMITS_COUNT`: **1000**: 仅当仓库的提交数大于时才启用缓存。 + ## Session (`session`) - `PROVIDER`: Session 内容存储方式,可选 `memory`, `file`, `redis` 或 `mysql`。 diff --git a/integrations/repo_test.go b/integrations/repo_test.go index b5ff072ea..d2e02dd37 100644 --- a/integrations/repo_test.go +++ b/integrations/repo_test.go @@ -7,8 +7,10 @@ package integrations import ( "fmt" "net/http" + "path" "strings" "testing" + "time" "code.gitea.io/gitea/modules/setting" @@ -29,12 +31,71 @@ func TestViewRepo(t *testing.T) { session.MakeRequest(t, req, http.StatusNotFound) } -func TestViewRepo2(t *testing.T) { +func testViewRepo(t *testing.T) { defer prepareTestEnv(t)() req := NewRequest(t, "GET", "/user3/repo3") session := loginUser(t, "user2") - session.MakeRequest(t, req, http.StatusOK) + resp := session.MakeRequest(t, req, http.StatusOK) + + htmlDoc := NewHTMLParser(t, resp.Body) + files := htmlDoc.doc.Find("#repo-files-table > TBODY > TR") + + type file struct { + fileName string + commitID string + commitMsg string + commitTime string + } + + var items []file + + files.Each(func(i int, s *goquery.Selection) { + tds := s.Find("td") + var f file + tds.Each(func(i int, s *goquery.Selection) { + if i == 0 { + f.fileName = strings.TrimSpace(s.Text()) + } else if i == 1 { + a := s.Find("a") + f.commitMsg = strings.TrimSpace(a.Text()) + l, _ := a.Attr("href") + f.commitID = path.Base(l) + } + }) + + f.commitTime, _ = s.Find("span.time-since").Attr("title") + items = append(items, f) + }) + + assert.EqualValues(t, []file{ + { + fileName: "doc", + commitID: "2a47ca4b614a9f5a43abbd5ad851a54a616ffee6", + commitMsg: "init project", + commitTime: time.Date(2017, time.June, 14, 13, 54, 21, 0, time.UTC).Format(time.RFC1123), + }, + { + fileName: "README.md", + commitID: "2a47ca4b614a9f5a43abbd5ad851a54a616ffee6", + commitMsg: "init project", + commitTime: time.Date(2017, time.June, 14, 13, 54, 21, 0, time.UTC).Format(time.RFC1123), + }, + }, items) +} + +func TestViewRepo2(t *testing.T) { + // no last commit cache + testViewRepo(t) + + // enable last commit cache for all repositories + oldCommitsCount := setting.CacheService.LastCommit.CommitsCount + setting.CacheService.LastCommit.CommitsCount = 0 + // first view will not hit the cache + testViewRepo(t) + // second view will hit the cache + testViewRepo(t) + setting.CacheService.LastCommit.CommitsCount = oldCommitsCount } func TestViewRepo3(t *testing.T) { diff --git a/modules/cache/cache.go b/modules/cache/cache.go index 20d23f3b5..e3a905e3f 100644 --- a/modules/cache/cache.go +++ b/modules/cache/cache.go @@ -16,20 +16,28 @@ import ( _ "gitea.com/macaron/cache/redis" ) -var conn mc.Cache +var ( + conn mc.Cache +) + +func newCache(cacheConfig setting.Cache) (mc.Cache, error) { + return mc.NewCacher(cacheConfig.Adapter, mc.Options{ + Adapter: cacheConfig.Adapter, + AdapterConfig: cacheConfig.Conn, + Interval: cacheConfig.Interval, + }) +} // NewContext start cache service func NewContext() error { - if setting.CacheService == nil || conn != nil { - return nil + var err error + + if conn == nil && setting.CacheService.Enabled { + if conn, err = newCache(setting.CacheService.Cache); err != nil { + return err + } } - var err error - conn, err = mc.NewCacher(setting.CacheService.Adapter, mc.Options{ - Adapter: setting.CacheService.Adapter, - AdapterConfig: setting.CacheService.Conn, - Interval: setting.CacheService.Interval, - }) return err } diff --git a/modules/cache/last_commit.go b/modules/cache/last_commit.go new file mode 100644 index 000000000..2fd9313bd --- /dev/null +++ b/modules/cache/last_commit.go @@ -0,0 +1,64 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package cache + +import ( + "fmt" + + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/log" + + mc "gitea.com/macaron/cache" + "gopkg.in/src-d/go-git.v4/plumbing/object" +) + +// LastCommitCache represents a cache to store last commit +type LastCommitCache struct { + repoPath string + ttl int64 + repo *git.Repository + commitCache map[string]*object.Commit + mc.Cache +} + +// NewLastCommitCache creates a new last commit cache for repo +func NewLastCommitCache(repoPath string, gitRepo *git.Repository, ttl int64) *LastCommitCache { + return &LastCommitCache{ + repoPath: repoPath, + repo: gitRepo, + commitCache: make(map[string]*object.Commit), + ttl: ttl, + Cache: conn, + } +} + +// Get get the last commit information by commit id and entry path +func (c LastCommitCache) Get(ref, entryPath string) (*object.Commit, error) { + v := c.Cache.Get(fmt.Sprintf("last_commit:%s:%s:%s", c.repoPath, ref, entryPath)) + if vs, ok := v.(string); ok { + log.Trace("LastCommitCache hit level 1: [%s:%s:%s]", ref, entryPath, vs) + if commit, ok := c.commitCache[vs]; ok { + log.Trace("LastCommitCache hit level 2: [%s:%s:%s]", ref, entryPath, vs) + return commit, nil + } + id, err := c.repo.ConvertToSHA1(vs) + if err != nil { + return nil, err + } + commit, err := c.repo.GoGitRepo().CommitObject(id) + if err != nil { + return nil, err + } + c.commitCache[vs] = commit + return commit, nil + } + return nil, nil +} + +// Put put the last commit id with commit and entry path +func (c LastCommitCache) Put(ref, entryPath, commitID string) error { + log.Trace("LastCommitCache save: [%s:%s:%s]", ref, entryPath, commitID) + return c.Cache.Put(fmt.Sprintf("last_commit:%s:%s:%s", c.repoPath, ref, entryPath), commitID, c.ttl) +} diff --git a/modules/git/cache.go b/modules/git/cache.go index dbbbafae4..39daf5c49 100644 --- a/modules/git/cache.go +++ b/modules/git/cache.go @@ -4,8 +4,10 @@ package git +import "gopkg.in/src-d/go-git.v4/plumbing/object" + // LastCommitCache cache type LastCommitCache interface { - Get(repoPath, ref, entryPath string) (*Commit, error) - Put(repoPath, ref, entryPath string, commit *Commit) error + Get(ref, entryPath string) (*object.Commit, error) + Put(ref, entryPath, commitID string) error } diff --git a/modules/git/commit_info.go b/modules/git/commit_info.go index e74ddbfb0..69fd7f356 100644 --- a/modules/git/commit_info.go +++ b/modules/git/commit_info.go @@ -5,6 +5,8 @@ package git import ( + "path" + "github.com/emirpasic/gods/trees/binaryheap" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/object" @@ -30,7 +32,29 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCom return nil, nil, err } - revs, err := getLastCommitForPaths(c, treePath, entryPaths) + var revs map[string]*object.Commit + if cache != nil { + var unHitPaths []string + revs, unHitPaths, err = getLastCommitForPathsByCache(commit.ID.String(), treePath, entryPaths, cache) + if err != nil { + return nil, nil, err + } + if len(unHitPaths) > 0 { + revs2, err := getLastCommitForPaths(c, treePath, unHitPaths) + if err != nil { + return nil, nil, err + } + + for k, v := range revs2 { + if err := cache.Put(commit.ID.String(), path.Join(treePath, k), v.ID().String()); err != nil { + return nil, nil, err + } + revs[k] = v + } + } + } else { + revs, err = getLastCommitForPaths(c, treePath, entryPaths) + } if err != nil { return nil, nil, err } @@ -127,6 +151,25 @@ func getFileHashes(c cgobject.CommitNode, treePath string, paths []string) (map[ return hashes, nil } +func getLastCommitForPathsByCache(commitID, treePath string, paths []string, cache LastCommitCache) (map[string]*object.Commit, []string, error) { + var unHitEntryPaths []string + var results = make(map[string]*object.Commit) + for _, p := range paths { + lastCommit, err := cache.Get(commitID, path.Join(treePath, p)) + if err != nil { + return nil, nil, err + } + if lastCommit != nil { + results[p] = lastCommit + continue + } + + unHitEntryPaths = append(unHitEntryPaths, p) + } + + return results, unHitEntryPaths, nil +} + func getLastCommitForPaths(c cgobject.CommitNode, treePath string, paths []string) (map[string]*object.Commit, error) { // We do a tree traversal with nodes sorted by commit time heap := binaryheap.NewWith(func(a, b interface{}) int { diff --git a/modules/setting/cache.go b/modules/setting/cache.go index babb62bae..34a212db1 100644 --- a/modules/setting/cache.go +++ b/modules/setting/cache.go @@ -13,31 +13,71 @@ import ( // Cache represents cache settings type Cache struct { + Enabled bool Adapter string Interval int Conn string - TTL time.Duration + TTL time.Duration `ini:"ITEM_TTL"` } var ( // CacheService the global cache - CacheService *Cache + CacheService = struct { + Cache + + LastCommit struct { + Enabled bool + TTL time.Duration `ini:"ITEM_TTL"` + CommitsCount int64 + } `ini:"cache.last_commit"` + }{ + Cache: Cache{ + Enabled: true, + Adapter: "memory", + Interval: 60, + TTL: 16 * time.Hour, + }, + LastCommit: struct { + Enabled bool + TTL time.Duration `ini:"ITEM_TTL"` + CommitsCount int64 + }{ + Enabled: true, + TTL: 8760 * time.Hour, + CommitsCount: 1000, + }, + } ) func newCacheService() { sec := Cfg.Section("cache") - CacheService = &Cache{ - Adapter: sec.Key("ADAPTER").In("memory", []string{"memory", "redis", "memcache"}), + if err := sec.MapTo(&CacheService); err != nil { + log.Fatal("Failed to map Cache settings: %v", err) } + + CacheService.Adapter = sec.Key("ADAPTER").In("memory", []string{"memory", "redis", "memcache"}) switch CacheService.Adapter { case "memory": - CacheService.Interval = sec.Key("INTERVAL").MustInt(60) case "redis", "memcache": CacheService.Conn = strings.Trim(sec.Key("HOST").String(), "\" ") + case "": // disable cache + CacheService.Enabled = false default: log.Fatal("Unknown cache adapter: %s", CacheService.Adapter) } - CacheService.TTL = sec.Key("ITEM_TTL").MustDuration(16 * time.Hour) - log.Info("Cache Service Enabled") + if CacheService.Enabled { + log.Info("Cache Service Enabled") + } + + sec = Cfg.Section("cache.last_commit") + if !CacheService.Enabled { + CacheService.LastCommit.Enabled = false + } + + CacheService.LastCommit.CommitsCount = sec.Key("COMMITS_COUNT").MustInt64(1000) + + if CacheService.LastCommit.Enabled { + log.Info("Last Commit Cache Service Enabled") + } } diff --git a/routers/repo/view.go b/routers/repo/view.go index 3fbff007e..f56c52435 100644 --- a/routers/repo/view.go +++ b/routers/repo/view.go @@ -17,6 +17,7 @@ import ( "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/cache" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/git" @@ -49,8 +50,13 @@ func renderDirectory(ctx *context.Context, treeLink string) { } entries.CustomSort(base.NaturalSortLess) + var c git.LastCommitCache + if setting.CacheService.LastCommit.Enabled && ctx.Repo.CommitsCount >= setting.CacheService.LastCommit.CommitsCount { + c = cache.NewLastCommitCache(ctx.Repo.Repository.FullName(), ctx.Repo.GitRepo, int64(setting.CacheService.LastCommit.TTL.Seconds())) + } + var latestCommit *git.Commit - ctx.Data["Files"], latestCommit, err = entries.GetCommitsInfo(ctx.Repo.Commit, ctx.Repo.TreePath, nil) + ctx.Data["Files"], latestCommit, err = entries.GetCommitsInfo(ctx.Repo.Commit, ctx.Repo.TreePath, c) if err != nil { ctx.ServerError("GetCommitsInfo", err) return