Improve listing performance by using go-git (#6478)

* Use go-git for tree reading and commit info lookup.

Signed-off-by: Filip Navara <navara@emclient.com>

* Use TreeEntry.IsRegular() instead of ObjectType that was removed.

Signed-off-by: Filip Navara <navara@emclient.com>

* Use the treePath to optimize commit info search.

Signed-off-by: Filip Navara <navara@emclient.com>

* Extract the latest commit at treePath along with the other commits.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix listing commit info for a directory that was created in one commit and never modified after.

Signed-off-by: Filip Navara <navara@emclient.com>

* Avoid nearly all external 'git' invocations when doing directory listing (.editorconfig code path is still hit).

Signed-off-by: Filip Navara <navara@emclient.com>

* Use go-git for reading blobs.

Signed-off-by: Filip Navara <navara@emclient.com>

* Make SHA1 type alias for plumbing.Hash in go-git.

Signed-off-by: Filip Navara <navara@emclient.com>

* Make Signature type alias for object.Signature in go-git.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix GetCommitsInfo for repository with only one commit.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix PGP signature verification.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix issues with walking commit graph across merges.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix typo in condition.

Signed-off-by: Filip Navara <navara@emclient.com>

* Speed up loading branch list by keeping the repository reference (and thus all the loaded packfile indexes).

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix lising submodules.

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix build

Signed-off-by: Filip Navara <navara@emclient.com>

* Add back commit cache because of name-rev

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix tests

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix code style

* Fix spelling

* Address PR feedback

Signed-off-by: Filip Navara <navara@emclient.com>

* Update vendor module list

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix getting trees by commit id

Signed-off-by: Filip Navara <navara@emclient.com>

* Fix remaining unit test failures

* Fix GetTreeBySHA

* Avoid running `git name-rev` if not necessary

Signed-off-by: Filip Navara <navara@emclient.com>

* Move Branch code to git module

* Clean up GPG signature verification and fix it for tagged commits

* Address PR feedback (import formatting, copyright headers)

* Make blob lookup by SHA working

* Update tests to use public API

* Allow getting content from any type of object through the blob interface

* Change test to actually expect the object content that is in the GIT repository

* Change one more test to actually expect the object content that is in the GIT repository

* Add comments
release/v1.9
Filip Navara 5 years ago committed by Lunny Xiao
parent 19ec2606e9
commit 2af67f6044

@ -32,7 +32,7 @@ require (
github.com/dgrijalva/jwt-go v0.0.0-20161101193935-9ed569b5d1ac
github.com/edsrzf/mmap-go v0.0.0-20170320065105-0bce6a688712 // indirect
github.com/elazarl/go-bindata-assetfs v0.0.0-20151224045452-57eb5e1fc594 // indirect
github.com/emirpasic/gods v1.12.0 // indirect
github.com/emirpasic/gods v1.12.0
github.com/etcd-io/bbolt v1.3.2 // indirect
github.com/ethantkoenig/rupture v0.0.0-20180203182544-0a76f03a811a
github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a // indirect
@ -127,7 +127,7 @@ require (
gopkg.in/ldap.v3 v3.0.2
gopkg.in/macaron.v1 v1.3.2
gopkg.in/redis.v2 v2.3.2 // indirect
gopkg.in/src-d/go-billy.v4 v4.3.0 // indirect
gopkg.in/src-d/go-billy.v4 v4.3.0
gopkg.in/src-d/go-git.v4 v4.10.0
gopkg.in/testfixtures.v2 v2.5.0
mvdan.cc/xurls/v2 v2.0.0

@ -38,7 +38,7 @@ func TestAPIReposGitBlobs(t *testing.T) {
var gitBlobResponse api.GitBlobResponse
DecodeJSON(t, resp, &gitBlobResponse)
assert.NotNil(t, gitBlobResponse)
expectedContent := "Y29tbWl0IDY1ZjFiZjI3YmMzYmY3MGY2NDY1NzY1ODYzNWU2NjA5NGVkYmNiNGQKQXV0aG9yOiB1c2VyMSA8YWRkcmVzczFAZXhhbXBsZS5jb20+CkRhdGU6ICAgU3VuIE1hciAxOSAxNjo0Nzo1OSAyMDE3IC0wNDAwCgogICAgSW5pdGlhbCBjb21taXQKCmRpZmYgLS1naXQgYS9SRUFETUUubWQgYi9SRUFETUUubWQKbmV3IGZpbGUgbW9kZSAxMDA2NDQKaW5kZXggMDAwMDAwMC4uNGI0ODUxYQotLS0gL2Rldi9udWxsCisrKyBiL1JFQURNRS5tZApAQCAtMCwwICsxLDMgQEAKKyMgcmVwbzEKKworRGVzY3JpcHRpb24gZm9yIHJlcG8xClwgTm8gbmV3bGluZSBhdCBlbmQgb2YgZmlsZQo="
expectedContent := "dHJlZSAyYTJmMWQ0NjcwNzI4YTJlMTAwNDllMzQ1YmQ3YTI3NjQ2OGJlYWI2CmF1dGhvciB1c2VyMSA8YWRkcmVzczFAZXhhbXBsZS5jb20+IDE0ODk5NTY0NzkgLTA0MDAKY29tbWl0dGVyIEV0aGFuIEtvZW5pZyA8ZXRoYW50a29lbmlnQGdtYWlsLmNvbT4gMTQ4OTk1NjQ3OSAtMDQwMAoKSW5pdGlhbCBjb21taXQK"
assert.Equal(t, expectedContent, gitBlobResponse.Content)
// Tests a private repo with no token so will fail

@ -874,21 +874,6 @@ func (err ErrUserDoesNotHaveAccessToRepo) Error() string {
// |______ / |__| (____ /___| /\___ >___| /
// \/ \/ \/ \/ \/
// ErrBranchNotExist represents a "BranchNotExist" kind of error.
type ErrBranchNotExist struct {
Name string
}
// IsErrBranchNotExist checks if an error is a ErrBranchNotExist.
func IsErrBranchNotExist(err error) bool {
_, ok := err.(ErrBranchNotExist)
return ok
}
func (err ErrBranchNotExist) Error() string {
return fmt.Sprintf("branch does not exist [name: %s]", err.Name)
}
// ErrBranchAlreadyExists represents an error that branch with such name already exists.
type ErrBranchAlreadyExists struct {
BranchName string

@ -1,4 +1,5 @@
// Copyright 2015 The Gogs Authors. All rights reserved.
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@ -165,8 +166,8 @@ func (pr *PullRequest) APIFormat() *api.PullRequest {
func (pr *PullRequest) apiFormat(e Engine) *api.PullRequest {
var (
baseBranch *Branch
headBranch *Branch
baseBranch *git.Branch
headBranch *git.Branch
baseCommit *git.Commit
headCommit *git.Commit
err error

@ -1,4 +1,5 @@
// Copyright 2016 The Gogs Authors. All rights reserved.
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
@ -86,53 +87,24 @@ func (repo *Repository) DeleteLocalBranch(branchName string) error {
return deleteLocalBranch(repo.LocalCopyPath(), repo.DefaultBranch, branchName)
}
// Branch holds the branch information
type Branch struct {
Path string
Name string
}
// GetBranchesByPath returns a branch by it's path
func GetBranchesByPath(path string) ([]*Branch, error) {
gitRepo, err := git.OpenRepository(path)
if err != nil {
return nil, err
}
brs, err := gitRepo.GetBranches()
if err != nil {
return nil, err
}
branches := make([]*Branch, len(brs))
for i := range brs {
branches[i] = &Branch{
Path: path,
Name: brs[i],
}
}
return branches, nil
}
// CanCreateBranch returns true if repository meets the requirements for creating new branches.
func (repo *Repository) CanCreateBranch() bool {
return !repo.IsMirror
}
// GetBranch returns a branch by it's name
func (repo *Repository) GetBranch(branch string) (*Branch, error) {
if !git.IsBranchExist(repo.RepoPath(), branch) {
return nil, ErrBranchNotExist{branch}
// GetBranch returns a branch by its name
func (repo *Repository) GetBranch(branch string) (*git.Branch, error) {
gitRepo, err := git.OpenRepository(repo.RepoPath())
if err != nil {
return nil, err
}
return &Branch{
Path: repo.RepoPath(),
Name: branch,
}, nil
return gitRepo.GetBranch(branch)
}
// GetBranches returns all the branches of a repository
func (repo *Repository) GetBranches() ([]*Branch, error) {
return GetBranchesByPath(repo.RepoPath())
func (repo *Repository) GetBranches() ([]*git.Branch, error) {
return git.GetBranchesByPath(repo.RepoPath())
}
// CheckBranchName validates branch name with existing repository branches
@ -257,12 +229,3 @@ func (repo *Repository) CreateNewBranchFromCommit(doer *User, commit, branchName
return nil
}
// GetCommit returns all the commits of a branch
func (branch *Branch) GetCommit() (*git.Commit, error) {
gitRepo, err := git.OpenRepository(branch.Path)
if err != nil {
return nil, err
}
return gitRepo.GetBranchCommit(branch.Name)
}

@ -157,10 +157,11 @@ func (r *Repository) GetEditorconfig() (*editorconfig.Editorconfig, error) {
if treeEntry.Blob().Size() >= setting.UI.MaxDisplayFileSize {
return nil, git.ErrNotExist{ID: "", RelPath: ".editorconfig"}
}
reader, err := treeEntry.Blob().Data()
reader, err := treeEntry.Blob().DataAsync()
if err != nil {
return nil, err
}
defer reader.Close()
data, err := ioutil.ReadAll(reader)
if err != nil {
return nil, err

@ -1,76 +1,40 @@
// Copyright 2015 The Gogs Authors. All rights reserved.
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package git
import (
"bytes"
"encoding/base64"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"gopkg.in/src-d/go-git.v4/plumbing"
)
// Blob represents a Git object.
type Blob struct {
repo *Repository
*TreeEntry
}
// Data gets content of blob all at once and wrap it as io.Reader.
// This can be very slow and memory consuming for huge content.
func (b *Blob) Data() (io.Reader, error) {
stdout := new(bytes.Buffer)
stderr := new(bytes.Buffer)
// Preallocate memory to save ~50% memory usage on big files.
stdout.Grow(int(b.Size() + 2048))
if err := b.DataPipeline(stdout, stderr); err != nil {
return nil, concatenateError(err, stderr.String())
}
return stdout, nil
}
ID SHA1
// DataPipeline gets content of blob and write the result or error to stdout or stderr
func (b *Blob) DataPipeline(stdout, stderr io.Writer) error {
return NewCommand("show", b.ID.String()).RunInDirPipeline(b.repo.Path, stdout, stderr)
}
type cmdReadCloser struct {
cmd *exec.Cmd
stdout io.Reader
}
func (c cmdReadCloser) Read(p []byte) (int, error) {
return c.stdout.Read(p)
}
func (c cmdReadCloser) Close() error {
io.Copy(ioutil.Discard, c.stdout)
return c.cmd.Wait()
gogitEncodedObj plumbing.EncodedObject
name string
}
// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
// Calling the Close function on the result will discard all unread output.
func (b *Blob) DataAsync() (io.ReadCloser, error) {
cmd := exec.Command("git", "show", b.ID.String())
cmd.Dir = b.repo.Path
cmd.Stderr = os.Stderr
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("StdoutPipe: %v", err)
}
return b.gogitEncodedObj.Reader()
}
if err = cmd.Start(); err != nil {
return nil, fmt.Errorf("Start: %v", err)
}
// Size returns the uncompressed size of the blob
func (b *Blob) Size() int64 {
return b.gogitEncodedObj.Size()
}
return cmdReadCloser{stdout: stdout, cmd: cmd}, nil
// Name returns name of the tree entry this blob object was created from (or empty string)
func (b *Blob) Name() string {
return b.name
}
// GetBlobContentBase64 Reads the content of the blob with a base64 encode and returns the encoded string

@ -1,11 +1,11 @@
// Copyright 2015 The Gogs Authors. All rights reserved.
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package git
import (
"bytes"
"io/ioutil"
"testing"
@ -13,20 +13,6 @@ import (
"github.com/stretchr/testify/require"
)
var repoSelf = &Repository{
Path: "./",
}
var testBlob = &Blob{
repo: repoSelf,
TreeEntry: &TreeEntry{
ID: MustIDFromString("a8d4b49dd073a4a38a7e58385eeff7cc52568697"),
ptree: &Tree{
repo: repoSelf,
},
},
}
func TestBlob_Data(t *testing.T) {
output := `Copyright (c) 2016 The Gitea Authors
Copyright (c) 2015 The Gogs Authors
@ -49,10 +35,15 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
`
repo, err := OpenRepository("../../.git")
assert.NoError(t, err)
testBlob, err := repo.GetBlob("a8d4b49dd073a4a38a7e58385eeff7cc52568697")
assert.NoError(t, err)
r, err := testBlob.Data()
r, err := testBlob.DataAsync()
assert.NoError(t, err)
require.NotNil(t, r)
defer r.Close()
data, err := ioutil.ReadAll(r)
assert.NoError(t, err)
@ -60,21 +51,21 @@ THE SOFTWARE.
}
func Benchmark_Blob_Data(b *testing.B) {
for i := 0; i < b.N; i++ {
r, err := testBlob.Data()
if err != nil {
b.Fatal(err)
}
ioutil.ReadAll(r)
repo, err := OpenRepository("../../.git")
if err != nil {
b.Fatal(err)
}
testBlob, err := repo.GetBlob("a8d4b49dd073a4a38a7e58385eeff7cc52568697")
if err != nil {
b.Fatal(err)
}
}
func Benchmark_Blob_DataPipeline(b *testing.B) {
stdout := new(bytes.Buffer)
for i := 0; i < b.N; i++ {
stdout.Reset()
if err := testBlob.DataPipeline(stdout, nil); err != nil {
r, err := testBlob.DataAsync()
if err != nil {
b.Fatal(err)
}
defer r.Close()
ioutil.ReadAll(r)
}
}

@ -14,6 +14,8 @@ import (
"net/http"
"strconv"
"strings"
"gopkg.in/src-d/go-git.v4/plumbing/object"
)
// Commit represents a git commit.
@ -36,20 +38,59 @@ type CommitGPGSignature struct {
Payload string //TODO check if can be reconstruct from the rest of commit information to not have duplicate data
}
// similar to https://github.com/git/git/blob/3bc53220cb2dcf709f7a027a3f526befd021d858/commit.c#L1128
func newGPGSignatureFromCommitline(data []byte, signatureStart int, tag bool) (*CommitGPGSignature, error) {
sig := new(CommitGPGSignature)
signatureEnd := bytes.LastIndex(data, []byte("-----END PGP SIGNATURE-----"))
if signatureEnd == -1 {
return nil, fmt.Errorf("end of commit signature not found")
func convertPGPSignature(c *object.Commit) *CommitGPGSignature {
if c.PGPSignature == "" {
return nil
}
var w strings.Builder
var err error
if _, err = fmt.Fprintf(&w, "tree %s\n", c.TreeHash.String()); err != nil {
return nil
}
for _, parent := range c.ParentHashes {
if _, err = fmt.Fprintf(&w, "parent %s\n", parent.String()); err != nil {
return nil
}
}
if _, err = fmt.Fprint(&w, "author "); err != nil {
return nil
}
sig.Signature = strings.Replace(string(data[signatureStart:signatureEnd+27]), "\n ", "\n", -1)
if tag {
sig.Payload = string(data[:signatureStart-1])
} else {
sig.Payload = string(data[:signatureStart-8]) + string(data[signatureEnd+27:])
if err = c.Author.Encode(&w); err != nil {
return nil
}
if _, err = fmt.Fprint(&w, "\ncommitter "); err != nil {
return nil
}
if err = c.Committer.Encode(&w); err != nil {
return nil
}
if _, err = fmt.Fprintf(&w, "\n\n%s", c.Message); err != nil {
return nil
}
return &CommitGPGSignature{
Signature: c.PGPSignature,
Payload: w.String(),
}
}
func convertCommit(c *object.Commit) *Commit {
return &Commit{
ID: c.Hash,
CommitMessage: c.Message,
Committer: &c.Committer,
Author: &c.Author,
Signature: convertPGPSignature(c),
parents: c.ParentHashes,
}
return sig, nil
}
// Message returns the commit message. Same as retrieving CommitMessage directly.
@ -281,11 +322,13 @@ func (c *Commit) GetSubModules() (*ObjectCache, error) {
}
return nil, err
}
rd, err := entry.Blob().Data()
rd, err := entry.Blob().DataAsync()
if err != nil {
return nil, err
}
defer rd.Close()
scanner := bufio.NewScanner(rd)
c.submoduleCache = newObjectCache()
var ismodule bool
@ -326,6 +369,17 @@ func (c *Commit) GetSubModule(entryname string) (*SubModule, error) {
return nil, nil
}
// GetBranchName gets the closes branch name (as returned by 'git name-rev')
func (c *Commit) GetBranchName() (string, error) {
data, err := NewCommand("name-rev", c.ID.String()).RunInDirBytes(c.repo.Path)
if err != nil {
return "", err
}
// name-rev commitID output will be "COMMIT_ID master" or "COMMIT_ID master~12"
return strings.Split(strings.Split(string(data), " ")[1], "~")[0], nil
}
// CommitFileStatus represents status of files in a commit.
type CommitFileStatus struct {
Added []string

@ -5,325 +5,237 @@
package git
import (
"bufio"
"context"
"fmt"
"os/exec"
"path"
"runtime"
"strconv"
"strings"
"sync"
"time"
"github.com/emirpasic/gods/trees/binaryheap"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/object"
)
const (
// parameters for searching for commit infos. If the untargeted search has
// not found any entries in the past 5 commits, and 12 or fewer entries
// remain, then we'll just let the targeted-searching threads finish off,
// and stop the untargeted search to not interfere.
deferToTargetedSearchColdStreak = 5
deferToTargetedSearchNumRemainingEntries = 12
)
// GetCommitsInfo gets information of all commits that are corresponding to these entries
func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCommitCache) ([][]interface{}, *Commit, error) {
entryPaths := make([]string, len(tes)+1)
// Get the commit for the treePath itself
entryPaths[0] = ""
for i, entry := range tes {
entryPaths[i+1] = entry.Name()
}
// getCommitsInfoState shared state while getting commit info for entries
type getCommitsInfoState struct {
lock sync.Mutex
/* read-only fields, can be read without the mutex */
// entries and entryPaths are read-only after initialization, so they can
// safely be read without the mutex
entries []*TreeEntry
// set of filepaths to get info for
entryPaths map[string]struct{}
treePath string
headCommit *Commit
c, err := commit.repo.gogitRepo.CommitObject(plumbing.Hash(commit.ID))
if err != nil {
return nil, nil, err
}
/* mutable fields, must hold mutex to read or write */
// map from filepath to commit
commits map[string]*Commit
// set of filepaths that have been or are being searched for in a target search
targetedPaths map[string]struct{}
}
revs, err := getLastCommitForPaths(c, treePath, entryPaths)
if err != nil {
return nil, nil, err
}
func (state *getCommitsInfoState) numRemainingEntries() int {
state.lock.Lock()
defer state.lock.Unlock()
return len(state.entries) - len(state.commits)
}
commit.repo.gogitStorage.Close()
// getTargetEntryPath Returns the next path for a targeted-searching thread to
// search for, or returns the empty string if nothing left to search for
func (state *getCommitsInfoState) getTargetedEntryPath() string {
var targetedEntryPath string
state.lock.Lock()
defer state.lock.Unlock()
for _, entry := range state.entries {
entryPath := path.Join(state.treePath, entry.Name())
if _, ok := state.commits[entryPath]; ok {
continue
} else if _, ok = state.targetedPaths[entryPath]; ok {
continue
commitsInfo := make([][]interface{}, len(tes))
for i, entry := range tes {
if rev, ok := revs[entry.Name()]; ok {
entryCommit := convertCommit(rev)
if entry.IsSubModule() {
subModuleURL := ""
if subModule, err := commit.GetSubModule(entry.Name()); err != nil {
return nil, nil, err
} else if subModule != nil {
subModuleURL = subModule.URL
}
subModuleFile := NewSubModuleFile(entryCommit, subModuleURL, entry.ID.String())
commitsInfo[i] = []interface{}{entry, subModuleFile}
} else {
commitsInfo[i] = []interface{}{entry, entryCommit}
}
} else {
commitsInfo[i] = []interface{}{entry, nil}
}
targetedEntryPath = entryPath
state.targetedPaths[entryPath] = struct{}{}
break
}
return targetedEntryPath
}
// repeatedly perform targeted searches for unpopulated entries
func targetedSearch(state *getCommitsInfoState, done chan error, cache LastCommitCache) {
for {
entryPath := state.getTargetedEntryPath()
if len(entryPath) == 0 {
done <- nil
return
}
if cache != nil {
commit, err := cache.Get(state.headCommit.repo.Path, state.headCommit.ID.String(), entryPath)
if err == nil && commit != nil {
state.update(entryPath, commit)
continue
}
}
command := NewCommand("rev-list", "-1", state.headCommit.ID.String(), "--", entryPath)
output, err := command.RunInDir(state.headCommit.repo.Path)
if err != nil {
done <- err
return
}
id, err := NewIDFromString(strings.TrimSpace(output))
if err != nil {
done <- err
return
}
commit, err := state.headCommit.repo.getCommit(id)
if err != nil {
done <- err
return
}
state.update(entryPath, commit)
if cache != nil {
cache.Put(state.headCommit.repo.Path, state.headCommit.ID.String(), entryPath, commit)
}
// Retrieve the commit for the treePath itself (see above). We basically
// get it for free during the tree traversal and it's used for listing
// pages to display information about newest commit for a given path.
var treeCommit *Commit
if rev, ok := revs[""]; ok {
treeCommit = convertCommit(rev)
}
return commitsInfo, treeCommit, nil
}
func initGetCommitInfoState(entries Entries, headCommit *Commit, treePath string) *getCommitsInfoState {
entryPaths := make(map[string]struct{}, len(entries))
for _, entry := range entries {
entryPaths[path.Join(treePath, entry.Name())] = struct{}{}
}
if treePath = path.Clean(treePath); treePath == "." {
treePath = ""
}
return &getCommitsInfoState{
entries: entries,
entryPaths: entryPaths,
commits: make(map[string]*Commit, len(entries)),
targetedPaths: make(map[string]struct{}, len(entries)),
treePath: treePath,
headCommit: headCommit,
}
type commitAndPaths struct {
commit *object.Commit
// Paths that are still on the branch represented by commit
paths []string
// Set of hashes for the paths
hashes map[string]plumbing.Hash
}
// GetCommitsInfo gets information of all commits that are corresponding to these entries
func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCommitCache) ([][]interface{}, error) {
state := initGetCommitInfoState(tes, commit, treePath)
if err := getCommitsInfo(state, cache); err != nil {
func getCommitTree(c *object.Commit, treePath string) (*object.Tree, error) {
tree, err := c.Tree()
if err != nil {
return nil, err
}
if len(state.commits) < len(state.entryPaths) {
return nil, fmt.Errorf("could not find commits for all entries")
}
commitsInfo := make([][]interface{}, len(tes))
for i, entry := range tes {
commit, ok := state.commits[path.Join(treePath, entry.Name())]
if !ok {
return nil, fmt.Errorf("could not find commit for %s", entry.Name())
}
switch entry.Type {
case ObjectCommit:
subModuleURL := ""
if subModule, err := state.headCommit.GetSubModule(entry.Name()); err != nil {
return nil, err
} else if subModule != nil {
subModuleURL = subModule.URL
}
subModuleFile := NewSubModuleFile(commit, subModuleURL, entry.ID.String())
commitsInfo[i] = []interface{}{entry, subModuleFile}
default:
commitsInfo[i] = []interface{}{entry, commit}
}
}
return commitsInfo, nil
}
func (state *getCommitsInfoState) cleanEntryPath(rawEntryPath string) (string, error) {
if rawEntryPath[0] == '"' {
var err error
rawEntryPath, err = strconv.Unquote(rawEntryPath)
// Optimize deep traversals by focusing only on the specific tree
if treePath != "" {
tree, err = tree.Tree(treePath)
if err != nil {
return rawEntryPath, err
return nil, err
}
}
var entryNameStartIndex int
if len(state.treePath) > 0 {
entryNameStartIndex = len(state.treePath) + 1
}
if index := strings.IndexByte(rawEntryPath[entryNameStartIndex:], '/'); index >= 0 {
return rawEntryPath[:entryNameStartIndex+index], nil
}
return rawEntryPath, nil
return tree, nil
}
// update report that the given path was last modified by the given commit.
// Returns whether state.commits was updated
func (state *getCommitsInfoState) update(entryPath string, commit *Commit) bool {
if _, ok := state.entryPaths[entryPath]; !ok {
return false
}
var updated bool
state.lock.Lock()
defer state.lock.Unlock()
if _, ok := state.commits[entryPath]; !ok {
state.commits[entryPath] = commit
updated = true
func getFullPath(treePath, path string) string {
if treePath != "" {
if path != "" {
return treePath + "/" + path
}
return treePath
}
return updated
return path
}
const getCommitsInfoPretty = "--pretty=format:%H %ct %s"
func getCommitsInfo(state *getCommitsInfoState, cache LastCommitCache) error {
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
args := []string{"log", state.headCommit.ID.String(), getCommitsInfoPretty, "--name-status", "-c"}
if len(state.treePath) > 0 {
args = append(args, "--", state.treePath)
func getFileHashes(c *object.Commit, treePath string, paths []string) (map[string]plumbing.Hash, error) {
tree, err := getCommitTree(c, treePath)
if err == object.ErrDirectoryNotFound {
// The whole tree didn't exist, so return empty map
return make(map[string]plumbing.Hash), nil
}
cmd := exec.CommandContext(ctx, "git", args...)
cmd.Dir = state.headCommit.repo.Path
readCloser, err := cmd.StdoutPipe()
if err != nil {
return err
return nil, err
}
if err := cmd.Start(); err != nil {
return err
hashes := make(map[string]plumbing.Hash)
for _, path := range paths {
if path != "" {
entry, err := tree.FindEntry(path)
if err == nil {
hashes[path] = entry.Hash
}
} else {
hashes[path] = tree.Hash
}
}
// it's okay to ignore the error returned by cmd.Wait(); we expect the
// subprocess to sometimes have a non-zero exit status, since we may
// prematurely close stdout, resulting in a broken pipe.
defer cmd.Wait()
numThreads := runtime.NumCPU()
done := make(chan error, numThreads)
for i := 0; i < numThreads; i++ {
go targetedSearch(state, done, cache)
}
return hashes, nil
}
scanner := bufio.NewScanner(readCloser)
err = state.processGitLogOutput(scanner)
func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (map[string]*object.Commit, error) {
// We do a tree traversal with nodes sorted by commit time
seen := make(map[plumbing.Hash]bool)
heap := binaryheap.NewWith(func(a, b interface{}) int {
if a.(*commitAndPaths).commit.Committer.When.Before(b.(*commitAndPaths).commit.Committer.When) {
return 1
}
return -1
})
// it is important that we close stdout here; if we do not close
// stdout, the subprocess will keep running, and the deffered call
// cmd.Wait() may block for a long time.
if closeErr := readCloser.Close(); closeErr != nil && err == nil {
err = closeErr
result := make(map[string]*object.Commit)
initialHashes, err := getFileHashes(c, treePath, paths)
if err != nil {
return nil, err
}
for i := 0; i < numThreads; i++ {
doneErr := <-done
if doneErr != nil && err == nil {
err = doneErr
// Start search from the root commit and with full set of paths
heap.Push(&commitAndPaths{c, paths, initialHashes})
for {
cIn, ok := heap.Pop()
if !ok {
break
}
}
return err
}
current := cIn.(*commitAndPaths)
currentID := current.commit.ID()
func (state *getCommitsInfoState) processGitLogOutput(scanner *bufio.Scanner) error {
// keep a local cache of seen paths to avoid acquiring a lock for paths
// we've already seen
seenPaths := make(map[string]struct{}, len(state.entryPaths))
// number of consecutive commits without any finds
coldStreak := 0
var commit *Commit
var err error
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 { // in-between commits
numRemainingEntries := state.numRemainingEntries()
if numRemainingEntries == 0 {
break
}
if coldStreak >= deferToTargetedSearchColdStreak &&
numRemainingEntries <= deferToTargetedSearchNumRemainingEntries {
// stop this untargeted search, and let the targeted-search threads
// finish the work
break
}
if seen[currentID] {
continue
}
if line[0] >= 'A' && line[0] <= 'X' { // a file was changed by the current commit
// look for the last tab, since for copies (C) and renames (R) two
// filenames are printed: src, then dest
tabIndex := strings.LastIndexByte(line, '\t')
if tabIndex < 1 {
return fmt.Errorf("misformatted line: %s", line)
seen[currentID] = true
// Load the parent commits for the one we are currently examining
numParents := current.commit.NumParents()
var parents []*object.Commit
for i := 0; i < numParents; i++ {
parent, err := current.commit.Parent(i)
if err != nil {
break
}
entryPath, err := state.cleanEntryPath(line[tabIndex+1:])
parents = append(parents, parent)
}
// Examine the current commit and set of interesting paths
numOfParentsWithPath := make([]int, len(current.paths))
pathChanged := make([]bool, len(current.paths))
parentHashes := make([]map[string]plumbing.Hash, len(parents))
for j, parent := range parents {
parentHashes[j], err = getFileHashes(parent, treePath, current.paths)
if err != nil {
return err
break
}
if _, ok := seenPaths[entryPath]; !ok {
if state.update(entryPath, commit) {
coldStreak = 0
for i, path := range current.paths {
if parentHashes[j][path] != plumbing.ZeroHash {
numOfParentsWithPath[i]++
if parentHashes[j][path] != current.hashes[path] {
pathChanged[i] = true
}
}
seenPaths[entryPath] = struct{}{}
}
continue
}
// a new commit
commit, err = parseCommitInfo(line)
if err != nil {
return err
var remainingPaths []string
for i, path := range current.paths {
switch numOfParentsWithPath[i] {
case 0:
// The path didn't exist in any parent, so it must have been created by
// this commit. The results could already contain some newer change from
// different path, so don't override that.
if result[path] == nil {
result[path] = current.commit
}
case 1:
// The file is present on exactly one parent, so check if it was changed
// and save the revision if it did.
if pathChanged[i] {
if result[path] == nil {
result[path] = current.commit
}
} else {
remainingPaths = append(remainingPaths, path)
}
default:
// The file is present on more than one of the parent paths, so this is
// a merge. We have to examine all the parent trees to find out where
// the change occurred. pathChanged[i] would tell us that the file was
// changed during the merge, but it wouldn't tell us the relevant commit
// that introduced it.
remainingPaths = append(remainingPaths, path)
}
}
coldStreak++
}
return scanner.Err()
}
// parseCommitInfo parse a commit from a line of `git log` output. Expects the
// line to be formatted according to getCommitsInfoPretty.
func parseCommitInfo(line string) (*Commit, error) {
if len(line) < 43 {
return nil, fmt.Errorf("invalid git output: %s", line)
}
ref, err := NewIDFromString(line[:40])
if err != nil {
return nil, err
}
spaceIndex := strings.IndexByte(line[41:], ' ')
if spaceIndex < 0 {
return nil, fmt.Errorf("invalid git output: %s", line)
}
unixSeconds, err := strconv.Atoi(line[41 : 41+spaceIndex])
if err != nil {
return nil, err
if len(remainingPaths) > 0 {
// Add the parent nodes along with remaining paths to the heap for further
// processing.
for j, parent := range parents {
if seen[parent.ID()] {
continue
}
// Combine remainingPath with paths available on the parent branch
// and make union of them
var remainingPathsForParent []string
for _, path := range remainingPaths {
if parentHashes[j][path] != plumbing.ZeroHash {
remainingPathsForParent = append(remainingPathsForParent, path)
}
}
heap.Push(&commitAndPaths{parent, remainingPathsForParent, parentHashes[j]})
}
}
}
message := line[spaceIndex+42:]
return &Commit{
ID: ref,
CommitMessage: message,
Committer: &Signature{
When: time.Unix(int64(unixSeconds), 0),
},
}, nil
return result, nil
}

@ -51,7 +51,7 @@ func testGetCommitsInfo(t *testing.T, repo1 *Repository) {
assert.NoError(t, err)
entries, err := tree.ListEntries()
assert.NoError(t, err)
commitsInfo, err := entries.GetCommitsInfo(commit, testCase.Path, nil)
commitsInfo, _, err := entries.GetCommitsInfo(commit, testCase.Path, nil)
assert.NoError(t, err)
assert.Len(t, commitsInfo, len(testCase.ExpectedIDs))
for _, commitInfo := range commitsInfo {
@ -107,7 +107,7 @@ func BenchmarkEntries_GetCommitsInfo(b *testing.B) {
b.ResetTimer()
b.Run(benchmark.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, err := entries.GetCommitsInfo(commit, "", nil)
_, _, err := entries.GetCommitsInfo(commit, "", nil)
if err != nil {
b.Fatal(err)
}

@ -64,3 +64,18 @@ func IsErrUnsupportedVersion(err error) bool {
func (err ErrUnsupportedVersion) Error() string {
return fmt.Sprintf("Operation requires higher version [required: %s]", err.Required)
}
// ErrBranchNotExist represents a "BranchNotExist" kind of error.
type ErrBranchNotExist struct {
Name string
}
// IsErrBranchNotExist checks if an error is a ErrBranchNotExist.
func IsErrBranchNotExist(err error) bool {
_, ok := err.(ErrBranchNotExist)
return ok
}
func (err ErrBranchNotExist) Error() string {
return fmt.Sprintf("branch does not exist [name: %s]", err.Name)
}

@ -8,6 +8,10 @@ import (
"bytes"
"fmt"
"strconv"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/filemode"
"gopkg.in/src-d/go-git.v4/plumbing/object"
)
// ParseTreeEntries parses the output of a `git ls-tree` command.
@ -20,30 +24,26 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
for pos := 0; pos < len(data); {
// expect line to be of the form "<mode> <type> <sha>\t<filename>"
entry := new(TreeEntry)
entry.gogitTreeEntry = &object.TreeEntry{}
entry.ptree = ptree
if pos+6 > len(data) {
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
}
switch string(data[pos : pos+6]) {
case "100644":
entry.mode = EntryModeBlob
entry.Type = ObjectBlob
entry.gogitTreeEntry.Mode = filemode.Regular
pos += 12 // skip over "100644 blob "
case "100755":
entry.mode = EntryModeExec
entry.Type = ObjectBlob
entry.gogitTreeEntry.Mode = filemode.Executable
pos += 12 // skip over "100755 blob "
case "120000":
entry.mode = EntryModeSymlink
entry.Type = ObjectBlob
entry.gogitTreeEntry.Mode = filemode.Symlink
pos += 12 // skip over "120000 blob "
case "160000":
entry.mode = EntryModeCommit
entry.Type = ObjectCommit
entry.gogitTreeEntry.Mode = filemode.Submodule
pos += 14 // skip over "160000 object "
case "040000":
entry.mode = EntryModeTree
entry.Type = ObjectTree
entry.gogitTreeEntry.Mode = filemode.Dir
pos += 12 // skip over "040000 tree "
default:
return nil, fmt.Errorf("unknown type: %v", string(data[pos:pos+6]))
@ -57,6 +57,7 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
return nil, fmt.Errorf("Invalid ls-tree output: %v", err)
}
entry.ID = id
entry.gogitTreeEntry.Hash = plumbing.Hash(id)
pos += 41 // skip over sha and trailing space
end := pos + bytes.IndexByte(data[pos:], '\n')
@ -66,12 +67,12 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
// In case entry name is surrounded by double quotes(it happens only in git-shell).
if data[pos] == '"' {
entry.name, err = strconv.Unquote(string(data[pos:end]))
entry.gogitTreeEntry.Name, err = strconv.Unquote(string(data[pos:end]))
if err != nil {
return nil, fmt.Errorf("Invalid ls-tree output: %v", err)
}
} else {
entry.name = string(data[pos:end])
entry.gogitTreeEntry.Name = string(data[pos:end])
}
pos = end + 1

@ -8,6 +8,8 @@ import (
"testing"
"github.com/stretchr/testify/assert"
"gopkg.in/src-d/go-git.v4/plumbing/filemode"
"gopkg.in/src-d/go-git.v4/plumbing/object"
)
func TestParseTreeEntries(t *testing.T) {
@ -23,10 +25,12 @@ func TestParseTreeEntries(t *testing.T) {
Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\texample/file2.txt\n",
Expected: []*TreeEntry{
{
mode: EntryModeBlob,
Type: ObjectBlob,
ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
name: "example/file2.txt",
ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
gogitTreeEntry: &object.TreeEntry{
Hash: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
Name: "example/file2.txt",
Mode: filemode.Regular,
},
},
},
},
@ -35,16 +39,20 @@ func TestParseTreeEntries(t *testing.T) {
"040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8\texample\n",
Expected: []*TreeEntry{
{
ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
Type: ObjectBlob,
mode: EntryModeSymlink,
name: "example/\n.txt",
ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
gogitTreeEntry: &object.TreeEntry{
Hash: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
Name: "example/\n.txt",
Mode: filemode.Symlink,
},
},
{
ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
Type: ObjectTree,
mode: EntryModeTree,
name: "example",
ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
gogitTreeEntry: &object.TreeEntry{
Hash: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
Name: "example",
Mode: filemode.Dir,
},
},
},
},

@ -16,14 +16,20 @@ import (
"time"
"github.com/Unknwon/com"
"gopkg.in/src-d/go-billy.v4/osfs"
gogit "gopkg.in/src-d/go-git.v4"
"gopkg.in/src-d/go-git.v4/plumbing/cache"
"gopkg.in/src-d/go-git.v4/storage/filesystem"
)
// Repository represents a Git repository.
type Repository struct {
Path string
commitCache *ObjectCache
tagCache *ObjectCache
tagCache *ObjectCache
gogitRepo *gogit.Repository
gogitStorage *filesystem.Storage
}
const prettyLogFormat = `--pretty=format:%H`
@ -77,10 +83,25 @@ func OpenRepository(repoPath string) (*Repository, error) {
return nil, errors.New("no such file or directory")
}
fs := osfs.New(repoPath)
_, err = fs.Stat(".git")
if err == nil {
fs, err = fs.Chroot(".git")
if err != nil {
return nil, err
}
}
storage := filesystem.NewStorageWithOptions(fs, cache.NewObjectLRUDefault(), filesystem.Options{KeepDescriptors: true})
gogitRepo, err := gogit.Open(storage, fs)
if err != nil {
return nil, err
}
return &Repository{
Path: repoPath,
commitCache: newObjectCache(),
tagCache: newObjectCache(),
Path: repoPath,
gogitRepo: gogitRepo,
gogitStorage: storage,
tagCache: newObjectCache(),
}, nil
}

@ -4,19 +4,19 @@
package git
import (
"gopkg.in/src-d/go-git.v4/plumbing"
)
func (repo *Repository) getBlob(id SHA1) (*Blob, error) {
if _, err := NewCommand("cat-file", "-p", id.String()).RunInDir(repo.Path); err != nil {
encodedObj, err := repo.gogitRepo.Storer.EncodedObject(plumbing.AnyObject, plumbing.Hash(id))
if err != nil {
return nil, ErrNotExist{id.String(), ""}
}
return &Blob{
repo: repo,
TreeEntry: &TreeEntry{
ID: id,
ptree: &Tree{
repo: repo,
},
},
ID: id,
gogitEncodedObj: encodedObj,
}, nil
}

@ -30,8 +30,9 @@ func TestRepository_GetBlob_Found(t *testing.T) {
blob, err := r.GetBlob(testCase.OID)
assert.NoError(t, err)
dataReader, err := blob.Data()
dataReader, err := blob.DataAsync()
assert.NoError(t, err)
defer dataReader.Close()
data, err := ioutil.ReadAll(dataReader)
assert.NoError(t, err)

@ -9,7 +9,6 @@ import (
"fmt"
"strings"
"gopkg.in/src-d/go-git.v4"
"gopkg.in/src-d/go-git.v4/plumbing"
)
@ -29,13 +28,19 @@ func IsBranchExist(repoPath, name string) bool {
// IsBranchExist returns true if given branch exists in current repository.
func (repo *Repository) IsBranchExist(name string) bool {
return IsBranchExist(repo.Path, name)
_, err := repo.gogitRepo.Reference(plumbing.ReferenceName(BranchPrefix+name), true)
if err != nil {
return false
}
return true
}
// Branch represents a Git branch.
type Branch struct {
Name string
Path string
gitRepo *Repository
}
// GetHEADBranch returns corresponding branch of HEAD.
@ -51,8 +56,9 @@ func (repo *Repository) GetHEADBranch() (*Branch, error) {
}
return &Branch{
Name: stdout[len(BranchPrefix):],
Path: stdout,
Name: stdout[len(BranchPrefix):],
Path: stdout,
gitRepo: repo,
}, nil
}
@ -64,23 +70,56 @@ func (repo *Repository) SetDefaultBranch(name string) error {
// GetBranches returns all branches of the repository.
func (repo *Repository) GetBranches() ([]string, error) {
r, err := git.PlainOpen(repo.Path)
var branchNames []string
branches, err := repo.gogitRepo.Branches()
if err != nil {
return nil, err
}
branchIter, err := r.Branches()
branches.ForEach(func(branch *plumbing.Reference) error {
branchNames = append(branchNames, strings.TrimPrefix(branch.Name().String(), BranchPrefix))
return nil
})
// TODO: Sort?
return branchNames, nil
}
// GetBranch returns a branch by it's name
func (repo *Repository) GetBranch(branch string) (*Branch, error) {
if !repo.IsBranchExist(branch) {
return nil, ErrBranchNotExist{branch}
}
return &Branch{
Path: repo.Path,
Name: branch,
gitRepo: repo,
}, nil
}
// GetBranchesByPath returns a branch by it's path
func GetBranchesByPath(path string) ([]*Branch, error) {
gitRepo, err := OpenRepository(path)
if err != nil {
return nil, err
}
branches := make([]string, 0)
if err = branchIter.ForEach(func(branch *plumbing.Reference) error {
branches = append(branches, branch.Name().Short())
return nil
}); err != nil {
brs, err := gitRepo.GetBranches()
if err != nil {
return nil, err
}
branches := make([]*Branch, len(brs))
for i := range brs {
branches[i] = &Branch{
Path: path,
Name: brs[i],
gitRepo: gitRepo,
}
}
return branches, nil
}
@ -132,3 +171,8 @@ func (repo *Repository) RemoveRemote(name string) error {
_, err := NewCommand("remote", "remove", name).RunInDir(repo.Path)
return err
}
// GetCommit returns the head commit of a branch
func (branch *Branch) GetCommit() (*Commit, error) {
return branch.gitRepo.GetBranchCommit(branch.Name)
}

@ -1,4 +1,5 @@
// Copyright 2015 The Gogs Authors. All rights reserved.
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style