Add detected file language to code search (#10256)

Move langauge detection to separate module to be more reusable

Add option to disable vendored file exclusion from file search

Allways show all language stats for search
mj
Lauris BH 4 years ago committed by GitHub
parent efbd7ca39b
commit 3c45cf8494
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -242,6 +242,7 @@ relation to port exhaustion.
- `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search. - `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search.
- `REPO_INDEXER_INCLUDE`: **empty**: A comma separated list of glob patterns (see https://github.com/gobwas/glob) to **include** in the index. Use `**.txt` to match any files with .txt extension. An empty list means include all files. - `REPO_INDEXER_INCLUDE`: **empty**: A comma separated list of glob patterns (see https://github.com/gobwas/glob) to **include** in the index. Use `**.txt` to match any files with .txt extension. An empty list means include all files.
- `REPO_INDEXER_EXCLUDE`: **empty**: A comma separated list of glob patterns (see https://github.com/gobwas/glob) to **exclude** from the index. Files that match this list will not be indexed, even if they match in `REPO_INDEXER_INCLUDE`. - `REPO_INDEXER_EXCLUDE`: **empty**: A comma separated list of glob patterns (see https://github.com/gobwas/glob) to **exclude** from the index. Files that match this list will not be indexed, even if they match in `REPO_INDEXER_INCLUDE`.
- `REPO_INDEXER_EXCLUDE_VENDORED`: **true**: Exclude vendored files from index.
- `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request. - `UPDATE_BUFFER_LEN`: **20**: Buffer length of index request.
- `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed. - `MAX_FILE_SIZE`: **1048576**: Maximum size in bytes of files to be indexed.
- `STARTUP_TIMEOUT`: **30s**: If the indexer takes longer than this timeout to start - fail. (This timeout will be added to the hammer time above for child processes - as bleve will not start until the previous parent is shutdown.) Set to zero to never timeout. - `STARTUP_TIMEOUT`: **30s**: If the indexer takes longer than this timeout to start - fail. (This timeout will be added to the hammer time above for child processes - as bleve will not start until the previous parent is shutdown.) Set to zero to never timeout.

@ -42,6 +42,8 @@ Gitea applies glob pattern matching from the [`gobwas/glob` library](https://git
Limiting the list of files prevents the indexes from becoming polluted with derived or irrelevant files (e.g. lss, sym, map, etc.), so the search results are more relevant. It can also help reduce the index size. Limiting the list of files prevents the indexes from becoming polluted with derived or irrelevant files (e.g. lss, sym, map, etc.), so the search results are more relevant. It can also help reduce the index size.
`REPO_INDEXER_EXCLUDE_VENDORED` (default: true) excludes vendored files from index.
`REPO_INDEXER_INCLUDE` (default: empty) is a comma separated list of glob patterns to **include** in the index. An empty list means "_include all files_". `REPO_INDEXER_INCLUDE` (default: empty) is a comma separated list of glob patterns to **include** in the index. An empty list means "_include all files_".
`REPO_INDEXER_EXCLUDE` (default: empty) is a comma separated list of glob patterns to **exclude** from the index. Files that match this list will not be indexed. `REPO_INDEXER_EXCLUDE` takes precedence over `REPO_INDEXER_INCLUDE`. `REPO_INDEXER_EXCLUDE` (default: empty) is a comma separated list of glob patterns to **exclude** from the index. Files that match this list will not be indexed. `REPO_INDEXER_EXCLUDE` takes precedence over `REPO_INDEXER_INCLUDE`.

@ -0,0 +1,36 @@
// Copyright 2020 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package analyze
import (
"path/filepath"
"github.com/src-d/enry/v2"
)
// GetCodeLanguageWithCallback detects code language based on file name and content using callback
func GetCodeLanguageWithCallback(filename string, contentFunc func() ([]byte, error)) string {
if language, ok := enry.GetLanguageByExtension(filename); ok {
return language
}
if language, ok := enry.GetLanguageByFilename(filename); ok {
return language
}
content, err := contentFunc()
if err != nil {
return enry.OtherLanguage
}
return enry.GetLanguage(filepath.Base(filename), content)
}
// GetCodeLanguage detects code language based on file name and content
func GetCodeLanguage(filename string, content []byte) string {
return GetCodeLanguageWithCallback(filename, func() ([]byte, error) {
return content, nil
})
}

@ -9,7 +9,8 @@ import (
"io" "io"
"io/ioutil" "io/ioutil"
"math" "math"
"path/filepath"
"code.gitea.io/gitea/modules/analyze"
"github.com/src-d/enry/v2" "github.com/src-d/enry/v2"
"gopkg.in/src-d/go-git.v4" "gopkg.in/src-d/go-git.v4"
@ -51,25 +52,15 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
// TODO: Use .gitattributes file for linguist overrides // TODO: Use .gitattributes file for linguist overrides
language, ok := enry.GetLanguageByExtension(f.Name) language := analyze.GetCodeLanguageWithCallback(f.Name, func() ([]byte, error) {
if !ok { return readFile(f, fileSizeLimit)
if language, ok = enry.GetLanguageByFilename(f.Name); !ok { })
content, err := readFile(f, fileSizeLimit) if language == enry.OtherLanguage || language == "" {
if err != nil { return nil
return nil
}
language = enry.GetLanguage(filepath.Base(f.Name), content)
if language == enry.OtherLanguage {
return nil
}
}
} }
if language != "" { sizes[language] += f.Size
sizes[language] += f.Size total += f.Size
total += f.Size
}
return nil return nil
}) })

@ -9,16 +9,20 @@ import (
"os" "os"
"strconv" "strconv"
"strings" "strings"
"time"
"code.gitea.io/gitea/models" "code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
"github.com/blevesearch/bleve" "github.com/blevesearch/bleve"
"github.com/blevesearch/bleve/analysis/analyzer/custom" analyzer_custom "github.com/blevesearch/bleve/analysis/analyzer/custom"
analyzer_keyword "github.com/blevesearch/bleve/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/analysis/token/lowercase" "github.com/blevesearch/bleve/analysis/token/lowercase"
"github.com/blevesearch/bleve/analysis/token/unicodenorm" "github.com/blevesearch/bleve/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/analysis/tokenizer/unicode" "github.com/blevesearch/bleve/analysis/tokenizer/unicode"
@ -26,6 +30,7 @@ import (
"github.com/blevesearch/bleve/mapping" "github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search/query" "github.com/blevesearch/bleve/search/query"
"github.com/ethantkoenig/rupture" "github.com/ethantkoenig/rupture"
"github.com/src-d/enry/v2"
) )
const unicodeNormalizeName = "unicodeNormalize" const unicodeNormalizeName = "unicodeNormalize"
@ -86,8 +91,11 @@ func openIndexer(path string, latestVersion int) (bleve.Index, error) {
// RepoIndexerData data stored in the repo indexer // RepoIndexerData data stored in the repo indexer
type RepoIndexerData struct { type RepoIndexerData struct {
RepoID int64 RepoID int64
Content string CommitID string
Content string
Language string
UpdatedAt time.Time
} }
// Type returns the document type, for bleve's mapping.Classifier interface. // Type returns the document type, for bleve's mapping.Classifier interface.
@ -95,7 +103,11 @@ func (d *RepoIndexerData) Type() string {
return repoIndexerDocType return repoIndexerDocType
} }
func addUpdate(update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error { func addUpdate(commitSha string, update fileUpdate, repo *models.Repository, batch rupture.FlushingBatch) error {
// Ignore vendored files in code search
if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) {
return nil
}
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
RunInDir(repo.RepoPath()) RunInDir(repo.RepoPath())
if err != nil { if err != nil {
@ -118,8 +130,11 @@ func addUpdate(update fileUpdate, repo *models.Repository, batch rupture.Flushin
id := filenameIndexerID(repo.ID, update.Filename) id := filenameIndexerID(repo.ID, update.Filename)
return batch.Index(id, &RepoIndexerData{ return batch.Index(id, &RepoIndexerData{
RepoID: repo.ID, RepoID: repo.ID,
Content: string(charset.ToUTF8DropErrors(fileContents)), CommitID: commitSha,
Content: string(charset.ToUTF8DropErrors(fileContents)),
Language: analyze.GetCodeLanguage(update.Filename, fileContents),
UpdatedAt: time.Now().UTC(),
}) })
} }
@ -131,7 +146,7 @@ func addDelete(filename string, repo *models.Repository, batch rupture.FlushingB
const ( const (
repoIndexerAnalyzer = "repoIndexerAnalyzer" repoIndexerAnalyzer = "repoIndexerAnalyzer"
repoIndexerDocType = "repoIndexerDocType" repoIndexerDocType = "repoIndexerDocType"
repoIndexerLatestVersion = 4 repoIndexerLatestVersion = 5
) )
// createRepoIndexer create a repo indexer if one does not already exist // createRepoIndexer create a repo indexer if one does not already exist
@ -145,11 +160,21 @@ func createRepoIndexer(path string, latestVersion int) (bleve.Index, error) {
textFieldMapping.IncludeInAll = false textFieldMapping.IncludeInAll = false
docMapping.AddFieldMappingsAt("Content", textFieldMapping) docMapping.AddFieldMappingsAt("Content", textFieldMapping)
termFieldMapping := bleve.NewTextFieldMapping()
termFieldMapping.IncludeInAll = false
termFieldMapping.Analyzer = analyzer_keyword.Name
docMapping.AddFieldMappingsAt("Language", termFieldMapping)
docMapping.AddFieldMappingsAt("CommitID", termFieldMapping)
timeFieldMapping := bleve.NewDateTimeFieldMapping()
timeFieldMapping.IncludeInAll = false
docMapping.AddFieldMappingsAt("UpdatedAt", timeFieldMapping)
mapping := bleve.NewIndexMapping() mapping := bleve.NewIndexMapping()
if err := addUnicodeNormalizeTokenFilter(mapping); err != nil { if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
return nil, err return nil, err
} else if err := mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{ } else if err := mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{
"type": custom.Name, "type": analyzer_custom.Name,
"char_filters": []string{}, "char_filters": []string{},
"tokenizer": unicode.Name, "tokenizer": unicode.Name,
"token_filters": []string{unicodeNormalizeName, lowercase.Name}, "token_filters": []string{unicodeNormalizeName, lowercase.Name},
@ -255,7 +280,7 @@ func (b *BleveIndexer) Index(repoID int64) error {
batch := rupture.NewFlushingBatch(b.indexer, maxBatchSize) batch := rupture.NewFlushingBatch(b.indexer, maxBatchSize)
for _, update := range changes.Updates { for _, update := range changes.Updates {
if err := addUpdate(update, repo, batch); err != nil { if err := addUpdate(sha, update, repo, batch); err != nil {
return err return err
} }
} }
@ -289,7 +314,7 @@ func (b *BleveIndexer) Delete(repoID int64) error {
// Search searches for files in the specified repo. // Search searches for files in the specified repo.
// Returns the matching file-paths // Returns the matching file-paths
func (b *BleveIndexer) Search(repoIDs []int64, keyword string, page, pageSize int) (int64, []*SearchResult, error) { func (b *BleveIndexer) Search(repoIDs []int64, language, keyword string, page, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) {
phraseQuery := bleve.NewMatchPhraseQuery(keyword) phraseQuery := bleve.NewMatchPhraseQuery(keyword)
phraseQuery.FieldVal = "Content" phraseQuery.FieldVal = "Content"
phraseQuery.Analyzer = repoIndexerAnalyzer phraseQuery.Analyzer = repoIndexerAnalyzer
@ -309,16 +334,35 @@ func (b *BleveIndexer) Search(repoIDs []int64, keyword string, page, pageSize in
indexerQuery = phraseQuery indexerQuery = phraseQuery
} }
// Save for reuse without language filter
facetQuery := indexerQuery
if len(language) > 0 {
languageQuery := bleve.NewMatchQuery(language)
languageQuery.FieldVal = "Language"
languageQuery.Analyzer = analyzer_keyword.Name
indexerQuery = bleve.NewConjunctionQuery(
indexerQuery,
languageQuery,
)
}
from := (page - 1) * pageSize from := (page - 1) * pageSize
searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false) searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false)
searchRequest.Fields = []string{"Content", "RepoID"} searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
searchRequest.IncludeLocations = true searchRequest.IncludeLocations = true
if len(language) == 0 {
searchRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10))
}
result, err := b.indexer.Search(searchRequest) result, err := b.indexer.Search(searchRequest)
if err != nil { if err != nil {
return 0, nil, err return 0, nil, nil, err
} }
total := int64(result.Total)
searchResults := make([]*SearchResult, len(result.Hits)) searchResults := make([]*SearchResult, len(result.Hits))
for i, hit := range result.Hits { for i, hit := range result.Hits {
var startIndex, endIndex int = -1, -1 var startIndex, endIndex int = -1, -1
@ -333,13 +377,47 @@ func (b *BleveIndexer) Search(repoIDs []int64, keyword string, page, pageSize in
endIndex = locationEnd endIndex = locationEnd
} }
} }
language := hit.Fields["Language"].(string)
var updatedUnix timeutil.TimeStamp
if t, err := time.Parse(time.RFC3339, hit.Fields["UpdatedAt"].(string)); err == nil {
updatedUnix = timeutil.TimeStamp(t.Unix())
}
searchResults[i] = &SearchResult{ searchResults[i] = &SearchResult{
RepoID: int64(hit.Fields["RepoID"].(float64)), RepoID: int64(hit.Fields["RepoID"].(float64)),
StartIndex: startIndex, StartIndex: startIndex,
EndIndex: endIndex, EndIndex: endIndex,
Filename: filenameOfIndexerID(hit.ID), Filename: filenameOfIndexerID(hit.ID),
Content: hit.Fields["Content"].(string), Content: hit.Fields["Content"].(string),
CommitID: hit.Fields["CommitID"].(string),
UpdatedUnix: updatedUnix,
Language: language,
Color: enry.GetColor(language),
}
}
searchResultLanguages := make([]*SearchResultLanguages, 0, 10)
if len(language) > 0 {
// Use separate query to go get all language counts
facetRequest := bleve.NewSearchRequestOptions(facetQuery, 1, 0, false)
facetRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
facetRequest.IncludeLocations = true
facetRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10))
if result, err = b.indexer.Search(facetRequest); err != nil {
return 0, nil, nil, err
}
}
languagesFacet := result.Facets["languages"]
for _, term := range languagesFacet.Terms {
if len(term.Term) == 0 {
continue
} }
searchResultLanguages = append(searchResultLanguages, &SearchResultLanguages{
Language: term.Term,
Color: enry.GetColor(term.Term),
Count: term.Count,
})
} }
return int64(result.Total), searchResults, nil return total, searchResults, searchResultLanguages, nil
} }

@ -49,27 +49,34 @@ func TestIndexAndSearch(t *testing.T) {
keywords = []struct { keywords = []struct {
Keyword string Keyword string
IDs []int64 IDs []int64
Langs int
}{ }{
{ {
Keyword: "Description", Keyword: "Description",
IDs: []int64{1}, IDs: []int64{1},
Langs: 1,
}, },
{ {
Keyword: "repo1", Keyword: "repo1",
IDs: []int64{1}, IDs: []int64{1},
Langs: 1,
}, },
{ {
Keyword: "non-exist", Keyword: "non-exist",
IDs: []int64{}, IDs: []int64{},
Langs: 0,
}, },
} }
) )
for _, kw := range keywords { for _, kw := range keywords {
total, res, err := idx.Search(nil, kw.Keyword, 1, 10) total, res, langs, err := idx.Search(nil, "", kw.Keyword, 1, 10)
assert.NoError(t, err) assert.NoError(t, err)
assert.EqualValues(t, len(kw.IDs), total) assert.EqualValues(t, len(kw.IDs), total)
assert.NotNil(t, langs)
assert.Len(t, langs, kw.Langs)
var ids = make([]int64, 0, len(res)) var ids = make([]int64, 0, len(res))
for _, hit := range res { for _, hit := range res {
ids = append(ids, hit.RepoID) ids = append(ids, hit.RepoID)

@ -12,22 +12,34 @@ import (
"code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/timeutil"
) )
// SearchResult result of performing a search in a repo // SearchResult result of performing a search in a repo
type SearchResult struct { type SearchResult struct {
RepoID int64 RepoID int64
StartIndex int StartIndex int
EndIndex int EndIndex int
Filename string Filename string
Content string Content string
CommitID string
UpdatedUnix timeutil.TimeStamp
Language string
Color string
}
// SearchResultLanguages result of top languages count in search results
type SearchResultLanguages struct {
Language string
Color string
Count int
} }
// Indexer defines an interface to indexer issues contents // Indexer defines an interface to indexer issues contents
type Indexer interface { type Indexer interface {
Index(repoID int64) error Index(repoID int64) error
Delete(repoID int64) error Delete(repoID int64) error
Search(repoIDs []int64, keyword string, page, pageSize int) (int64, []*SearchResult, error) Search(repoIDs []int64, language, keyword string, page, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error)
Close() Close()
} }

@ -11,6 +11,7 @@ import (
"strings" "strings"
"code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/modules/util"
) )
@ -18,6 +19,10 @@ import (
type Result struct { type Result struct {
RepoID int64 RepoID int64
Filename string Filename string
CommitID string
UpdatedUnix timeutil.TimeStamp
Language string
Color string
HighlightClass string HighlightClass string
LineNumbers []int LineNumbers []int
FormattedLines gotemplate.HTML FormattedLines gotemplate.HTML
@ -100,6 +105,10 @@ func searchResult(result *SearchResult, startIndex, endIndex int) (*Result, erro
return &Result{ return &Result{
RepoID: result.RepoID, RepoID: result.RepoID,
Filename: result.Filename, Filename: result.Filename,
CommitID: result.CommitID,
UpdatedUnix: result.UpdatedUnix,
Language: result.Language,
Color: result.Color,
HighlightClass: highlight.FileNameToHighlightClass(result.Filename), HighlightClass: highlight.FileNameToHighlightClass(result.Filename),
LineNumbers: lineNumbers, LineNumbers: lineNumbers,
FormattedLines: gotemplate.HTML(formattedLinesBuffer.String()), FormattedLines: gotemplate.HTML(formattedLinesBuffer.String()),
@ -107,14 +116,14 @@ func searchResult(result *SearchResult, startIndex, endIndex int) (*Result, erro
} }
// PerformSearch perform a search on a repository // PerformSearch perform a search on a repository
func PerformSearch(repoIDs []int64, keyword string, page, pageSize int) (int, []*Result, error) { func PerformSearch(repoIDs []int64, language, keyword string, page, pageSize int) (int, []*Result, []*SearchResultLanguages, error) {
if len(keyword) == 0 { if len(keyword) == 0 {
return 0, nil, nil return 0, nil, nil, nil
} }
total, results, err := indexer.Search(repoIDs, keyword, page, pageSize) total, results, resultLanguages, err := indexer.Search(repoIDs, language, keyword, page, pageSize)
if err != nil { if err != nil {
return 0, nil, err return 0, nil, nil, err
} }
displayResults := make([]*Result, len(results)) displayResults := make([]*Result, len(results))
@ -123,8 +132,8 @@ func PerformSearch(repoIDs []int64, keyword string, page, pageSize int) (int, []
startIndex, endIndex := indices(result.Content, result.StartIndex, result.EndIndex) startIndex, endIndex := indices(result.Content, result.StartIndex, result.EndIndex)
displayResults[i], err = searchResult(result, startIndex, endIndex) displayResults[i], err = searchResult(result, startIndex, endIndex)
if err != nil { if err != nil {
return 0, nil, err return 0, nil, nil, err
} }
} }
return int(total), displayResults, nil return int(total), displayResults, resultLanguages, nil
} }

@ -71,12 +71,12 @@ func (w *wrappedIndexer) Delete(repoID int64) error {
return indexer.Delete(repoID) return indexer.Delete(repoID)
} }
func (w *wrappedIndexer) Search(repoIDs []int64, keyword string, page, pageSize int) (int64, []*SearchResult, error) { func (w *wrappedIndexer) Search(repoIDs []int64, language, keyword string, page, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) {
indexer, err := w.get() indexer, err := w.get()
if err != nil { if err != nil {
return 0, nil, err return 0, nil, nil, err
} }
return indexer.Search(repoIDs, keyword, page, pageSize) return indexer.Search(repoIDs, language, keyword, page, pageSize)
} }

@ -41,6 +41,7 @@ var (
MaxIndexerFileSize int64 MaxIndexerFileSize int64
IncludePatterns []glob.Glob IncludePatterns []glob.Glob
ExcludePatterns []glob.Glob ExcludePatterns []glob.Glob
ExcludeVendored bool
}{ }{
IssueType: "bleve", IssueType: "bleve",
IssuePath: "indexers/issues.bleve", IssuePath: "indexers/issues.bleve",
@ -52,6 +53,7 @@ var (
IssueQueueBatchNumber: 20, IssueQueueBatchNumber: 20,
MaxIndexerFileSize: 1024 * 1024, MaxIndexerFileSize: 1024 * 1024,
ExcludeVendored: true,
} }
) )
@ -77,6 +79,7 @@ func newIndexerService() {
} }
Indexer.IncludePatterns = IndexerGlobFromString(sec.Key("REPO_INDEXER_INCLUDE").MustString("")) Indexer.IncludePatterns = IndexerGlobFromString(sec.Key("REPO_INDEXER_INCLUDE").MustString(""))
Indexer.ExcludePatterns = IndexerGlobFromString(sec.Key("REPO_INDEXER_EXCLUDE").MustString("")) Indexer.ExcludePatterns = IndexerGlobFromString(sec.Key("REPO_INDEXER_EXCLUDE").MustString(""))
Indexer.ExcludeVendored = sec.Key("REPO_INDEXER_EXCLUDE_VENDORED").MustBool(true)
Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20) Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20)
Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024) Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024)
Indexer.StartupTimeout = sec.Key("STARTUP_TIMEOUT").MustDuration(30 * time.Second) Indexer.StartupTimeout = sec.Key("STARTUP_TIMEOUT").MustDuration(30 * time.Second)

@ -215,6 +215,7 @@ user_no_results = No matching users found.
org_no_results = No matching organizations found. org_no_results = No matching organizations found.
code_no_results = No source code matching your search term found. code_no_results = No source code matching your search term found.
code_search_results = Search results for '%s' code_search_results = Search results for '%s'
code_last_indexed_at = Last indexed %s
[auth] [auth]
create_new_account = Register Account create_new_account = Register Account

@ -290,6 +290,7 @@ func ExploreCode(ctx *context.Context) {
ctx.Data["PageIsExplore"] = true ctx.Data["PageIsExplore"] = true
ctx.Data["PageIsExploreCode"] = true ctx.Data["PageIsExploreCode"] = true
language := strings.TrimSpace(ctx.Query("l"))
keyword := strings.TrimSpace(ctx.Query("q")) keyword := strings.TrimSpace(ctx.Query("q"))
page := ctx.QueryInt("page") page := ctx.QueryInt("page")
if page <= 0 { if page <= 0 {
@ -317,8 +318,9 @@ func ExploreCode(ctx *context.Context) {
} }
var ( var (
total int total int
searchResults []*code_indexer.Result searchResults []*code_indexer.Result
searchResultLanguages []*code_indexer.SearchResultLanguages
) )
// if non-admin login user, we need check UnitTypeCode at first // if non-admin login user, we need check UnitTypeCode at first
@ -340,14 +342,14 @@ func ExploreCode(ctx *context.Context) {
ctx.Data["RepoMaps"] = rightRepoMap ctx.Data["RepoMaps"] = rightRepoMap
total, searchResults, err = code_indexer.PerformSearch(repoIDs, keyword, page, setting.UI.RepoSearchPagingNum) total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum)
if err != nil { if err != nil {
ctx.ServerError("SearchResults", err) ctx.ServerError("SearchResults", err)
return return
} }
// if non-login user or isAdmin, no need to check UnitTypeCode // if non-login user or isAdmin, no need to check UnitTypeCode
} else if (ctx.User == nil && len(repoIDs) > 0) || isAdmin { } else if (ctx.User == nil && len(repoIDs) > 0) || isAdmin {
total, searchResults, err = code_indexer.PerformSearch(repoIDs, keyword, page, setting.UI.RepoSearchPagingNum) total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum)
if err != nil { if err != nil {
ctx.ServerError("SearchResults", err) ctx.ServerError("SearchResults", err)
return return
@ -377,12 +379,15 @@ func ExploreCode(ctx *context.Context) {
} }
ctx.Data["Keyword"] = keyword ctx.Data["Keyword"] = keyword
ctx.Data["Language"] = language
ctx.Data["SearchResults"] = searchResults ctx.Data["SearchResults"] = searchResults
ctx.Data["SearchResultLanguages"] = searchResultLanguages
ctx.Data["RequireHighlightJS"] = true ctx.Data["RequireHighlightJS"] = true
ctx.Data["PageIsViewCode"] = true ctx.Data["PageIsViewCode"] = true
pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5) pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5)
pager.SetDefaultParams(ctx) pager.SetDefaultParams(ctx)
pager.AddParam(ctx, "l", "Language")
ctx.Data["Page"] = pager ctx.Data["Page"] = pager
ctx.HTML(200, tplExploreCode) ctx.HTML(200, tplExploreCode)

@ -22,26 +22,30 @@ func Search(ctx *context.Context) {
ctx.Redirect(ctx.Repo.RepoLink, 302) ctx.Redirect(ctx.Repo.RepoLink, 302)
return return
} }
language := strings.TrimSpace(ctx.Query("l"))
keyword := strings.TrimSpace(ctx.Query("q")) keyword := strings.TrimSpace(ctx.Query("q"))
page := ctx.QueryInt("page") page := ctx.QueryInt("page")
if page <= 0 { if page <= 0 {
page = 1 page = 1
} }
total, searchResults, err := code_indexer.PerformSearch([]int64{ctx.Repo.Repository.ID}, total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch([]int64{ctx.Repo.Repository.ID},
keyword, page, setting.UI.RepoSearchPagingNum) language, keyword, page, setting.UI.RepoSearchPagingNum)
if err != nil { if err != nil {
ctx.ServerError("SearchResults", err) ctx.ServerError("SearchResults", err)
return return
} }
ctx.Data["Keyword"] = keyword ctx.Data["Keyword"] = keyword
ctx.Data["Language"] = language
ctx.Data["SourcePath"] = setting.AppSubURL + "/" + ctx.Data["SourcePath"] = setting.AppSubURL + "/" +
path.Join(ctx.Repo.Repository.Owner.Name, ctx.Repo.Repository.Name, "src", "branch", ctx.Repo.Repository.DefaultBranch) path.Join(ctx.Repo.Repository.Owner.Name, ctx.Repo.Repository.Name)
ctx.Data["SearchResults"] = searchResults ctx.Data["SearchResults"] = searchResults
ctx.Data["SearchResultLanguages"] = searchResultLanguages
ctx.Data["RequireHighlightJS"] = true ctx.Data["RequireHighlightJS"] = true
ctx.Data["PageIsViewCode"] = true ctx.Data["PageIsViewCode"] = true
pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5) pager := context.NewPagination(total, setting.UI.RepoSearchPagingNum, page, 5)
pager.SetDefaultParams(ctx) pager.SetDefaultParams(ctx)
pager.AddParam(ctx, "l", "Language")
ctx.Data["Page"] = pager ctx.Data["Page"] = pager
ctx.HTML(200, tplSearch) ctx.HTML(200, tplSearch)

@ -16,13 +16,22 @@
<h3> <h3>
{{.i18n.Tr "explore.code_search_results" (.Keyword|Escape) | Str2html }} {{.i18n.Tr "explore.code_search_results" (.Keyword|Escape) | Str2html }}
</h3> </h3>
<div>
{{range $term := .SearchResultLanguages}}
<a class="ui {{if eq $.Language $term.Language}}primary {{end}}basic label" href="{{$.AppSubURL}}/explore/code?q={{$.Keyword}}{{if ne $.Language $term.Language}}&l={{$term.Language}}{{end}}">
<i class="color-icon" style="background-color: {{$term.Color}}"></i>
{{$term.Language}}
<div class="detail">{{$term.Count}}</div>
</a>
{{end}}
</div>
<div class="repository search"> <div class="repository search">
{{range $result := .SearchResults}} {{range $result := .SearchResults}}
{{$repo := (index $.RepoMaps .RepoID)}} {{$repo := (index $.RepoMaps .RepoID)}}
<div class="diff-file-box diff-box file-content non-diff-file-content repo-search-result"> <div class="diff-file-box diff-box file-content non-diff-file-content repo-search-result">
<h4 class="ui top attached normal header"> <h4 class="ui top attached normal header">
<span class="file"><a rel="nofollow" href="{{EscapePound $repo.HTMLURL}}">{{$repo.FullName}}</a> - {{.Filename}}</span> <span class="file"><a rel="nofollow" href="{{EscapePound $repo.HTMLURL}}">{{$repo.FullName}}</a> - {{.Filename}}</span>
<a class="ui basic grey tiny button" rel="nofollow" href="{{EscapePound $repo.HTMLURL}}/src/branch/{{$repo.DefaultBranch}}/{{EscapePound .Filename}}">{{$.i18n.Tr "repo.diff.view_file"}}</a> <a class="ui basic grey tiny button" rel="nofollow" href="{{EscapePound $repo.HTMLURL}}/src/commit/{{$result.CommitID}}/{{EscapePound .Filename}}">{{$.i18n.Tr "repo.diff.view_file"}}</a>
</h4> </h4>
<div class="ui attached table segment"> <div class="ui attached table segment">
<div class="file-body file-code code-view"> <div class="file-body file-code code-view">
@ -31,7 +40,7 @@
<tr> <tr>
<td class="lines-num"> <td class="lines-num">
{{range .LineNumbers}} {{range .LineNumbers}}
<a href="{{EscapePound $repo.HTMLURL}}/src/branch/{{$repo.DefaultBranch}}/{{EscapePound $result.Filename}}#L{{.}}"><span>{{.}}</span></a> <a href="{{EscapePound $repo.HTMLURL}}/src/commit/{{$result.CommitID}}/{{EscapePound $result.Filename}}#L{{.}}"><span>{{.}}</span></a>
{{end}} {{end}}
</td> </td>
<td class="lines-code"><pre><code class="{{.HighlightClass}}"><ol class="linenums">{{.FormattedLines}}</ol></code></pre></td> <td class="lines-code"><pre><code class="{{.HighlightClass}}"><ol class="linenums">{{.FormattedLines}}</ol></code></pre></td>
@ -40,6 +49,15 @@
</table> </table>
</div> </div>
</div> </div>
<div class="ui bottom attached table segment">
{{if $result.Language}}
<i class="color-icon" style="background-color: {{$result.Color}}"></i>{{$result.Language}}
{{end}}
&nbsp;
{{if not $result.UpdatedUnix.IsZero}}
<span class="ui small grey text pull right">{{$.i18n.Tr "explore.code_last_indexed_at" (TimeSinceUnix $result.UpdatedUnix $.i18n.Lang) | Safe}} &nbsp;</span>
{{end}}
</div>
</div> </div>
{{end}} {{end}}
</div> </div>

@ -16,12 +16,21 @@
<h3> <h3>
{{.i18n.Tr "repo.search.results" (.Keyword|Escape) .RepoLink .RepoName | Str2html }} {{.i18n.Tr "repo.search.results" (.Keyword|Escape) .RepoLink .RepoName | Str2html }}
</h3> </h3>
<div>
{{range $term := .SearchResultLanguages}}
<a class="ui {{if eq $.Language $term.Language}}primary {{end}}basic label" href="{{EscapePound $.SourcePath}}/search?q={{$.Keyword}}{{if ne $.Language $term.Language}}&l={{$term.Language}}{{end}}">
<i class="color-icon" style="background-color: {{$term.Color}}"></i>
{{$term.Language}}
<div class="detail">{{$term.Count}}</div>
</a>
{{end}}
</div>
<div class="repository search"> <div class="repository search">
{{range $result := .SearchResults}} {{range $result := .SearchResults}}
<div class="diff-file-box diff-box file-content non-diff-file-content repo-search-result"> <div class="diff-file-box diff-box file-content non-diff-file-content repo-search-result">
<h4 class="ui top attached normal header"> <h4 class="ui top attached normal header">
<span class="file">{{.Filename}}</span> <span class="file">{{.Filename}}</span>
<a class="ui basic grey tiny button" rel="nofollow" href="{{EscapePound $.SourcePath}}/{{EscapePound .Filename}}">{{$.i18n.Tr "repo.diff.view_file"}}</a> <a class="ui basic grey tiny button" rel="nofollow" href="{{EscapePound $.SourcePath}}/src/commit/{{$result.CommitID}}/{{EscapePound .Filename}}">{{$.i18n.Tr "repo.diff.view_file"}}</a>
</h4> </h4>
<div class="ui attached table segment"> <div class="ui attached table segment">
<div class="file-body file-code code-view"> <div class="file-body file-code code-view">
@ -30,7 +39,7 @@
<tr> <tr>
<td class="lines-num"> <td class="lines-num">
{{range .LineNumbers}} {{range .LineNumbers}}
<a href="{{EscapePound $.SourcePath}}/{{EscapePound $result.Filename}}#L{{.}}"><span>{{.}}</span></a> <a href="{{EscapePound $.SourcePath}}/src/commit/{{$result.CommitID}}/{{EscapePound $result.Filename}}#L{{.}}"><span>{{.}}</span></a>
{{end}} {{end}}
</td> </td>
<td class="lines-code"><pre><code class="{{.HighlightClass}}"><ol class="linenums">{{.FormattedLines}}</ol></code></pre></td> <td class="lines-code"><pre><code class="{{.HighlightClass}}"><ol class="linenums">{{.FormattedLines}}</ol></code></pre></td>
@ -39,6 +48,15 @@
</table> </table>
</div> </div>
</div> </div>
<div class="ui bottom attached table segment">
{{if $result.Language}}
<i class="color-icon" style="background-color: {{$result.Color}}"></i>{{$result.Language}}
{{end}}
&nbsp;
{{if not $result.UpdatedUnix.IsZero}}
<span class="ui small grey text pull right">{{$.i18n.Tr "explore.code_last_indexed_at" (TimeSinceUnix $result.UpdatedUnix $.i18n.Lang) | Safe}} &nbsp;</span>
{{end}}
</div>
</div> </div>
{{end}} {{end}}
</div> </div>

@ -0,0 +1,38 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package keyword
import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/tokenizer/single"
"github.com/blevesearch/bleve/registry"
)
const Name = "keyword"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
keywordTokenizer, err := cache.TokenizerNamed(single.Name)
if err != nil {
return nil, err
}
rv := analysis.Analyzer{
Tokenizer: keywordTokenizer,
}
return &rv, nil
}
func init() {
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
}

@ -0,0 +1,49 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package single
import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
)
const Name = "single"
type SingleTokenTokenizer struct {
}
func NewSingleTokenTokenizer() *SingleTokenTokenizer {
return &SingleTokenTokenizer{}
}
func (t *SingleTokenTokenizer) Tokenize(input []byte) analysis.TokenStream {
return analysis.TokenStream{
&analysis.Token{
Term: input,
Position: 1,
Start: 0,
End: len(input),
Type: analysis.AlphaNumeric,
},
}
}
func SingleTokenTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
return NewSingleTokenTokenizer(), nil
}
func init() {
registry.RegisterTokenizer(Name, SingleTokenTokenizerConstructor)
}

@ -54,6 +54,7 @@ github.com/beorn7/perks/quantile
github.com/blevesearch/bleve github.com/blevesearch/bleve
github.com/blevesearch/bleve/analysis github.com/blevesearch/bleve/analysis
github.com/blevesearch/bleve/analysis/analyzer/custom github.com/blevesearch/bleve/analysis/analyzer/custom
github.com/blevesearch/bleve/analysis/analyzer/keyword
github.com/blevesearch/bleve/analysis/analyzer/standard github.com/blevesearch/bleve/analysis/analyzer/standard
github.com/blevesearch/bleve/analysis/datetime/flexible github.com/blevesearch/bleve/analysis/datetime/flexible
github.com/blevesearch/bleve/analysis/datetime/optional github.com/blevesearch/bleve/analysis/datetime/optional
@ -62,6 +63,7 @@ github.com/blevesearch/bleve/analysis/token/lowercase
github.com/blevesearch/bleve/analysis/token/porter github.com/blevesearch/bleve/analysis/token/porter
github.com/blevesearch/bleve/analysis/token/stop github.com/blevesearch/bleve/analysis/token/stop
github.com/blevesearch/bleve/analysis/token/unicodenorm github.com/blevesearch/bleve/analysis/token/unicodenorm
github.com/blevesearch/bleve/analysis/tokenizer/single
github.com/blevesearch/bleve/analysis/tokenizer/unicode github.com/blevesearch/bleve/analysis/tokenizer/unicode
github.com/blevesearch/bleve/document github.com/blevesearch/bleve/document
github.com/blevesearch/bleve/geo github.com/blevesearch/bleve/geo

@ -1193,6 +1193,10 @@ i.icon.centerlock {
border-radius: 500em; border-radius: 500em;
} }
.ui.label > .color-icon {
margin-left: 0;
}
.invisible { .invisible {
visibility: hidden; visibility: hidden;
} }

@ -1502,6 +1502,11 @@
clear: right; clear: right;
} }
.ui.bottom.attached.table.segment {
padding-top: 5px;
padding-bottom: 5px;
}
} }
.diff-stats { .diff-stats {

Loading…
Cancel
Save