Improve issue search (#2387)

* Improve issue indexer

* Fix new issue sqlite bug

* Different test indexer paths for each db

* Add integration indexer paths to make clean
release/v1.3
Ethan Koenig 6 years ago committed by Lauris BH
parent 52e11b24bf
commit b0f7457d9e

3
.gitignore vendored

@ -53,5 +53,8 @@ coverage.all
/integrations/gitea-integration-mysql
/integrations/gitea-integration-pgsql
/integrations/gitea-integration-sqlite
/integrations/indexers-mysql
/integrations/indexers-pgsql
/integrations/indexers-sqlite
/integrations/mysql.ini
/integrations/pgsql.ini

@ -63,7 +63,11 @@ all: build
.PHONY: clean
clean:
$(GO) clean -i ./...
rm -rf $(EXECUTABLE) $(DIST) $(BINDATA) integrations*.test integrations/gitea-integration-pgsql/ integrations/gitea-integration-mysql/ integrations/gitea-integration-sqlite/ integrations/mysql.ini integrations/pgsql.ini
rm -rf $(EXECUTABLE) $(DIST) $(BINDATA) \
integrations*.test \
integrations/gitea-integration-pgsql/ integrations/gitea-integration-mysql/ integrations/gitea-integration-sqlite/ \
integrations/indexers-mysql/ integrations/indexers-pgsql integrations/indexers-sqlite \
integrations/mysql.ini integrations/pgsql.ini
required-gofmt-version:
@$(GO) version | grep -q '\(1.7\|1.8\)' || { echo "We require go version 1.7 or 1.8 to format code" >&2 && exit 1; }

@ -57,7 +57,14 @@ func TestMain(m *testing.M) {
fmt.Printf("Error initializing test database: %v\n", err)
os.Exit(1)
}
os.Exit(m.Run())
exitCode := m.Run()
if err = os.RemoveAll(setting.Indexer.IssuePath); err != nil {
fmt.Printf("os.RemoveAll: %v\n", err)
os.Exit(1)
}
os.Exit(exitCode)
}
func initIntegrationTest() {

@ -18,8 +18,10 @@ import (
"github.com/stretchr/testify/assert"
)
func getIssuesSelection(htmlDoc *HTMLDoc) *goquery.Selection {
return htmlDoc.doc.Find(".issue.list").Find("li").Find(".title")
func getIssuesSelection(t testing.TB, htmlDoc *HTMLDoc) *goquery.Selection {
issueList := htmlDoc.doc.Find(".issue.list")
assert.EqualValues(t, 1, issueList.Length())
return issueList.Find("li").Find(".title")
}
func getIssue(t *testing.T, repoID int64, issueSelection *goquery.Selection) *models.Issue {
@ -31,6 +33,18 @@ func getIssue(t *testing.T, repoID int64, issueSelection *goquery.Selection) *mo
return models.AssertExistsAndLoadBean(t, &models.Issue{RepoID: repoID, Index: int64(index)}).(*models.Issue)
}
func assertMatch(t testing.TB, issue *models.Issue, keyword string) {
matches := strings.Contains(strings.ToLower(issue.Title), keyword) ||
strings.Contains(strings.ToLower(issue.Content), keyword)
for _, comment := range issue.Comments {
matches = matches || strings.Contains(
strings.ToLower(comment.Content),
keyword,
)
}
assert.True(t, matches)
}
func TestNoLoginViewIssues(t *testing.T) {
prepareTestEnv(t)
@ -38,19 +52,18 @@ func TestNoLoginViewIssues(t *testing.T) {
MakeRequest(t, req, http.StatusOK)
}
func TestNoLoginViewIssuesSortByType(t *testing.T) {
func TestViewIssuesSortByType(t *testing.T) {
prepareTestEnv(t)
user := models.AssertExistsAndLoadBean(t, &models.User{ID: 1}).(*models.User)
repo := models.AssertExistsAndLoadBean(t, &models.Repository{ID: 1}).(*models.Repository)
repo.Owner = models.AssertExistsAndLoadBean(t, &models.User{ID: repo.OwnerID}).(*models.User)
session := loginUser(t, user.Name)
req := NewRequest(t, "GET", repo.RelLink()+"/issues?type=created_by")
resp := session.MakeRequest(t, req, http.StatusOK)
htmlDoc := NewHTMLParser(t, resp.Body)
issuesSelection := getIssuesSelection(htmlDoc)
issuesSelection := getIssuesSelection(t, htmlDoc)
expectedNumIssues := models.GetCount(t,
&models.Issue{RepoID: repo.ID, PosterID: user.ID},
models.Cond("is_closed=?", false),
@ -67,6 +80,26 @@ func TestNoLoginViewIssuesSortByType(t *testing.T) {
})
}
func TestViewIssuesKeyword(t *testing.T) {
prepareTestEnv(t)
repo := models.AssertExistsAndLoadBean(t, &models.Repository{ID: 1}).(*models.Repository)
const keyword = "first"
req := NewRequestf(t, "GET", "%s/issues?q=%s", repo.RelLink(), keyword)
resp := MakeRequest(t, req, http.StatusOK)
htmlDoc := NewHTMLParser(t, resp.Body)
issuesSelection := getIssuesSelection(t, htmlDoc)
assert.EqualValues(t, 1, issuesSelection.Length())
issuesSelection.Each(func(_ int, selection *goquery.Selection) {
issue := getIssue(t, repo.ID, selection)
assert.False(t, issue.IsClosed)
assert.False(t, issue.IsPull)
assertMatch(t, issue, keyword)
})
}
func TestNoLoginViewIssue(t *testing.T) {
prepareTestEnv(t)

@ -10,6 +10,9 @@ PASSWD = {{TEST_MYSQL_PASSWORD}}
SSL_MODE = disable
PATH = data/gitea.db
[indexer]
ISSUE_INDEXER_PATH = integrations/indexers-mysql/issues.bleve
[repository]
ROOT = integrations/gitea-integration-mysql/gitea-repositories

@ -10,6 +10,9 @@ PASSWD = {{TEST_PGSQL_PASSWORD}}
SSL_MODE = disable
PATH = data/gitea.db
[indexer]
ISSUE_INDEXER_PATH = integrations/indexers-pgsql/issues.bleve
[repository]
ROOT = integrations/gitea-integration-pgsql/gitea-repositories

@ -5,6 +5,9 @@ RUN_MODE = prod
DB_TYPE = sqlite3
PATH = :memory:
[indexer]
ISSUE_INDEXER_PATH = integrations/indexers-sqlite/issues.bleve
[repository]
ROOT = integrations/gitea-integration-sqlite/gitea-repositories

@ -5,7 +5,7 @@
poster_id: 1
assignee_id: 1
name: issue1
content: content1
content: content for the first issue
is_closed: false
is_pull: false
num_comments: 2
@ -18,7 +18,7 @@
index: 2
poster_id: 1
name: issue2
content: content2
content: content for the second issue
milestone_id: 1
is_closed: false
is_pull: true
@ -32,7 +32,7 @@
index: 3
poster_id: 1
name: issue3
content: content4
content: content for the third issue
is_closed: false
is_pull: true
created_unix: 946684820
@ -44,7 +44,7 @@
index: 1
poster_id: 2
name: issue4
content: content4
content: content for the fourth issue
is_closed: true
is_pull: false
@ -54,7 +54,7 @@
index: 4
poster_id: 2
name: issue5
content: content5
content: content for the fifth issue
is_closed: true
is_pull: false
-

@ -155,6 +155,17 @@ func (issue *Issue) loadPullRequest(e Engine) (err error) {
return nil
}
func (issue *Issue) loadComments(e Engine) (err error) {
if issue.Comments != nil {
return nil
}
issue.Comments, err = findComments(e, FindCommentsOptions{
IssueID: issue.ID,
Type: CommentTypeUnknown,
})
return err
}
func (issue *Issue) loadAttributes(e Engine) (err error) {
if err = issue.loadRepo(e); err != nil {
return
@ -191,14 +202,8 @@ func (issue *Issue) loadAttributes(e Engine) (err error) {
}
}
if issue.Comments == nil {
issue.Comments, err = findComments(e, FindCommentsOptions{
IssueID: issue.ID,
Type: CommentTypeUnknown,
})
if err != nil {
return fmt.Errorf("getCommentsByIssueID [%d]: %v", issue.ID, err)
}
if err = issue.loadComments(e); err != nil {
return
}
return nil
@ -577,7 +582,7 @@ func updateIssueCols(e Engine, issue *Issue, cols ...string) error {
if _, err := e.Id(issue.ID).Cols(cols...).Update(issue); err != nil {
return err
}
UpdateIssueIndexer(issue)
UpdateIssueIndexer(issue.ID)
return nil
}
@ -907,8 +912,6 @@ func newIssue(e *xorm.Session, doer *User, opts NewIssueOptions) (err error) {
return err
}
UpdateIssueIndexer(opts.Issue)
if len(opts.Attachments) > 0 {
attachments, err := getAttachmentsByUUIDs(e, opts.Attachments)
if err != nil {
@ -947,6 +950,8 @@ func NewIssue(repo *Repository, issue *Issue, labelIDs []int64, uuids []string)
return fmt.Errorf("Commit: %v", err)
}
UpdateIssueIndexer(issue.ID)
if err = NotifyWatchers(&Action{
ActUserID: issue.Poster.ID,
ActUser: issue.Poster,
@ -1448,7 +1453,7 @@ func updateIssue(e Engine, issue *Issue) error {
if err != nil {
return err
}
UpdateIssueIndexer(issue)
UpdateIssueIndexer(issue.ID)
return nil
}

@ -520,7 +520,14 @@ func CreateComment(opts *CreateCommentOptions) (comment *Comment, err error) {
return nil, err
}
return comment, sess.Commit()
if err = sess.Commit(); err != nil {
return nil, err
}
if opts.Type == CommentTypeComment {
UpdateIssueIndexer(opts.Issue.ID)
}
return comment, nil
}
// CreateIssueComment creates a plain issue comment.
@ -645,8 +652,12 @@ func GetCommentsByRepoIDSince(repoID, since int64) ([]*Comment, error) {
// UpdateComment updates information of comment.
func UpdateComment(c *Comment) error {
_, err := x.Id(c.ID).AllCols().Update(c)
return err
if _, err := x.Id(c.ID).AllCols().Update(c); err != nil {
return err
} else if c.Type == CommentTypeComment {
UpdateIssueIndexer(c.IssueID)
}
return nil
}
// DeleteComment deletes the comment
@ -672,5 +683,10 @@ func DeleteComment(comment *Comment) error {
return err
}
return sess.Commit()
if err := sess.Commit(); err != nil {
return err
} else if comment.Type == CommentTypeComment {
UpdateIssueIndexer(comment.IssueID)
}
return nil
}

@ -6,112 +6,21 @@ package models
import (
"fmt"
"os"
"strconv"
"strings"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
"github.com/blevesearch/bleve"
"github.com/blevesearch/bleve/analysis/analyzer/simple"
"github.com/blevesearch/bleve/search/query"
)
// issueIndexerUpdateQueue queue of issues that need to be updated in the issues
// indexer
var issueIndexerUpdateQueue chan *Issue
// issueIndexer (thread-safe) index for searching issues
var issueIndexer bleve.Index
// issueIndexerData data stored in the issue indexer
type issueIndexerData struct {
ID int64
RepoID int64
Title string
Content string
}
// numericQuery an numeric-equality query for the given value and field
func numericQuery(value int64, field string) *query.NumericRangeQuery {
f := float64(value)
tru := true
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
q.SetField(field)
return q
}
// SearchIssuesByKeyword searches for issues by given conditions.
// Returns the matching issue IDs
func SearchIssuesByKeyword(repoID int64, keyword string) ([]int64, error) {
terms := strings.Fields(strings.ToLower(keyword))
indexerQuery := bleve.NewConjunctionQuery(
numericQuery(repoID, "RepoID"),
bleve.NewDisjunctionQuery(
bleve.NewPhraseQuery(terms, "Title"),
bleve.NewPhraseQuery(terms, "Content"),
))
search := bleve.NewSearchRequestOptions(indexerQuery, 2147483647, 0, false)
search.Fields = []string{"ID"}
result, err := issueIndexer.Search(search)
if err != nil {
return nil, err
}
issueIDs := make([]int64, len(result.Hits))
for i, hit := range result.Hits {
issueIDs[i] = int64(hit.Fields["ID"].(float64))
}
return issueIDs, nil
}
// issueIndexerUpdateQueue queue of issue ids to be updated
var issueIndexerUpdateQueue chan int64
// InitIssueIndexer initialize issue indexer
func InitIssueIndexer() {
_, err := os.Stat(setting.Indexer.IssuePath)
if err != nil {
if os.IsNotExist(err) {
if err = createIssueIndexer(); err != nil {
log.Fatal(4, "CreateIssuesIndexer: %v", err)
}
if err = populateIssueIndexer(); err != nil {
log.Fatal(4, "PopulateIssuesIndex: %v", err)
}
} else {
log.Fatal(4, "InitIssuesIndexer: %v", err)
}
} else {
issueIndexer, err = bleve.Open(setting.Indexer.IssuePath)
if err != nil {
log.Fatal(4, "InitIssuesIndexer, open index: %v", err)
}
}
issueIndexerUpdateQueue = make(chan *Issue, setting.Indexer.UpdateQueueLength)
indexer.InitIssueIndexer(populateIssueIndexer)
issueIndexerUpdateQueue = make(chan int64, setting.Indexer.UpdateQueueLength)
go processIssueIndexerUpdateQueue()
// TODO close issueIndexer when Gitea closes
}
// createIssueIndexer create an issue indexer if one does not already exist
func createIssueIndexer() error {
mapping := bleve.NewIndexMapping()
docMapping := bleve.NewDocumentMapping()
docMapping.AddFieldMappingsAt("ID", bleve.NewNumericFieldMapping())
docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping())
textFieldMapping := bleve.NewTextFieldMapping()
textFieldMapping.Analyzer = simple.Name
docMapping.AddFieldMappingsAt("Title", textFieldMapping)
docMapping.AddFieldMappingsAt("Content", textFieldMapping)
mapping.AddDocumentMapping("issues", docMapping)
var err error
issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping)
return err
}
// populateIssueIndexer populate the issue indexer with issue data
@ -127,57 +36,64 @@ func populateIssueIndexer() error {
if len(repos) == 0 {
return nil
}
batch := issueIndexer.NewBatch()
for _, repo := range repos {
issues, err := Issues(&IssuesOptions{
RepoID: repo.ID,
IsClosed: util.OptionalBoolNone,
IsPull: util.OptionalBoolNone,
})
if err != nil {
return fmt.Errorf("Issues: %v", err)
updates := make([]indexer.IssueIndexerUpdate, len(issues))
for i, issue := range issues {
updates[i] = issue.update()
}
for _, issue := range issues {
err = batch.Index(issue.indexUID(), issue.issueData())
if err != nil {
return fmt.Errorf("batch.Index: %v", err)
}
if err = indexer.BatchUpdateIssues(updates...); err != nil {
return fmt.Errorf("BatchUpdate: %v", err)
}
}
if err = issueIndexer.Batch(batch); err != nil {
return fmt.Errorf("index.Batch: %v", err)
}
}
}
func processIssueIndexerUpdateQueue() {
for {
select {
case issue := <-issueIndexerUpdateQueue:
if err := issueIndexer.Index(issue.indexUID(), issue.issueData()); err != nil {
case issueID := <-issueIndexerUpdateQueue:
issue, err := GetIssueByID(issueID)
if err != nil {
log.Error(4, "issuesIndexer.Index: %v", err)
continue
}
if err = indexer.UpdateIssue(issue.update()); err != nil {
log.Error(4, "issuesIndexer.Index: %v", err)
}
}
}
}
// indexUID a unique identifier for an issue used in full-text indices
func (issue *Issue) indexUID() string {
return strconv.FormatInt(issue.ID, 36)
}
func (issue *Issue) issueData() *issueIndexerData {
return &issueIndexerData{
ID: issue.ID,
RepoID: issue.RepoID,
Title: issue.Title,
Content: issue.Content,
func (issue *Issue) update() indexer.IssueIndexerUpdate {
comments := make([]string, 0, 5)
for _, comment := range issue.Comments {
if comment.Type == CommentTypeComment {
comments = append(comments, comment.Content)
}
}
return indexer.IssueIndexerUpdate{
IssueID: issue.ID,
Data: &indexer.IssueIndexerData{
RepoID: issue.RepoID,
Title: issue.Title,
Content: issue.Content,
Comments: comments,
},
}
}
// UpdateIssueIndexer add/update an issue to the issue indexer
func UpdateIssueIndexer(issue *Issue) {
go func() {
issueIndexerUpdateQueue <- issue
}()
func UpdateIssueIndexer(issueID int64) {
select {
case issueIndexerUpdateQueue <- issueID:
default:
go func() {
issueIndexerUpdateQueue <- issueID
}()
}
}

@ -640,6 +640,8 @@ func NewPullRequest(repo *Repository, pull *Issue, labelIDs []int64, uuids []str
return fmt.Errorf("Commit: %v", err)
}
UpdateIssueIndexer(pull.ID)
if err = NotifyWatchers(&Action{
ActUserID: pull.Poster.ID,
ActUser: pull.Poster,

@ -5,10 +5,39 @@
package indexer
import (
"code.gitea.io/gitea/models"
"fmt"
"strconv"
"github.com/blevesearch/bleve"
"github.com/blevesearch/bleve/search/query"
)
// NewContext start indexer service
func NewContext() {
models.InitIssueIndexer()
// indexerID a bleve-compatible unique identifier for an integer id
func indexerID(id int64) string {
return strconv.FormatInt(id, 36)
}
// idOfIndexerID the integer id associated with an indexer id
func idOfIndexerID(indexerID string) (int64, error) {
id, err := strconv.ParseInt(indexerID, 36, 64)
if err != nil {
return 0, fmt.Errorf("Unexpected indexer ID %s: %v", indexerID, err)
}
return id, nil
}
// numericEqualityQuery a numeric equality query for the given value and field
func numericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
f := float64(value)
tru := true
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
q.SetField(field)
return q
}
func newMatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
q := bleve.NewMatchPhraseQuery(matchPhrase)
q.FieldVal = field
q.Analyzer = analyzer
return q
}

@ -0,0 +1,143 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package indexer
import (
"os"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"github.com/blevesearch/bleve"
"github.com/blevesearch/bleve/analysis/analyzer/custom"
"github.com/blevesearch/bleve/analysis/token/lowercase"
"github.com/blevesearch/bleve/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
)
// issueIndexer (thread-safe) index for searching issues
var issueIndexer bleve.Index
// IssueIndexerData data stored in the issue indexer
type IssueIndexerData struct {
RepoID int64
Title string
Content string
Comments []string
}
// IssueIndexerUpdate an update to the issue indexer
type IssueIndexerUpdate struct {
IssueID int64
Data *IssueIndexerData
}
const issueIndexerAnalyzer = "issueIndexer"
// InitIssueIndexer initialize issue indexer
func InitIssueIndexer(populateIndexer func() error) {
_, err := os.Stat(setting.Indexer.IssuePath)
if err != nil {
if os.IsNotExist(err) {
if err = createIssueIndexer(); err != nil {
log.Fatal(4, "CreateIssuesIndexer: %v", err)
}
if err = populateIndexer(); err != nil {
log.Fatal(4, "PopulateIssuesIndex: %v", err)
}
} else {
log.Fatal(4, "InitIssuesIndexer: %v", err)
}
} else {
issueIndexer, err = bleve.Open(setting.Indexer.IssuePath)
if err != nil {
log.Error(4, "Unable to open issues indexer (%s)."+
" If the error is due to incompatible versions, try deleting the indexer files;"+
" gitea will recreate them with the appropriate version the next time it runs."+
" Deleting the indexer files will not result in loss of data.",
setting.Indexer.IssuePath)
log.Fatal(4, "InitIssuesIndexer, open index: %v", err)
}
}
}
// createIssueIndexer create an issue indexer if one does not already exist
func createIssueIndexer() error {
mapping := bleve.NewIndexMapping()
docMapping := bleve.NewDocumentMapping()
docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping())
textFieldMapping := bleve.NewTextFieldMapping()
docMapping.AddFieldMappingsAt("Title", textFieldMapping)
docMapping.AddFieldMappingsAt("Content", textFieldMapping)
docMapping.AddFieldMappingsAt("Comments", textFieldMapping)
const unicodeNormNFC = "unicodeNormNFC"
if err := mapping.AddCustomTokenFilter(unicodeNormNFC, map[string]interface{}{
"type": unicodenorm.Name,
"form": unicodenorm.NFC,
}); err != nil {
return err
} else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]interface{}{
"type": custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
"token_filters": []string{unicodeNormNFC, lowercase.Name},
}); err != nil {
return err
}
mapping.DefaultAnalyzer = issueIndexerAnalyzer
mapping.AddDocumentMapping("issues", docMapping)
var err error
issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping)
return err
}
// UpdateIssue update the issue indexer
func UpdateIssue(update IssueIndexerUpdate) error {
return issueIndexer.Index(indexerID(update.IssueID), update.Data)
}
// BatchUpdateIssues perform a batch update of the issue indexer
func BatchUpdateIssues(updates ...IssueIndexerUpdate) error {
batch := issueIndexer.NewBatch()
for _, update := range updates {
err := batch.Index(indexerID(update.IssueID), update.Data)
if err != nil {
return err
}
}
return issueIndexer.Batch(batch)
}
// SearchIssuesByKeyword searches for issues by given conditions.
// Returns the matching issue IDs
func SearchIssuesByKeyword(repoID int64, keyword string) ([]int64, error) {
indexerQuery := bleve.NewConjunctionQuery(
numericEqualityQuery(repoID, "RepoID"),
bleve.NewDisjunctionQuery(
newMatchPhraseQuery(keyword, "Title", issueIndexerAnalyzer),
newMatchPhraseQuery(keyword, "Content", issueIndexerAnalyzer),
newMatchPhraseQuery(keyword, "Comments", issueIndexerAnalyzer),
))
search := bleve.NewSearchRequestOptions(indexerQuery, 2147483647, 0, false)
result, err := issueIndexer.Search(search)
if err != nil {
return nil, err
}
issueIDs := make([]int64, len(result.Hits))
for i, hit := range result.Hits {
issueIDs[i], err = idOfIndexerID(hit.ID)
if err != nil {
return nil, err
}
}
return issueIDs, nil
}

@ -13,7 +13,6 @@ import (
"code.gitea.io/gitea/models/migrations"
"code.gitea.io/gitea/modules/cron"
"code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/mailer"
"code.gitea.io/gitea/modules/markup"
@ -63,7 +62,7 @@ func GlobalInit() {
// Booting long running goroutines.
cron.NewContext()
indexer.NewContext()
models.InitIssueIndexer()
models.InitSyncMirrors()
models.InitDeliverHooks()
models.InitTestPullRequests()

@ -22,6 +22,7 @@ import (
"code.gitea.io/gitea/modules/auth"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markdown"
"code.gitea.io/gitea/modules/notification"
@ -142,7 +143,7 @@ func Issues(ctx *context.Context) {
var issueIDs []int64
var err error
if len(keyword) > 0 {
issueIDs, err = models.SearchIssuesByKeyword(repo.ID, keyword)
issueIDs, err = indexer.SearchIssuesByKeyword(repo.ID, keyword)
if len(issueIDs) == 0 {
forceEmpty = true
}

@ -4,6 +4,7 @@
[![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve)
[![Go Report Card](https://goreportcard.com/badge/blevesearch/bleve)](https://goreportcard.com/report/blevesearch/bleve)
[![Sourcegraph](https://sourcegraph.com/github.com/blevesearch/bleve/-/badge.svg)](https://sourcegraph.com/github.com/blevesearch/bleve?badge) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)
@ -33,29 +34,33 @@ Discuss usage and development of bleve in the [google group](https://groups.goog
## Indexing
message := struct{
Id string
From string
Body string
}{
Id: "example",
From: "marty.schoch@gmail.com",
Body: "bleve indexing is easy",
}
mapping := bleve.NewIndexMapping()
index, err := bleve.New("example.bleve", mapping)
if err != nil {
panic(err)
}
index.Index(message.Id, message)
```go
message := struct{
Id string
From string
Body string
}{
Id: "example",
From: "marty.schoch@gmail.com",
Body: "bleve indexing is easy",
}
mapping := bleve.NewIndexMapping()
index, err := bleve.New("example.bleve", mapping)
if err != nil {
panic(err)
}
index.Index(message.Id, message)
```
## Querying
index, _ := bleve.Open("example.bleve")
query := bleve.NewQueryStringQuery("bleve")
searchRequest := bleve.NewSearchRequest(query)
searchResult, _ := index.Search(searchRequest)
```go
index, _ := bleve.Open("example.bleve")
query := bleve.NewQueryStringQuery("bleve")
searchRequest := bleve.NewSearchRequest(query)
searchResult, _ := index.Search(searchRequest)
```
## License

@ -0,0 +1,145 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package custom
import (
"fmt"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
)
const Name = "custom"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
var err error
var charFilters []analysis.CharFilter
charFiltersValue, ok := config["char_filters"]
if ok {
switch charFiltersValue := charFiltersValue.(type) {
case []string:
charFilters, err = getCharFilters(charFiltersValue, cache)
if err != nil {
return nil, err
}
case []interface{}:
charFiltersNames, err := convertInterfaceSliceToStringSlice(charFiltersValue, "char filter")
if err != nil {
return nil, err
}
charFilters, err = getCharFilters(charFiltersNames, cache)
if err != nil {
return nil, err
}
default:
return nil, fmt.Errorf("unsupported type for char_filters, must be slice")
}
}
var tokenizerName string
tokenizerValue, ok := config["tokenizer"]
if ok {
tokenizerName, ok = tokenizerValue.(string)
if !ok {
return nil, fmt.Errorf("must specify tokenizer as string")
}
} else {
return nil, fmt.Errorf("must specify tokenizer")
}
tokenizer, err := cache.TokenizerNamed(tokenizerName)
if err != nil {
return nil, err
}
var tokenFilters []analysis.TokenFilter
tokenFiltersValue, ok := config["token_filters"]
if ok {
switch tokenFiltersValue := tokenFiltersValue.(type) {
case []string:
tokenFilters, err = getTokenFilters(tokenFiltersValue, cache)
if err != nil {
return nil, err
}
case []interface{}:
tokenFiltersNames, err := convertInterfaceSliceToStringSlice(tokenFiltersValue, "token filter")
if err != nil {
return nil, err
}
tokenFilters, err = getTokenFilters(tokenFiltersNames, cache)
if err != nil {
return nil, err
}
default:
return nil, fmt.Errorf("unsupported type for token_filters, must be slice")
}
}
rv := analysis.Analyzer{
Tokenizer: tokenizer,
}
if charFilters != nil {
rv.CharFilters = charFilters
}
if tokenFilters != nil {
rv.TokenFilters = tokenFilters
}
return &rv, nil
}
func init() {
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
}
func getCharFilters(charFilterNames []string, cache *registry.Cache) ([]analysis.CharFilter, error) {
charFilters := make([]analysis.CharFilter, len(charFilterNames))
for i, charFilterName := range charFilterNames {
charFilter, err := cache.CharFilterNamed(charFilterName)
if err != nil {
return nil, err
}
charFilters[i] = charFilter
}
return charFilters, nil
}
func getTokenFilters(tokenFilterNames []string, cache *registry.Cache) ([]analysis.TokenFilter, error) {
tokenFilters := make([]analysis.TokenFilter, len(tokenFilterNames))
for i, tokenFilterName := range tokenFilterNames {
tokenFilter, err := cache.TokenFilterNamed(tokenFilterName)
if err != nil {
return nil, err
}
tokenFilters[i] = tokenFilter
}
return tokenFilters, nil
}
func convertInterfaceSliceToStringSlice(interfaceSlice []interface{}, objType string) ([]string, error) {
stringSlice := make([]string, len(interfaceSlice))
for i, interfaceObj := range interfaceSlice {
stringObj, ok := interfaceObj.(string)
if ok {
stringSlice[i] = stringObj
} else {
return nil, fmt.Errorf(objType + " name must be a string")
}
}
return stringSlice, nil
}

@ -1,46 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package simple
import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/token/lowercase"
"github.com/blevesearch/bleve/analysis/tokenizer/letter"
"github.com/blevesearch/bleve/registry"
)
const Name = "simple"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(letter.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
rv := analysis.Analyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
},
}
return &rv, nil
}
func init() {
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
}

@ -0,0 +1,79 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package unicodenorm
import (
"fmt"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
"golang.org/x/text/unicode/norm"
)
const Name = "normalize_unicode"
const NFC = "nfc"
const NFD = "nfd"
const NFKC = "nfkc"
const NFKD = "nfkd"
var forms = map[string]norm.Form{
NFC: norm.NFC,
NFD: norm.NFD,
NFKC: norm.NFKC,
NFKD: norm.NFKD,
}
type UnicodeNormalizeFilter struct {
form norm.Form
}
func NewUnicodeNormalizeFilter(formName string) (*UnicodeNormalizeFilter, error) {
form, ok := forms[formName]
if !ok {
return nil, fmt.Errorf("no form named %s", formName)
}
return &UnicodeNormalizeFilter{
form: form,
}, nil
}
func MustNewUnicodeNormalizeFilter(formName string) *UnicodeNormalizeFilter {
filter, err := NewUnicodeNormalizeFilter(formName)
if err != nil {
panic(err)
}
return filter
}
func (s *UnicodeNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
token.Term = s.form.Bytes(token.Term)
}
return input
}
func UnicodeNormalizeFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
formVal, ok := config["form"].(string)
if !ok {
return nil, fmt.Errorf("must specify form")
}
form := formVal
return NewUnicodeNormalizeFilter(form)
}
func init() {
registry.RegisterTokenFilter(Name, UnicodeNormalizeFilterConstructor)
}

@ -1,76 +0,0 @@
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package character
import (
"unicode/utf8"
"github.com/blevesearch/bleve/analysis"
)
type IsTokenRune func(r rune) bool
type CharacterTokenizer struct {
isTokenRun IsTokenRune
}
func NewCharacterTokenizer(f IsTokenRune) *CharacterTokenizer {
return &CharacterTokenizer{
isTokenRun: f,
}
}
func (c *CharacterTokenizer) Tokenize(input []byte) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, 1024)
offset := 0
start := 0
end := 0
count := 0
for currRune, size := utf8.DecodeRune(input[offset:]); currRune != utf8.RuneError; currRune, size = utf8.DecodeRune(input[offset:]) {
isToken := c.isTokenRun(currRune)
if isToken {
end = offset + size
} else {
if end-start > 0 {
// build token
rv = append(rv, &analysis.Token{
Term: input[start:end],
Start: start,
End: end,
Position: count + 1,
Type: analysis.AlphaNumeric,
})
count++
}
start = offset + size
end = start
}
offset += size
}
// if we ended in the middle of a token, finish it
if end-start > 0 {
// build token
rv = append(rv, &analysis.Token{
Term: input[start:end],
Start: start,
End: end,
Position: count + 1,
Type: analysis.AlphaNumeric,
})
}
return rv
}

@ -1,33 +0,0 @@
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package letter
import (
"unicode"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/analysis/tokenizer/character"
"github.com/blevesearch/bleve/registry"
)
const Name = "letter"
func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
return character.NewCharacterTokenizer(unicode.IsLetter), nil
}
func init() {
registry.RegisterTokenizer(Name, TokenizerConstructor)
}

@ -1,23 +0,0 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +build appengine appenginevm
package bleve
// in the appengine environment we cannot support disk based indexes
// so we do no extra configuration in this method
func initDisk() {
}

@ -0,0 +1,137 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/numeric"
)
var GeoPrecisionStep uint = 9
type GeoPointField struct {
name string
arrayPositions []uint64
options IndexingOptions
value numeric.PrefixCoded
numPlainTextBytes uint64
}
func (n *GeoPointField) Name() string {
return n.name
}
func (n *GeoPointField) ArrayPositions() []uint64 {
return n.arrayPositions
}
func (n *GeoPointField) Options() IndexingOptions {
return n.options
}
func (n *GeoPointField) Analyze() (int, analysis.TokenFrequencies) {
tokens := make(analysis.TokenStream, 0)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(n.value),
Term: n.value,
Position: 1,
Type: analysis.Numeric,
})
original, err := n.value.Int64()
if err == nil {
shift := GeoPrecisionStep
for shift < 64 {
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
if err != nil {
break
}
token := analysis.Token{
Start: 0,
End: len(shiftEncoded),
Term: shiftEncoded,
Position: 1,
Type: analysis.Numeric,
}
tokens = append(tokens, &token)
shift += GeoPrecisionStep
}
}
fieldLength := len(tokens)
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors())
return fieldLength, tokenFreqs
}
func (n *GeoPointField) Value() []byte {
return n.value
}
func (n *GeoPointField) Lon() (float64, error) {
i64, err := n.value.Int64()
if err != nil {
return 0.0, err
}
return geo.MortonUnhashLon(uint64(i64)), nil
}
func (n *GeoPointField) Lat() (float64, error) {
i64, err := n.value.Int64()
if err != nil {
return 0.0, err
}
return geo.MortonUnhashLat(uint64(i64)), nil
}
func (n *GeoPointField) GoString() string {
return fmt.Sprintf("&document.GeoPointField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func (n *GeoPointField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func NewGeoPointFieldFromBytes(name string, arrayPositions []uint64, value []byte) *GeoPointField {
return &GeoPointField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewGeoPointField(name string, arrayPositions []uint64, lon, lat float64) *GeoPointField {
return NewGeoPointFieldWithIndexingOptions(name, arrayPositions, lon, lat, DefaultNumericIndexingOptions)
}
func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, lon, lat float64, options IndexingOptions) *GeoPointField {
mhash := geo.MortonHash(lon, lat)
prefixCoded := numeric.MustNewPrefixCodedInt64(int64(mhash), 0)
return &GeoPointField{
name: name,
arrayPositions: arrayPositions,
value: prefixCoded,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}
}

@ -0,0 +1,9 @@
# geo support in bleve
First, all of this geo code is a Go adaptation of the [Lucene 5.3.2 sandbox geo support](https://lucene.apache.org/core/5_3_2/sandbox/org/apache/lucene/util/package-summary.html).
## Notes
- All of the APIs will use float64 for lon/lat values.
- When describing a point in function arguments or return values, we always use the order lon, lat.
- High level APIs will use TopLeft and BottomRight to describe bounding boxes. This may not map cleanly to min/max lon/lat when crossing the dateline. The lower level APIs will use min/max lon/lat and require the higher-level code to split boxes accordingly.