Browse Source
Improve issue search (#2387)
Improve issue search (#2387)
* Improve issue indexer * Fix new issue sqlite bug * Different test indexer paths for each db * Add integration indexer paths to make cleanrelease/v1.3
committed by
Lauris BH
122 changed files with 15275 additions and 1453 deletions
-
3.gitignore
-
6Makefile
-
9integrations/integration_test.go
-
43integrations/issue_test.go
-
3integrations/mysql.ini.tmpl
-
3integrations/pgsql.ini.tmpl
-
3integrations/sqlite.ini
-
10models/fixtures/issue.yml
-
29models/issue.go
-
24models/issue_comment.go
-
164models/issue_indexer.go
-
2models/pull.go
-
37modules/indexer/indexer.go
-
143modules/indexer/issue.go
-
3routers/init.go
-
3routers/repo/issue.go
-
45vendor/github.com/blevesearch/bleve/README.md
-
145vendor/github.com/blevesearch/bleve/analysis/analyzer/custom/custom.go
-
46vendor/github.com/blevesearch/bleve/analysis/analyzer/simple/simple.go
-
79vendor/github.com/blevesearch/bleve/analysis/token/unicodenorm/unicodenorm.go
-
76vendor/github.com/blevesearch/bleve/analysis/tokenizer/character/character.go
-
33vendor/github.com/blevesearch/bleve/analysis/tokenizer/letter/letter.go
-
23vendor/github.com/blevesearch/bleve/config_app.go
-
137vendor/github.com/blevesearch/bleve/document/field_geopoint.go
-
9vendor/github.com/blevesearch/bleve/geo/README.md
-
170vendor/github.com/blevesearch/bleve/geo/geo.go
-
98vendor/github.com/blevesearch/bleve/geo/geo_dist.go
-
140vendor/github.com/blevesearch/bleve/geo/parse.go
-
212vendor/github.com/blevesearch/bleve/geo/sloppy.go
-
22vendor/github.com/blevesearch/bleve/index.go
-
4vendor/github.com/blevesearch/bleve/index/index.go
-
6vendor/github.com/blevesearch/bleve/index/upsidedown/analysis.go
-
10vendor/github.com/blevesearch/bleve/index/upsidedown/dump.go
-
42vendor/github.com/blevesearch/bleve/index/upsidedown/index_reader.go
-
63vendor/github.com/blevesearch/bleve/index/upsidedown/reader.go
-
309vendor/github.com/blevesearch/bleve/index/upsidedown/row.go
-
121vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
-
142vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.pb.go
-
8vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.proto
-
17vendor/github.com/blevesearch/bleve/index_alias_impl.go
-
31vendor/github.com/blevesearch/bleve/index_impl.go
-
4vendor/github.com/blevesearch/bleve/mapping.go
-
51vendor/github.com/blevesearch/bleve/mapping/document.go
-
25vendor/github.com/blevesearch/bleve/mapping/field.go
-
7vendor/github.com/blevesearch/bleve/mapping/index.go
-
13vendor/github.com/blevesearch/bleve/mapping/mapping.go
-
43vendor/github.com/blevesearch/bleve/numeric/bin.go
-
32vendor/github.com/blevesearch/bleve/query.go
-
92vendor/github.com/blevesearch/bleve/search.go
-
30vendor/github.com/blevesearch/bleve/search/collector/heap.go
-
15vendor/github.com/blevesearch/bleve/search/collector/list.go
-
15vendor/github.com/blevesearch/bleve/search/collector/slice.go
-
83vendor/github.com/blevesearch/bleve/search/collector/topn.go
-
53vendor/github.com/blevesearch/bleve/search/facet/facet_builder_datetime.go
-
53vendor/github.com/blevesearch/bleve/search/facet/facet_builder_numeric.go
-
29vendor/github.com/blevesearch/bleve/search/facet/facet_builder_terms.go
-
41vendor/github.com/blevesearch/bleve/search/facets_builder.go
-
2vendor/github.com/blevesearch/bleve/search/highlight/format/html/html.go
-
2vendor/github.com/blevesearch/bleve/search/highlight/highlighter/simple/fragment_scorer_simple.go
-
2vendor/github.com/blevesearch/bleve/search/highlight/highlighter/simple/highlighter_simple.go
-
14vendor/github.com/blevesearch/bleve/search/highlight/term_locations.go
-
12vendor/github.com/blevesearch/bleve/search/pool.go
-
13vendor/github.com/blevesearch/bleve/search/query/bool_field.go
-
69vendor/github.com/blevesearch/bleve/search/query/boolean.go
-
26vendor/github.com/blevesearch/bleve/search/query/conjunction.go
-
9vendor/github.com/blevesearch/bleve/search/query/date_range.go
-
29vendor/github.com/blevesearch/bleve/search/query/disjunction.go
-
6vendor/github.com/blevesearch/bleve/search/query/docid.go
-
8vendor/github.com/blevesearch/bleve/search/query/fuzzy.go
-
113vendor/github.com/blevesearch/bleve/search/query/geo_boundingbox.go
-
100vendor/github.com/blevesearch/bleve/search/query/geo_distance.go
-
12vendor/github.com/blevesearch/bleve/search/query/match.go
-
8vendor/github.com/blevesearch/bleve/search/query/match_all.go
-
4vendor/github.com/blevesearch/bleve/search/query/match_none.go
-
21vendor/github.com/blevesearch/bleve/search/query/match_phrase.go
-
80vendor/github.com/blevesearch/bleve/search/query/multi_phrase.go
-
8vendor/github.com/blevesearch/bleve/search/query/numeric_range.go
-
38vendor/github.com/blevesearch/bleve/search/query/phrase.go
-
8vendor/github.com/blevesearch/bleve/search/query/prefix.go
-
52vendor/github.com/blevesearch/bleve/search/query/query.go
-
10vendor/github.com/blevesearch/bleve/search/query/query_string.go
-
63vendor/github.com/blevesearch/bleve/search/query/query_string.y
-
148vendor/github.com/blevesearch/bleve/search/query/query_string.y.go
-
10vendor/github.com/blevesearch/bleve/search/query/query_string_parser.go
-
24vendor/github.com/blevesearch/bleve/search/query/regexp.go
-
8vendor/github.com/blevesearch/bleve/search/query/term.go
-
95vendor/github.com/blevesearch/bleve/search/query/term_range.go
-
10vendor/github.com/blevesearch/bleve/search/query/wildcard.go
-
12vendor/github.com/blevesearch/bleve/search/scorer/scorer_conjunction.go
-
14vendor/github.com/blevesearch/bleve/search/scorer/scorer_constant.go
-
14vendor/github.com/blevesearch/bleve/search/scorer/scorer_disjunction.go
-
56vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go
-
38vendor/github.com/blevesearch/bleve/search/search.go
-
4vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go
-
8vendor/github.com/blevesearch/bleve/search/searcher/search_conjunction.go
-
25vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction.go
-
4vendor/github.com/blevesearch/bleve/search/searcher/search_docid.go
-
88vendor/github.com/blevesearch/bleve/search/searcher/search_filter.go
-
87vendor/github.com/blevesearch/bleve/search/searcher/search_fuzzy.go
-
173vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go
@ -0,0 +1,143 @@ |
|||
// Copyright 2017 The Gitea Authors. All rights reserved.
|
|||
// Use of this source code is governed by a MIT-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package indexer |
|||
|
|||
import ( |
|||
"os" |
|||
|
|||
"code.gitea.io/gitea/modules/log" |
|||
"code.gitea.io/gitea/modules/setting" |
|||
|
|||
"github.com/blevesearch/bleve" |
|||
"github.com/blevesearch/bleve/analysis/analyzer/custom" |
|||
"github.com/blevesearch/bleve/analysis/token/lowercase" |
|||
"github.com/blevesearch/bleve/analysis/token/unicodenorm" |
|||
"github.com/blevesearch/bleve/analysis/tokenizer/unicode" |
|||
) |
|||
|
|||
// issueIndexer (thread-safe) index for searching issues
|
|||
var issueIndexer bleve.Index |
|||
|
|||
// IssueIndexerData data stored in the issue indexer
|
|||
type IssueIndexerData struct { |
|||
RepoID int64 |
|||
Title string |
|||
Content string |
|||
Comments []string |
|||
} |
|||
|
|||
// IssueIndexerUpdate an update to the issue indexer
|
|||
type IssueIndexerUpdate struct { |
|||
IssueID int64 |
|||
Data *IssueIndexerData |
|||
} |
|||
|
|||
const issueIndexerAnalyzer = "issueIndexer" |
|||
|
|||
// InitIssueIndexer initialize issue indexer
|
|||
func InitIssueIndexer(populateIndexer func() error) { |
|||
_, err := os.Stat(setting.Indexer.IssuePath) |
|||
if err != nil { |
|||
if os.IsNotExist(err) { |
|||
if err = createIssueIndexer(); err != nil { |
|||
log.Fatal(4, "CreateIssuesIndexer: %v", err) |
|||
} |
|||
if err = populateIndexer(); err != nil { |
|||
log.Fatal(4, "PopulateIssuesIndex: %v", err) |
|||
} |
|||
} else { |
|||
log.Fatal(4, "InitIssuesIndexer: %v", err) |
|||
} |
|||
} else { |
|||
issueIndexer, err = bleve.Open(setting.Indexer.IssuePath) |
|||
if err != nil { |
|||
log.Error(4, "Unable to open issues indexer (%s)."+ |
|||
" If the error is due to incompatible versions, try deleting the indexer files;"+ |
|||
" gitea will recreate them with the appropriate version the next time it runs."+ |
|||
" Deleting the indexer files will not result in loss of data.", |
|||
setting.Indexer.IssuePath) |
|||
log.Fatal(4, "InitIssuesIndexer, open index: %v", err) |
|||
} |
|||
} |
|||
} |
|||
|
|||
// createIssueIndexer create an issue indexer if one does not already exist
|
|||
func createIssueIndexer() error { |
|||
mapping := bleve.NewIndexMapping() |
|||
docMapping := bleve.NewDocumentMapping() |
|||
|
|||
docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping()) |
|||
|
|||
textFieldMapping := bleve.NewTextFieldMapping() |
|||
docMapping.AddFieldMappingsAt("Title", textFieldMapping) |
|||
docMapping.AddFieldMappingsAt("Content", textFieldMapping) |
|||
docMapping.AddFieldMappingsAt("Comments", textFieldMapping) |
|||
|
|||
const unicodeNormNFC = "unicodeNormNFC" |
|||
if err := mapping.AddCustomTokenFilter(unicodeNormNFC, map[string]interface{}{ |
|||
"type": unicodenorm.Name, |
|||
"form": unicodenorm.NFC, |
|||
}); err != nil { |
|||
return err |
|||
} else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]interface{}{ |
|||
"type": custom.Name, |
|||
"char_filters": []string{}, |
|||
"tokenizer": unicode.Name, |
|||
"token_filters": []string{unicodeNormNFC, lowercase.Name}, |
|||
}); err != nil { |
|||
return err |
|||
} |
|||
|
|||
mapping.DefaultAnalyzer = issueIndexerAnalyzer |
|||
mapping.AddDocumentMapping("issues", docMapping) |
|||
|
|||
var err error |
|||
issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping) |
|||
return err |
|||
} |
|||
|
|||
// UpdateIssue update the issue indexer
|
|||
func UpdateIssue(update IssueIndexerUpdate) error { |
|||
return issueIndexer.Index(indexerID(update.IssueID), update.Data) |
|||
} |
|||
|
|||
// BatchUpdateIssues perform a batch update of the issue indexer
|
|||
func BatchUpdateIssues(updates ...IssueIndexerUpdate) error { |
|||
batch := issueIndexer.NewBatch() |
|||
for _, update := range updates { |
|||
err := batch.Index(indexerID(update.IssueID), update.Data) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
} |
|||
return issueIndexer.Batch(batch) |
|||
} |
|||
|
|||
// SearchIssuesByKeyword searches for issues by given conditions.
|
|||
// Returns the matching issue IDs
|
|||
func SearchIssuesByKeyword(repoID int64, keyword string) ([]int64, error) { |
|||
indexerQuery := bleve.NewConjunctionQuery( |
|||
numericEqualityQuery(repoID, "RepoID"), |
|||
bleve.NewDisjunctionQuery( |
|||
newMatchPhraseQuery(keyword, "Title", issueIndexerAnalyzer), |
|||
newMatchPhraseQuery(keyword, "Content", issueIndexerAnalyzer), |
|||
newMatchPhraseQuery(keyword, "Comments", issueIndexerAnalyzer), |
|||
)) |
|||
search := bleve.NewSearchRequestOptions(indexerQuery, 2147483647, 0, false) |
|||
|
|||
result, err := issueIndexer.Search(search) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
|
|||
issueIDs := make([]int64, len(result.Hits)) |
|||
for i, hit := range result.Hits { |
|||
issueIDs[i], err = idOfIndexerID(hit.ID) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
} |
|||
return issueIDs, nil |
|||
} |
@ -0,0 +1,145 @@ |
|||
// Copyright (c) 2014 Couchbase, Inc.
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
package custom |
|||
|
|||
import ( |
|||
"fmt" |
|||
|
|||
"github.com/blevesearch/bleve/analysis" |
|||
"github.com/blevesearch/bleve/registry" |
|||
) |
|||
|
|||
const Name = "custom" |
|||
|
|||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { |
|||
|
|||
var err error |
|||
var charFilters []analysis.CharFilter |
|||
charFiltersValue, ok := config["char_filters"] |
|||
if ok { |
|||
switch charFiltersValue := charFiltersValue.(type) { |
|||
case []string: |
|||
charFilters, err = getCharFilters(charFiltersValue, cache) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
case []interface{}: |
|||
charFiltersNames, err := convertInterfaceSliceToStringSlice(charFiltersValue, "char filter") |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
charFilters, err = getCharFilters(charFiltersNames, cache) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
default: |
|||
return nil, fmt.Errorf("unsupported type for char_filters, must be slice") |
|||
} |
|||
} |
|||
|
|||
var tokenizerName string |
|||
tokenizerValue, ok := config["tokenizer"] |
|||
if ok { |
|||
tokenizerName, ok = tokenizerValue.(string) |
|||
if !ok { |
|||
return nil, fmt.Errorf("must specify tokenizer as string") |
|||
} |
|||
} else { |
|||
return nil, fmt.Errorf("must specify tokenizer") |
|||
} |
|||
|
|||
tokenizer, err := cache.TokenizerNamed(tokenizerName) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
|
|||
var tokenFilters []analysis.TokenFilter |
|||
tokenFiltersValue, ok := config["token_filters"] |
|||
if ok { |
|||
switch tokenFiltersValue := tokenFiltersValue.(type) { |
|||
case []string: |
|||
tokenFilters, err = getTokenFilters(tokenFiltersValue, cache) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
case []interface{}: |
|||
tokenFiltersNames, err := convertInterfaceSliceToStringSlice(tokenFiltersValue, "token filter") |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
tokenFilters, err = getTokenFilters(tokenFiltersNames, cache) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
default: |
|||
return nil, fmt.Errorf("unsupported type for token_filters, must be slice") |
|||
} |
|||
} |
|||
|
|||
rv := analysis.Analyzer{ |
|||
Tokenizer: tokenizer, |
|||
} |
|||
if charFilters != nil { |
|||
rv.CharFilters = charFilters |
|||
} |
|||
if tokenFilters != nil { |
|||
rv.TokenFilters = tokenFilters |
|||
} |
|||
return &rv, nil |
|||
} |
|||
|
|||
func init() { |
|||
registry.RegisterAnalyzer(Name, AnalyzerConstructor) |
|||
} |
|||
|
|||
func getCharFilters(charFilterNames []string, cache *registry.Cache) ([]analysis.CharFilter, error) { |
|||
charFilters := make([]analysis.CharFilter, len(charFilterNames)) |
|||
for i, charFilterName := range charFilterNames { |
|||
charFilter, err := cache.CharFilterNamed(charFilterName) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
charFilters[i] = charFilter |
|||
} |
|||
|
|||
return charFilters, nil |
|||
} |
|||
|
|||
func getTokenFilters(tokenFilterNames []string, cache *registry.Cache) ([]analysis.TokenFilter, error) { |
|||
tokenFilters := make([]analysis.TokenFilter, len(tokenFilterNames)) |
|||
for i, tokenFilterName := range tokenFilterNames { |
|||
tokenFilter, err := cache.TokenFilterNamed(tokenFilterName) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
tokenFilters[i] = tokenFilter |
|||
} |
|||
|
|||
return tokenFilters, nil |
|||
} |
|||
|
|||
func convertInterfaceSliceToStringSlice(interfaceSlice []interface{}, objType string) ([]string, error) { |
|||
stringSlice := make([]string, len(interfaceSlice)) |
|||
for i, interfaceObj := range interfaceSlice { |
|||
stringObj, ok := interfaceObj.(string) |
|||
if ok { |
|||
stringSlice[i] = stringObj |
|||
} else { |
|||
return nil, fmt.Errorf(objType + " name must be a string") |
|||
} |
|||
} |
|||
|
|||
return stringSlice, nil |
|||
} |
@ -1,46 +0,0 @@ |
|||
// Copyright (c) 2014 Couchbase, Inc.
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
package simple |
|||
|
|||
import ( |
|||
"github.com/blevesearch/bleve/analysis" |
|||
"github.com/blevesearch/bleve/analysis/token/lowercase" |
|||
"github.com/blevesearch/bleve/analysis/tokenizer/letter" |
|||
"github.com/blevesearch/bleve/registry" |
|||
) |
|||
|
|||
const Name = "simple" |
|||
|
|||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) { |
|||
tokenizer, err := cache.TokenizerNamed(letter.Name) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
rv := analysis.Analyzer{ |
|||
Tokenizer: tokenizer, |
|||
TokenFilters: []analysis.TokenFilter{ |
|||
toLowerFilter, |
|||
}, |
|||
} |
|||
return &rv, nil |
|||
} |
|||
|
|||
func init() { |
|||
registry.RegisterAnalyzer(Name, AnalyzerConstructor) |
|||
} |
@ -0,0 +1,79 @@ |
|||
// Copyright (c) 2014 Couchbase, Inc.
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
package unicodenorm |
|||
|
|||
import ( |
|||
"fmt" |
|||
|
|||
"github.com/blevesearch/bleve/analysis" |
|||
"github.com/blevesearch/bleve/registry" |
|||
"golang.org/x/text/unicode/norm" |
|||
) |
|||
|
|||
const Name = "normalize_unicode" |
|||
|
|||
const NFC = "nfc" |
|||
const NFD = "nfd" |
|||
const NFKC = "nfkc" |
|||
const NFKD = "nfkd" |
|||
|
|||
var forms = map[string]norm.Form{ |
|||
NFC: norm.NFC, |
|||
NFD: norm.NFD, |
|||
NFKC: norm.NFKC, |
|||
NFKD: norm.NFKD, |
|||
} |
|||
|
|||
type UnicodeNormalizeFilter struct { |
|||
form norm.Form |
|||
} |
|||
|
|||
func NewUnicodeNormalizeFilter(formName string) (*UnicodeNormalizeFilter, error) { |
|||
form, ok := forms[formName] |
|||
if !ok { |
|||
return nil, fmt.Errorf("no form named %s", formName) |
|||
} |
|||
return &UnicodeNormalizeFilter{ |
|||
form: form, |
|||
}, nil |
|||
} |
|||
|
|||
func MustNewUnicodeNormalizeFilter(formName string) *UnicodeNormalizeFilter { |
|||
filter, err := NewUnicodeNormalizeFilter(formName) |
|||
if err != nil { |
|||
panic(err) |
|||
} |
|||
return filter |
|||
} |
|||
|
|||
func (s *UnicodeNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream { |
|||
for _, token := range input { |
|||
token.Term = s.form.Bytes(token.Term) |
|||
} |
|||
return input |
|||
} |
|||
|
|||
func UnicodeNormalizeFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) { |
|||
formVal, ok := config["form"].(string) |
|||
if !ok { |
|||
return nil, fmt.Errorf("must specify form") |
|||
} |
|||
form := formVal |
|||
return NewUnicodeNormalizeFilter(form) |
|||
} |
|||
|
|||
func init() { |
|||
registry.RegisterTokenFilter(Name, UnicodeNormalizeFilterConstructor) |
|||
} |
@ -1,76 +0,0 @@ |
|||
// Copyright (c) 2016 Couchbase, Inc.
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
package character |
|||
|
|||
import ( |
|||
"unicode/utf8" |
|||
|
|||
"github.com/blevesearch/bleve/analysis" |
|||
) |
|||
|
|||
type IsTokenRune func(r rune) bool |
|||
|
|||
type CharacterTokenizer struct { |
|||
isTokenRun IsTokenRune |
|||
} |
|||
|
|||
func NewCharacterTokenizer(f IsTokenRune) *CharacterTokenizer { |
|||
return &CharacterTokenizer{ |
|||
isTokenRun: f, |
|||
} |
|||
} |
|||
|
|||
func (c *CharacterTokenizer) Tokenize(input []byte) analysis.TokenStream { |
|||
|
|||
rv := make(analysis.TokenStream, 0, 1024) |
|||
|
|||
offset := 0 |
|||
start := 0 |
|||
end := 0 |
|||
count := 0 |
|||
for currRune, size := utf8.DecodeRune(input[offset:]); currRune != utf8.RuneError; currRune, size = utf8.DecodeRune(input[offset:]) { |
|||
isToken := c.isTokenRun(currRune) |
|||
if isToken { |
|||
end = offset + size |
|||
} else { |
|||
if end-start > 0 { |
|||
// build token
|
|||
rv = append(rv, &analysis.Token{ |
|||
Term: input[start:end], |
|||
Start: start, |
|||
End: end, |
|||
Position: count + 1, |
|||
Type: analysis.AlphaNumeric, |
|||
}) |
|||
count++ |
|||
} |
|||
start = offset + size |
|||
end = start |
|||
} |
|||
offset += size |
|||
} |
|||
// if we ended in the middle of a token, finish it
|
|||
if end-start > 0 { |
|||
// build token
|
|||
rv = append(rv, &analysis.Token{ |
|||
Term: input[start:end], |
|||
Start: start, |
|||
End: end, |
|||
Position: count + 1, |
|||
Type: analysis.AlphaNumeric, |
|||
}) |
|||
} |
|||
return rv |
|||
} |
@ -1,33 +0,0 @@ |
|||
// Copyright (c) 2016 Couchbase, Inc.
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
package letter |
|||
|
|||
import ( |
|||
"unicode" |
|||
|
|||
"github.com/blevesearch/bleve/analysis" |
|||
"github.com/blevesearch/bleve/analysis/tokenizer/character" |
|||
"github.com/blevesearch/bleve/registry" |
|||
) |
|||
|
|||
const Name = "letter" |
|||
|
|||
func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) { |
|||
return character.NewCharacterTokenizer(unicode.IsLetter), nil |
|||
} |
|||
|
|||
func init() { |
|||
registry.RegisterTokenizer(Name, TokenizerConstructor) |
|||
} |
@ -1,23 +0,0 @@ |
|||
// Copyright (c) 2014 Couchbase, Inc.
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
// +build appengine appenginevm
|
|||
|
|||
package bleve |
|||
|
|||
// in the appengine environment we cannot support disk based indexes
|
|||
// so we do no extra configuration in this method
|
|||
func initDisk() { |
|||
|
|||
} |
@ -0,0 +1,137 @@ |
|||
// Copyright (c) 2017 Couchbase, Inc.
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
package document |
|||
|
|||
import ( |
|||
"fmt" |
|||
|
|||
"github.com/blevesearch/bleve/analysis" |
|||
"github.com/blevesearch/bleve/geo" |
|||
"github.com/blevesearch/bleve/numeric" |
|||
) |
|||
|
|||
var GeoPrecisionStep uint = 9 |
|||
|
|||
type GeoPointField struct { |
|||
name string |
|||
arrayPositions []uint64 |
|||
options IndexingOptions |
|||
value numeric.PrefixCoded |
|||
numPlainTextBytes uint64 |
|||
} |
|||
|
|||
func (n *GeoPointField) Name() string { |
|||
return n.name |
|||
} |
|||
|
|||
func (n *GeoPointField) ArrayPositions() []uint64 { |
|||
return n.arrayPositions |
|||
} |
|||
|
|||
func (n *GeoPointField) Options() IndexingOptions { |
|||
return n.options |
|||
} |
|||
|
|||
func (n *GeoPointField) Analyze() (int, analysis.TokenFrequencies) { |
|||
tokens := make(analysis.TokenStream, 0) |
|||
tokens = append(tokens, &analysis.Token{ |
|||
Start: 0, |
|||
End: len(n.value), |
|||
Term: n.value, |
|||
Position: 1, |
|||
Type: analysis.Numeric, |
|||
}) |
|||
|
|||
original, err := n.value.Int64() |
|||
if err == nil { |
|||
|
|||
shift := GeoPrecisionStep |
|||
for shift < 64 { |
|||
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift) |
|||
if err != nil { |
|||
break |
|||
} |
|||
token := analysis.Token{ |
|||
Start: 0, |
|||
End: len(shiftEncoded), |
|||
Term: shiftEncoded, |
|||
Position: 1, |
|||
Type: analysis.Numeric, |
|||
} |
|||
tokens = append(tokens, &token) |
|||
shift += GeoPrecisionStep |
|||
} |
|||
} |
|||
|
|||
fieldLength := len(tokens) |
|||
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors()) |
|||
return fieldLength, tokenFreqs |
|||
} |
|||
|
|||
func (n *GeoPointField) Value() []byte { |
|||
return n.value |
|||
} |
|||
|
|||
func (n *GeoPointField) Lon() (float64, error) { |
|||
i64, err := n.value.Int64() |
|||
if err != nil { |
|||
return 0.0, err |
|||
} |
|||
return geo.MortonUnhashLon(uint64(i64)), nil |
|||
} |
|||
|
|||
func (n *GeoPointField) Lat() (float64, error) { |
|||
i64, err := n.value.Int64() |
|||
if err != nil { |
|||
return 0.0, err |
|||
} |
|||
return geo.MortonUnhashLat(uint64(i64)), nil |
|||
} |
|||
|
|||
func (n *GeoPointField) GoString() string { |
|||
return fmt.Sprintf("&document.GeoPointField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value) |
|||
} |
|||
|
|||
func (n *GeoPointField) NumPlainTextBytes() uint64 { |
|||
return n.numPlainTextBytes |
|||
} |
|||
|
|||
func NewGeoPointFieldFromBytes(name string, arrayPositions []uint64, value []byte) *GeoPointField { |
|||
return &GeoPointField{ |
|||
name: name, |
|||
arrayPositions: arrayPositions, |
|||
value: value, |
|||
options: DefaultNumericIndexingOptions, |
|||
numPlainTextBytes: uint64(len(value)), |
|||
} |
|||
} |
|||
|
|||
func NewGeoPointField(name string, arrayPositions []uint64, lon, lat float64) *GeoPointField { |
|||
return NewGeoPointFieldWithIndexingOptions(name, arrayPositions, lon, lat, DefaultNumericIndexingOptions) |
|||
} |
|||
|
|||
func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, lon, lat float64, options IndexingOptions) *GeoPointField { |
|||
mhash := geo.MortonHash(lon, lat) |
|||
prefixCoded := numeric.MustNewPrefixCodedInt64(int64(mhash), 0) |
|||
return &GeoPointField{ |
|||
name: name, |
|||
arrayPositions: arrayPositions, |
|||
value: prefixCoded, |
|||
options: options, |
|||
// not correct, just a place holder until we revisit how fields are
|
|||
// represented and can fix this better
|
|||
numPlainTextBytes: uint64(8), |
|||
} |
|||
} |
@ -0,0 +1,9 @@ |
|||
# geo support in bleve |
|||
|
|||
First, all of this geo code is a Go adaptation of the [Lucene 5.3.2 sandbox geo support](https://lucene.apache.org/core/5_3_2/sandbox/org/apache/lucene/util/package-summary.html). |
|||
|
|||
## Notes |
|||
|
|||
- All of the APIs will use float64 for lon/lat values. |
|||
- When describing a point in function arguments or return values, we always use the order lon, lat. |
|||
- High level APIs will use TopLeft and BottomRight to describe bounding boxes. This may not map cleanly to min/max lon/lat when crossing the dateline. The lower level APIs will use min/max lon/lat and require the higher-level code to split boxes accordingly. |
@ -0,0 +1,170 @@ |
|||
// Copyright (c) 2017 Couchbase, Inc.
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
package geo |
|||
|
|||
import ( |
|||
"fmt" |
|||
"math" |
|||
|
|||
"github.com/blevesearch/bleve/numeric" |
|||
) |
|||
|
|||
// GeoBits is the number of bits used for a single geo point
|
|||
// Currently this is 32bits for lon and 32bits for lat
|
|||
var GeoBits uint = 32 |
|||
|
|||
var minLon = -180.0 |
|||
var minLat = -90.0 |
|||
var maxLon = 180.0 |
|||
var maxLat = 90.0 |
|||
var minLonRad = minLon * degreesToRadian |
|||
var minLatRad = minLat * degreesToRadian |
|||
var maxLonRad = maxLon * degreesToRadian |
|||
var maxLatRad = maxLat * degreesToRadian |
|||
var geoTolerance = 1E-6 |
|||
var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0 |
|||
var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0 |
|||
|
|||
// MortonHash computes the morton hash value for the provided geo point
|
|||
// This point is ordered as lon, lat.
|
|||
func MortonHash(lon, lat float64) uint64 { |
|||
return numeric.Interleave(scaleLon(lon), scaleLat(lat)) |
|||
} |
|||
|
|||
func scaleLon(lon float64) uint64 { |
|||
rv := uint64((lon - minLon) * lonScale) |
|||
return rv |
|||
} |
|||
|
|||
func scaleLat(lat float64) uint64 { |
|||
rv := uint64((lat - minLat) * latScale) |
|||
return rv |
|||
} |
|||
|
|||
// MortonUnhashLon extracts the longitude value from the provided morton hash.
|
|||
func MortonUnhashLon(hash uint64) float64 { |
|||
return unscaleLon(numeric.Deinterleave(hash)) |
|||
} |
|||
|
|||
// MortonUnhashLat extracts the latitude value from the provided morton hash.
|
|||
func MortonUnhashLat(hash uint64) float64 { |
|||
return unscaleLat(numeric.Deinterleave(hash >> 1)) |
|||
} |
|||
|
|||