Improve issue search (#2387)
* Improve issue indexer * Fix new issue sqlite bug * Different test indexer paths for each db * Add integration indexer paths to make cleanrelease/v1.3
parent
52e11b24bf
commit
b0f7457d9e
@ -0,0 +1,143 @@
|
||||
// Copyright 2017 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package indexer
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
|
||||
"github.com/blevesearch/bleve"
|
||||
"github.com/blevesearch/bleve/analysis/analyzer/custom"
|
||||
"github.com/blevesearch/bleve/analysis/token/lowercase"
|
||||
"github.com/blevesearch/bleve/analysis/token/unicodenorm"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
|
||||
)
|
||||
|
||||
// issueIndexer (thread-safe) index for searching issues
|
||||
var issueIndexer bleve.Index
|
||||
|
||||
// IssueIndexerData data stored in the issue indexer
|
||||
type IssueIndexerData struct {
|
||||
RepoID int64
|
||||
Title string
|
||||
Content string
|
||||
Comments []string
|
||||
}
|
||||
|
||||
// IssueIndexerUpdate an update to the issue indexer
|
||||
type IssueIndexerUpdate struct {
|
||||
IssueID int64
|
||||
Data *IssueIndexerData
|
||||
}
|
||||
|
||||
const issueIndexerAnalyzer = "issueIndexer"
|
||||
|
||||
// InitIssueIndexer initialize issue indexer
|
||||
func InitIssueIndexer(populateIndexer func() error) {
|
||||
_, err := os.Stat(setting.Indexer.IssuePath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
if err = createIssueIndexer(); err != nil {
|
||||
log.Fatal(4, "CreateIssuesIndexer: %v", err)
|
||||
}
|
||||
if err = populateIndexer(); err != nil {
|
||||
log.Fatal(4, "PopulateIssuesIndex: %v", err)
|
||||
}
|
||||
} else {
|
||||
log.Fatal(4, "InitIssuesIndexer: %v", err)
|
||||
}
|
||||
} else {
|
||||
issueIndexer, err = bleve.Open(setting.Indexer.IssuePath)
|
||||
if err != nil {
|
||||
log.Error(4, "Unable to open issues indexer (%s)."+
|
||||
" If the error is due to incompatible versions, try deleting the indexer files;"+
|
||||
" gitea will recreate them with the appropriate version the next time it runs."+
|
||||
" Deleting the indexer files will not result in loss of data.",
|
||||
setting.Indexer.IssuePath)
|
||||
log.Fatal(4, "InitIssuesIndexer, open index: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// createIssueIndexer create an issue indexer if one does not already exist
|
||||
func createIssueIndexer() error {
|
||||
mapping := bleve.NewIndexMapping()
|
||||
docMapping := bleve.NewDocumentMapping()
|
||||
|
||||
docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping())
|
||||
|
||||
textFieldMapping := bleve.NewTextFieldMapping()
|
||||
docMapping.AddFieldMappingsAt("Title", textFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("Content", textFieldMapping)
|
||||
docMapping.AddFieldMappingsAt("Comments", textFieldMapping)
|
||||
|
||||
const unicodeNormNFC = "unicodeNormNFC"
|
||||
if err := mapping.AddCustomTokenFilter(unicodeNormNFC, map[string]interface{}{
|
||||
"type": unicodenorm.Name,
|
||||
"form": unicodenorm.NFC,
|
||||
}); err != nil {
|
||||
return err
|
||||
} else if err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]interface{}{
|
||||
"type": custom.Name,
|
||||
"char_filters": []string{},
|
||||
"tokenizer": unicode.Name,
|
||||
"token_filters": []string{unicodeNormNFC, lowercase.Name},
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
mapping.DefaultAnalyzer = issueIndexerAnalyzer
|
||||
mapping.AddDocumentMapping("issues", docMapping)
|
||||
|
||||
var err error
|
||||
issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping)
|
||||
return err
|
||||
}
|
||||
|
||||
// UpdateIssue update the issue indexer
|
||||
func UpdateIssue(update IssueIndexerUpdate) error {
|
||||
return issueIndexer.Index(indexerID(update.IssueID), update.Data)
|
||||
}
|
||||
|
||||
// BatchUpdateIssues perform a batch update of the issue indexer
|
||||
func BatchUpdateIssues(updates ...IssueIndexerUpdate) error {
|
||||
batch := issueIndexer.NewBatch()
|
||||
for _, update := range updates {
|
||||
err := batch.Index(indexerID(update.IssueID), update.Data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return issueIndexer.Batch(batch)
|
||||
}
|
||||
|
||||
// SearchIssuesByKeyword searches for issues by given conditions.
|
||||
// Returns the matching issue IDs
|
||||
func SearchIssuesByKeyword(repoID int64, keyword string) ([]int64, error) {
|
||||
indexerQuery := bleve.NewConjunctionQuery(
|
||||
numericEqualityQuery(repoID, "RepoID"),
|
||||
bleve.NewDisjunctionQuery(
|
||||
newMatchPhraseQuery(keyword, "Title", issueIndexerAnalyzer),
|
||||
newMatchPhraseQuery(keyword, "Content", issueIndexerAnalyzer),
|
||||
newMatchPhraseQuery(keyword, "Comments", issueIndexerAnalyzer),
|
||||
))
|
||||
search := bleve.NewSearchRequestOptions(indexerQuery, 2147483647, 0, false)
|
||||
|
||||
result, err := issueIndexer.Search(search)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
issueIDs := make([]int64, len(result.Hits))
|
||||
for i, hit := range result.Hits {
|
||||
issueIDs[i], err = idOfIndexerID(hit.ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return issueIDs, nil
|
||||
}
|
@ -0,0 +1,145 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package custom
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "custom"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
|
||||
var err error
|
||||
var charFilters []analysis.CharFilter
|
||||
charFiltersValue, ok := config["char_filters"]
|
||||
if ok {
|
||||
switch charFiltersValue := charFiltersValue.(type) {
|
||||
case []string:
|
||||
charFilters, err = getCharFilters(charFiltersValue, cache)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
case []interface{}:
|
||||
charFiltersNames, err := convertInterfaceSliceToStringSlice(charFiltersValue, "char filter")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
charFilters, err = getCharFilters(charFiltersNames, cache)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported type for char_filters, must be slice")
|
||||
}
|
||||
}
|
||||
|
||||
var tokenizerName string
|
||||
tokenizerValue, ok := config["tokenizer"]
|
||||
if ok {
|
||||
tokenizerName, ok = tokenizerValue.(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify tokenizer as string")
|
||||
}
|
||||
} else {
|
||||
return nil, fmt.Errorf("must specify tokenizer")
|
||||
}
|
||||
|
||||
tokenizer, err := cache.TokenizerNamed(tokenizerName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var tokenFilters []analysis.TokenFilter
|
||||
tokenFiltersValue, ok := config["token_filters"]
|
||||
if ok {
|
||||
switch tokenFiltersValue := tokenFiltersValue.(type) {
|
||||
case []string:
|
||||
tokenFilters, err = getTokenFilters(tokenFiltersValue, cache)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
case []interface{}:
|
||||
tokenFiltersNames, err := convertInterfaceSliceToStringSlice(tokenFiltersValue, "token filter")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tokenFilters, err = getTokenFilters(tokenFiltersNames, cache)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported type for token_filters, must be slice")
|
||||
}
|
||||
}
|
||||
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: tokenizer,
|
||||
}
|
||||
if charFilters != nil {
|
||||
rv.CharFilters = charFilters
|
||||
}
|
||||
if tokenFilters != nil {
|
||||
rv.TokenFilters = tokenFilters
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
||||
|
||||
func getCharFilters(charFilterNames []string, cache *registry.Cache) ([]analysis.CharFilter, error) {
|
||||
charFilters := make([]analysis.CharFilter, len(charFilterNames))
|
||||
for i, charFilterName := range charFilterNames {
|
||||
charFilter, err := cache.CharFilterNamed(charFilterName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
charFilters[i] = charFilter
|
||||
}
|
||||
|
||||
return charFilters, nil
|
||||
}
|
||||
|
||||
func getTokenFilters(tokenFilterNames []string, cache *registry.Cache) ([]analysis.TokenFilter, error) {
|
||||
tokenFilters := make([]analysis.TokenFilter, len(tokenFilterNames))
|
||||
for i, tokenFilterName := range tokenFilterNames {
|
||||
tokenFilter, err := cache.TokenFilterNamed(tokenFilterName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tokenFilters[i] = tokenFilter
|
||||
}
|
||||
|
||||
return tokenFilters, nil
|
||||
}
|
||||
|
||||
func convertInterfaceSliceToStringSlice(interfaceSlice []interface{}, objType string) ([]string, error) {
|
||||
stringSlice := make([]string, len(interfaceSlice))
|
||||
for i, interfaceObj := range interfaceSlice {
|
||||
stringObj, ok := interfaceObj.(string)
|
||||
if ok {
|
||||
stringSlice[i] = stringObj
|
||||
} else {
|
||||
return nil, fmt.Errorf(objType + " name must be a string")
|
||||
}
|
||||
}
|
||||
|
||||
return stringSlice, nil
|
||||
}
|
@ -1,46 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package simple
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/token/lowercase"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizer/letter"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "simple"
|
||||
|
||||
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
|
||||
tokenizer, err := cache.TokenizerNamed(letter.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := analysis.Analyzer{
|
||||
Tokenizer: tokenizer,
|
||||
TokenFilters: []analysis.TokenFilter{
|
||||
toLowerFilter,
|
||||
},
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterAnalyzer(Name, AnalyzerConstructor)
|
||||
}
|
@ -0,0 +1,79 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package unicodenorm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
const Name = "normalize_unicode"
|
||||
|
||||
const NFC = "nfc"
|
||||
const NFD = "nfd"
|
||||
const NFKC = "nfkc"
|
||||
const NFKD = "nfkd"
|
||||
|
||||
var forms = map[string]norm.Form{
|
||||
NFC: norm.NFC,
|
||||
NFD: norm.NFD,
|
||||
NFKC: norm.NFKC,
|
||||
NFKD: norm.NFKD,
|
||||
}
|
||||
|
||||
type UnicodeNormalizeFilter struct {
|
||||
form norm.Form
|
||||
}
|
||||
|
||||
func NewUnicodeNormalizeFilter(formName string) (*UnicodeNormalizeFilter, error) {
|
||||
form, ok := forms[formName]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("no form named %s", formName)
|
||||
}
|
||||
return &UnicodeNormalizeFilter{
|
||||
form: form,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func MustNewUnicodeNormalizeFilter(formName string) *UnicodeNormalizeFilter {
|
||||
filter, err := NewUnicodeNormalizeFilter(formName)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return filter
|
||||
}
|
||||
|
||||
func (s *UnicodeNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
|
||||
for _, token := range input {
|
||||
token.Term = s.form.Bytes(token.Term)
|
||||
}
|
||||
return input
|
||||
}
|
||||
|
||||
func UnicodeNormalizeFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
|
||||
formVal, ok := config["form"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify form")
|
||||
}
|
||||
form := formVal
|
||||
return NewUnicodeNormalizeFilter(form)
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenFilter(Name, UnicodeNormalizeFilterConstructor)
|
||||
}
|
@ -1,76 +0,0 @@
|
||||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package character
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
)
|
||||
|
||||
type IsTokenRune func(r rune) bool
|
||||
|
||||
type CharacterTokenizer struct {
|
||||
isTokenRun IsTokenRune
|
||||
}
|
||||
|
||||
func NewCharacterTokenizer(f IsTokenRune) *CharacterTokenizer {
|
||||
return &CharacterTokenizer{
|
||||
isTokenRun: f,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *CharacterTokenizer) Tokenize(input []byte) analysis.TokenStream {
|
||||
|
||||
rv := make(analysis.TokenStream, 0, 1024)
|
||||
|
||||
offset := 0
|
||||
start := 0
|
||||
end := 0
|
||||
count := 0
|
||||
for currRune, size := utf8.DecodeRune(input[offset:]); currRune != utf8.RuneError; currRune, size = utf8.DecodeRune(input[offset:]) {
|
||||
isToken := c.isTokenRun(currRune)
|
||||
if isToken {
|
||||
end = offset + size
|
||||
} else {
|
||||
if end-start > 0 {
|
||||
// build token
|
||||
rv = append(rv, &analysis.Token{
|
||||
Term: input[start:end],
|
||||
Start: start,
|
||||
End: end,
|
||||
Position: count + 1,
|
||||
Type: analysis.AlphaNumeric,
|
||||
})
|
||||
count++
|
||||
}
|
||||
start = offset + size
|
||||
end = start
|
||||
}
|
||||
offset += size
|
||||
}
|
||||
// if we ended in the middle of a token, finish it
|
||||
if end-start > 0 {
|
||||
// build token
|
||||
rv = append(rv, &analysis.Token{
|
||||
Term: input[start:end],
|
||||
Start: start,
|
||||
End: end,
|
||||
Position: count + 1,
|
||||
Type: analysis.AlphaNumeric,
|
||||
})
|
||||
}
|
||||
return rv
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
// Copyright (c) 2016 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package letter
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/analysis/tokenizer/character"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
)
|
||||
|
||||
const Name = "letter"
|
||||
|
||||
func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
|
||||
return character.NewCharacterTokenizer(unicode.IsLetter), nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
registry.RegisterTokenizer(Name, TokenizerConstructor)
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// +build appengine appenginevm
|
||||
|
||||
package bleve
|
||||
|
||||
// in the appengine environment we cannot support disk based indexes
|
||||
// so we do no extra configuration in this method
|
||||
func initDisk() {
|
||||
|
||||
}
|
@ -0,0 +1,137 @@
|
||||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/geo"
|
||||
"github.com/blevesearch/bleve/numeric"
|
||||
)
|
||||
|
||||
var GeoPrecisionStep uint = 9
|
||||
|
||||
type GeoPointField struct {
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options IndexingOptions
|
||||
value numeric.PrefixCoded
|
||||
numPlainTextBytes uint64
|
||||
}
|
||||
|
||||
func (n *GeoPointField) Name() string {
|
||||
return n.name
|
||||
}
|
||||
|
||||
func (n *GeoPointField) ArrayPositions() []uint64 {
|
||||
return n.arrayPositions
|
||||
}
|
||||
|
||||
func (n *GeoPointField) Options() IndexingOptions {
|
||||
return n.options
|
||||
}
|
||||
|
||||
func (n *GeoPointField) Analyze() (int, analysis.TokenFrequencies) {
|
||||
tokens := make(analysis.TokenStream, 0)
|
||||
tokens = append(tokens, &analysis.Token{
|
||||
Start: 0,
|
||||
End: len(n.value),
|
||||
Term: n.value,
|
||||
Position: 1,
|
||||
Type: analysis.Numeric,
|
||||
})
|
||||
|
||||
original, err := n.value.Int64()
|
||||
if err == nil {
|
||||
|
||||
shift := GeoPrecisionStep
|
||||
for shift < 64 {
|
||||
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
token := analysis.Token{
|
||||
Start: 0,
|
||||
End: len(shiftEncoded),
|
||||
Term: shiftEncoded,
|
||||
Position: 1,
|
||||
Type: analysis.Numeric,
|
||||
}
|
||||
tokens = append(tokens, &token)
|
||||
shift += GeoPrecisionStep
|
||||
}
|
||||
}
|
||||
|
||||
fieldLength := len(tokens)
|
||||
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors())
|
||||
return fieldLength, tokenFreqs
|
||||
}
|
||||
|
||||
func (n *GeoPointField) Value() []byte {
|
||||
return n.value
|
||||
}
|
||||
|
||||
func (n *GeoPointField) Lon() (float64, error) {
|
||||
i64, err := n.value.Int64()
|
||||
if err != nil {
|
||||
return 0.0, err
|
||||
}
|
||||
return geo.MortonUnhashLon(uint64(i64)), nil
|
||||
}
|
||||
|
||||
func (n *GeoPointField) Lat() (float64, error) {
|
||||
i64, err := n.value.Int64()
|
||||
if err != nil {
|
||||
return 0.0, err
|
||||
}
|
||||
return geo.MortonUnhashLat(uint64(i64)), nil
|
||||
}
|
||||
|
||||
func (n *GeoPointField) GoString() string {
|
||||
return fmt.Sprintf("&document.GeoPointField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
|
||||
}
|
||||
|
||||
func (n *GeoPointField) NumPlainTextBytes() uint64 {
|
||||
return n.numPlainTextBytes
|
||||
}
|
||||
|
||||
func NewGeoPointFieldFromBytes(name string, arrayPositions []uint64, value []byte) *GeoPointField {
|
||||
return &GeoPointField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: value,
|
||||
options: DefaultNumericIndexingOptions,
|
||||
numPlainTextBytes: uint64(len(value)),
|
||||
}
|
||||
}
|
||||
|
||||
func NewGeoPointField(name string, arrayPositions []uint64, lon, lat float64) *GeoPointField {
|
||||
return NewGeoPointFieldWithIndexingOptions(name, arrayPositions, lon, lat, DefaultNumericIndexingOptions)
|
||||
}
|
||||
|
||||
func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, lon, lat float64, options IndexingOptions) *GeoPointField {
|
||||
mhash := geo.MortonHash(lon, lat)
|
||||
prefixCoded := numeric.MustNewPrefixCodedInt64(int64(mhash), 0)
|
||||
return &GeoPointField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: prefixCoded,
|
||||
options: options,
|
||||
// not correct, just a place holder until we revisit how fields are
|
||||
// represented and can fix this better
|
||||
numPlainTextBytes: uint64(8),
|
||||
}
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
# geo support in bleve
|
||||
|
||||
First, all of this geo code is a Go adaptation of the [Lucene 5.3.2 sandbox geo support](https://lucene.apache.org/core/5_3_2/sandbox/org/apache/lucene/util/package-summary.html).
|
||||
|
||||
## Notes
|
||||
|
||||
- All of the APIs will use float64 for lon/lat values.
|
||||
- When describing a point in function arguments or return values, we always use the order lon, lat.
|
||||
- High level APIs will use TopLeft and BottomRight to describe bounding boxes. This may not map cleanly to min/max lon/lat when crossing the dateline. The lower level APIs will use min/max lon/lat and require the higher-level code to split boxes accordingly.
|