Browse Source
Upgrade bleve to v1.0.10 (#12737)
Upgrade bleve to v1.0.10 (#12737)
* Fix bug on migration 111 * Upgrade bleve to 1.0.10 Co-authored-by: zeripath <art27@cantab.net> Co-authored-by: techknowlogick <techknowlogick@gitea.io>mj-v1.14.3
committed by
GitHub
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
90 changed files with 12116 additions and 433 deletions
-
3go.mod
-
26go.sum
-
7vendor/github.com/blevesearch/bleve/README.md
-
94vendor/github.com/blevesearch/bleve/builder.go
-
13vendor/github.com/blevesearch/bleve/go.mod
-
14vendor/github.com/blevesearch/bleve/index.go
-
7vendor/github.com/blevesearch/bleve/index/index.go
-
334vendor/github.com/blevesearch/bleve/index/scorch/builder.go
-
8vendor/github.com/blevesearch/bleve/index/scorch/event.go
-
18vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
-
168vendor/github.com/blevesearch/bleve/index/scorch/merge.go
-
13vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/merge_plan.go
-
60vendor/github.com/blevesearch/bleve/index/scorch/optimize.go
-
85vendor/github.com/blevesearch/bleve/index/scorch/persister.go
-
0vendor/github.com/blevesearch/bleve/index/scorch/rollback.go
-
62vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
-
22vendor/github.com/blevesearch/bleve/index/scorch/segment/unadorned.go
-
26vendor/github.com/blevesearch/bleve/index/scorch/segment_plugin.go
-
21vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go
-
3vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_tfr.go
-
13vendor/github.com/blevesearch/bleve/index/scorch/stats.go
-
16vendor/github.com/blevesearch/bleve/index_alias_impl.go
-
3vendor/github.com/blevesearch/bleve/index_impl.go
-
3vendor/github.com/blevesearch/bleve/mapping/document.go
-
16vendor/github.com/blevesearch/bleve/mapping/index.go
-
23vendor/github.com/blevesearch/bleve/search.go
-
14vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction.go
-
4vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_heap.go
-
4vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_slice.go
-
4vendor/github.com/blevesearch/bleve/search/searcher/search_fuzzy.go
-
184vendor/github.com/blevesearch/bleve/search/searcher/search_geoboundingbox.go
-
180vendor/github.com/blevesearch/bleve/search/searcher/search_multi_term.go
-
7vendor/github.com/blevesearch/bleve/search/searcher/search_numeric_range.go
-
2vendor/github.com/blevesearch/bleve/search/searcher/search_regexp.go
-
2vendor/github.com/blevesearch/bleve/search/searcher/search_term.go
-
2vendor/github.com/blevesearch/bleve/search/searcher/search_term_prefix.go
-
9vendor/github.com/blevesearch/bleve/search/sort.go
-
6vendor/github.com/blevesearch/zap/v11/go.mod
-
6vendor/github.com/blevesearch/zap/v12/go.mod
-
12vendor/github.com/blevesearch/zap/v13/.gitignore
-
202vendor/github.com/blevesearch/zap/v13/LICENSE
-
158vendor/github.com/blevesearch/zap/v13/README.md
-
156vendor/github.com/blevesearch/zap/v13/build.go
-
54vendor/github.com/blevesearch/zap/v13/chunk.go
-
243vendor/github.com/blevesearch/zap/v13/contentcoder.go
-
61vendor/github.com/blevesearch/zap/v13/count.go
-
263vendor/github.com/blevesearch/zap/v13/dict.go
-
312vendor/github.com/blevesearch/zap/v13/docvalues.go
-
138vendor/github.com/blevesearch/zap/v13/enumerator.go
-
12vendor/github.com/blevesearch/zap/v13/go.mod
-
111vendor/github.com/blevesearch/zap/v13/intDecoder.go
-
206vendor/github.com/blevesearch/zap/v13/intcoder.go
-
847vendor/github.com/blevesearch/zap/v13/merge.go
-
860vendor/github.com/blevesearch/zap/v13/new.go
-
37vendor/github.com/blevesearch/zap/v13/plugin.go
-
798vendor/github.com/blevesearch/zap/v13/posting.go
-
43vendor/github.com/blevesearch/zap/v13/read.go
-
572vendor/github.com/blevesearch/zap/v13/segment.go
-
145vendor/github.com/blevesearch/zap/v13/write.go
-
177vendor/github.com/blevesearch/zap/v13/zap.md
-
12vendor/github.com/blevesearch/zap/v14/.gitignore
-
202vendor/github.com/blevesearch/zap/v14/LICENSE
-
158vendor/github.com/blevesearch/zap/v14/README.md
-
156vendor/github.com/blevesearch/zap/v14/build.go
-
67vendor/github.com/blevesearch/zap/v14/chunk.go
-
243vendor/github.com/blevesearch/zap/v14/contentcoder.go
-
61vendor/github.com/blevesearch/zap/v14/count.go
-
263vendor/github.com/blevesearch/zap/v14/dict.go
-
312vendor/github.com/blevesearch/zap/v14/docvalues.go
-
138vendor/github.com/blevesearch/zap/v14/enumerator.go
-
12vendor/github.com/blevesearch/zap/v14/go.mod
-
118vendor/github.com/blevesearch/zap/v14/intDecoder.go
-
206vendor/github.com/blevesearch/zap/v14/intcoder.go
-
847vendor/github.com/blevesearch/zap/v14/merge.go
-
860vendor/github.com/blevesearch/zap/v14/new.go
-
37vendor/github.com/blevesearch/zap/v14/plugin.go
-
796vendor/github.com/blevesearch/zap/v14/posting.go
-
43vendor/github.com/blevesearch/zap/v14/read.go
-
572vendor/github.com/blevesearch/zap/v14/segment.go
-
145vendor/github.com/blevesearch/zap/v14/write.go
-
177vendor/github.com/blevesearch/zap/v14/zap.md
-
2vendor/github.com/couchbase/vellum/README.md
-
2vendor/github.com/couchbase/vellum/fst_iterator.go
-
11vendor/go.etcd.io/bbolt/README.md
-
57vendor/go.etcd.io/bbolt/freelist.go
-
25vendor/go.etcd.io/bbolt/node.go
-
57vendor/go.etcd.io/bbolt/page.go
-
27vendor/go.etcd.io/bbolt/tx.go
-
39vendor/go.etcd.io/bbolt/unsafe.go
-
15vendor/modules.txt
@ -0,0 +1,94 @@ |
|||
// Copyright (c) 2019 Couchbase, Inc.
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
package bleve |
|||
|
|||
import ( |
|||
"encoding/json" |
|||
"fmt" |
|||
|
|||
"github.com/blevesearch/bleve/document" |
|||
"github.com/blevesearch/bleve/index" |
|||
"github.com/blevesearch/bleve/index/scorch" |
|||
"github.com/blevesearch/bleve/mapping" |
|||
) |
|||
|
|||
type builderImpl struct { |
|||
b index.IndexBuilder |
|||
m mapping.IndexMapping |
|||
} |
|||
|
|||
func (b *builderImpl) Index(id string, data interface{}) error { |
|||
if id == "" { |
|||
return ErrorEmptyID |
|||
} |
|||
|
|||
doc := document.NewDocument(id) |
|||
err := b.m.MapDocument(doc, data) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
err = b.b.Index(doc) |
|||
return err |
|||
} |
|||
|
|||
func (b *builderImpl) Close() error { |
|||
return b.b.Close() |
|||
} |
|||
|
|||
func newBuilder(path string, mapping mapping.IndexMapping, config map[string]interface{}) (Builder, error) { |
|||
if path == "" { |
|||
return nil, fmt.Errorf("builder requires path") |
|||
} |
|||
|
|||
err := mapping.Validate() |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
|
|||
if config == nil { |
|||
config = map[string]interface{}{} |
|||
} |
|||
|
|||
// the builder does not have an API to interact with internal storage
|
|||
// however we can pass k/v pairs through the config
|
|||
mappingBytes, err := json.Marshal(mapping) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
config["internal"] = map[string][]byte{ |
|||
string(mappingInternalKey): mappingBytes, |
|||
} |
|||
|
|||
// do not use real config, as these are options for the builder,
|
|||
// not the resulting index
|
|||
meta := newIndexMeta(scorch.Name, scorch.Name, map[string]interface{}{}) |
|||
err = meta.Save(path) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
|
|||
config["path"] = indexStorePath(path) |
|||
|
|||
b, err := scorch.NewBuilder(config) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
rv := &builderImpl{ |
|||
b: b, |
|||
m: mapping, |
|||
} |
|||
|
|||
return rv, nil |
|||
} |
@ -0,0 +1,334 @@ |
|||
// Copyright (c) 2019 Couchbase, Inc.
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
package scorch |
|||
|
|||
import ( |
|||
"fmt" |
|||
"io/ioutil" |
|||
"os" |
|||
"sync" |
|||
|
|||
"github.com/RoaringBitmap/roaring" |
|||
"github.com/blevesearch/bleve/document" |
|||
"github.com/blevesearch/bleve/index" |
|||
"github.com/blevesearch/bleve/index/scorch/segment" |
|||
bolt "go.etcd.io/bbolt" |
|||
) |
|||
|
|||
const DefaultBuilderBatchSize = 1000 |
|||
const DefaultBuilderMergeMax = 10 |
|||
|
|||
type Builder struct { |
|||
m sync.Mutex |
|||
segCount uint64 |
|||
path string |
|||
buildPath string |
|||
segPaths []string |
|||
batchSize int |
|||
mergeMax int |
|||
batch *index.Batch |
|||
internal map[string][]byte |
|||
segPlugin segment.Plugin |
|||
} |
|||
|
|||
func NewBuilder(config map[string]interface{}) (*Builder, error) { |
|||
path, ok := config["path"].(string) |
|||
if !ok { |
|||
return nil, fmt.Errorf("must specify path") |
|||
} |
|||
|
|||
buildPathPrefix, _ := config["buildPathPrefix"].(string) |
|||
buildPath, err := ioutil.TempDir(buildPathPrefix, "scorch-offline-build") |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
|
|||
rv := &Builder{ |
|||
path: path, |
|||
buildPath: buildPath, |
|||
mergeMax: DefaultBuilderMergeMax, |
|||
batchSize: DefaultBuilderBatchSize, |
|||
batch: index.NewBatch(), |
|||
segPlugin: defaultSegmentPlugin, |
|||
} |
|||
|
|||
err = rv.parseConfig(config) |
|||
if err != nil { |
|||
return nil, fmt.Errorf("error parsing builder config: %v", err) |
|||
} |
|||
|
|||
return rv, nil |
|||
} |
|||
|
|||
func (o *Builder) parseConfig(config map[string]interface{}) (err error) { |
|||
if v, ok := config["mergeMax"]; ok { |
|||
var t int |
|||
if t, err = parseToInteger(v); err != nil { |
|||
return fmt.Errorf("mergeMax parse err: %v", err) |
|||
} |
|||
if t > 0 { |
|||
o.mergeMax = t |
|||
} |
|||
} |
|||
|
|||
if v, ok := config["batchSize"]; ok { |
|||
var t int |
|||
if t, err = parseToInteger(v); err != nil { |
|||
return fmt.Errorf("batchSize parse err: %v", err) |
|||
} |
|||
if t > 0 { |
|||
o.batchSize = t |
|||
} |
|||
} |
|||
|
|||
if v, ok := config["internal"]; ok { |
|||
if vinternal, ok := v.(map[string][]byte); ok { |
|||
o.internal = vinternal |
|||
} |
|||
} |
|||
|
|||
forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
if forcedSegmentType != "" && forcedSegmentVersion != 0 { |
|||
segPlugin, err := chooseSegmentPlugin(forcedSegmentType, |
|||
uint32(forcedSegmentVersion)) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
o.segPlugin = segPlugin |
|||
} |
|||
|
|||
return nil |
|||
} |
|||
|
|||
// Index will place the document into the index.
|
|||
// It is invalid to index the same document multiple times.
|
|||
func (o *Builder) Index(doc *document.Document) error { |
|||
o.m.Lock() |
|||
defer o.m.Unlock() |
|||
|
|||
o.batch.Update(doc) |
|||
|
|||
return o.maybeFlushBatchLOCKED(o.batchSize) |
|||
} |
|||
|
|||
func (o *Builder) maybeFlushBatchLOCKED(moreThan int) error { |
|||
if len(o.batch.IndexOps) >= moreThan { |
|||
defer o.batch.Reset() |
|||
return o.executeBatchLOCKED(o.batch) |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
func (o *Builder) executeBatchLOCKED(batch *index.Batch) (err error) { |
|||
analysisResults := make([]*index.AnalysisResult, 0, len(batch.IndexOps)) |
|||
for _, doc := range batch.IndexOps { |
|||
if doc != nil { |
|||
// insert _id field
|
|||
doc.AddField(document.NewTextFieldCustom("_id", nil, []byte(doc.ID), document.IndexField|document.StoreField, nil)) |
|||
// perform analysis directly
|
|||
analysisResult := analyze(doc) |
|||
analysisResults = append(analysisResults, analysisResult) |
|||
} |
|||
} |
|||
|
|||
seg, _, err := o.segPlugin.New(analysisResults) |
|||
if err != nil { |
|||
return fmt.Errorf("error building segment base: %v", err) |
|||
} |
|||
|
|||
filename := zapFileName(o.segCount) |
|||
o.segCount++ |
|||
path := o.buildPath + string(os.PathSeparator) + filename |
|||
|
|||
if segUnpersisted, ok := seg.(segment.UnpersistedSegment); ok { |
|||
err = segUnpersisted.Persist(path) |
|||
if err != nil { |
|||
return fmt.Errorf("error persisting segment base to %s: %v", path, err) |
|||
} |
|||
|
|||
o.segPaths = append(o.segPaths, path) |
|||
return nil |
|||
} |
|||
|
|||
return fmt.Errorf("new segment does not implement unpersisted: %T", seg) |
|||
} |
|||
|
|||
func (o *Builder) doMerge() error { |
|||
// as long as we have more than 1 segment, keep merging
|
|||
for len(o.segPaths) > 1 { |
|||
|
|||
// merge the next <mergeMax> number of segments into one new one
|
|||
// or, if there are fewer than <mergeMax> remaining, merge them all
|
|||
mergeCount := o.mergeMax |
|||
if mergeCount > len(o.segPaths) { |
|||
mergeCount = len(o.segPaths) |
|||
} |
|||
|
|||
mergePaths := o.segPaths[0:mergeCount] |
|||
o.segPaths = o.segPaths[mergeCount:] |
|||
|
|||
// open each of the segments to be merged
|
|||
mergeSegs := make([]segment.Segment, 0, mergeCount) |
|||
|
|||
// closeOpenedSegs attempts to close all opened
|
|||
// segments even if an error occurs, in which case
|
|||
// the first error is returned
|
|||
closeOpenedSegs := func() error { |
|||
var err error |
|||
for _, seg := range mergeSegs { |
|||
clErr := seg.Close() |
|||
if clErr != nil && err == nil { |
|||
err = clErr |
|||
} |
|||
} |
|||
return err |
|||
} |
|||
|
|||
for _, mergePath := range mergePaths { |
|||
seg, err := o.segPlugin.Open(mergePath) |
|||
if err != nil { |
|||
_ = closeOpenedSegs() |
|||
return fmt.Errorf("error opening segment (%s) for merge: %v", mergePath, err) |
|||
} |
|||
mergeSegs = append(mergeSegs, seg) |
|||
} |
|||
|
|||
// do the merge
|
|||
mergedSegPath := o.buildPath + string(os.PathSeparator) + zapFileName(o.segCount) |
|||
drops := make([]*roaring.Bitmap, mergeCount) |
|||
_, _, err := o.segPlugin.Merge(mergeSegs, drops, mergedSegPath, nil, nil) |
|||
if err != nil { |
|||
_ = closeOpenedSegs() |
|||
return fmt.Errorf("error merging segments (%v): %v", mergePaths, err) |
|||
} |
|||
o.segCount++ |
|||
o.segPaths = append(o.segPaths, mergedSegPath) |
|||
|
|||
// close segments opened for merge
|
|||
err = closeOpenedSegs() |
|||
if err != nil { |
|||
return fmt.Errorf("error closing opened segments: %v", err) |
|||
} |
|||
|
|||
// remove merged segments
|
|||
for _, mergePath := range mergePaths { |
|||
err = os.RemoveAll(mergePath) |
|||
if err != nil { |
|||
return fmt.Errorf("error removing segment %s after merge: %v", mergePath, err) |
|||
} |
|||
} |
|||
} |
|||
|
|||
return nil |
|||
} |
|||
|
|||
func (o *Builder) Close() error { |
|||
o.m.Lock() |
|||
defer o.m.Unlock() |
|||
|
|||
// see if there is a partial batch
|
|||
err := o.maybeFlushBatchLOCKED(1) |
|||
if err != nil { |
|||
return fmt.Errorf("error flushing batch before close: %v", err) |
|||
} |
|||
|
|||
// perform all the merging
|
|||
err = o.doMerge() |
|||
if err != nil { |
|||
return fmt.Errorf("error while merging: %v", err) |
|||
} |
|||
|
|||
// ensure the store path exists
|
|||
err = os.MkdirAll(o.path, 0700) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
|
|||
// move final segment into place
|
|||
// segment id 2 is chosen to match the behavior of a scorch
|
|||
// index which indexes a single batch of data
|
|||
finalSegPath := o.path + string(os.PathSeparator) + zapFileName(2) |
|||
err = os.Rename(o.segPaths[0], finalSegPath) |
|||
if err != nil { |
|||
return fmt.Errorf("error moving final segment into place: %v", err) |
|||
} |
|||
|
|||
// remove the buildPath, as it is no longer needed
|
|||
err = os.RemoveAll(o.buildPath) |
|||
if err != nil { |
|||
return fmt.Errorf("error removing build path: %v", err) |
|||
} |
|||
|
|||
// prepare wrapping
|
|||
seg, err := o.segPlugin.Open(finalSegPath) |
|||
if err != nil { |
|||
return fmt.Errorf("error opening final segment") |
|||
} |
|||
|
|||
// create a segment snapshot for this segment
|
|||
ss := &SegmentSnapshot{ |
|||
segment: seg, |
|||
} |
|||
is := &IndexSnapshot{ |
|||
epoch: 3, // chosen to match scorch behavior when indexing a single batch
|
|||
segment: []*SegmentSnapshot{ss}, |
|||
creator: "scorch-builder", |
|||
internal: o.internal, |
|||
} |
|||
|
|||
// create the root bolt
|
|||
rootBoltPath := o.path + string(os.PathSeparator) + "root.bolt" |
|||
rootBolt, err := bolt.Open(rootBoltPath, 0600, nil) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
|
|||
// start a write transaction
|
|||
tx, err := rootBolt.Begin(true) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
|
|||
// fill the root bolt with this fake index snapshot
|
|||
_, _, err = prepareBoltSnapshot(is, tx, o.path, o.segPlugin) |
|||
if err != nil { |
|||
_ = tx.Rollback() |
|||
_ = rootBolt.Close() |
|||
return fmt.Errorf("error preparing bolt snapshot in root.bolt: %v", err) |
|||
} |
|||
|
|||
// commit bolt data
|
|||
err = tx.Commit() |
|||
if err != nil { |
|||
_ = rootBolt.Close() |
|||
return fmt.Errorf("error committing bolt tx in root.bolt: %v", err) |
|||
} |
|||
|
|||
// close bolt
|
|||
err = rootBolt.Close() |
|||
if err != nil { |
|||
return fmt.Errorf("error closing root.bolt: %v", err) |
|||
} |
|||
|
|||
// close final segment
|
|||
err = seg.Close() |
|||
if err != nil { |
|||
return fmt.Errorf("error closing final segment: %v", err) |
|||
} |
|||
return nil |
|||
} |