Upgrade bleve to v1.0.10 (#12737)
* Fix bug on migration 111 * Upgrade bleve to 1.0.10 Co-authored-by: zeripath <art27@cantab.net> Co-authored-by: techknowlogick <techknowlogick@gitea.io>mj-v1.14.3
parent
1b9d5074a7
commit
d17efaa114
@ -0,0 +1,94 @@
|
||||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package bleve
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch"
|
||||
"github.com/blevesearch/bleve/mapping"
|
||||
)
|
||||
|
||||
type builderImpl struct {
|
||||
b index.IndexBuilder
|
||||
m mapping.IndexMapping
|
||||
}
|
||||
|
||||
func (b *builderImpl) Index(id string, data interface{}) error {
|
||||
if id == "" {
|
||||
return ErrorEmptyID
|
||||
}
|
||||
|
||||
doc := document.NewDocument(id)
|
||||
err := b.m.MapDocument(doc, data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = b.b.Index(doc)
|
||||
return err
|
||||
}
|
||||
|
||||
func (b *builderImpl) Close() error {
|
||||
return b.b.Close()
|
||||
}
|
||||
|
||||
func newBuilder(path string, mapping mapping.IndexMapping, config map[string]interface{}) (Builder, error) {
|
||||
if path == "" {
|
||||
return nil, fmt.Errorf("builder requires path")
|
||||
}
|
||||
|
||||
err := mapping.Validate()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if config == nil {
|
||||
config = map[string]interface{}{}
|
||||
}
|
||||
|
||||
// the builder does not have an API to interact with internal storage
|
||||
// however we can pass k/v pairs through the config
|
||||
mappingBytes, err := json.Marshal(mapping)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
config["internal"] = map[string][]byte{
|
||||
string(mappingInternalKey): mappingBytes,
|
||||
}
|
||||
|
||||
// do not use real config, as these are options for the builder,
|
||||
// not the resulting index
|
||||
meta := newIndexMeta(scorch.Name, scorch.Name, map[string]interface{}{})
|
||||
err = meta.Save(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
config["path"] = indexStorePath(path)
|
||||
|
||||
b, err := scorch.NewBuilder(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := &builderImpl{
|
||||
b: b,
|
||||
m: mapping,
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
@ -0,0 +1,334 @@
|
||||
// Copyright (c) 2019 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package scorch
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"sync"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
bolt "go.etcd.io/bbolt"
|
||||
)
|
||||
|
||||
const DefaultBuilderBatchSize = 1000
|
||||
const DefaultBuilderMergeMax = 10
|
||||
|
||||
type Builder struct {
|
||||
m sync.Mutex
|
||||
segCount uint64
|
||||
path string
|
||||
buildPath string
|
||||
segPaths []string
|
||||
batchSize int
|
||||
mergeMax int
|
||||
batch *index.Batch
|
||||
internal map[string][]byte
|
||||
segPlugin segment.Plugin
|
||||
}
|
||||
|
||||
func NewBuilder(config map[string]interface{}) (*Builder, error) {
|
||||
path, ok := config["path"].(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("must specify path")
|
||||
}
|
||||
|
||||
buildPathPrefix, _ := config["buildPathPrefix"].(string)
|
||||
buildPath, err := ioutil.TempDir(buildPathPrefix, "scorch-offline-build")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rv := &Builder{
|
||||
path: path,
|
||||
buildPath: buildPath,
|
||||
mergeMax: DefaultBuilderMergeMax,
|
||||
batchSize: DefaultBuilderBatchSize,
|
||||
batch: index.NewBatch(),
|
||||
segPlugin: defaultSegmentPlugin,
|
||||
}
|
||||
|
||||
err = rv.parseConfig(config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing builder config: %v", err)
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (o *Builder) parseConfig(config map[string]interface{}) (err error) {
|
||||
if v, ok := config["mergeMax"]; ok {
|
||||
var t int
|
||||
if t, err = parseToInteger(v); err != nil {
|
||||
return fmt.Errorf("mergeMax parse err: %v", err)
|
||||
}
|
||||
if t > 0 {
|
||||
o.mergeMax = t
|
||||
}
|
||||
}
|
||||
|
||||
if v, ok := config["batchSize"]; ok {
|
||||
var t int
|
||||
if t, err = parseToInteger(v); err != nil {
|
||||
return fmt.Errorf("batchSize parse err: %v", err)
|
||||
}
|
||||
if t > 0 {
|
||||
o.batchSize = t
|
||||
}
|
||||
}
|
||||
|
||||
if v, ok := config["internal"]; ok {
|
||||
if vinternal, ok := v.(map[string][]byte); ok {
|
||||
o.internal = vinternal
|
||||
}
|
||||
}
|
||||
|
||||
forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if forcedSegmentType != "" && forcedSegmentVersion != 0 {
|
||||
segPlugin, err := chooseSegmentPlugin(forcedSegmentType,
|
||||
uint32(forcedSegmentVersion))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
o.segPlugin = segPlugin
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Index will place the document into the index.
|
||||
// It is invalid to index the same document multiple times.
|
||||
func (o *Builder) Index(doc *document.Document) error {
|
||||
o.m.Lock()
|
||||
defer o.m.Unlock()
|
||||
|
||||
o.batch.Update(doc)
|
||||
|
||||
return o.maybeFlushBatchLOCKED(o.batchSize)
|
||||
}
|
||||
|
||||
func (o *Builder) maybeFlushBatchLOCKED(moreThan int) error {
|
||||
if len(o.batch.IndexOps) >= moreThan {
|
||||
defer o.batch.Reset()
|
||||
return o.executeBatchLOCKED(o.batch)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *Builder) executeBatchLOCKED(batch *index.Batch) (err error) {
|
||||
analysisResults := make([]*index.AnalysisResult, 0, len(batch.IndexOps))
|
||||
for _, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
// insert _id field
|
||||
doc.AddField(document.NewTextFieldCustom("_id", nil, []byte(doc.ID), document.IndexField|document.StoreField, nil))
|
||||
// perform analysis directly
|
||||
analysisResult := analyze(doc)
|
||||
analysisResults = append(analysisResults, analysisResult)
|
||||
}
|
||||
}
|
||||
|
||||
seg, _, err := o.segPlugin.New(analysisResults)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error building segment base: %v", err)
|
||||
}
|
||||
|
||||
filename := zapFileName(o.segCount)
|
||||
o.segCount++
|
||||
path := o.buildPath + string(os.PathSeparator) + filename
|
||||
|
||||
if segUnpersisted, ok := seg.(segment.UnpersistedSegment); ok {
|
||||
err = segUnpersisted.Persist(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error persisting segment base to %s: %v", path, err)
|
||||
}
|
||||
|
||||
o.segPaths = append(o.segPaths, path)
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("new segment does not implement unpersisted: %T", seg)
|
||||
}
|
||||
|
||||
func (o *Builder) doMerge() error {
|
||||
// as long as we have more than 1 segment, keep merging
|
||||
for len(o.segPaths) > 1 {
|
||||
|
||||
// merge the next <mergeMax> number of segments into one new one
|
||||
// or, if there are fewer than <mergeMax> remaining, merge them all
|
||||
mergeCount := o.mergeMax
|
||||
if mergeCount > len(o.segPaths) {
|
||||
mergeCount = len(o.segPaths)
|
||||
}
|
||||
|
||||
mergePaths := o.segPaths[0:mergeCount]
|
||||
o.segPaths = o.segPaths[mergeCount:]
|
||||
|
||||
// open each of the segments to be merged
|
||||
mergeSegs := make([]segment.Segment, 0, mergeCount)
|
||||
|
||||
// closeOpenedSegs attempts to close all opened
|
||||
// segments even if an error occurs, in which case
|
||||
// the first error is returned
|
||||
closeOpenedSegs := func() error {
|
||||
var err error
|
||||
for _, seg := range mergeSegs {
|
||||
clErr := seg.Close()
|
||||
if clErr != nil && err == nil {
|
||||
err = clErr
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
for _, mergePath := range mergePaths {
|
||||
seg, err := o.segPlugin.Open(mergePath)
|
||||
if err != nil {
|
||||
_ = closeOpenedSegs()
|
||||
return fmt.Errorf("error opening segment (%s) for merge: %v", mergePath, err)
|
||||
}
|
||||
mergeSegs = append(mergeSegs, seg)
|
||||
}
|
||||
|
||||
// do the merge
|
||||
mergedSegPath := o.buildPath + string(os.PathSeparator) + zapFileName(o.segCount)
|
||||
drops := make([]*roaring.Bitmap, mergeCount)
|
||||
_, _, err := o.segPlugin.Merge(mergeSegs, drops, mergedSegPath, nil, nil)
|
||||
if err != nil {
|
||||
_ = closeOpenedSegs()
|
||||
return fmt.Errorf("error merging segments (%v): %v", mergePaths, err)
|
||||
}
|
||||
o.segCount++
|
||||
o.segPaths = append(o.segPaths, mergedSegPath)
|
||||
|
||||
// close segments opened for merge
|
||||
err = closeOpenedSegs()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error closing opened segments: %v", err)
|
||||
}
|
||||
|
||||
// remove merged segments
|
||||
for _, mergePath := range mergePaths {
|
||||
err = os.RemoveAll(mergePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error removing segment %s after merge: %v", mergePath, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *Builder) Close() error {
|
||||
o.m.Lock()
|
||||
defer o.m.Unlock()
|
||||
|
||||
// see if there is a partial batch
|
||||
err := o.maybeFlushBatchLOCKED(1)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error flushing batch before close: %v", err)
|
||||
}
|
||||
|
||||
// perform all the merging
|
||||
err = o.doMerge()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error while merging: %v", err)
|
||||
}
|
||||
|
||||
// ensure the store path exists
|
||||
err = os.MkdirAll(o.path, 0700)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// move final segment into place
|
||||
// segment id 2 is chosen to match the behavior of a scorch
|
||||
// index which indexes a single batch of data
|
||||
finalSegPath := o.path + string(os.PathSeparator) + zapFileName(2)
|
||||
err = os.Rename(o.segPaths[0], finalSegPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error moving final segment into place: %v", err)
|
||||
}
|
||||
|
||||
// remove the buildPath, as it is no longer needed
|
||||
err = os.RemoveAll(o.buildPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error removing build path: %v", err)
|
||||
}
|
||||
|
||||
// prepare wrapping
|
||||
seg, err := o.segPlugin.Open(finalSegPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening final segment")
|
||||
}
|
||||
|
||||
// create a segment snapshot for this segment
|
||||
ss := &SegmentSnapshot{
|
||||
segment: seg,
|
||||
}
|
||||
is := &IndexSnapshot{
|
||||
epoch: 3, // chosen to match scorch behavior when indexing a single batch
|
||||
segment: []*SegmentSnapshot{ss},
|
||||
creator: "scorch-builder",
|
||||
internal: o.internal,
|
||||
}
|
||||
|
||||
// create the root bolt
|
||||
rootBoltPath := o.path + string(os.PathSeparator) + "root.bolt"
|
||||
rootBolt, err := bolt.Open(rootBoltPath, 0600, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// start a write transaction
|
||||
tx, err := rootBolt.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// fill the root bolt with this fake index snapshot
|
||||
_, _, err = prepareBoltSnapshot(is, tx, o.path, o.segPlugin)
|
||||
if err != nil {
|
||||
_ = tx.Rollback()
|
||||
_ = rootBolt.Close()
|
||||
return fmt.Errorf("error preparing bolt snapshot in root.bolt: %v", err)
|
||||
}
|
||||
|
||||
// commit bolt data
|
||||
err = tx.Commit()
|
||||
if err != nil {
|
||||
_ = rootBolt.Close()
|
||||
return fmt.Errorf("error committing bolt tx in root.bolt: %v", err)
|
||||
}
|
||||
|
||||
// close bolt
|
||||
err = rootBolt.Close()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error closing root.bolt: %v", err)
|
||||
}
|
||||
|
||||
// close final segment
|
||||
err = seg.Close()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error closing final segment: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|