You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gitea-fork-majority-judgment/vendor/github.com/blevesearch/bleve/index/scorch/builder.go

335 lines
8.3 KiB

// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"fmt"
"io/ioutil"
"os"
"sync"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
bolt "go.etcd.io/bbolt"
)
const DefaultBuilderBatchSize = 1000
const DefaultBuilderMergeMax = 10
type Builder struct {
m sync.Mutex
segCount uint64
path string
buildPath string
segPaths []string
batchSize int
mergeMax int
batch *index.Batch
internal map[string][]byte
segPlugin segment.Plugin
}
func NewBuilder(config map[string]interface{}) (*Builder, error) {
path, ok := config["path"].(string)
if !ok {
return nil, fmt.Errorf("must specify path")
}
buildPathPrefix, _ := config["buildPathPrefix"].(string)
buildPath, err := ioutil.TempDir(buildPathPrefix, "scorch-offline-build")
if err != nil {
return nil, err
}
rv := &Builder{
path: path,
buildPath: buildPath,
mergeMax: DefaultBuilderMergeMax,
batchSize: DefaultBuilderBatchSize,
batch: index.NewBatch(),
segPlugin: defaultSegmentPlugin,
}
err = rv.parseConfig(config)
if err != nil {
return nil, fmt.Errorf("error parsing builder config: %v", err)
}
return rv, nil
}
func (o *Builder) parseConfig(config map[string]interface{}) (err error) {
if v, ok := config["mergeMax"]; ok {
var t int
if t, err = parseToInteger(v); err != nil {
return fmt.Errorf("mergeMax parse err: %v", err)
}
if t > 0 {
o.mergeMax = t
}
}
if v, ok := config["batchSize"]; ok {
var t int
if t, err = parseToInteger(v); err != nil {
return fmt.Errorf("batchSize parse err: %v", err)
}
if t > 0 {
o.batchSize = t
}
}
if v, ok := config["internal"]; ok {
if vinternal, ok := v.(map[string][]byte); ok {
o.internal = vinternal
}
}
forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config)
if err != nil {
return err
}
if forcedSegmentType != "" && forcedSegmentVersion != 0 {
segPlugin, err := chooseSegmentPlugin(forcedSegmentType,
uint32(forcedSegmentVersion))
if err != nil {
return err
}
o.segPlugin = segPlugin
}
return nil
}
// Index will place the document into the index.
// It is invalid to index the same document multiple times.
func (o *Builder) Index(doc *document.Document) error {
o.m.Lock()
defer o.m.Unlock()
o.batch.Update(doc)
return o.maybeFlushBatchLOCKED(o.batchSize)
}
func (o *Builder) maybeFlushBatchLOCKED(moreThan int) error {
if len(o.batch.IndexOps) >= moreThan {
defer o.batch.Reset()
return o.executeBatchLOCKED(o.batch)
}
return nil
}
func (o *Builder) executeBatchLOCKED(batch *index.Batch) (err error) {
analysisResults := make([]*index.AnalysisResult, 0, len(batch.IndexOps))
for _, doc := range batch.IndexOps {
if doc != nil {
// insert _id field
doc.AddField(document.NewTextFieldCustom("_id", nil, []byte(doc.ID), document.IndexField|document.StoreField, nil))
// perform analysis directly
analysisResult := analyze(doc)
analysisResults = append(analysisResults, analysisResult)
}
}
seg, _, err := o.segPlugin.New(analysisResults)
if err != nil {
return fmt.Errorf("error building segment base: %v", err)
}
filename := zapFileName(o.segCount)
o.segCount++
path := o.buildPath + string(os.PathSeparator) + filename
if segUnpersisted, ok := seg.(segment.UnpersistedSegment); ok {
err = segUnpersisted.Persist(path)
if err != nil {
return fmt.Errorf("error persisting segment base to %s: %v", path, err)
}
o.segPaths = append(o.segPaths, path)
return nil
}
return fmt.Errorf("new segment does not implement unpersisted: %T", seg)
}
func (o *Builder) doMerge() error {
// as long as we have more than 1 segment, keep merging
for len(o.segPaths) > 1 {
// merge the next <mergeMax> number of segments into one new one
// or, if there are fewer than <mergeMax> remaining, merge them all
mergeCount := o.mergeMax
if mergeCount > len(o.segPaths) {
mergeCount = len(o.segPaths)
}
mergePaths := o.segPaths[0:mergeCount]
o.segPaths = o.segPaths[mergeCount:]
// open each of the segments to be merged
mergeSegs := make([]segment.Segment, 0, mergeCount)
// closeOpenedSegs attempts to close all opened
// segments even if an error occurs, in which case
// the first error is returned
closeOpenedSegs := func() error {
var err error
for _, seg := range mergeSegs {
clErr := seg.Close()
if clErr != nil && err == nil {
err = clErr
}
}
return err
}
for _, mergePath := range mergePaths {
seg, err := o.segPlugin.Open(mergePath)
if err != nil {
_ = closeOpenedSegs()
return fmt.Errorf("error opening segment (%s) for merge: %v", mergePath, err)
}
mergeSegs = append(mergeSegs, seg)
}
// do the merge
mergedSegPath := o.buildPath + string(os.PathSeparator) + zapFileName(o.segCount)
drops := make([]*roaring.Bitmap, mergeCount)
_, _, err := o.segPlugin.Merge(mergeSegs, drops, mergedSegPath, nil, nil)
if err != nil {
_ = closeOpenedSegs()
return fmt.Errorf("error merging segments (%v): %v", mergePaths, err)
}
o.segCount++
o.segPaths = append(o.segPaths, mergedSegPath)
// close segments opened for merge
err = closeOpenedSegs()
if err != nil {
return fmt.Errorf("error closing opened segments: %v", err)
}
// remove merged segments
for _, mergePath := range mergePaths {
err = os.RemoveAll(mergePath)
if err != nil {
return fmt.Errorf("error removing segment %s after merge: %v", mergePath, err)
}
}
}
return nil
}
func (o *Builder) Close() error {
o.m.Lock()
defer o.m.Unlock()
// see if there is a partial batch
err := o.maybeFlushBatchLOCKED(1)
if err != nil {
return fmt.Errorf("error flushing batch before close: %v", err)
}
// perform all the merging
err = o.doMerge()
if err != nil {
return fmt.Errorf("error while merging: %v", err)
}
// ensure the store path exists
err = os.MkdirAll(o.path, 0700)
if err != nil {
return err
}
// move final segment into place
// segment id 2 is chosen to match the behavior of a scorch
// index which indexes a single batch of data
finalSegPath := o.path + string(os.PathSeparator) + zapFileName(2)
err = os.Rename(o.segPaths[0], finalSegPath)
if err != nil {
return fmt.Errorf("error moving final segment into place: %v", err)
}
// remove the buildPath, as it is no longer needed
err = os.RemoveAll(o.buildPath)
if err != nil {
return fmt.Errorf("error removing build path: %v", err)
}
// prepare wrapping
seg, err := o.segPlugin.Open(finalSegPath)
if err != nil {
return fmt.Errorf("error opening final segment")
}
// create a segment snapshot for this segment
ss := &SegmentSnapshot{
segment: seg,
}
is := &IndexSnapshot{
epoch: 3, // chosen to match scorch behavior when indexing a single batch
segment: []*SegmentSnapshot{ss},
creator: "scorch-builder",
internal: o.internal,
}
// create the root bolt
rootBoltPath := o.path + string(os.PathSeparator) + "root.bolt"
rootBolt, err := bolt.Open(rootBoltPath, 0600, nil)
if err != nil {
return err
}
// start a write transaction
tx, err := rootBolt.Begin(true)
if err != nil {
return err
}
// fill the root bolt with this fake index snapshot
_, _, err = prepareBoltSnapshot(is, tx, o.path, o.segPlugin)
if err != nil {
_ = tx.Rollback()
_ = rootBolt.Close()
return fmt.Errorf("error preparing bolt snapshot in root.bolt: %v", err)
}
// commit bolt data
err = tx.Commit()
if err != nil {
_ = rootBolt.Close()
return fmt.Errorf("error committing bolt tx in root.bolt: %v", err)
}
// close bolt
err = rootBolt.Close()
if err != nil {
return fmt.Errorf("error closing root.bolt: %v", err)
}
// close final segment
err = seg.Close()
if err != nil {
return fmt.Errorf("error closing final segment: %v", err)
}
return nil
}