Upgrade bleve to v1.0.10 (#12737)

* Fix bug on migration 111

* Upgrade bleve to 1.0.10

Co-authored-by: zeripath <art27@cantab.net>
Co-authored-by: techknowlogick <techknowlogick@gitea.io>
mj-v1.14.3
Lunny Xiao 3 years ago committed by GitHub
parent 1b9d5074a7
commit d17efaa114
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -18,9 +18,8 @@ require (
gitea.com/macaron/toolbox v0.0.0-20190822013122-05ff0fc766b7
github.com/BurntSushi/toml v0.3.1
github.com/PuerkitoBio/goquery v1.5.1
github.com/RoaringBitmap/roaring v0.4.23 // indirect
github.com/alecthomas/chroma v0.8.0
github.com/blevesearch/bleve v1.0.7
github.com/blevesearch/bleve v1.0.10
github.com/couchbase/gomemcached v0.0.0-20191004160342-7b5da2ec40b2 // indirect
github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d // indirect
github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect

@ -63,8 +63,6 @@ github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tN
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/RoaringBitmap/roaring v0.4.21 h1:WJ/zIlNX4wQZ9x8Ey33O1UaD9TCTakYsdLFSBcTwH+8=
github.com/RoaringBitmap/roaring v0.4.21/go.mod h1:D0gp8kJQgE1A4LQ5wFLggQEyvDi06Mq5mKs52e1TwOo=
github.com/RoaringBitmap/roaring v0.4.23 h1:gpyfd12QohbqhFO4NVDUdoPOCXsyahYRQhINmlHxKeo=
github.com/RoaringBitmap/roaring v0.4.23/go.mod h1:D0gp8kJQgE1A4LQ5wFLggQEyvDi06Mq5mKs52e1TwOo=
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
@ -117,8 +115,8 @@ github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6r
github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/bketelsen/crypt v0.0.3-0.20200106085610-5cbc8cc4026c/go.mod h1:MKsuJmJgSg28kpZDP6UIiPt0e0Oz0kqKNGyRaWEPv84=
github.com/blevesearch/bleve v1.0.7 h1:4PspZE7XABMSKcVpzAKp0E05Yer1PIYmTWk+1ngNr/c=
github.com/blevesearch/bleve v1.0.7/go.mod h1:3xvmBtaw12Y4C9iA1RTzwWCof5j5HjydjCTiDE2TeE0=
github.com/blevesearch/bleve v1.0.10 h1:DxFXeC+faL+5LVTlljUDpP9eXj3mleiQem3DuSjepqQ=
github.com/blevesearch/bleve v1.0.10/go.mod h1:KHAOH5HuVGn9fo+dN5TkqcA1HcuOQ89goLWVWXZDl8w=
github.com/blevesearch/blevex v0.0.0-20190916190636-152f0fe5c040 h1:SjYVcfJVZoCfBlg+fkaq2eoZHTf5HaJfaTeTkOtyfHQ=
github.com/blevesearch/blevex v0.0.0-20190916190636-152f0fe5c040/go.mod h1:WH+MU2F4T0VmSdaPX+Wu5GYoZBrYWdOZWSjzvYcDmqQ=
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
@ -129,10 +127,14 @@ github.com/blevesearch/segment v0.9.0 h1:5lG7yBCx98or7gK2cHMKPukPZ/31Kag7nONpoBt
github.com/blevesearch/segment v0.9.0/go.mod h1:9PfHYUdQCgHktBgvtUOF4x+pc4/l8rdH0u5spnW85UQ=
github.com/blevesearch/snowballstem v0.9.0 h1:lMQ189YspGP6sXvZQ4WZ+MLawfV8wOmPoD/iWeNXm8s=
github.com/blevesearch/snowballstem v0.9.0/go.mod h1:PivSj3JMc8WuaFkTSRDW2SlrulNWPl4ABg1tC/hlgLs=
github.com/blevesearch/zap/v11 v11.0.7 h1:nnmAOP6eXBkqEa1Srq1eqA5Wmn4w+BZjLdjynNxvd+M=
github.com/blevesearch/zap/v11 v11.0.7/go.mod h1:bJoY56fdU2m/IP4LLz/1h4jY2thBoREvoqbuJ8zhm9k=
github.com/blevesearch/zap/v12 v12.0.7 h1:y8FWSAYkdc4p1dn4YLxNNr1dxXlSUsakJh2Fc/r6cj4=
github.com/blevesearch/zap/v12 v12.0.7/go.mod h1:70DNK4ZN4tb42LubeDbfpp6xnm8g3ROYVvvZ6pEoXD8=
github.com/blevesearch/zap/v11 v11.0.10 h1:zJdl+cnxT0Yt2hA6meG+OIat3oSA4rERfrNX2CSchII=
github.com/blevesearch/zap/v11 v11.0.10/go.mod h1:BdqdgKy6u0Jgw/CqrMfP2Gue/EldcfvB/3eFzrzhIfw=
github.com/blevesearch/zap/v12 v12.0.10 h1:T1/GXNBxC9eetfuMwCM5RLWXeharSMyAdNEdXVtBuHA=
github.com/blevesearch/zap/v12 v12.0.10/go.mod h1:QtKkjpmV/sVFEnKSaIWPXZJAaekL97TrTV3ImhNx+nw=
github.com/blevesearch/zap/v13 v13.0.2 h1:quhI5OVFX33dhPpUW+nLyXGpu7QT8qTgzu6qA/fRRXM=
github.com/blevesearch/zap/v13 v13.0.2/go.mod h1:/9QLKla8/8mloJvQQutPhB+tw6y35urvKeAFeun2JGA=
github.com/blevesearch/zap/v14 v14.0.1 h1:s8KeqX53Vc4eRaziHsnY2bYUE+8IktWqRL9W5H5VDMY=
github.com/blevesearch/zap/v14 v14.0.1/go.mod h1:Y+tUL9TypMca5+96m7iJb2lpcntETXSeDoI5BBX2tvY=
github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc h1:biVzkmvwrH8WK8raXaxBx6fRVTlJILwEwQGL1I/ByEI=
github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
github.com/bradfitz/gomemcache v0.0.0-20190329173943-551aad21a668 h1:U/lr3Dgy4WK+hNk4tyD+nuGjpVLPEHuJSFXMw11/HPA=
@ -164,8 +166,8 @@ github.com/couchbase/goutils v0.0.0-20190315194238-f9d42b11473b/go.mod h1:BQwMFl
github.com/couchbase/goutils v0.0.0-20191018232750-b49639060d85 h1:0WMIDtuXCKEm4wtAJgAAXa/qtM5O9MariLwgHaRlYmk=
github.com/couchbase/goutils v0.0.0-20191018232750-b49639060d85/go.mod h1:BQwMFlJzDjFDG3DJUdU0KORxn88UlsOULuxLExMh3Hs=
github.com/couchbase/moss v0.1.0/go.mod h1:9MaHIaRuy9pvLPUJxB8sh8OrLfyDczECVL37grCIubs=
github.com/couchbase/vellum v1.0.1 h1:qrj9ohvZedvc51S5KzPfJ6P6z0Vqzv7Lx7k3mVc2WOk=
github.com/couchbase/vellum v1.0.1/go.mod h1:FcwrEivFpNi24R3jLOs3n+fs5RnuQnQqCLBJ1uAg1W4=
github.com/couchbase/vellum v1.0.2 h1:BrbP0NKiyDdndMPec8Jjhy0U47CZ0Lgx3xUC2r9rZqw=
github.com/couchbase/vellum v1.0.2/go.mod h1:FcwrEivFpNi24R3jLOs3n+fs5RnuQnQqCLBJ1uAg1W4=
github.com/couchbaselabs/go-couchbase v0.0.0-20190708161019-23e7ca2ce2b7 h1:1XjEY/gnjQ+AfXef2U6dxCquhiRzkEpxZuWqs+QxTL8=
github.com/couchbaselabs/go-couchbase v0.0.0-20190708161019-23e7ca2ce2b7/go.mod h1:mby/05p8HE5yHEAKiIH/555NoblMs7PtW6NrYshDruc=
github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
@ -914,8 +916,8 @@ github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxt
github.com/ziutek/mymysql v1.5.4 h1:GB0qdRGsTwQSBVYuVShFBKaXSnSnYYC2d9knnE1LHFs=
github.com/ziutek/mymysql v1.5.4/go.mod h1:LMSpPZ6DbqWFxNCHW77HeMg9I646SAhApZ/wKdgO/C0=
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.etcd.io/bbolt v1.3.4 h1:hi1bXHMVrlQh6WwxAy+qZCV/SYIlqo+Ushwdpa4tAKg=
go.etcd.io/bbolt v1.3.4/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
go.etcd.io/bbolt v1.3.5 h1:XAzx9gjCb0Rxj7EoqcClPD1d5ZBxZJk0jbuoPHenBt0=
go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ=
go.mongodb.org/mongo-driver v1.0.3/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM=
go.mongodb.org/mongo-driver v1.1.1 h1:Sq1fR+0c58RME5EoqKdjkiQAmPjmfHlZOoRI6fTUOcs=
go.mongodb.org/mongo-driver v1.1.1/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM=

@ -1,10 +1,13 @@
# ![bleve](docs/bleve.png) bleve
[![Build Status](https://travis-ci.org/blevesearch/bleve.svg?branch=master)](https://travis-ci.org/blevesearch/bleve) [![Coverage Status](https://coveralls.io/repos/github/blevesearch/bleve/badge.svg?branch=master)](https://coveralls.io/github/blevesearch/bleve?branch=master) [![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve)
[![Tests](https://github.com/blevesearch/bleve/workflows/Tests/badge.svg?branch=master&event=push)](https://github.com/blevesearch/bleve/actions?query=workflow%3ATests+event%3Apush+branch%3Amaster)
[![Coverage Status](https://coveralls.io/repos/github/blevesearch/bleve/badge.svg?branch=master)](https://coveralls.io/github/blevesearch/bleve?branch=master)
[![GoDoc](https://godoc.org/github.com/blevesearch/bleve?status.svg)](https://godoc.org/github.com/blevesearch/bleve)
[![Join the chat at https://gitter.im/blevesearch/bleve](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/blevesearch/bleve?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
[![codebeat](https://codebeat.co/badges/38a7cbc9-9cf5-41c0-a315-0746178230f4)](https://codebeat.co/projects/github-com-blevesearch-bleve)
[![Go Report Card](https://goreportcard.com/badge/blevesearch/bleve)](https://goreportcard.com/report/blevesearch/bleve)
[![Sourcegraph](https://sourcegraph.com/github.com/blevesearch/bleve/-/badge.svg)](https://sourcegraph.com/github.com/blevesearch/bleve?badge) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
[![Sourcegraph](https://sourcegraph.com/github.com/blevesearch/bleve/-/badge.svg)](https://sourcegraph.com/github.com/blevesearch/bleve?badge)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
modern text indexing in go - [blevesearch.com](http://www.blevesearch.com/)

@ -0,0 +1,94 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch"
"github.com/blevesearch/bleve/mapping"
)
type builderImpl struct {
b index.IndexBuilder
m mapping.IndexMapping
}
func (b *builderImpl) Index(id string, data interface{}) error {
if id == "" {
return ErrorEmptyID
}
doc := document.NewDocument(id)
err := b.m.MapDocument(doc, data)
if err != nil {
return err
}
err = b.b.Index(doc)
return err
}
func (b *builderImpl) Close() error {
return b.b.Close()
}
func newBuilder(path string, mapping mapping.IndexMapping, config map[string]interface{}) (Builder, error) {
if path == "" {
return nil, fmt.Errorf("builder requires path")
}
err := mapping.Validate()
if err != nil {
return nil, err
}
if config == nil {
config = map[string]interface{}{}
}
// the builder does not have an API to interact with internal storage
// however we can pass k/v pairs through the config
mappingBytes, err := json.Marshal(mapping)
if err != nil {
return nil, err
}
config["internal"] = map[string][]byte{
string(mappingInternalKey): mappingBytes,
}
// do not use real config, as these are options for the builder,
// not the resulting index
meta := newIndexMeta(scorch.Name, scorch.Name, map[string]interface{}{})
err = meta.Save(path)
if err != nil {
return nil, err
}
config["path"] = indexStorePath(path)
b, err := scorch.NewBuilder(config)
if err != nil {
return nil, err
}
rv := &builderImpl{
b: b,
m: mapping,
}
return rv, nil
}

@ -3,16 +3,17 @@ module github.com/blevesearch/bleve
go 1.13
require (
github.com/RoaringBitmap/roaring v0.4.21
github.com/RoaringBitmap/roaring v0.4.23
github.com/blevesearch/blevex v0.0.0-20190916190636-152f0fe5c040
github.com/blevesearch/go-porterstemmer v1.0.3
github.com/blevesearch/segment v0.9.0
github.com/blevesearch/snowballstem v0.9.0
github.com/blevesearch/zap/v11 v11.0.7
github.com/blevesearch/zap/v12 v12.0.7
github.com/couchbase/ghistogram v0.1.0 // indirect
github.com/blevesearch/zap/v11 v11.0.10
github.com/blevesearch/zap/v12 v12.0.10
github.com/blevesearch/zap/v13 v13.0.2
github.com/blevesearch/zap/v14 v14.0.1
github.com/couchbase/moss v0.1.0
github.com/couchbase/vellum v1.0.1
github.com/couchbase/vellum v1.0.2
github.com/golang/protobuf v1.3.2
github.com/kljensen/snowball v0.6.0
github.com/rcrowley/go-metrics v0.0.0-20190826022208-cac0b30c2563
@ -20,6 +21,6 @@ require (
github.com/steveyen/gtreap v0.1.0
github.com/syndtr/goleveldb v1.0.0
github.com/willf/bitset v1.1.10
go.etcd.io/bbolt v1.3.4
go.etcd.io/bbolt v1.3.5
golang.org/x/text v0.3.0
)

@ -293,3 +293,17 @@ func Open(path string) (Index, error) {
func OpenUsing(path string, runtimeConfig map[string]interface{}) (Index, error) {
return openIndexUsing(path, runtimeConfig)
}
// Builder is a limited interface, used to build indexes in an offline mode.
// Items cannot be updated or deleted, and the caller MUST ensure a document is
// indexed only once.
type Builder interface {
Index(id string, data interface{}) error
Close() error
}
// NewBuilder creates a builder, which will build an index at the specified path,
// using the specified mapping and options.
func NewBuilder(path string, mapping mapping.IndexMapping, config map[string]interface{}) (Builder, error) {
return newBuilder(path, mapping, config)
}

@ -367,3 +367,10 @@ type OptimizableContext interface {
type DocValueReader interface {
VisitDocValues(id IndexInternalID, visitor DocumentFieldTermVisitor) error
}
// IndexBuilder is an interface supported by some index schemes
// to allow direct write-only index building
type IndexBuilder interface {
Index(doc *document.Document) error
Close() error
}

@ -0,0 +1,334 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"fmt"
"io/ioutil"
"os"
"sync"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
bolt "go.etcd.io/bbolt"
)
const DefaultBuilderBatchSize = 1000
const DefaultBuilderMergeMax = 10
type Builder struct {
m sync.Mutex
segCount uint64
path string
buildPath string
segPaths []string
batchSize int
mergeMax int
batch *index.Batch
internal map[string][]byte
segPlugin segment.Plugin
}
func NewBuilder(config map[string]interface{}) (*Builder, error) {
path, ok := config["path"].(string)
if !ok {
return nil, fmt.Errorf("must specify path")
}
buildPathPrefix, _ := config["buildPathPrefix"].(string)
buildPath, err := ioutil.TempDir(buildPathPrefix, "scorch-offline-build")
if err != nil {
return nil, err
}
rv := &Builder{
path: path,
buildPath: buildPath,
mergeMax: DefaultBuilderMergeMax,
batchSize: DefaultBuilderBatchSize,
batch: index.NewBatch(),
segPlugin: defaultSegmentPlugin,
}
err = rv.parseConfig(config)
if err != nil {
return nil, fmt.Errorf("error parsing builder config: %v", err)
}
return rv, nil
}
func (o *Builder) parseConfig(config map[string]interface{}) (err error) {
if v, ok := config["mergeMax"]; ok {
var t int
if t, err = parseToInteger(v); err != nil {
return fmt.Errorf("mergeMax parse err: %v", err)
}
if t > 0 {
o.mergeMax = t
}
}
if v, ok := config["batchSize"]; ok {
var t int
if t, err = parseToInteger(v); err != nil {
return fmt.Errorf("batchSize parse err: %v", err)
}
if t > 0 {
o.batchSize = t
}
}
if v, ok := config["internal"]; ok {
if vinternal, ok := v.(map[string][]byte); ok {
o.internal = vinternal
}
}
forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config)
if err != nil {
return err
}
if forcedSegmentType != "" && forcedSegmentVersion != 0 {
segPlugin, err := chooseSegmentPlugin(forcedSegmentType,
uint32(forcedSegmentVersion))
if err != nil {
return err
}
o.segPlugin = segPlugin
}
return nil
}
// Index will place the document into the index.
// It is invalid to index the same document multiple times.
func (o *Builder) Index(doc *document.Document) error {
o.m.Lock()
defer o.m.Unlock()
o.batch.Update(doc)
return o.maybeFlushBatchLOCKED(o.batchSize)
}
func (o *Builder) maybeFlushBatchLOCKED(moreThan int) error {
if len(o.batch.IndexOps) >= moreThan {
defer o.batch.Reset()
return o.executeBatchLOCKED(o.batch)
}
return nil
}
func (o *Builder) executeBatchLOCKED(batch *index.Batch) (err error) {
analysisResults := make([]*index.AnalysisResult, 0, len(batch.IndexOps))
for _, doc := range batch.IndexOps {
if doc != nil {
// insert _id field
doc.AddField(document.NewTextFieldCustom("_id", nil, []byte(doc.ID), document.IndexField|document.StoreField, nil))
// perform analysis directly
analysisResult := analyze(doc)
analysisResults = append(analysisResults, analysisResult)
}
}
seg, _, err := o.segPlugin.New(analysisResults)
if err != nil {
return fmt.Errorf("error building segment base: %v", err)
}
filename := zapFileName(o.segCount)
o.segCount++
path := o.buildPath + string(os.PathSeparator) + filename
if segUnpersisted, ok := seg.(segment.UnpersistedSegment); ok {
err = segUnpersisted.Persist(path)
if err != nil {
return fmt.Errorf("error persisting segment base to %s: %v", path, err)
}
o.segPaths = append(o.segPaths, path)
return nil
}
return fmt.Errorf("new segment does not implement unpersisted: %T", seg)
}
func (o *Builder) doMerge() error {
// as long as we have more than 1 segment, keep merging
for len(o.segPaths) > 1 {
// merge the next <mergeMax> number of segments into one new one
// or, if there are fewer than <mergeMax> remaining, merge them all
mergeCount := o.mergeMax
if mergeCount > len(o.segPaths) {
mergeCount = len(o.segPaths)
}
mergePaths := o.segPaths[0:mergeCount]
o.segPaths = o.segPaths[mergeCount:]
// open each of the segments to be merged
mergeSegs := make([]segment.Segment, 0, mergeCount)
// closeOpenedSegs attempts to close all opened
// segments even if an error occurs, in which case
// the first error is returned
closeOpenedSegs := func() error {
var err error
for _, seg := range mergeSegs {
clErr := seg.Close()
if clErr != nil && err == nil {
err = clErr
}
}
return err
}
for _, mergePath := range mergePaths {
seg, err := o.segPlugin.Open(mergePath)
if err != nil {
_ = closeOpenedSegs()
return fmt.Errorf("error opening segment (%s) for merge: %v", mergePath, err)
}
mergeSegs = append(mergeSegs, seg)
}
// do the merge
mergedSegPath := o.buildPath + string(os.PathSeparator) + zapFileName(o.segCount)
drops := make([]*roaring.Bitmap, mergeCount)
_, _, err := o.segPlugin.Merge(mergeSegs, drops, mergedSegPath, nil, nil)
if err != nil {
_ = closeOpenedSegs()
return fmt.Errorf("error merging segments (%v): %v", mergePaths, err)
}
o.segCount++
o.segPaths = append(o.segPaths, mergedSegPath)
// close segments opened for merge
err = closeOpenedSegs()
if err != nil {
return fmt.Errorf("error closing opened segments: %v", err)
}
// remove merged segments
for _, mergePath := range mergePaths {
err = os.RemoveAll(mergePath)
if err != nil {
return fmt.Errorf("error removing segment %s after merge: %v", mergePath, err)
}
}
}
return nil
}
func (o *Builder) Close() error {
o.m.Lock()
defer o.m.Unlock()
// see if there is a partial batch
err := o.maybeFlushBatchLOCKED(1)
if err != nil {
return fmt.Errorf("error flushing batch before close: %v", err)
}
// perform all the merging
err = o.doMerge()
if err != nil {
return fmt.Errorf("error while merging: %v", err)
}
// ensure the store path exists
err = os.MkdirAll(o.path, 0700)
if err != nil {
return err
}
// move final segment into place
// segment id 2 is chosen to match the behavior of a scorch
// index which indexes a single batch of data
finalSegPath := o.path + string(os.PathSeparator) + zapFileName(2)
err = os.Rename(o.segPaths[0], finalSegPath)
if err != nil {
return fmt.Errorf("error moving final segment into place: %v", err)
}
// remove the buildPath, as it is no longer needed
err = os.RemoveAll(o.buildPath)
if err != nil {
return fmt.Errorf("error removing build path: %v", err)
}
// prepare wrapping
seg, err := o.segPlugin.Open(finalSegPath)
if err != nil {
return fmt.Errorf("error opening final segment")
}
// create a segment snapshot for this segment
ss := &SegmentSnapshot{
segment: seg,
}
is := &IndexSnapshot{
epoch: 3, // chosen to match scorch behavior when indexing a single batch
segment: []*SegmentSnapshot{ss},
creator: "scorch-builder",
internal: o.internal,
}
// create the root bolt
rootBoltPath := o.path + string(os.PathSeparator) + "root.bolt"
rootBolt, err := bolt.Open(rootBoltPath, 0600, nil)
if err != nil {
return err
}
// start a write transaction
tx, err := rootBolt.Begin(true)
if err != nil {
return err
}
// fill the root bolt with this fake index snapshot
_, _, err = prepareBoltSnapshot(is, tx, o.path, o.segPlugin)
if err != nil {
_ = tx.Rollback()
_ = rootBolt.Close()
return fmt.Errorf("error preparing bolt snapshot in root.bolt: %v", err)
}
// commit bolt data
err = tx.Commit()
if err != nil {
_ = rootBolt.Close()
return fmt.Errorf("error committing bolt tx in root.bolt: %v", err)
}
// close bolt
err = rootBolt.Close()
if err != nil {
return fmt.Errorf("error closing root.bolt: %v", err)
}
// close final segment
err = seg.Close()
if err != nil {
return fmt.Errorf("error closing final segment: %v", err)
}
return nil
}

@ -54,3 +54,11 @@ var EventKindBatchIntroductionStart = EventKind(5)
// EventKindBatchIntroduction is fired when Batch() completes.
var EventKindBatchIntroduction = EventKind(6)
// EventKindMergeTaskIntroductionStart is fired when the merger is about to
// start the introduction of merged segment from a single merge task.
var EventKindMergeTaskIntroductionStart = EventKind(7)
// EventKindMergeTaskIntroduction is fired when the merger has completed
// the introduction of merged segment from a single merge task.
var EventKindMergeTaskIntroduction = EventKind(8)

@ -45,13 +45,7 @@ type epochWatcher struct {
notifyCh notificationChan
}
type snapshotReversion struct {
snapshot *IndexSnapshot
applied chan error
persisted chan error
}
func (s *Scorch) mainLoop() {
func (s *Scorch) introducerLoop() {
var epochWatchers []*epochWatcher
OUTER:
for {
@ -389,6 +383,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
}
}
}
var skipped bool
// In case where all the docs in the newly merged segment getting
// deleted by the time we reach here, can skip the introduction.
if nextMerge.new != nil &&
@ -411,6 +406,9 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
docsToPersistCount += nextMerge.new.Count() - newSegmentDeleted.GetCardinality()
memSegments++
}
} else {
skipped = true
atomic.AddUint64(&s.stats.TotFileMergeIntroductionsObsoleted, 1)
}
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
@ -435,8 +433,10 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
}
// notify requester that we incorporated this
nextMerge.notify <- newSnapshot
close(nextMerge.notify)
nextMerge.notifyCh <- &mergeTaskIntroStatus{
indexSnapshot: newSnapshot,
skipped: skipped}
close(nextMerge.notifyCh)
}
func isMemorySegment(s *SegmentSnapshot) bool {

@ -15,6 +15,7 @@
package scorch
import (
"context"
"encoding/json"
"fmt"
"os"
@ -29,12 +30,16 @@ import (
func (s *Scorch) mergerLoop() {
var lastEpochMergePlanned uint64
var ctrlMsg *mergerCtrl
mergePlannerOptions, err := s.parseMergePlannerOptions()
if err != nil {
s.fireAsyncError(fmt.Errorf("mergePlannerOption json parsing err: %v", err))
s.asyncTasks.Done()
return
}
ctrlMsgDflt := &mergerCtrl{ctx: context.Background(),
options: mergePlannerOptions,
doneCh: nil}
OUTER:
for {
@ -53,16 +58,30 @@ OUTER:
atomic.StoreUint64(&s.iStats.mergeEpoch, ourSnapshot.epoch)
s.rootLock.Unlock()
if ourSnapshot.epoch != lastEpochMergePlanned {
if ctrlMsg == nil && ourSnapshot.epoch != lastEpochMergePlanned {
ctrlMsg = ctrlMsgDflt
}
if ctrlMsg != nil {
startTime := time.Now()
// lets get started
err := s.planMergeAtSnapshot(ourSnapshot, mergePlannerOptions)
err := s.planMergeAtSnapshot(ctrlMsg.ctx, ctrlMsg.options,
ourSnapshot)
if err != nil {
atomic.StoreUint64(&s.iStats.mergeEpoch, 0)
if err == segment.ErrClosed {
// index has been closed
_ = ourSnapshot.DecRef()
// continue the workloop on a user triggered cancel
if ctrlMsg.doneCh != nil {
close(ctrlMsg.doneCh)
ctrlMsg = nil
continue OUTER
}
// exit the workloop on index closure
ctrlMsg = nil
break OUTER
}
s.fireAsyncError(fmt.Errorf("merging err: %v", err))
@ -70,6 +89,12 @@ OUTER:
atomic.AddUint64(&s.stats.TotFileMergeLoopErr, 1)
continue OUTER
}
if ctrlMsg.doneCh != nil {
close(ctrlMsg.doneCh)
}
ctrlMsg = nil
lastEpochMergePlanned = ourSnapshot.epoch
atomic.StoreUint64(&s.stats.LastMergedEpoch, ourSnapshot.epoch)
@ -90,6 +115,8 @@ OUTER:
case <-s.closeCh:
break OUTER
case s.persisterNotifier <- ew:
case ctrlMsg = <-s.forceMergeRequestCh:
continue OUTER
}
// now wait for persister (but also detect close)
@ -97,6 +124,7 @@ OUTER:
case <-s.closeCh:
break OUTER
case <-ew.notifyCh:
case ctrlMsg = <-s.forceMergeRequestCh:
}
}
@ -106,6 +134,58 @@ OUTER:
s.asyncTasks.Done()
}
type mergerCtrl struct {
ctx context.Context
options *mergeplan.MergePlanOptions
doneCh chan struct{}
}
// ForceMerge helps users trigger a merge operation on
// an online scorch index.
func (s *Scorch) ForceMerge(ctx context.Context,
mo *mergeplan.MergePlanOptions) error {
// check whether force merge is already under processing
s.rootLock.Lock()
if s.stats.TotFileMergeForceOpsStarted >
s.stats.TotFileMergeForceOpsCompleted {
s.rootLock.Unlock()
return fmt.Errorf("force merge already in progress")
}
s.stats.TotFileMergeForceOpsStarted++
s.rootLock.Unlock()
if mo != nil {
err := mergeplan.ValidateMergePlannerOptions(mo)
if err != nil {
return err
}
} else {
// assume the default single segment merge policy
mo = &mergeplan.SingleSegmentMergePlanOptions
}
msg := &mergerCtrl{options: mo,
doneCh: make(chan struct{}),
ctx: ctx,
}
// request the merger perform a force merge
select {
case s.forceMergeRequestCh <- msg:
case <-s.closeCh:
return nil
}
// wait for the force merge operation completion
select {
case <-msg.doneCh:
atomic.AddUint64(&s.stats.TotFileMergeForceOpsCompleted, 1)
case <-s.closeCh:
}
return nil
}
func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
error) {
mergePlannerOptions := mergeplan.DefaultMergePlanOptions
@ -128,8 +208,39 @@ func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
return &mergePlannerOptions, nil
}
func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
options *mergeplan.MergePlanOptions) error {
type closeChWrapper struct {
ch1 chan struct{}
ctx context.Context
closeCh chan struct{}
}
func newCloseChWrapper(ch1 chan struct{},
ctx context.Context) *closeChWrapper {
return &closeChWrapper{ch1: ch1,
ctx: ctx,
closeCh: make(chan struct{})}
}
func (w *closeChWrapper) close() {
select {
case <-w.closeCh:
default:
close(w.closeCh)
}
}
func (w *closeChWrapper) listen() {
select {
case <-w.ch1:
w.close()
case <-w.ctx.Done():
w.close()
case <-w.closeCh:
}
}
func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
options *mergeplan.MergePlanOptions, ourSnapshot *IndexSnapshot) error {
// build list of persisted segments in this snapshot
var onlyPersistedSnapshots []mergeplan.Segment
for _, segmentSnapshot := range ourSnapshot.segment {
@ -158,6 +269,11 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
// process tasks in serial for now
var filenames []string
cw := newCloseChWrapper(s.closeCh, ctx)
defer cw.close()
go cw.listen()
for _, task := range resultMergePlan.Tasks {
if len(task.Segments) == 0 {
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
@ -194,8 +310,9 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
var oldNewDocNums map[uint64][]uint64
var seg segment.Segment
var filename string
if len(segmentsToMerge) > 0 {
filename := zapFileName(newSegmentID)
filename = zapFileName(newSegmentID)
s.markIneligibleForRemoval(filename)
path := s.path + string(os.PathSeparator) + filename
@ -203,7 +320,7 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
newDocNums, _, err := s.segPlugin.Merge(segmentsToMerge, docsToDrop, path,
s.closeCh, s)
cw.closeCh, s)
atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
@ -240,9 +357,11 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
old: oldMap,
oldNewDocNums: oldNewDocNums,
new: seg,
notify: make(chan *IndexSnapshot),
notifyCh: make(chan *mergeTaskIntroStatus),
}
s.fireEvent(EventKindMergeTaskIntroductionStart, 0)
// give it to the introducer
select {
case <-s.closeCh:
@ -255,18 +374,25 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
introStartTime := time.Now()
// it is safe to blockingly wait for the merge introduction
// here as the introducer is bound to handle the notify channel.
newSnapshot := <-sm.notify
introStatus := <-sm.notifyCh
introTime := uint64(time.Since(introStartTime))
atomic.AddUint64(&s.stats.TotFileMergeZapIntroductionTime, introTime)
if atomic.LoadUint64(&s.stats.MaxFileMergeZapIntroductionTime) < introTime {
atomic.StoreUint64(&s.stats.MaxFileMergeZapIntroductionTime, introTime)
}
atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
if newSnapshot != nil {
_ = newSnapshot.DecRef()
if introStatus != nil && introStatus.indexSnapshot != nil {
_ = introStatus.indexSnapshot.DecRef()
if introStatus.skipped {
// close the segment on skipping introduction.
s.unmarkIneligibleForRemoval(filename)
_ = seg.Close()
}
}
atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
s.fireEvent(EventKindMergeTaskIntroduction, 0)
}
// once all the newly merged segment introductions are done,
@ -279,12 +405,17 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
return nil
}
type mergeTaskIntroStatus struct {
indexSnapshot *IndexSnapshot
skipped bool
}
type segmentMerge struct {
id uint64
old map[uint64]*SegmentSnapshot
oldNewDocNums map[uint64][]uint64
new segment.Segment
notify chan *IndexSnapshot
notifyCh chan *mergeTaskIntroStatus
}
// perform a merging of the given SegmentBase instances into a new,
@ -334,7 +465,7 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
old: make(map[uint64]*SegmentSnapshot),
oldNewDocNums: make(map[uint64][]uint64),
new: seg,
notify: make(chan *IndexSnapshot),
notifyCh: make(chan *mergeTaskIntroStatus),
}
for i, idx := range sbsIndexes {
@ -351,11 +482,20 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
}
// blockingly wait for the introduction to complete
newSnapshot := <-sm.notify
if newSnapshot != nil {
var newSnapshot *IndexSnapshot
introStatus := <-sm.notifyCh
if introStatus != nil && introStatus.indexSnapshot != nil {
newSnapshot = introStatus.indexSnapshot
atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs)))
atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
if introStatus.skipped {
// close the segment on skipping introduction.
_ = newSnapshot.DecRef()
_ = seg.Close()
newSnapshot = nil
}
}
return newSnapshot, newSegmentID, nil
}

@ -134,6 +134,17 @@ var DefaultMergePlanOptions = MergePlanOptions{
ReclaimDeletesWeight: 2.0,
}
// SingleSegmentMergePlanOptions helps in creating a
// single segment index.
var SingleSegmentMergePlanOptions = MergePlanOptions{
MaxSegmentsPerTier: 1,
MaxSegmentSize: 1 << 30,
TierGrowth: 1.0,
SegmentsPerMergeTask: 10,
FloorSegmentSize: 1 << 30,
ReclaimDeletesWeight: 2.0,
}
// -------------------------------------------
func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
@ -173,7 +184,7 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
calcBudget = CalcBudget
}
budgetNumSegments := CalcBudget(eligiblesLiveSize, minLiveSize, o)
budgetNumSegments := calcBudget(eligiblesLiveSize, minLiveSize, o)
scoreSegments := o.ScoreSegments
if scoreSegments == nil {

@ -16,10 +16,10 @@ package scorch
import (
"fmt"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"sync/atomic"
)
var OptimizeConjunction = true
@ -40,7 +40,7 @@ func (s *IndexSnapshotTermFieldReader) Optimize(kind string,
return s.optimizeDisjunctionUnadorned(octx)
}
return octx, nil
return nil, nil
}
var OptimizeDisjunctionUnadornedMinChildCardinality = uint64(256)
@ -161,16 +161,8 @@ func (o *OptimizeTFRConjunctionUnadorned) Finish() (rv index.Optimized, err erro
// We use an artificial term and field because the optimized
// termFieldReader can represent multiple terms and fields.
oTFR := &IndexSnapshotTermFieldReader{
term: OptimizeTFRConjunctionUnadornedTerm,
field: OptimizeTFRConjunctionUnadornedField,
snapshot: o.snapshot,
iterators: make([]segment.PostingsIterator, len(o.snapshot.segment)),
segmentOffset: 0,
includeFreq: false,
includeNorm: false,
includeTermVectors: false,
}
oTFR := o.snapshot.unadornedTermFieldReader(
OptimizeTFRConjunctionUnadornedTerm, OptimizeTFRConjunctionUnadornedField)
var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
@ -265,6 +257,7 @@ OUTER:
oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(bm)
}
atomic.AddUint64(&o.snapshot.parent.stats.TotTermSearchersStarted, uint64(1))
return oTFR, nil
}
@ -277,7 +270,9 @@ OUTER:
func (s *IndexSnapshotTermFieldReader) optimizeDisjunctionUnadorned(
octx index.OptimizableContext) (index.OptimizableContext, error) {
if octx == nil {
octx = &OptimizeTFRDisjunctionUnadorned{snapshot: s.snapshot}
octx = &OptimizeTFRDisjunctionUnadorned{
snapshot: s.snapshot,
}
}
o, ok := octx.(*OptimizeTFRDisjunctionUnadorned)
@ -328,27 +323,12 @@ func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err erro
}
}
}
// Heuristic to skip the optimization if all the constituent
// bitmaps are too small, where the processing & resource
// overhead to create the OR'ed bitmap outweighs the benefit.
if cMax < OptimizeDisjunctionUnadornedMinChildCardinality {
return nil, nil
}
}
// We use an artificial term and field because the optimized
// termFieldReader can represent multiple terms and fields.
oTFR := &IndexSnapshotTermFieldReader{
term: OptimizeTFRDisjunctionUnadornedTerm,
field: OptimizeTFRDisjunctionUnadornedField,
snapshot: o.snapshot,
iterators: make([]segment.PostingsIterator, len(o.snapshot.segment)),
segmentOffset: 0,
includeFreq: false,
includeNorm: false,
includeTermVectors: false,
}
oTFR := o.snapshot.unadornedTermFieldReader(
OptimizeTFRDisjunctionUnadornedTerm, OptimizeTFRDisjunctionUnadornedField)
var docNums []uint32 // Collected docNum's from 1-hit posting lists.
var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
@ -392,5 +372,25 @@ func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err erro
oTFR.iterators[i] = segment.NewUnadornedPostingsIteratorFromBitmap(bm)
}
atomic.AddUint64(&o.snapshot.parent.stats.TotTermSearchersStarted, uint64(1))
return oTFR, nil
}
// ----------------------------------------------------------------
func (i *IndexSnapshot) unadornedTermFieldReader(
term []byte, field string) *IndexSnapshotTermFieldReader {
// This IndexSnapshotTermFieldReader will not be recycled, more
// conversation here: https://github.com/blevesearch/bleve/pull/1438
return &IndexSnapshotTermFieldReader{
term: term,
field: field,
snapshot: i,
iterators: make([]segment.PostingsIterator, len(i.segment)),
segmentOffset: 0,
includeFreq: false,
includeNorm: false,
includeTermVectors: false,
recycle: false,
}
}

@ -256,7 +256,7 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
// for sufficient in-memory segments to pile up for the next
// memory merge cum persist loop.
if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
po.PersisterNapTimeMSec > 0 && s.paused() == 0 {
po.PersisterNapTimeMSec > 0 && s.NumEventsBlocking() == 0 {
select {
case <-s.closeCh:
case <-time.After(time.Millisecond * time.Duration(po.PersisterNapTimeMSec)):
@ -333,7 +333,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot,
// Perform in-memory segment merging only when the memory pressure is
// below the configured threshold, else the persister performs the
// direct persistence of segments.
if s.paused() < po.MemoryPressurePauseThreshold {
if s.NumEventsBlocking() < po.MemoryPressurePauseThreshold {
persisted, err := s.persistSnapshotMaybeMerge(snapshot)
if err != nil {
return err
@ -428,55 +428,44 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) (
return true, nil
}
func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
// start a write transaction
tx, err := s.rootBolt.Begin(true)
if err != nil {
return err
}
// defer rollback on error
defer func() {
if err != nil {
_ = tx.Rollback()
}
}()
func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
segPlugin segment.Plugin) ([]string, map[uint64]string, error) {
snapshotsBucket, err := tx.CreateBucketIfNotExists(boltSnapshotsBucket)
if err != nil {
return err
return nil, nil, err
}
newSnapshotKey := segment.EncodeUvarintAscending(nil, snapshot.epoch)
snapshotBucket, err := snapshotsBucket.CreateBucketIfNotExists(newSnapshotKey)
if err != nil {
return err
return nil, nil, err
}
// persist meta values
metaBucket, err := snapshotBucket.CreateBucketIfNotExists(boltMetaDataKey)
if err != nil {
return err
return nil, nil, err
}
err = metaBucket.Put(boltMetaDataSegmentTypeKey, []byte(s.segPlugin.Type()))
err = metaBucket.Put(boltMetaDataSegmentTypeKey, []byte(segPlugin.Type()))
if err != nil {
return err
return nil, nil, err
}
buf := make([]byte, binary.MaxVarintLen32)
binary.BigEndian.PutUint32(buf, s.segPlugin.Version())
binary.BigEndian.PutUint32(buf, segPlugin.Version())
err = metaBucket.Put(boltMetaDataSegmentVersionKey, buf)
if err != nil {
return err
return nil, nil, err
}
// persist internal values
internalBucket, err := snapshotBucket.CreateBucketIfNotExists(boltInternalKey)
if err != nil {
return err
return nil, nil, err
}
// TODO optimize writing these in order?
for k, v := range snapshot.internal {
err = internalBucket.Put([]byte(k), v)
if err != nil {
return err
return nil, nil, err
}
}
@ -488,49 +477,69 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
snapshotSegmentKey := segment.EncodeUvarintAscending(nil, segmentSnapshot.id)
snapshotSegmentBucket, err := snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey)
if err != nil {
return err
return nil, nil, err
}
switch seg := segmentSnapshot.segment.(type) {
case segment.PersistedSegment:
path := seg.Path()
filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
segPath := seg.Path()
filename := strings.TrimPrefix(segPath, path+string(os.PathSeparator))
err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
if err != nil {
return err
return nil, nil, err
}
filenames = append(filenames, filename)
case segment.UnpersistedSegment:
// need to persist this to disk
filename := zapFileName(segmentSnapshot.id)
path := s.path + string(os.PathSeparator) + filename
path := path + string(os.PathSeparator) + filename
err = seg.Persist(path)
if err != nil {
return fmt.Errorf("error persisting segment: %v", err)
return nil, nil, fmt.Errorf("error persisting segment: %v", err)
}
newSegmentPaths[segmentSnapshot.id] = path
err = snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
if err != nil {
return err
return nil, nil, err
}
filenames = append(filenames, filename)
default:
return fmt.Errorf("unknown segment type: %T", seg)
return nil, nil, fmt.Errorf("unknown segment type: %T", seg)
}
// store current deleted bits
var roaringBuf bytes.Buffer
if segmentSnapshot.deleted != nil {
_, err = segmentSnapshot.deleted.WriteTo(&roaringBuf)
if err != nil {
return fmt.Errorf("error persisting roaring bytes: %v", err)
return nil, nil, fmt.Errorf("error persisting roaring bytes: %v", err)
}
err = snapshotSegmentBucket.Put(boltDeletedKey, roaringBuf.Bytes())
if err != nil {
return err
return nil, nil, err
}
}
}
return filenames, newSegmentPaths, nil
}
func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) {
// start a write transaction
tx, err := s.rootBolt.Begin(true)
if err != nil {
return err
}
// defer rollback on error
defer func() {
if err != nil {
_ = tx.Rollback()
}
}()
filenames, newSegmentPaths, err := prepareBoltSnapshot(snapshot, tx, s.path, s.segPlugin)
if err != nil {
return err
}
// we need to swap in a new root only when we've persisted 1 or
// more segments -- whereby the new root would have 1-for-1
// replacements of in-memory segments with file-based segments
@ -780,12 +789,6 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
return rv, nil
}
type uint64Descending []uint64
func (p uint64Descending) Len() int { return len(p) }
func (p uint64Descending) Less(i, j int) bool { return p[i] > p[j] }
func (p uint64Descending) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (s *Scorch) removeOldData() {
removed, err := s.removeOldBoltSnapshots()
if err != nil {

@ -73,9 +73,7 @@ type Scorch struct {
onEvent func(event Event)
onAsyncError func(err error)
pauseLock sync.RWMutex
pauseCount uint64
forceMergeRequestCh chan *mergerCtrl
segPlugin segment.Plugin
}
@ -101,18 +99,15 @@ func NewScorch(storeName string,
nextSnapshotEpoch: 1,
closeCh: make(chan struct{}),
ineligibleForRemoval: map[string]bool{},
forceMergeRequestCh: make(chan *mergerCtrl, 1),
segPlugin: defaultSegmentPlugin,
}
// check if the caller has requested a specific segment type/version
forcedSegmentVersion, ok := config["forceSegmentVersion"].(int)
if ok {
forcedSegmentType, ok2 := config["forceSegmentType"].(string)
if !ok2 {
return nil, fmt.Errorf(
"forceSegmentVersion set to %d, must also specify forceSegmentType", forcedSegmentVersion)
}
forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config)
if err != nil {
return nil, err
}
if forcedSegmentType != "" && forcedSegmentVersion != 0 {
err := rv.loadSegmentPlugin(forcedSegmentType,
uint32(forcedSegmentVersion))
if err != nil {
@ -140,30 +135,34 @@ func NewScorch(storeName string,
return rv, nil
}
func (s *Scorch) paused() uint64 {
s.pauseLock.Lock()
pc := s.pauseCount
s.pauseLock.Unlock()
return pc
}
// configForceSegmentTypeVersion checks if the caller has requested a
// specific segment type/version
func configForceSegmentTypeVersion(config map[string]interface{}) (string, uint32, error) {
forcedSegmentVersion, err := parseToInteger(config["forceSegmentVersion"])
if err != nil {
return "", 0, nil
}
func (s *Scorch) incrPause() {
s.pauseLock.Lock()
s.pauseCount++
s.pauseLock.Unlock()
forcedSegmentType, ok := config["forceSegmentType"].(string)
if !ok {
return "", 0, fmt.Errorf(
"forceSegmentVersion set to %d, must also specify forceSegmentType", forcedSegmentVersion)