Update bleve dependency to latest master revision (#6100)

* update bleve to master b17287a86f6cac923a5d886e10618df994eeb54b6724eac2e3b8dde89cfbe3a2

* remove unused pkg from dep file

* change bleve from master to recent revision
release/v1.8
Lunny Xiao 5 years ago committed by techknowlogick
parent 11e316654e
commit a380cfd8e0

36
Gopkg.lock generated

@ -40,14 +40,6 @@
revision = "1a28a7fa985680f9f4e1644c0a857ec359a444b0"
version = "v0.4.7"
[[projects]]
branch = "master"
digest = "1:93367b6d47a8ccc7d14f9f493ccf103ccf5afb698559ff8e8f1999427ce27ace"
name = "github.com/Smerity/govarint"
packages = ["."]
pruneopts = "NUT"
revision = "7265e41f48f15fd61751e16da866af3c704bb3ab"
[[projects]]
branch = "master"
digest = "1:d290f4b25abbf574f80f60c8a5603ddada784f13f436b91a9a927bc7ce5a0146"
@ -98,7 +90,8 @@
revision = "3a771d992973f24aa725d07868b467d1ddfceafb"
[[projects]]
digest = "1:c10f35be6200b09e26da267ca80f837315093ecaba27e7a223071380efb9dd32"
branch = "master"
digest = "1:b17287a86f6cac923a5d886e10618df994eeb54b6724eac2e3b8dde89cfbe3a2"
name = "github.com/blevesearch/bleve"
packages = [
".",
@ -121,7 +114,6 @@
"index/scorch",
"index/scorch/mergeplan",
"index/scorch/segment",
"index/scorch/segment/mem",
"index/scorch/segment/zap",
"index/store",
"index/store/boltdb",
@ -141,9 +133,10 @@
"search/query",
"search/scorer",
"search/searcher",
"size",
]
pruneopts = "NUT"
revision = "c74e08f039e56cef576e4336382b2a2d12d9e026"
revision = "05d86ea8f6e30456949f612cf68cf4a27ce8c9c5"
[[projects]]
branch = "master"
@ -160,14 +153,6 @@
pruneopts = "NUT"
revision = "db70c57796cc8c310613541dfade3dce627d09c7"
[[projects]]
digest = "1:c7e0968c05659f3973148cd5c5387d6ee960a6ae1b2eaaec0b1d435d806458bb"
name = "github.com/boltdb/bolt"
packages = ["."]
pruneopts = "NUT"
revision = "ccd680d8c1a0179ac3d68f692b01e1a1589cbfc7"
source = "github.com/go-gitea/bolt"
[[projects]]
digest = "1:7c96cf7bf7f52af67f7a8222185813b9b665f5172ec2ac5f7d49ed96e5fcf3e5"
name = "github.com/boombuler/barcode"
@ -217,15 +202,16 @@
[[projects]]
branch = "master"
digest = "1:82e1ad11d777f7bff9a1fc678a8a534a318f85e5026a8a4d6f4a94a6b0678bb6"
digest = "1:6a658ac7d23204dc743c7155557c45273747d78e05ae0579742bd6b744bce215"
name = "github.com/couchbase/vellum"
packages = [
".",
"levenshtein2",
"regexp",
"utf8",
]
pruneopts = "NUT"
revision = "eb6ae3743b3f300f2136f83ca78c08cc071edbd4"
revision = "e91b68ff3efe3cc11723aa25dd315cbc9276cd65"
[[projects]]
branch = "master"
@ -287,6 +273,14 @@
revision = "1615341f118ae12f353cc8a983f35b584342c9b3"
version = "v1.12.0"
[[projects]]
digest = "1:ae8eea1a24ae43a46c2e96631b6303fcc4210ca0ac9d643e4da965029d1b511d"
name = "github.com/etcd-io/bbolt"
packages = ["."]
pruneopts = "NUT"
revision = "63597a96ec0ad9e6d43c3fc81e809909e0237461"
version = "v1.3.2"
[[projects]]
digest = "1:8603f74d35c93b37c615a02ba297be2cf2efc9ff6f1ff2b458a903990b568e48"
name = "github.com/ethantkoenig/rupture"

@ -15,10 +15,8 @@ ignored = ["google.golang.org/appengine*"]
name = "code.gitea.io/sdk"
[[constraint]]
# branch = "master"
revision = "c74e08f039e56cef576e4336382b2a2d12d9e026"
revision = "05d86ea8f6e30456949f612cf68cf4a27ce8c9c5"
name = "github.com/blevesearch/bleve"
#Not targetting v0.7.0 since standard where use only just after this tag
[[constraint]]
revision = "12dd70caea0268ac0d6c2707d0611ef601e7c64e"
@ -108,11 +106,6 @@ ignored = ["google.golang.org/appengine*"]
name = "gopkg.in/testfixtures.v2"
version = "2.0.0"
[[override]]
name = "github.com/boltdb/bolt"
revision = "ccd680d8c1a0179ac3d68f692b01e1a1589cbfc7"
source = "github.com/go-gitea/bolt"
[[override]]
branch = "master"
name = "golang.org/x/oauth2"

@ -1,22 +0,0 @@
The MIT License (MIT)
Copyright (c) 2015 Stephen Merity
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

@ -1,229 +0,0 @@
package govarint
import "encoding/binary"
import "io"
type U32VarintEncoder interface {
PutU32(x uint32) int
Close()
}
type U32VarintDecoder interface {
GetU32() (uint32, error)
}
///
type U64VarintEncoder interface {
PutU64(x uint64) int
Close()
}
type U64VarintDecoder interface {
GetU64() (uint64, error)
}
///
type U32GroupVarintEncoder struct {
w io.Writer
index int
store [4]uint32
temp [17]byte
}
func NewU32GroupVarintEncoder(w io.Writer) *U32GroupVarintEncoder { return &U32GroupVarintEncoder{w: w} }
func (b *U32GroupVarintEncoder) Flush() (int, error) {
// TODO: Is it more efficient to have a tailored version that's called only in Close()?
// If index is zero, there are no integers to flush
if b.index == 0 {
return 0, nil
}
// In the case we're flushing (the group isn't of size four), the non-values should be zero
// This ensures the unused entries are all zero in the sizeByte
for i := b.index; i < 4; i++ {
b.store[i] = 0
}
length := 1
// We need to reset the size byte to zero as we only bitwise OR into it, we don't overwrite it
b.temp[0] = 0
for i, x := range b.store {
size := byte(0)
shifts := []byte{24, 16, 8, 0}
for _, shift := range shifts {
// Always writes at least one byte -- the first one (shift = 0)
// Will write more bytes until the rest of the integer is all zeroes
if (x>>shift) != 0 || shift == 0 {
size += 1
b.temp[length] = byte(x >> shift)
length += 1
}
}
// We store the size in two of the eight bits in the first byte (sizeByte)
// 0 means there is one byte in total, hence why we subtract one from size
b.temp[0] |= (size - 1) << (uint8(3-i) * 2)
}
// If we're flushing without a full group of four, remove the unused bytes we computed
// This enables us to realize it's a partial group on decoding thanks to EOF
if b.index != 4 {
length -= 4 - b.index
}
_, err := b.w.Write(b.temp[:length])
return length, err
}
func (b *U32GroupVarintEncoder) PutU32(x uint32) (int, error) {
bytesWritten := 0
b.store[b.index] = x
b.index += 1
if b.index == 4 {
n, err := b.Flush()
if err != nil {
return n, err
}
bytesWritten += n
b.index = 0
}
return bytesWritten, nil
}
func (b *U32GroupVarintEncoder) Close() {
// On Close, we flush any remaining values that might not have been in a full group
b.Flush()
}
///
type U32GroupVarintDecoder struct {
r io.ByteReader
group [4]uint32
pos int
finished bool
capacity int
}
func NewU32GroupVarintDecoder(r io.ByteReader) *U32GroupVarintDecoder {
return &U32GroupVarintDecoder{r: r, pos: 4, capacity: 4}
}
func (b *U32GroupVarintDecoder) getGroup() error {
// We should always receive a sizeByte if there are more values to read
sizeByte, err := b.r.ReadByte()
if err != nil {
return err
}
// Calculate the size of the four incoming 32 bit integers
// 0b00 means 1 byte to read, 0b01 = 2, etc
b.group[0] = uint32((sizeByte >> 6) & 3)
b.group[1] = uint32((sizeByte >> 4) & 3)
b.group[2] = uint32((sizeByte >> 2) & 3)
b.group[3] = uint32(sizeByte & 3)
//
for index, size := range b.group {
b.group[index] = 0
// Any error that occurs in earlier byte reads should be repeated at the end one
// Hence we only catch and report the final ReadByte's error
var err error
switch size {
case 0:
var x byte
x, err = b.r.ReadByte()
b.group[index] = uint32(x)
case 1:
var x, y byte
x, _ = b.r.ReadByte()
y, err = b.r.ReadByte()
b.group[index] = uint32(x)<<8 | uint32(y)
case 2:
var x, y, z byte
x, _ = b.r.ReadByte()
y, _ = b.r.ReadByte()
z, err = b.r.ReadByte()
b.group[index] = uint32(x)<<16 | uint32(y)<<8 | uint32(z)
case 3:
var x, y, z, zz byte
x, _ = b.r.ReadByte()
y, _ = b.r.ReadByte()
z, _ = b.r.ReadByte()
zz, err = b.r.ReadByte()
b.group[index] = uint32(x)<<24 | uint32(y)<<16 | uint32(z)<<8 | uint32(zz)
}
if err != nil {
if err == io.EOF {
// If we hit EOF here, we have found a partial group
// We've return any valid entries we have read and return EOF once we run out
b.capacity = index
b.finished = true
break
} else {
return err
}
}
}
// Reset the pos pointer to the beginning of the read values
b.pos = 0
return nil
}
func (b *U32GroupVarintDecoder) GetU32() (uint32, error) {
// Check if we have any more values to give out - if not, let's get them
if b.pos == b.capacity {
// If finished is set, there is nothing else to do
if b.finished {
return 0, io.EOF
}
err := b.getGroup()
if err != nil {
return 0, err
}
}
// Increment pointer and return the value stored at that point
b.pos += 1
return b.group[b.pos-1], nil
}
///
type Base128Encoder struct {
w io.Writer
tmpBytes []byte
}
func NewU32Base128Encoder(w io.Writer) *Base128Encoder {
return &Base128Encoder{w: w, tmpBytes: make([]byte, binary.MaxVarintLen32)}
}
func NewU64Base128Encoder(w io.Writer) *Base128Encoder {
return &Base128Encoder{w: w, tmpBytes: make([]byte, binary.MaxVarintLen64)}
}
func (b *Base128Encoder) PutU32(x uint32) (int, error) {
writtenBytes := binary.PutUvarint(b.tmpBytes, uint64(x))
return b.w.Write(b.tmpBytes[:writtenBytes])
}
func (b *Base128Encoder) PutU64(x uint64) (int, error) {
writtenBytes := binary.PutUvarint(b.tmpBytes, x)
return b.w.Write(b.tmpBytes[:writtenBytes])
}
func (b *Base128Encoder) Close() {
}
///
type Base128Decoder struct {
r io.ByteReader
}
func NewU32Base128Decoder(r io.ByteReader) *Base128Decoder { return &Base128Decoder{r: r} }
func NewU64Base128Decoder(r io.ByteReader) *Base128Decoder { return &Base128Decoder{r: r} }
func (b *Base128Decoder) GetU32() (uint32, error) {
v, err := binary.ReadUvarint(b.r)
return uint32(v), err
}
func (b *Base128Decoder) GetU64() (uint64, error) {
return binary.ReadUvarint(b.r)
}

@ -14,6 +14,22 @@
package analysis
import (
"reflect"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeTokenLocation int
var reflectStaticSizeTokenFreq int
func init() {
var tl TokenLocation
reflectStaticSizeTokenLocation = int(reflect.TypeOf(tl).Size())
var tf TokenFreq
reflectStaticSizeTokenFreq = int(reflect.TypeOf(tf).Size())
}
// TokenLocation represents one occurrence of a term at a particular location in
// a field. Start, End and Position have the same meaning as in analysis.Token.
// Field and ArrayPositions identify the field value in the source document.
@ -26,6 +42,12 @@ type TokenLocation struct {
Position int
}
func (tl *TokenLocation) Size() int {
rv := reflectStaticSizeTokenLocation
rv += len(tl.ArrayPositions) * size.SizeOfUint64
return rv
}
// TokenFreq represents all the occurrences of a term in all fields of a
// document.
type TokenFreq struct {
@ -34,6 +56,15 @@ type TokenFreq struct {
frequency int
}
func (tf *TokenFreq) Size() int {
rv := reflectStaticSizeTokenFreq
rv += len(tf.Term)
for _, loc := range tf.Locations {
rv += loc.Size()
}
return rv
}
func (tf *TokenFreq) Frequency() int {
return tf.frequency
}
@ -42,6 +73,16 @@ func (tf *TokenFreq) Frequency() int {
// fields.
type TokenFrequencies map[string]*TokenFreq
func (tfs TokenFrequencies) Size() int {
rv := size.SizeOfMap
rv += len(tfs) * (size.SizeOfString + size.SizeOfPtr)
for k, v := range tfs {
rv += len(k)
rv += v.Size()
}
return rv
}
func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies) {
// walk the new token frequencies
for tfk, tf := range other {

@ -46,11 +46,11 @@ type Parser struct {
index int
}
func NewParser(len, position, index int) *Parser {
func NewParser(length, position, index int) *Parser {
return &Parser{
bufferLen: len,
buffer: make([]rune, 0, len),
tokens: make([]*analysis.Token, 0, len),
bufferLen: length,
buffer: make([]rune, 0, length),
tokens: make([]*analysis.Token, 0, length),
position: position,
index: index,
}

@ -21,7 +21,7 @@ import (
const Name = "unique"
// UniqueTermFilter retains only the tokens which mark the first occurence of
// UniqueTermFilter retains only the tokens which mark the first occurrence of
// a term. Tokens whose term appears in a preceding token are dropped.
type UniqueTermFilter struct{}

@ -14,7 +14,19 @@
package document
import "fmt"
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeDocument int
func init() {
var d Document
reflectStaticSizeDocument = int(reflect.TypeOf(d).Size())
}
type Document struct {
ID string `json:"id"`
@ -30,6 +42,21 @@ func NewDocument(id string) *Document {
}
}
func (d *Document) Size() int {
sizeInBytes := reflectStaticSizeDocument + size.SizeOfPtr +
len(d.ID)
for _, entry := range d.Fields {
sizeInBytes += entry.Size()
}
for _, entry := range d.CompositeFields {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
func (d *Document) AddField(f Field) *Document {
switch f := f.(type) {
case *CompositeField:

@ -36,4 +36,6 @@ type Field interface {
// that this field represents - this is a common metric for tracking
// the rate of indexing
NumPlainTextBytes() uint64
Size() int
}

@ -16,10 +16,19 @@ package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeBooleanField int
func init() {
var f BooleanField
reflectStaticSizeBooleanField = int(reflect.TypeOf(f).Size())
}
const DefaultBooleanIndexingOptions = StoreField | IndexField | DocValues
type BooleanField struct {
@ -30,6 +39,13 @@ type BooleanField struct {
numPlainTextBytes uint64
}
func (b *BooleanField) Size() int {
return reflectStaticSizeBooleanField + size.SizeOfPtr +
len(b.name) +
len(b.arrayPositions)*size.SizeOfUint64 +
len(b.value)
}
func (b *BooleanField) Name() string {
return b.name
}

@ -15,9 +15,19 @@
package document
import (
"reflect"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeCompositeField int
func init() {
var cf CompositeField
reflectStaticSizeCompositeField = int(reflect.TypeOf(cf).Size())
}
const DefaultCompositeIndexingOptions = IndexField
type CompositeField struct {
@ -54,6 +64,21 @@ func NewCompositeFieldWithIndexingOptions(name string, defaultInclude bool, incl
return rv
}
func (c *CompositeField) Size() int {
sizeInBytes := reflectStaticSizeCompositeField + size.SizeOfPtr +
len(c.name)
for k, _ := range c.includedFields {
sizeInBytes += size.SizeOfString + len(k) + size.SizeOfBool
}
for k, _ := range c.excludedFields {
sizeInBytes += size.SizeOfString + len(k) + size.SizeOfBool
}
return sizeInBytes
}
func (c *CompositeField) Name() string {
return c.name
}

@ -17,12 +17,21 @@ package document
import (
"fmt"
"math"
"reflect"
"time"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeDateTimeField int
func init() {
var f DateTimeField
reflectStaticSizeDateTimeField = int(reflect.TypeOf(f).Size())
}
const DefaultDateTimeIndexingOptions = StoreField | IndexField | DocValues
const DefaultDateTimePrecisionStep uint = 4
@ -37,6 +46,12 @@ type DateTimeField struct {
numPlainTextBytes uint64
}
func (n *DateTimeField) Size() int {
return reflectStaticSizeDateTimeField + size.SizeOfPtr +
len(n.name) +
len(n.arrayPositions)*size.SizeOfUint64
}
func (n *DateTimeField) Name() string {
return n.name
}

@ -16,12 +16,21 @@ package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeGeoPointField int
func init() {
var f GeoPointField
reflectStaticSizeGeoPointField = int(reflect.TypeOf(f).Size())
}
var GeoPrecisionStep uint = 9
type GeoPointField struct {
@ -32,6 +41,12 @@ type GeoPointField struct {
numPlainTextBytes uint64
}
func (n *GeoPointField) Size() int {
return reflectStaticSizeGeoPointField + size.SizeOfPtr +
len(n.name) +
len(n.arrayPositions)*size.SizeOfUint64
}
func (n *GeoPointField) Name() string {
return n.name
}

@ -16,11 +16,20 @@ package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeNumericField int
func init() {
var f NumericField
reflectStaticSizeNumericField = int(reflect.TypeOf(f).Size())
}
const DefaultNumericIndexingOptions = StoreField | IndexField | DocValues
const DefaultPrecisionStep uint = 4
@ -33,6 +42,12 @@ type NumericField struct {
numPlainTextBytes uint64
}
func (n *NumericField) Size() int {
return reflectStaticSizeNumericField + size.SizeOfPtr +
len(n.name) +
len(n.arrayPositions)*size.SizeOfPtr
}
func (n *NumericField) Name() string {
return n.name
}

@ -16,10 +16,19 @@ package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeTextField int
func init() {
var f TextField
reflectStaticSizeTextField = int(reflect.TypeOf(f).Size())
}
const DefaultTextIndexingOptions = IndexField | DocValues
type TextField struct {
@ -31,6 +40,13 @@ type TextField struct {
numPlainTextBytes uint64
}
func (t *TextField) Size() int {
return reflectStaticSizeTextField + size.SizeOfPtr +
len(t.name) +
len(t.arrayPositions)*size.SizeOfUint64 +
len(t.value)
}
func (t *TextField) Name() string {
return t.name
}

@ -0,0 +1,174 @@
// The code here was obtained from:
// https://github.com/mmcloughlin/geohash
// The MIT License (MIT)
// Copyright (c) 2015 Michael McLoughlin
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
package geo
import (
"math"
)
// encoding encapsulates an encoding defined by a given base32 alphabet.
type encoding struct {
enc string
dec [256]byte
}
// newEncoding constructs a new encoding defined by the given alphabet,
// which must be a 32-byte string.
func newEncoding(encoder string) *encoding {
e := new(encoding)
e.enc = encoder
for i := 0; i < len(e.dec); i++ {
e.dec[i] = 0xff
}
for i := 0; i < len(encoder); i++ {
e.dec[encoder[i]] = byte(i)
}
return e
}
// Decode string into bits of a 64-bit word. The string s may be at most 12
// characters.
func (e *encoding) decode(s string) uint64 {
x := uint64(0)
for i := 0; i < len(s); i++ {
x = (x << 5) | uint64(e.dec[s[i]])
}
return x
}
// Encode bits of 64-bit word into a string.
func (e *encoding) encode(x uint64) string {
b := [12]byte{}
for i := 0; i < 12; i++ {
b[11-i] = e.enc[x&0x1f]
x >>= 5
}
return string(b[:])
}
// Base32Encoding with the Geohash alphabet.
var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz")
// BoundingBox returns the region encoded by the given string geohash.
func geoBoundingBox(hash string) geoBox {
bits := uint(5 * len(hash))
inthash := base32encoding.decode(hash)
return geoBoundingBoxIntWithPrecision(inthash, bits)
}
// Box represents a rectangle in latitude/longitude space.
type geoBox struct {
minLat float64
maxLat float64
minLng float64
maxLng float64
}
// Round returns a point inside the box, making an effort to round to minimal
// precision.
func (b geoBox) round() (lat, lng float64) {
x := maxDecimalPower(b.maxLat - b.minLat)
lat = math.Ceil(b.minLat/x) * x
x = maxDecimalPower(b.maxLng - b.minLng)
lng = math.Ceil(b.minLng/x) * x
return
}
// precalculated for performance
var exp232 = math.Exp2(32)
// errorWithPrecision returns the error range in latitude and longitude for in
// integer geohash with bits of precision.
func errorWithPrecision(bits uint) (latErr, lngErr float64) {
b := int(bits)
latBits := b / 2
lngBits := b - latBits
latErr = math.Ldexp(180.0, -latBits)
lngErr = math.Ldexp(360.0, -lngBits)
return
}
// minDecimalPlaces returns the minimum number of decimal places such that
// there must exist an number with that many places within any range of width
// r. This is intended for returning minimal precision coordinates inside a
// box.
func maxDecimalPower(r float64) float64 {
m := int(math.Floor(math.Log10(r)))
return math.Pow10(m)
}
// Encode the position of x within the range -r to +r as a 32-bit integer.
func encodeRange(x, r float64) uint32 {
p := (x + r) / (2 * r)
return uint32(p * exp232)
}
// Decode the 32-bit range encoding X back to a value in the range -r to +r.
func decodeRange(X uint32, r float64) float64 {
p := float64(X) / exp232
x := 2*r*p - r
return x
}
// Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are
// ignored, and may take any value.
func squash(X uint64) uint32 {
X &= 0x5555555555555555
X = (X | (X >> 1)) & 0x3333333333333333
X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f
X = (X | (X >> 4)) & 0x00ff00ff00ff00ff
X = (X | (X >> 8)) & 0x0000ffff0000ffff
X = (X | (X >> 16)) & 0x00000000ffffffff
return uint32(X)
}
// Deinterleave the bits of X into 32-bit words containing the even and odd
// bitlevels of X, respectively.
func deinterleave(X uint64) (uint32, uint32) {
return squash(X), squash(X >> 1)
}
// BoundingBoxIntWithPrecision returns the region encoded by the integer
// geohash with the specified precision.
func geoBoundingBoxIntWithPrecision(hash uint64, bits uint) geoBox {
fullHash := hash << (64 - bits)
latInt, lngInt := deinterleave(fullHash)
lat := decodeRange(latInt, 90)
lng := decodeRange(lngInt, 180)
latErr, lngErr := errorWithPrecision(bits)
return geoBox{
minLat: lat,
maxLat: lat + latErr,
minLng: lng,
maxLng: lng + lngErr,
}
}
// ----------------------------------------------------------------------
// Decode the string geohash to a (lat, lng) point.
func GeoHashDecode(hash string) (lat, lng float64) {
box := geoBoundingBox(hash)
return box.round()
}

@ -16,6 +16,7 @@ package geo
import (
"reflect"
"strconv"
"strings"
)
@ -24,6 +25,8 @@ import (
// Container:
// slice length 2 (GeoJSON)
// first element lon, second element lat
// string (coordinates separated by comma, or a geohash)
// first element lat, second element lon
// map[string]interface{}
// exact keys lat and lon or lng
// struct
@ -36,10 +39,14 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
var foundLon, foundLat bool
thingVal := reflect.ValueOf(thing)
if !thingVal.IsValid() {
return lon, lat, false
}
thingTyp := thingVal.Type()
// is it a slice
if thingVal.IsValid() && thingVal.Kind() == reflect.Slice {
if thingVal.Kind() == reflect.Slice {
// must be length 2
if thingVal.Len() == 2 {
first := thingVal.Index(0)
@ -55,6 +62,35 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
}
}
// is it a string
if thingVal.Kind() == reflect.String {
geoStr := thingVal.Interface().(string)
if strings.Contains(geoStr, ",") {
// geo point with coordinates split by comma
points := strings.Split(geoStr, ",")
for i, point := range points {
// trim any leading or trailing white spaces
points[i] = strings.TrimSpace(point)
}
if len(points) == 2 {
var err error
lat, err = strconv.ParseFloat(points[0], 64)
if err == nil {
foundLat = true
}
lon, err = strconv.ParseFloat(points[1], 64)
if err == nil {
foundLon = true
}
}
} else {
// geohash
lat, lon = GeoHashDecode(geoStr)
foundLat = true
foundLon = true
}
}
// is it a map
if l, ok := thing.(map[string]interface{}); ok {
if lval, ok := l["lon"]; ok {
@ -68,7 +104,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
}
// now try reflection on struct fields
if thingVal.IsValid() && thingVal.Kind() == reflect.Struct {
if thingVal.Kind() == reflect.Struct {
for i := 0; i < thingVal.NumField(); i++ {
fieldName := thingTyp.Field(i).Name
if strings.HasPrefix(strings.ToLower(fieldName), "lon") {
@ -113,6 +149,9 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
// extract numeric value (if possible) and returns a float64
func extractNumericVal(v interface{}) (float64, bool) {
val := reflect.ValueOf(v)
if !val.IsValid() {
return 0, false
}
typ := val.Type()
switch typ.Kind() {
case reflect.Float32, reflect.Float64:

@ -21,6 +21,7 @@ import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/size"
)
// A Batch groups together multiple Index and Delete
@ -32,6 +33,9 @@ import (
type Batch struct {
index Index
internal *index.Batch
lastDocSize uint64
totalSize uint64
}
// Index adds the specified index operation to the
@ -47,9 +51,22 @@ func (b *Batch) Index(id string, data interface{}) error {
return err
}
b.internal.Update(doc)
b.lastDocSize = uint64(doc.Size() +
len(id) + size.SizeOfString) // overhead from internal
b.totalSize += b.lastDocSize
return nil
}
func (b *Batch) LastDocSize() uint64 {
return b.lastDocSize
}
func (b *Batch) TotalDocsSize() uint64 {
return b.totalSize
}
// IndexAdvanced adds the specified index operation to the
// batch which skips the mapping. NOTE: the bleve Index is not updated
// until the batch is executed.
@ -102,6 +119,24 @@ func (b *Batch) Reset() {
b.internal.Reset()
}
func (b *Batch) Merge(o *Batch) {
if o != nil && o.internal != nil {
b.internal.Merge(o.internal)
if o.LastDocSize() > 0 {
b.lastDocSize = o.LastDocSize()
}
b.totalSize = uint64(b.internal.TotalDocSize())
}
}
func (b *Batch) SetPersistedCallback(f index.BatchCallback) {
b.internal.SetPersistedCallback(f)
}
func (b *Batch) PersistedCallback() index.BatchCallback {
return b.internal.PersistedCallback()
}
// An Index implements all the indexing and searching
// capabilities of bleve. An Index can be created
// using the New() and Open() methods.

@ -15,10 +15,20 @@
package index
import (
"reflect"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeAnalysisResult int
func init() {
var ar AnalysisResult
reflectStaticSizeAnalysisResult = int(reflect.TypeOf(ar).Size())
}
type IndexRow interface {
KeySize() int
KeyTo([]byte) (int, error)
@ -39,6 +49,15 @@ type AnalysisResult struct {
Length []int
}
func (a *AnalysisResult) Size() int {
rv := reflectStaticSizeAnalysisResult
for _, analyzedI := range a.Analyzed {
rv += analyzedI.Size()
}
rv += len(a.Length) * size.SizeOfInt
return rv
}
type AnalysisWork struct {
i Index
d *document.Document

@ -18,11 +18,23 @@ import (
"bytes"
"encoding/json"
"fmt"
"reflect"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/size"
)
var reflectStaticSizeTermFieldDoc int
var reflectStaticSizeTermFieldVector int
func init() {
var tfd TermFieldDoc
reflectStaticSizeTermFieldDoc = int(reflect.TypeOf(tfd).Size())
var tfv TermFieldVector
reflectStaticSizeTermFieldVector = int(reflect.TypeOf(tfv).Size())
}
var ErrorUnknownStorageType = fmt.Errorf("unknown storage type")
type Index interface {
@ -68,6 +80,8 @@ type IndexReader interface {
Document(id string) (*document.Document, error)
DocumentVisitFieldTerms(id IndexInternalID, fields []string, visitor DocumentFieldTermVisitor) error
DocValueReader(fields []string) (DocValueReader, error)
Fields() ([]string, error)
GetInternal(key []byte) ([]byte, error)
@ -84,6 +98,29 @@ type IndexReader interface {
Close() error
}
// The Regexp interface defines the subset of the regexp.Regexp API
// methods that are used by bleve indexes, allowing callers to pass in
// alternate implementations.
type Regexp interface {
FindStringIndex(s string) (loc []int)
LiteralPrefix() (prefix string, complete bool)
String() string
}
type IndexReaderRegexp interface {
FieldDictRegexp(field string, regex string) (FieldDict, error)
}
type IndexReaderFuzzy interface {
FieldDictFuzzy(field string, term string, fuzziness int, prefix string) (FieldDict, error)
}
type IndexReaderOnly interface {
FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
}
// FieldTerms contains the terms used by a document, keyed by field
type FieldTerms map[string][]string
@ -115,6 +152,11 @@ type TermFieldVector struct {
End uint64
}
func (tfv *TermFieldVector) Size() int {
return reflectStaticSizeTermFieldVector + size.SizeOfPtr +
len(tfv.Field) + len(tfv.ArrayPositions)*size.SizeOfUint64
}
// IndexInternalID is an opaque document identifier interal to the index impl
type IndexInternalID []byte
@ -134,14 +176,27 @@ type TermFieldDoc struct {
Vectors []*TermFieldVector
}
func (tfd *TermFieldDoc) Size() int {
sizeInBytes := reflectStaticSizeTermFieldDoc + size.SizeOfPtr +
len(tfd.Term) + len(tfd.ID)
for _, entry := range tfd.Vectors {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
// Reset allows an already allocated TermFieldDoc to be reused
func (tfd *TermFieldDoc) Reset() *TermFieldDoc {
// remember the []byte used for the ID
id := tfd.ID
vectors := tfd.Vectors
// idiom to copy over from empty TermFieldDoc (0 allocations)
*tfd = TermFieldDoc{}
// reuse the []byte already allocated (and reset len to 0)
tfd.ID = id[:0]
tfd.Vectors = vectors[:0]
return tfd
}
@ -161,6 +216,8 @@ type TermFieldReader interface {
// Count returns the number of documents contains the term in this field.
Count() uint64
Close() error
Size() int
}
type DictEntry struct {
@ -185,12 +242,18 @@ type DocIDReader interface {
// will start there instead. If ID is greater than or equal to the end of
// the range, Next() call will return io.EOF.
Advance(ID IndexInternalID) (IndexInternalID, error)
Size() int
Close() error
}
type BatchCallback func(error)
type Batch struct {
IndexOps map[string]*document.Document
InternalOps map[string][]byte
IndexOps map[string]*document.Document
InternalOps map[string][]byte
persistedCallback BatchCallback
}
func NewBatch() *Batch {
@ -216,6 +279,14 @@ func (b *Batch) DeleteInternal(key []byte) {
b.InternalOps[string(key)] = nil
}
func (b *Batch) SetPersistedCallback(f BatchCallback) {
b.persistedCallback = f
}
func (b *Batch) PersistedCallback() BatchCallback {
return b.persistedCallback
}
func (b *Batch) String() string {
rv := fmt.Sprintf("Batch (%d ops, %d internal ops)\n", len(b.IndexOps), len(b.InternalOps))
for k, v := range b.IndexOps {
@ -238,4 +309,53 @@ func (b *Batch) String() string {
func (b *Batch) Reset() {
b.IndexOps = make(map[string]*document.Document)
b.InternalOps = make(map[string][]byte)
b.persistedCallback = nil
}
func (b *Batch) Merge(o *Batch) {
for k, v := range o.IndexOps {
b.IndexOps[k] = v
}
for k, v := range o.InternalOps {
b.InternalOps[k] = v
}
}
func (b *Batch) TotalDocSize() int {
var s int
for k, v := range b.IndexOps {
if v != nil {
s += v.Size() + size.SizeOfString
}
s += len(k)
}
return s
}
// Optimizable represents an optional interface that implementable by
// optimizable resources (e.g., TermFieldReaders, Searchers). These
// optimizable resources are provided the same OptimizableContext
// instance, so that they can coordinate via dynamic interface
// casting.
type Optimizable interface {
Optimize(kind string, octx OptimizableContext) (OptimizableContext, error)
}
// Represents a result of optimization -- see the Finish() method.
type Optimized interface{}
type OptimizableContext interface {
// Once all the optimzable resources have been provided the same
// OptimizableContext instance, the optimization preparations are
// finished or completed via the Finish() method.
//
// Depending on the optimization being performed, the Finish()
// method might return a non-nil Optimized instance. For example,
// the Optimized instance might represent an optimized
// TermFieldReader instance.
Finish() (Optimized, error)
}
type DocValueReader interface {
VisitDocValues(id IndexInternalID, visitor DocumentFieldTermVisitor) error
}

@ -19,7 +19,9 @@ import (
"sync/atomic"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/index/scorch/segment/zap"
)
type segmentIntroduction struct {
@ -29,8 +31,14 @@ type segmentIntroduction struct {
ids []string
internal map[string][]byte
applied chan error
persisted chan error
applied chan error
persisted chan error
persistedCallback index.BatchCallback
}
type persistIntroduction struct {
persisted map[uint64]segment.Segment
applied notificationChan
}
type epochWatcher struct {
@ -48,6 +56,8 @@ func (s *Scorch) mainLoop() {
var epochWatchers []*epochWatcher
OUTER:
for {
atomic.AddUint64(&s.stats.TotIntroduceLoop, 1)
select {
case <-s.closeCh:
break OUTER
@ -64,6 +74,9 @@ OUTER:
continue OUTER
}
case persist := <-s.persists:
s.introducePersist(persist)
case revertTo := <-s.revertToSnapshots:
err := s.revertToSnapshot(revertTo)
if err != nil {
@ -92,32 +105,38 @@ OUTER:
}
func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
// acquire lock