1// Copyright (c) 2017 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zap 16 17import ( 18 "fmt" 19 20 "github.com/RoaringBitmap/roaring" 21 "github.com/blevesearch/bleve/index" 22 "github.com/blevesearch/bleve/index/scorch/segment" 23 "github.com/couchbase/vellum" 24 "github.com/couchbase/vellum/regexp" 25) 26 27// Dictionary is the zap representation of the term dictionary 28type Dictionary struct { 29 sb *SegmentBase 30 field string 31 fieldID uint16 32 fst *vellum.FST 33} 34 35// PostingsList returns the postings list for the specified term 36func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) { 37 return d.postingsList([]byte(term), except, nil) 38} 39 40func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) { 41 if d.fst == nil { 42 return d.postingsListInit(rv, except), nil 43 } 44 45 postingsOffset, exists, err := d.fst.Get(term) 46 if err != nil { 47 return nil, fmt.Errorf("vellum err: %v", err) 48 } 49 if !exists { 50 return d.postingsListInit(rv, except), nil 51 } 52 53 return d.postingsListFromOffset(postingsOffset, except, rv) 54} 55 56func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) { 57 rv = d.postingsListInit(rv, except) 58 59 err := rv.read(postingsOffset, d) 60 if err != nil { 61 return nil, err 62 } 63 64 return rv, nil 65} 66 67func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList { 68 if rv == nil { 69 rv = &PostingsList{} 70 } else { 71 *rv = PostingsList{} // clear the struct 72 } 73 rv.sb = d.sb 74 rv.except = except 75 return rv 76} 77 78// Iterator returns an iterator for this dictionary 79func (d *Dictionary) Iterator() segment.DictionaryIterator { 80 rv := &DictionaryIterator{ 81 d: d, 82 } 83 84 if d.fst != nil { 85 itr, err := d.fst.Iterator(nil, nil) 86 if err == nil { 87 rv.itr = itr 88 } 89 } 90 91 return rv 92} 93 94// PrefixIterator returns an iterator which only visits terms having the 95// the specified prefix 96func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator { 97 rv := &DictionaryIterator{ 98 d: d, 99 } 100 101 if d.fst != nil { 102 r, err := regexp.New(prefix + ".*") 103 if err == nil { 104 itr, err := d.fst.Search(r, nil, nil) 105 if err == nil { 106 rv.itr = itr 107 } 108 } 109 } 110 111 return rv 112} 113 114// RangeIterator returns an iterator which only visits terms between the 115// start and end terms. NOTE: bleve.index API specifies the end is inclusive. 116func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator { 117 rv := &DictionaryIterator{ 118 d: d, 119 } 120 121 // need to increment the end position to be inclusive 122 endBytes := []byte(end) 123 if endBytes[len(endBytes)-1] < 0xff { 124 endBytes[len(endBytes)-1]++ 125 } else { 126 endBytes = append(endBytes, 0xff) 127 } 128 129 if d.fst != nil { 130 itr, err := d.fst.Iterator([]byte(start), endBytes) 131 if err == nil { 132 rv.itr = itr 133 } 134 } 135 136 return rv 137} 138 139// DictionaryIterator is an iterator for term dictionary 140type DictionaryIterator struct { 141 d *Dictionary 142 itr vellum.Iterator 143 err error 144 tmp PostingsList 145} 146 147// Next returns the next entry in the dictionary 148func (i *DictionaryIterator) Next() (*index.DictEntry, error) { 149 if i.itr == nil || i.err == vellum.ErrIteratorDone { 150 return nil, nil 151 } else if i.err != nil { 152 return nil, i.err 153 } 154 term, postingsOffset := i.itr.Current() 155 i.err = i.tmp.read(postingsOffset, i.d) 156 if i.err != nil { 157 return nil, i.err 158 } 159 rv := &index.DictEntry{ 160 Term: string(term), 161 Count: i.tmp.Count(), 162 } 163 i.err = i.itr.Next() 164 return rv, nil 165} 166