1// Copyright (c) 2017 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package mem 16 17import ( 18 "github.com/RoaringBitmap/roaring" 19 "github.com/blevesearch/bleve/index/scorch/segment" 20) 21 22// PostingsList is an in-memory represenation of a postings list 23type PostingsList struct { 24 dictionary *Dictionary 25 term string 26 postingsID uint64 27 except *roaring.Bitmap 28} 29 30// Count returns the number of items on this postings list 31func (p *PostingsList) Count() uint64 { 32 var rv uint64 33 if p.postingsID > 0 { 34 rv = p.dictionary.segment.Postings[p.postingsID-1].GetCardinality() 35 if p.except != nil { 36 except := p.except.GetCardinality() 37 if except > rv { 38 // avoid underflow 39 except = rv 40 } 41 rv -= except 42 } 43 } 44 return rv 45} 46 47// Iterator returns an iterator for this postings list 48func (p *PostingsList) Iterator() segment.PostingsIterator { 49 rv := &PostingsIterator{ 50 postings: p, 51 } 52 if p.postingsID > 0 { 53 allbits := p.dictionary.segment.Postings[p.postingsID-1] 54 rv.locations = p.dictionary.segment.PostingsLocs[p.postingsID-1] 55 rv.all = allbits.Iterator() 56 if p.except != nil { 57 allExcept := allbits.Clone() 58 allExcept.AndNot(p.except) 59 rv.actual = allExcept.Iterator() 60 } else { 61 rv.actual = allbits.Iterator() 62 } 63 } 64 65 return rv 66} 67 68// PostingsIterator provides a way to iterate through the postings list 69type PostingsIterator struct { 70 postings *PostingsList 71 all roaring.IntIterable 72 locations *roaring.Bitmap 73 offset int 74 locoffset int 75 actual roaring.IntIterable 76} 77 78// Next returns the next posting on the postings list, or nil at the end 79func (i *PostingsIterator) Next() (segment.Posting, error) { 80 if i.actual == nil || !i.actual.HasNext() { 81 return nil, nil 82 } 83 n := i.actual.Next() 84 allN := i.all.Next() 85 86 // n is the next actual hit (excluding some postings) 87 // allN is the next hit in the full postings 88 // if they don't match, adjust offsets to factor in item we're skipping over 89 // incr the all iterator, and check again 90 for allN != n { 91 i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset]) 92 i.offset++ 93 allN = i.all.Next() 94 } 95 rv := &Posting{ 96 iterator: i, 97 docNum: uint64(n), 98 offset: i.offset, 99 locoffset: i.locoffset, 100 hasLoc: i.locations.Contains(n), 101 } 102 103 i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset]) 104 i.offset++ 105 return rv, nil 106} 107 108// Posting is a single entry in a postings list 109type Posting struct { 110 iterator *PostingsIterator 111 docNum uint64 112 offset int 113 locoffset int 114 hasLoc bool 115} 116 117// Number returns the document number of this posting in this segment 118func (p *Posting) Number() uint64 { 119 return p.docNum 120} 121 122// Frequency returns the frequence of occurance of this term in this doc/field 123func (p *Posting) Frequency() uint64 { 124 return p.iterator.postings.dictionary.segment.Freqs[p.iterator.postings.postingsID-1][p.offset] 125} 126 127// Norm returns the normalization factor for this posting 128func (p *Posting) Norm() float64 { 129 return float64(p.iterator.postings.dictionary.segment.Norms[p.iterator.postings.postingsID-1][p.offset]) 130} 131 132// Locations returns the location information for each occurance 133func (p *Posting) Locations() []segment.Location { 134 if !p.hasLoc { 135 return nil 136 } 137 freq := int(p.Frequency()) 138 rv := make([]segment.Location, freq) 139 for i := 0; i < freq; i++ { 140 rv[i] = &Location{ 141 p: p, 142 offset: p.locoffset + i, 143 } 144 } 145 return rv 146} 147 148// Location represents the location of a single occurance 149type Location struct { 150 p *Posting 151 offset int 152} 153 154// Field returns the name of the field (useful in composite fields to know 155// which original field the value came from) 156func (l *Location) Field() string { 157 return l.p.iterator.postings.dictionary.segment.FieldsInv[l.p.iterator.postings.dictionary.segment.Locfields[l.p.iterator.postings.postingsID-1][l.offset]] 158} 159 160// Start returns the start byte offset of this occurance 161func (l *Location) Start() uint64 { 162 return l.p.iterator.postings.dictionary.segment.Locstarts[l.p.iterator.postings.postingsID-1][l.offset] 163} 164 165// End returns the end byte offset of this occurance 166func (l *Location) End() uint64 { 167 return l.p.iterator.postings.dictionary.segment.Locends[l.p.iterator.postings.postingsID-1][l.offset] 168} 169 170// Pos returns the 1-based phrase position of this occurance 171func (l *Location) Pos() uint64 { 172 return l.p.iterator.postings.dictionary.segment.Locpos[l.p.iterator.postings.postingsID-1][l.offset] 173} 174 175// ArrayPositions returns the array position vector associated with this occurance 176func (l *Location) ArrayPositions() []uint64 { 177 return l.p.iterator.postings.dictionary.segment.Locarraypos[l.p.iterator.postings.postingsID-1][l.offset] 178} 179