1// Copyright (c) 2017 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package vellum 16 17import ( 18 "bytes" 19) 20 21// Iterator represents a means of visiting key/value pairs in order. 22type Iterator interface { 23 24 // Current() returns the key/value pair currently pointed to. 25 // The []byte of the key is ONLY guaranteed to be valid until 26 // another call to Next/Seek/Close. If you need it beyond that 27 // point you MUST make a copy. 28 Current() ([]byte, uint64) 29 30 // Next() advances the iterator to the next key/value pair. 31 // If no more key/value pairs exist, ErrIteratorDone is returned. 32 Next() error 33 34 // Seek() advances the iterator the specified key, or the next key 35 // if it does not exist. 36 // If no keys exist after that point, ErrIteratorDone is returned. 37 Seek(key []byte) error 38 39 // Reset resets the Iterator' internal state to allow for iterator 40 // reuse (e.g. pooling). 41 Reset(f *FST, startKeyInclusive, endKeyExclusive []byte, aut Automaton) error 42 43 // Close() frees any resources held by this iterator. 44 Close() error 45} 46 47// FSTIterator is a structure for iterating key/value pairs in this FST in 48// lexicographic order. Iterators should be constructed with the FSTIterator 49// method on the parent FST structure. 50type FSTIterator struct { 51 f *FST 52 aut Automaton 53 54 startKeyInclusive []byte 55 endKeyExclusive []byte 56 57 statesStack []fstState 58 keysStack []byte 59 keysPosStack []int 60 valsStack []uint64 61 autStatesStack []int 62 63 nextStart []byte 64} 65 66func newIterator(f *FST, startKeyInclusive, endKeyExclusive []byte, 67 aut Automaton) (*FSTIterator, error) { 68 69 rv := &FSTIterator{} 70 err := rv.Reset(f, startKeyInclusive, endKeyExclusive, aut) 71 if err != nil { 72 return nil, err 73 } 74 return rv, nil 75} 76 77// Reset resets the Iterator' internal state to allow for iterator 78// reuse (e.g. pooling). 79func (i *FSTIterator) Reset(f *FST, 80 startKeyInclusive, endKeyExclusive []byte, aut Automaton) error { 81 if aut == nil { 82 aut = alwaysMatchAutomaton 83 } 84 85 i.f = f 86 i.startKeyInclusive = startKeyInclusive 87 i.endKeyExclusive = endKeyExclusive 88 i.aut = aut 89 90 return i.pointTo(startKeyInclusive) 91} 92 93// pointTo attempts to point us to the specified location 94func (i *FSTIterator) pointTo(key []byte) error { 95 // tried to seek before start 96 if bytes.Compare(key, i.startKeyInclusive) < 0 { 97 key = i.startKeyInclusive 98 } 99 100 // tried to see past end 101 if i.endKeyExclusive != nil && 102 bytes.Compare(key, i.endKeyExclusive) > 0 { 103 key = i.endKeyExclusive 104 } 105 106 // reset any state, pointTo always starts over 107 i.statesStack = i.statesStack[:0] 108 i.keysStack = i.keysStack[:0] 109 i.keysPosStack = i.keysPosStack[:0] 110 i.valsStack = i.valsStack[:0] 111 i.autStatesStack = i.autStatesStack[:0] 112 113 root, err := i.f.decoder.stateAt(i.f.decoder.getRoot(), nil) 114 if err != nil { 115 return err 116 } 117 118 autStart := i.aut.Start() 119 120 maxQ := -1 121 // root is always part of the path 122 i.statesStack = append(i.statesStack, root) 123 i.autStatesStack = append(i.autStatesStack, autStart) 124 for j := 0; j < len(key); j++ { 125 keyJ := key[j] 126 curr := i.statesStack[len(i.statesStack)-1] 127 autCurr := i.autStatesStack[len(i.autStatesStack)-1] 128 129 pos, nextAddr, nextVal := curr.TransitionFor(keyJ) 130 if nextAddr == noneAddr { 131 // needed transition doesn't exist 132 // find last trans before the one we needed 133 for q := curr.NumTransitions() - 1; q >= 0; q-- { 134 if curr.TransitionAt(q) < keyJ { 135 maxQ = q 136 break 137 } 138 } 139 break 140 } 141 autNext := i.aut.Accept(autCurr, keyJ) 142 143 next, err := i.f.decoder.stateAt(nextAddr, nil) 144 if err != nil { 145 return err 146 } 147 148 i.statesStack = append(i.statesStack, next) 149 i.keysStack = append(i.keysStack, keyJ) 150 i.keysPosStack = append(i.keysPosStack, pos) 151 i.valsStack = append(i.valsStack, nextVal) 152 i.autStatesStack = append(i.autStatesStack, autNext) 153 continue 154 } 155 156 if !i.statesStack[len(i.statesStack)-1].Final() || 157 !i.aut.IsMatch(i.autStatesStack[len(i.autStatesStack)-1]) || 158 bytes.Compare(i.keysStack, key) < 0 { 159 return i.next(maxQ) 160 } 161 162 return nil 163} 164 165// Current returns the key and value currently pointed to by the iterator. 166// If the iterator is not pointing at a valid value (because Iterator/Next/Seek) 167// returned an error previously, it may return nil,0. 168func (i *FSTIterator) Current() ([]byte, uint64) { 169 curr := i.statesStack[len(i.statesStack)-1] 170 if curr.Final() { 171 var total uint64 172 for _, v := range i.valsStack { 173 total += v 174 } 175 total += curr.FinalOutput() 176 return i.keysStack, total 177 } 178 return nil, 0 179} 180 181// Next advances this iterator to the next key/value pair. If there is none 182// or the advancement goes beyond the configured endKeyExclusive, then 183// ErrIteratorDone is returned. 184func (i *FSTIterator) Next() error { 185 return i.next(-1) 186} 187 188func (i *FSTIterator) next(lastOffset int) error { 189 // remember where we started with keysStack in this next() call 190 i.nextStart = append(i.nextStart[:0], i.keysStack...) 191 192 nextOffset := lastOffset + 1 193 allowCompare := false 194 195OUTER: 196 for true { 197 curr := i.statesStack[len(i.statesStack)-1] 198 autCurr := i.autStatesStack[len(i.autStatesStack)-1] 199 200 if curr.Final() && i.aut.IsMatch(autCurr) && allowCompare { 201 // check to see if new keystack might have gone too far 202 if i.endKeyExclusive != nil && 203 bytes.Compare(i.keysStack, i.endKeyExclusive) >= 0 { 204 return ErrIteratorDone 205 } 206 207 cmp := bytes.Compare(i.keysStack, i.nextStart) 208 if cmp > 0 { 209 // in final state greater than start key 210 return nil 211 } 212 } 213 214 numTrans := curr.NumTransitions() 215 216 INNER: 217 for nextOffset < numTrans { 218 t := curr.TransitionAt(nextOffset) 219 220 autNext := i.aut.Accept(autCurr, t) 221 if !i.aut.CanMatch(autNext) { 222 // TODO: potential optimization to skip nextOffset 223 // forwards more directly to something that the 224 // automaton likes rather than a linear scan? 225 nextOffset += 1 226 continue INNER 227 } 228 229 pos, nextAddr, v := curr.TransitionFor(t) 230 231 // the next slot in the statesStack might have an 232 // fstState instance that we can reuse 233 var nextPrealloc fstState 234 if len(i.statesStack) < cap(i.statesStack) { 235 nextPrealloc = i.statesStack[0:cap(i.statesStack)][len(i.statesStack)] 236 } 237 238 // push onto stack 239 next, err := i.f.decoder.stateAt(nextAddr, nextPrealloc) 240 if err != nil { 241 return err 242 } 243 244 i.statesStack = append(i.statesStack, next) 245 i.keysStack = append(i.keysStack, t) 246 i.keysPosStack = append(i.keysPosStack, pos) 247 i.valsStack = append(i.valsStack, v) 248 i.autStatesStack = append(i.autStatesStack, autNext) 249 250 nextOffset = 0 251 allowCompare = true 252 253 continue OUTER 254 } 255 256 // no more transitions, so need to backtrack and stack pop 257 if len(i.statesStack) <= 1 { 258 // stack len is 1 (root), can't go back further, we're done 259 break 260 } 261 262 // if the top of the stack represents a linear chain of states 263 // (i.e., a suffix of nodes linked by single transitions), 264 // then optimize by popping the suffix in one shot without 265 // going back all the way to the OUTER loop 266 var popNum int 267 for j := len(i.statesStack) - 1; j > 0; j-- { 268 if j == 1 || i.statesStack[j].NumTransitions() != 1 { 269 popNum = len(i.statesStack) - 1 - j 270 break 271 } 272 } 273 if popNum < 1 { // always pop at least 1 entry from the stacks 274 popNum = 1 275 } 276 277 nextOffset = i.keysPosStack[len(i.keysPosStack)-popNum] + 1 278 allowCompare = false 279 280 i.statesStack = i.statesStack[:len(i.statesStack)-popNum] 281 i.keysStack = i.keysStack[:len(i.keysStack)-popNum] 282 i.keysPosStack = i.keysPosStack[:len(i.keysPosStack)-popNum] 283 i.valsStack = i.valsStack[:len(i.valsStack)-popNum] 284 i.autStatesStack = i.autStatesStack[:len(i.autStatesStack)-popNum] 285 } 286 287 return ErrIteratorDone 288} 289 290// Seek advances this iterator to the specified key/value pair. If this key 291// is not in the FST, Current() will return the next largest key. If this 292// seek operation would go past the last key, or outside the configured 293// startKeyInclusive/endKeyExclusive then ErrIteratorDone is returned. 294func (i *FSTIterator) Seek(key []byte) error { 295 return i.pointTo(key) 296} 297 298// Close will free any resources held by this iterator. 299func (i *FSTIterator) Close() error { 300 // at the moment we don't do anything, 301 // but wanted this for API completeness 302 return nil 303} 304