1// Copyright (c) 2017 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package vellum 16 17import ( 18 "io" 19 "sync" 20 21 "github.com/willf/bitset" 22) 23 24// FST is an in-memory representation of a finite state transducer, 25// capable of returning the uint64 value associated with 26// each []byte key stored, as well as enumerating all of the keys 27// in order. 28type FST struct { 29 f io.Closer 30 ver int 31 len int 32 typ int 33 data []byte 34 decoder decoder 35} 36 37var fstPool = sync.Pool{New: func() interface{} { return &FST{} }} 38 39func new(data []byte, f io.Closer) (*FST, error) { 40 rv := fstPool.Get().(*FST) 41 rv.data = data 42 rv.f = f 43 44 ver, typ, err := decodeHeader(data) 45 if err != nil { 46 return nil, err 47 } 48 49 if rv.ver == 0 || rv.decoder == nil || // first use 50 (rv.ver != ver || rv.typ != typ) { 51 rv.decoder, err = loadDecoder(ver, rv.data) 52 if err != nil { 53 return nil, err 54 } 55 rv.ver = ver 56 rv.typ = typ 57 } else { 58 rv.decoder.reload(rv.data) 59 } 60 61 rv.len = rv.decoder.getLen() 62 63 return rv, nil 64} 65 66// Contains returns true if this FST contains the specified key. 67func (f *FST) Contains(val []byte) (bool, error) { 68 _, exists, err := f.Get(val) 69 return exists, err 70} 71 72// Get returns the value associated with the key. NOTE: a value of zero 73// does not imply the key does not exist, you must consult the second 74// return value as well. 75func (f *FST) Get(input []byte) (uint64, bool, error) { 76 return f.get(input, nil) 77} 78 79func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) { 80 var total uint64 81 curr := f.decoder.getRoot() 82 state, err := f.decoder.stateAt(curr, prealloc) 83 if err != nil { 84 return 0, false, err 85 } 86 for _, c := range input { 87 _, curr, output := state.TransitionFor(c) 88 if curr == noneAddr { 89 return 0, false, nil 90 } 91 92 state, err = f.decoder.stateAt(curr, state) 93 if err != nil { 94 return 0, false, err 95 } 96 97 total += output 98 } 99 100 if state.Final() { 101 total += state.FinalOutput() 102 return total, true, nil 103 } 104 return 0, false, nil 105} 106 107// Version returns the encoding version used by this FST instance. 108func (f *FST) Version() int { 109 return f.ver 110} 111 112// Len returns the number of entries in this FST instance. 113func (f *FST) Len() int { 114 return f.len 115} 116 117// Type returns the type of this FST instance. 118func (f *FST) Type() int { 119 return f.typ 120} 121 122// Close will unmap any mmap'd data (if managed by vellum) and it will close 123// the backing file (if managed by vellum). You MUST call Close() for any 124// FST instance that is created. 125func (f *FST) Close() error { 126 if f.f != nil { 127 err := f.f.Close() 128 if err != nil { 129 return err 130 } 131 } 132 f.data = nil 133 f.decoder.clear() 134 f.f = nil 135 f.len = 0 136 fstPool.Put(f) 137 return nil 138} 139 140// Start returns the start state of this Automaton 141func (f *FST) Start() int { 142 return f.decoder.getRoot() 143} 144 145// IsMatch returns if this state is a matching state in this Automaton 146func (f *FST) IsMatch(addr int) bool { 147 match, _ := f.IsMatchWithVal(addr) 148 return match 149} 150 151// CanMatch returns if this state can ever transition to a matching state 152// in this Automaton 153func (f *FST) CanMatch(addr int) bool { 154 if addr == noneAddr { 155 return false 156 } 157 return true 158} 159 160// WillAlwaysMatch returns if from this state the Automaton will always 161// be in a matching state 162func (f *FST) WillAlwaysMatch(int) bool { 163 return false 164} 165 166// Accept returns the next state for this Automaton on input of byte b 167func (f *FST) Accept(addr int, b byte) int { 168 next, _ := f.AcceptWithVal(addr, b) 169 return next 170} 171 172// IsMatchWithVal returns if this state is a matching state in this Automaton 173// and also returns the final output value for this state 174func (f *FST) IsMatchWithVal(addr int) (bool, uint64) { 175 s, err := f.decoder.stateAt(addr, nil) 176 if err != nil { 177 return false, 0 178 } 179 return s.Final(), s.FinalOutput() 180} 181 182// AcceptWithVal returns the next state for this Automaton on input of byte b 183// and also returns the output value for the transition 184func (f *FST) AcceptWithVal(addr int, b byte) (int, uint64) { 185 s, err := f.decoder.stateAt(addr, nil) 186 if err != nil { 187 return noneAddr, 0 188 } 189 _, next, output := s.TransitionFor(b) 190 return next, output 191} 192 193// Iterator returns a new Iterator capable of enumerating the key/value pairs 194// between the provided startKeyInclusive and endKeyExclusive. 195func (f *FST) Iterator(startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) { 196 return newIterator(f, startKeyInclusive, endKeyExclusive, nil) 197} 198 199// Search returns a new Iterator capable of enumerating the key/value pairs 200// between the provided startKeyInclusive and endKeyExclusive that also 201// satisfy the provided automaton. 202func (f *FST) Search(aut Automaton, startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) { 203 return newIterator(f, startKeyInclusive, endKeyExclusive, aut) 204} 205 206// Debug is only intended for debug purposes, it simply asks the underlying 207// decoder visit each state, and pass it to the provided callback. 208func (f *FST) Debug(callback func(int, interface{}) error) error { 209 210 addr := f.decoder.getRoot() 211 set := bitset.New(uint(addr)) 212 stack := addrStack{addr} 213 214 stateNumber := 0 215 stack, addr = stack[:len(stack)-1], stack[len(stack)-1] 216 for addr != noneAddr { 217 if set.Test(uint(addr)) { 218 stack, addr = stack.Pop() 219 continue 220 } 221 set.Set(uint(addr)) 222 state, err := f.decoder.stateAt(addr, nil) 223 if err != nil { 224 return err 225 } 226 err = callback(stateNumber, state) 227 if err != nil { 228 return err 229 } 230 for i := 0; i < state.NumTransitions(); i++ { 231 tchar := state.TransitionAt(i) 232 _, dest, _ := state.TransitionFor(tchar) 233 stack = append(stack, dest) 234 } 235 stateNumber++ 236 stack, addr = stack.Pop() 237 } 238 239 return nil 240} 241 242type addrStack []int 243 244func (a addrStack) Pop() (addrStack, int) { 245 l := len(a) 246 if l < 1 { 247 return a, noneAddr 248 } 249 return a[:l-1], a[l-1] 250} 251 252// A Reader is meant for a single threaded use 253type Reader struct { 254 f *FST 255 prealloc fstStateV1 256} 257 258var fstReaderPool = sync.Pool{New: func() interface{} { return &Reader{} }} 259 260// Reader() returns a Reader instance that a single thread may use to 261// retrieve data from the FST 262func (f *FST) Reader() (*Reader, error) { 263 rv := fstReaderPool.Get().(*Reader) 264 rv.f = f 265 return rv, nil 266} 267 268func (r *Reader) Get(input []byte) (uint64, bool, error) { 269 return r.f.get(input, &r.prealloc) 270} 271 272func (r *Reader) Close() { 273 *r = Reader{} 274 fstReaderPool.Put(r) 275} 276