1// Copyright (c) 2017 Couchbase, Inc. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package vellum 16 17import ( 18 "io" 19 20 "github.com/willf/bitset" 21) 22 23// FST is an in-memory representation of a finite state transducer, 24// capable of returning the uint64 value associated with 25// each []byte key stored, as well as enumerating all of the keys 26// in order. 27type FST struct { 28 f io.Closer 29 ver int 30 len int 31 typ int 32 data []byte 33 decoder decoder 34} 35 36func new(data []byte, f io.Closer) (rv *FST, err error) { 37 rv = &FST{ 38 data: data, 39 f: f, 40 } 41 42 rv.ver, rv.typ, err = decodeHeader(data) 43 if err != nil { 44 return nil, err 45 } 46 47 rv.decoder, err = loadDecoder(rv.ver, rv.data) 48 if err != nil { 49 return nil, err 50 } 51 52 rv.len = rv.decoder.getLen() 53 54 return rv, nil 55} 56 57// Contains returns true if this FST contains the specified key. 58func (f *FST) Contains(val []byte) (bool, error) { 59 _, exists, err := f.Get(val) 60 return exists, err 61} 62 63// Get returns the value associated with the key. NOTE: a value of zero 64// does not imply the key does not exist, you must consult the second 65// return value as well. 66func (f *FST) Get(input []byte) (uint64, bool, error) { 67 return f.get(input, nil) 68} 69 70func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) { 71 var total uint64 72 curr := f.decoder.getRoot() 73 state, err := f.decoder.stateAt(curr, prealloc) 74 if err != nil { 75 return 0, false, err 76 } 77 for _, c := range input { 78 _, curr, output := state.TransitionFor(c) 79 if curr == noneAddr { 80 return 0, false, nil 81 } 82 83 state, err = f.decoder.stateAt(curr, state) 84 if err != nil { 85 return 0, false, err 86 } 87 88 total += output 89 } 90 91 if state.Final() { 92 total += state.FinalOutput() 93 return total, true, nil 94 } 95 return 0, false, nil 96} 97 98// Version returns the encoding version used by this FST instance. 99func (f *FST) Version() int { 100 return f.ver 101} 102 103// Len returns the number of entries in this FST instance. 104func (f *FST) Len() int { 105 return f.len 106} 107 108// Type returns the type of this FST instance. 109func (f *FST) Type() int { 110 return f.typ 111} 112 113// Close will unmap any mmap'd data (if managed by vellum) and it will close 114// the backing file (if managed by vellum). You MUST call Close() for any 115// FST instance that is created. 116func (f *FST) Close() error { 117 if f.f != nil { 118 err := f.f.Close() 119 if err != nil { 120 return err 121 } 122 } 123 f.data = nil 124 f.decoder = nil 125 return nil 126} 127 128// Start returns the start state of this Automaton 129func (f *FST) Start() int { 130 return f.decoder.getRoot() 131} 132 133// IsMatch returns if this state is a matching state in this Automaton 134func (f *FST) IsMatch(addr int) bool { 135 match, _ := f.IsMatchWithVal(addr) 136 return match 137} 138 139// CanMatch returns if this state can ever transition to a matching state 140// in this Automaton 141func (f *FST) CanMatch(addr int) bool { 142 if addr == noneAddr { 143 return false 144 } 145 return true 146} 147 148// WillAlwaysMatch returns if from this state the Automaton will always 149// be in a matching state 150func (f *FST) WillAlwaysMatch(int) bool { 151 return false 152} 153 154// Accept returns the next state for this Automaton on input of byte b 155func (f *FST) Accept(addr int, b byte) int { 156 next, _ := f.AcceptWithVal(addr, b) 157 return next 158} 159 160// IsMatchWithVal returns if this state is a matching state in this Automaton 161// and also returns the final output value for this state 162func (f *FST) IsMatchWithVal(addr int) (bool, uint64) { 163 s, err := f.decoder.stateAt(addr, nil) 164 if err != nil { 165 return false, 0 166 } 167 return s.Final(), s.FinalOutput() 168} 169 170// AcceptWithVal returns the next state for this Automaton on input of byte b 171// and also returns the output value for the transition 172func (f *FST) AcceptWithVal(addr int, b byte) (int, uint64) { 173 s, err := f.decoder.stateAt(addr, nil) 174 if err != nil { 175 return noneAddr, 0 176 } 177 _, next, output := s.TransitionFor(b) 178 return next, output 179} 180 181// Iterator returns a new Iterator capable of enumerating the key/value pairs 182// between the provided startKeyInclusive and endKeyExclusive. 183func (f *FST) Iterator(startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) { 184 return newIterator(f, startKeyInclusive, endKeyExclusive, nil) 185} 186 187// Search returns a new Iterator capable of enumerating the key/value pairs 188// between the provided startKeyInclusive and endKeyExclusive that also 189// satisfy the provided automaton. 190func (f *FST) Search(aut Automaton, startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) { 191 return newIterator(f, startKeyInclusive, endKeyExclusive, aut) 192} 193 194// Debug is only intended for debug purposes, it simply asks the underlying 195// decoder visit each state, and pass it to the provided callback. 196func (f *FST) Debug(callback func(int, interface{}) error) error { 197 198 addr := f.decoder.getRoot() 199 set := bitset.New(uint(addr)) 200 stack := addrStack{addr} 201 202 stateNumber := 0 203 stack, addr = stack[:len(stack)-1], stack[len(stack)-1] 204 for addr != noneAddr { 205 if set.Test(uint(addr)) { 206 stack, addr = stack.Pop() 207 continue 208 } 209 set.Set(uint(addr)) 210 state, err := f.decoder.stateAt(addr, nil) 211 if err != nil { 212 return err 213 } 214 err = callback(stateNumber, state) 215 if err != nil { 216 return err 217 } 218 for i := 0; i < state.NumTransitions(); i++ { 219 tchar := state.TransitionAt(i) 220 _, dest, _ := state.TransitionFor(tchar) 221 stack = append(stack, dest) 222 } 223 stateNumber++ 224 stack, addr = stack.Pop() 225 } 226 227 return nil 228} 229 230type addrStack []int 231 232func (a addrStack) Pop() (addrStack, int) { 233 l := len(a) 234 if l < 1 { 235 return a, noneAddr 236 } 237 return a[:l-1], a[l-1] 238} 239 240// Reader() returns a Reader instance that a single thread may use to 241// retrieve data from the FST 242func (f *FST) Reader() (*Reader, error) { 243 return &Reader{f: f}, nil 244} 245 246func (f *FST) GetMinKey() ([]byte, error) { 247 var rv []byte 248 249 curr := f.decoder.getRoot() 250 state, err := f.decoder.stateAt(curr, nil) 251 if err != nil { 252 return nil, err 253 } 254 255 for !state.Final() { 256 nextTrans := state.TransitionAt(0) 257 _, curr, _ = state.TransitionFor(nextTrans) 258 state, err = f.decoder.stateAt(curr, state) 259 if err != nil { 260 return nil, err 261 } 262 263 rv = append(rv, nextTrans) 264 } 265 266 return rv, nil 267} 268 269func (f *FST) GetMaxKey() ([]byte, error) { 270 var rv []byte 271 272 curr := f.decoder.getRoot() 273 state, err := f.decoder.stateAt(curr, nil) 274 if err != nil { 275 return nil, err 276 } 277 278 for state.NumTransitions() > 0 { 279 nextTrans := state.TransitionAt(state.NumTransitions() - 1) 280 _, curr, _ = state.TransitionFor(nextTrans) 281 state, err = f.decoder.stateAt(curr, state) 282 if err != nil { 283 return nil, err 284 } 285 286 rv = append(rv, nextTrans) 287 } 288 289 return rv, nil 290} 291 292// A Reader is meant for a single threaded use 293type Reader struct { 294 f *FST 295 prealloc fstStateV1 296} 297 298func (r *Reader) Get(input []byte) (uint64, bool, error) { 299 return r.f.get(input, &r.prealloc) 300} 301