1//  Copyright (c) 2017 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package vellum
16
17import (
18	"io"
19
20	"github.com/willf/bitset"
21)
22
23// FST is an in-memory representation of a finite state transducer,
24// capable of returning the uint64 value associated with
25// each []byte key stored, as well as enumerating all of the keys
26// in order.
27type FST struct {
28	f       io.Closer
29	ver     int
30	len     int
31	typ     int
32	data    []byte
33	decoder decoder
34}
35
36func new(data []byte, f io.Closer) (rv *FST, err error) {
37	rv = &FST{
38		data: data,
39		f:    f,
40	}
41
42	rv.ver, rv.typ, err = decodeHeader(data)
43	if err != nil {
44		return nil, err
45	}
46
47	rv.decoder, err = loadDecoder(rv.ver, rv.data)
48	if err != nil {
49		return nil, err
50	}
51
52	rv.len = rv.decoder.getLen()
53
54	return rv, nil
55}
56
57// Contains returns true if this FST contains the specified key.
58func (f *FST) Contains(val []byte) (bool, error) {
59	_, exists, err := f.Get(val)
60	return exists, err
61}
62
63// Get returns the value associated with the key.  NOTE: a value of zero
64// does not imply the key does not exist, you must consult the second
65// return value as well.
66func (f *FST) Get(input []byte) (uint64, bool, error) {
67	return f.get(input, nil)
68}
69
70func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) {
71	var total uint64
72	curr := f.decoder.getRoot()
73	state, err := f.decoder.stateAt(curr, prealloc)
74	if err != nil {
75		return 0, false, err
76	}
77	for _, c := range input {
78		_, curr, output := state.TransitionFor(c)
79		if curr == noneAddr {
80			return 0, false, nil
81		}
82
83		state, err = f.decoder.stateAt(curr, state)
84		if err != nil {
85			return 0, false, err
86		}
87
88		total += output
89	}
90
91	if state.Final() {
92		total += state.FinalOutput()
93		return total, true, nil
94	}
95	return 0, false, nil
96}
97
98// Version returns the encoding version used by this FST instance.
99func (f *FST) Version() int {
100	return f.ver
101}
102
103// Len returns the number of entries in this FST instance.
104func (f *FST) Len() int {
105	return f.len
106}
107
108// Type returns the type of this FST instance.
109func (f *FST) Type() int {
110	return f.typ
111}
112
113// Close will unmap any mmap'd data (if managed by vellum) and it will close
114// the backing file (if managed by vellum).  You MUST call Close() for any
115// FST instance that is created.
116func (f *FST) Close() error {
117	if f.f != nil {
118		err := f.f.Close()
119		if err != nil {
120			return err
121		}
122	}
123	f.data = nil
124	f.decoder = nil
125	return nil
126}
127
128// Start returns the start state of this Automaton
129func (f *FST) Start() int {
130	return f.decoder.getRoot()
131}
132
133// IsMatch returns if this state is a matching state in this Automaton
134func (f *FST) IsMatch(addr int) bool {
135	match, _ := f.IsMatchWithVal(addr)
136	return match
137}
138
139// CanMatch returns if this state can ever transition to a matching state
140// in this Automaton
141func (f *FST) CanMatch(addr int) bool {
142	if addr == noneAddr {
143		return false
144	}
145	return true
146}
147
148// WillAlwaysMatch returns if from this state the Automaton will always
149// be in a matching state
150func (f *FST) WillAlwaysMatch(int) bool {
151	return false
152}
153
154// Accept returns the next state for this Automaton on input of byte b
155func (f *FST) Accept(addr int, b byte) int {
156	next, _ := f.AcceptWithVal(addr, b)
157	return next
158}
159
160// IsMatchWithVal returns if this state is a matching state in this Automaton
161// and also returns the final output value for this state
162func (f *FST) IsMatchWithVal(addr int) (bool, uint64) {
163	s, err := f.decoder.stateAt(addr, nil)
164	if err != nil {
165		return false, 0
166	}
167	return s.Final(), s.FinalOutput()
168}
169
170// AcceptWithVal returns the next state for this Automaton on input of byte b
171// and also returns the output value for the transition
172func (f *FST) AcceptWithVal(addr int, b byte) (int, uint64) {
173	s, err := f.decoder.stateAt(addr, nil)
174	if err != nil {
175		return noneAddr, 0
176	}
177	_, next, output := s.TransitionFor(b)
178	return next, output
179}
180
181// Iterator returns a new Iterator capable of enumerating the key/value pairs
182// between the provided startKeyInclusive and endKeyExclusive.
183func (f *FST) Iterator(startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
184	return newIterator(f, startKeyInclusive, endKeyExclusive, nil)
185}
186
187// Search returns a new Iterator capable of enumerating the key/value pairs
188// between the provided startKeyInclusive and endKeyExclusive that also
189// satisfy the provided automaton.
190func (f *FST) Search(aut Automaton, startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
191	return newIterator(f, startKeyInclusive, endKeyExclusive, aut)
192}
193
194// Debug is only intended for debug purposes, it simply asks the underlying
195// decoder visit each state, and pass it to the provided callback.
196func (f *FST) Debug(callback func(int, interface{}) error) error {
197
198	addr := f.decoder.getRoot()
199	set := bitset.New(uint(addr))
200	stack := addrStack{addr}
201
202	stateNumber := 0
203	stack, addr = stack[:len(stack)-1], stack[len(stack)-1]
204	for addr != noneAddr {
205		if set.Test(uint(addr)) {
206			stack, addr = stack.Pop()
207			continue
208		}
209		set.Set(uint(addr))
210		state, err := f.decoder.stateAt(addr, nil)
211		if err != nil {
212			return err
213		}
214		err = callback(stateNumber, state)
215		if err != nil {
216			return err
217		}
218		for i := 0; i < state.NumTransitions(); i++ {
219			tchar := state.TransitionAt(i)
220			_, dest, _ := state.TransitionFor(tchar)
221			stack = append(stack, dest)
222		}
223		stateNumber++
224		stack, addr = stack.Pop()
225	}
226
227	return nil
228}
229
230type addrStack []int
231
232func (a addrStack) Pop() (addrStack, int) {
233	l := len(a)
234	if l < 1 {
235		return a, noneAddr
236	}
237	return a[:l-1], a[l-1]
238}
239
240// Reader() returns a Reader instance that a single thread may use to
241// retrieve data from the FST
242func (f *FST) Reader() (*Reader, error) {
243	return &Reader{f: f}, nil
244}
245
246func (f *FST) GetMinKey() ([]byte, error) {
247	var rv []byte
248
249	curr := f.decoder.getRoot()
250	state, err := f.decoder.stateAt(curr, nil)
251	if err != nil {
252		return nil, err
253	}
254
255	for !state.Final() {
256		nextTrans := state.TransitionAt(0)
257		_, curr, _ = state.TransitionFor(nextTrans)
258		state, err = f.decoder.stateAt(curr, state)
259		if err != nil {
260			return nil, err
261		}
262
263		rv = append(rv, nextTrans)
264	}
265
266	return rv, nil
267}
268
269func (f *FST) GetMaxKey() ([]byte, error) {
270	var rv []byte
271
272	curr := f.decoder.getRoot()
273	state, err := f.decoder.stateAt(curr, nil)
274	if err != nil {
275		return nil, err
276	}
277
278	for state.NumTransitions() > 0 {
279		nextTrans := state.TransitionAt(state.NumTransitions() - 1)
280		_, curr, _ = state.TransitionFor(nextTrans)
281		state, err = f.decoder.stateAt(curr, state)
282		if err != nil {
283			return nil, err
284		}
285
286		rv = append(rv, nextTrans)
287	}
288
289	return rv, nil
290}
291
292// A Reader is meant for a single threaded use
293type Reader struct {
294	f        *FST
295	prealloc fstStateV1
296}
297
298func (r *Reader) Get(input []byte) (uint64, bool, error) {
299	return r.f.get(input, &r.prealloc)
300}
301