1//  Copyright (c) 2017 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package vellum
16
17import (
18	"io"
19	"sync"
20
21	"github.com/willf/bitset"
22)
23
24// FST is an in-memory representation of a finite state transducer,
25// capable of returning the uint64 value associated with
26// each []byte key stored, as well as enumerating all of the keys
27// in order.
28type FST struct {
29	f       io.Closer
30	ver     int
31	len     int
32	typ     int
33	data    []byte
34	decoder decoder
35}
36
37var fstPool = sync.Pool{New: func() interface{} { return &FST{} }}
38
39func new(data []byte, f io.Closer) (*FST, error) {
40	rv := fstPool.Get().(*FST)
41	rv.data = data
42	rv.f = f
43
44	ver, typ, err := decodeHeader(data)
45	if err != nil {
46		return nil, err
47	}
48
49	if rv.ver == 0 || rv.decoder == nil || // first use
50		(rv.ver != ver || rv.typ != typ) {
51		rv.decoder, err = loadDecoder(ver, rv.data)
52		if err != nil {
53			return nil, err
54		}
55		rv.ver = ver
56		rv.typ = typ
57	} else {
58		rv.decoder.reload(rv.data)
59	}
60
61	rv.len = rv.decoder.getLen()
62
63	return rv, nil
64}
65
66// Contains returns true if this FST contains the specified key.
67func (f *FST) Contains(val []byte) (bool, error) {
68	_, exists, err := f.Get(val)
69	return exists, err
70}
71
72// Get returns the value associated with the key.  NOTE: a value of zero
73// does not imply the key does not exist, you must consult the second
74// return value as well.
75func (f *FST) Get(input []byte) (uint64, bool, error) {
76	return f.get(input, nil)
77}
78
79func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) {
80	var total uint64
81	curr := f.decoder.getRoot()
82	state, err := f.decoder.stateAt(curr, prealloc)
83	if err != nil {
84		return 0, false, err
85	}
86	for _, c := range input {
87		_, curr, output := state.TransitionFor(c)
88		if curr == noneAddr {
89			return 0, false, nil
90		}
91
92		state, err = f.decoder.stateAt(curr, state)
93		if err != nil {
94			return 0, false, err
95		}
96
97		total += output
98	}
99
100	if state.Final() {
101		total += state.FinalOutput()
102		return total, true, nil
103	}
104	return 0, false, nil
105}
106
107// Version returns the encoding version used by this FST instance.
108func (f *FST) Version() int {
109	return f.ver
110}
111
112// Len returns the number of entries in this FST instance.
113func (f *FST) Len() int {
114	return f.len
115}
116
117// Type returns the type of this FST instance.
118func (f *FST) Type() int {
119	return f.typ
120}
121
122// Close will unmap any mmap'd data (if managed by vellum) and it will close
123// the backing file (if managed by vellum).  You MUST call Close() for any
124// FST instance that is created.
125func (f *FST) Close() error {
126	if f.f != nil {
127		err := f.f.Close()
128		if err != nil {
129			return err
130		}
131	}
132	f.data = nil
133	f.decoder.clear()
134	f.f = nil
135	f.len = 0
136	fstPool.Put(f)
137	return nil
138}
139
140// Start returns the start state of this Automaton
141func (f *FST) Start() int {
142	return f.decoder.getRoot()
143}
144
145// IsMatch returns if this state is a matching state in this Automaton
146func (f *FST) IsMatch(addr int) bool {
147	match, _ := f.IsMatchWithVal(addr)
148	return match
149}
150
151// CanMatch returns if this state can ever transition to a matching state
152// in this Automaton
153func (f *FST) CanMatch(addr int) bool {
154	if addr == noneAddr {
155		return false
156	}
157	return true
158}
159
160// WillAlwaysMatch returns if from this state the Automaton will always
161// be in a matching state
162func (f *FST) WillAlwaysMatch(int) bool {
163	return false
164}
165
166// Accept returns the next state for this Automaton on input of byte b
167func (f *FST) Accept(addr int, b byte) int {
168	next, _ := f.AcceptWithVal(addr, b)
169	return next
170}
171
172// IsMatchWithVal returns if this state is a matching state in this Automaton
173// and also returns the final output value for this state
174func (f *FST) IsMatchWithVal(addr int) (bool, uint64) {
175	s, err := f.decoder.stateAt(addr, nil)
176	if err != nil {
177		return false, 0
178	}
179	return s.Final(), s.FinalOutput()
180}
181
182// AcceptWithVal returns the next state for this Automaton on input of byte b
183// and also returns the output value for the transition
184func (f *FST) AcceptWithVal(addr int, b byte) (int, uint64) {
185	s, err := f.decoder.stateAt(addr, nil)
186	if err != nil {
187		return noneAddr, 0
188	}
189	_, next, output := s.TransitionFor(b)
190	return next, output
191}
192
193// Iterator returns a new Iterator capable of enumerating the key/value pairs
194// between the provided startKeyInclusive and endKeyExclusive.
195func (f *FST) Iterator(startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
196	return newIterator(f, startKeyInclusive, endKeyExclusive, nil)
197}
198
199// Search returns a new Iterator capable of enumerating the key/value pairs
200// between the provided startKeyInclusive and endKeyExclusive that also
201// satisfy the provided automaton.
202func (f *FST) Search(aut Automaton, startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
203	return newIterator(f, startKeyInclusive, endKeyExclusive, aut)
204}
205
206// Debug is only intended for debug purposes, it simply asks the underlying
207// decoder visit each state, and pass it to the provided callback.
208func (f *FST) Debug(callback func(int, interface{}) error) error {
209
210	addr := f.decoder.getRoot()
211	set := bitset.New(uint(addr))
212	stack := addrStack{addr}
213
214	stateNumber := 0
215	stack, addr = stack[:len(stack)-1], stack[len(stack)-1]
216	for addr != noneAddr {
217		if set.Test(uint(addr)) {
218			stack, addr = stack.Pop()
219			continue
220		}
221		set.Set(uint(addr))
222		state, err := f.decoder.stateAt(addr, nil)
223		if err != nil {
224			return err
225		}
226		err = callback(stateNumber, state)
227		if err != nil {
228			return err
229		}
230		for i := 0; i < state.NumTransitions(); i++ {
231			tchar := state.TransitionAt(i)
232			_, dest, _ := state.TransitionFor(tchar)
233			stack = append(stack, dest)
234		}
235		stateNumber++
236		stack, addr = stack.Pop()
237	}
238
239	return nil
240}
241
242type addrStack []int
243
244func (a addrStack) Pop() (addrStack, int) {
245	l := len(a)
246	if l < 1 {
247		return a, noneAddr
248	}
249	return a[:l-1], a[l-1]
250}
251
252// A Reader is meant for a single threaded use
253type Reader struct {
254	f        *FST
255	prealloc fstStateV1
256}
257
258var fstReaderPool = sync.Pool{New: func() interface{} { return &Reader{} }}
259
260// Reader() returns a Reader instance that a single thread may use to
261// retrieve data from the FST
262func (f *FST) Reader() (*Reader, error) {
263	rv := fstReaderPool.Get().(*Reader)
264	rv.f = f
265	return rv, nil
266}
267
268func (r *Reader) Get(input []byte) (uint64, bool, error) {
269	return r.f.get(input, &r.prealloc)
270}
271
272func (r *Reader) Close() {
273	*r = Reader{}
274	fstReaderPool.Put(r)
275}
276