1//  Copyright (c) 2017 Couchbase, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// 		http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zap
16
17import (
18	"fmt"
19
20	"github.com/RoaringBitmap/roaring"
21	"github.com/blevesearch/bleve/index"
22	"github.com/blevesearch/bleve/index/scorch/segment"
23	"github.com/couchbase/vellum"
24	"github.com/couchbase/vellum/regexp"
25)
26
27// Dictionary is the zap representation of the term dictionary
28type Dictionary struct {
29	sb      *SegmentBase
30	field   string
31	fieldID uint16
32	fst     *vellum.FST
33}
34
35// PostingsList returns the postings list for the specified term
36func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
37	return d.postingsList([]byte(term), except, nil)
38}
39
40func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
41	if d.fst == nil {
42		return d.postingsListInit(rv, except), nil
43	}
44
45	postingsOffset, exists, err := d.fst.Get(term)
46	if err != nil {
47		return nil, fmt.Errorf("vellum err: %v", err)
48	}
49	if !exists {
50		return d.postingsListInit(rv, except), nil
51	}
52
53	return d.postingsListFromOffset(postingsOffset, except, rv)
54}
55
56func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) {
57	rv = d.postingsListInit(rv, except)
58
59	err := rv.read(postingsOffset, d)
60	if err != nil {
61		return nil, err
62	}
63
64	return rv, nil
65}
66
67func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList {
68	if rv == nil {
69		rv = &PostingsList{}
70	} else {
71		*rv = PostingsList{} // clear the struct
72	}
73	rv.sb = d.sb
74	rv.except = except
75	return rv
76}
77
78// Iterator returns an iterator for this dictionary
79func (d *Dictionary) Iterator() segment.DictionaryIterator {
80	rv := &DictionaryIterator{
81		d: d,
82	}
83
84	if d.fst != nil {
85		itr, err := d.fst.Iterator(nil, nil)
86		if err == nil {
87			rv.itr = itr
88		}
89	}
90
91	return rv
92}
93
94// PrefixIterator returns an iterator which only visits terms having the
95// the specified prefix
96func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
97	rv := &DictionaryIterator{
98		d: d,
99	}
100
101	if d.fst != nil {
102		r, err := regexp.New(prefix + ".*")
103		if err == nil {
104			itr, err := d.fst.Search(r, nil, nil)
105			if err == nil {
106				rv.itr = itr
107			}
108		}
109	}
110
111	return rv
112}
113
114// RangeIterator returns an iterator which only visits terms between the
115// start and end terms.  NOTE: bleve.index API specifies the end is inclusive.
116func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
117	rv := &DictionaryIterator{
118		d: d,
119	}
120
121	// need to increment the end position to be inclusive
122	endBytes := []byte(end)
123	if endBytes[len(endBytes)-1] < 0xff {
124		endBytes[len(endBytes)-1]++
125	} else {
126		endBytes = append(endBytes, 0xff)
127	}
128
129	if d.fst != nil {
130		itr, err := d.fst.Iterator([]byte(start), endBytes)
131		if err == nil {
132			rv.itr = itr
133		}
134	}
135
136	return rv
137}
138
139// DictionaryIterator is an iterator for term dictionary
140type DictionaryIterator struct {
141	d   *Dictionary
142	itr vellum.Iterator
143	err error
144	tmp PostingsList
145}
146
147// Next returns the next entry in the dictionary
148func (i *DictionaryIterator) Next() (*index.DictEntry, error) {
149	if i.itr == nil || i.err == vellum.ErrIteratorDone {
150		return nil, nil
151	} else if i.err != nil {
152		return nil, i.err
153	}
154	term, postingsOffset := i.itr.Current()
155	i.err = i.tmp.read(postingsOffset, i.d)
156	if i.err != nil {
157		return nil, i.err
158	}
159	rv := &index.DictEntry{
160		Term:  string(term),
161		Count: i.tmp.Count(),
162	}
163	i.err = i.itr.Next()
164	return rv, nil
165}
166