1 /* -*- c-basic-offset: 2 -*- */
2 /* Copyright(C) 2011 Brazil
3 
4   This library is free software; you can redistribute it and/or
5   modify it under the terms of the GNU Lesser General Public
6   License version 2.1 as published by the Free Software Foundation.
7 
8   This library is distributed in the hope that it will be useful,
9   but WITHOUT ANY WARRANTY; without even the implied warranty of
10   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11   Lesser General Public License for more details.
12 
13   You should have received a copy of the GNU Lesser General Public
14   License along with this library; if not, write to the Free Software
15   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
16 */
17 
18 #include "predictive-cursor.hpp"
19 
20 #include <algorithm>
21 #include <cstring>
22 
23 #include "trie.hpp"
24 
25 namespace grn {
26 namespace dat {
27 
PredictiveCursor()28 PredictiveCursor::PredictiveCursor()
29     : trie_(NULL),
30       offset_(0),
31       limit_(MAX_UINT32),
32       flags_(PREDICTIVE_CURSOR),
33       buf_(),
34       cur_(0),
35       end_(0),
36       min_length_(0) {}
37 
~PredictiveCursor()38 PredictiveCursor::~PredictiveCursor() {}
39 
open(const Trie & trie,const String & str,UInt32 offset,UInt32 limit,UInt32 flags)40 void PredictiveCursor::open(const Trie &trie,
41                             const String &str,
42                             UInt32 offset,
43                             UInt32 limit,
44                             UInt32 flags) {
45   GRN_DAT_THROW_IF(PARAM_ERROR, (str.ptr() == NULL) && (str.length() != 0));
46 
47   flags = fix_flags(flags);
48   PredictiveCursor new_cursor(trie, offset, limit, flags);
49   new_cursor.init(str);
50   new_cursor.swap(this);
51 }
52 
close()53 void PredictiveCursor::close() {
54   PredictiveCursor new_cursor;
55   new_cursor.swap(this);
56 }
57 
next()58 const Key &PredictiveCursor::next() {
59   if (cur_ == end_) {
60     return Key::invalid_key();
61   }
62 
63   if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) {
64     return ascending_next();
65   } else {
66     return descending_next();
67   }
68 }
69 
PredictiveCursor(const Trie & trie,UInt32 offset,UInt32 limit,UInt32 flags)70 PredictiveCursor::PredictiveCursor(const Trie &trie,
71                                    UInt32 offset, UInt32 limit, UInt32 flags)
72     : trie_(&trie),
73       offset_(offset),
74       limit_(limit),
75       flags_(flags),
76       buf_(),
77       cur_(0),
78       end_(0),
79       min_length_(0) {}
80 
fix_flags(UInt32 flags) const81 UInt32 PredictiveCursor::fix_flags(UInt32 flags) const {
82   const UInt32 cursor_type = flags & CURSOR_TYPE_MASK;
83   GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_type != 0) &&
84                                 (cursor_type != PREDICTIVE_CURSOR));
85   flags |= PREDICTIVE_CURSOR;
86 
87   const UInt32 cursor_order = flags & CURSOR_ORDER_MASK;
88   GRN_DAT_THROW_IF(PARAM_ERROR, (cursor_order != 0) &&
89                                 (cursor_order != ASCENDING_CURSOR) &&
90                                 (cursor_order != DESCENDING_CURSOR));
91   if (cursor_order == 0) {
92     flags |= ASCENDING_CURSOR;
93   }
94 
95   const UInt32 cursor_options = flags & CURSOR_OPTIONS_MASK;
96   GRN_DAT_THROW_IF(PARAM_ERROR, cursor_options & ~(EXCEPT_EXACT_MATCH));
97 
98   return flags;
99 }
100 
init(const String & str)101 void PredictiveCursor::init(const String &str) {
102   if (limit_ == 0) {
103     return;
104   }
105 
106   min_length_ = str.length();
107   if ((flags_ & EXCEPT_EXACT_MATCH) == EXCEPT_EXACT_MATCH) {
108     ++min_length_;
109   }
110   end_ = (offset_ > (MAX_UINT32 - limit_)) ? MAX_UINT32 : (offset_ + limit_);
111 
112   UInt32 node_id = ROOT_NODE_ID;
113   for (UInt32 i = 0; i < str.length(); ++i) {
114     const Base base = trie_->ith_node(node_id).base();
115     if (base.is_linker()) {
116       if (offset_ == 0) {
117         const Key &key = trie_->get_key(base.key_pos());
118         if ((key.length() >= str.length()) &&
119             (key.str().substr(0, str.length()).compare(str, i) == 0)) {
120           if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) {
121             node_id |= IS_ROOT_FLAG;
122           }
123           buf_.push_back(node_id);
124         }
125       }
126       return;
127     }
128 
129     node_id = base.offset() ^ str[i];
130     if (trie_->ith_node(node_id).label() != str[i]) {
131       return;
132     }
133   }
134 
135   if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) {
136     node_id |= IS_ROOT_FLAG;
137   }
138   buf_.push_back(node_id);
139 }
140 
swap(PredictiveCursor * cursor)141 void PredictiveCursor::swap(PredictiveCursor *cursor) {
142   std::swap(trie_, cursor->trie_);
143   std::swap(offset_, cursor->offset_);
144   std::swap(limit_, cursor->limit_);
145   std::swap(flags_, cursor->flags_);
146   buf_.swap(&cursor->buf_);
147   std::swap(cur_, cursor->cur_);
148   std::swap(end_, cursor->end_);
149   std::swap(min_length_, cursor->min_length_);
150 }
151 
ascending_next()152 const Key &PredictiveCursor::ascending_next() {
153   while (!buf_.empty()) {
154     const bool is_root = (buf_.back() & IS_ROOT_FLAG) == IS_ROOT_FLAG;
155     const UInt32 node_id = buf_.back() & ~IS_ROOT_FLAG;
156     buf_.pop_back();
157 
158     const Node node = trie_->ith_node(node_id);
159     if (!is_root && (node.sibling() != INVALID_LABEL)) {
160       buf_.push_back(node_id ^ node.label() ^ node.sibling());
161     }
162 
163     if (node.is_linker()) {
164       const Key &key = trie_->get_key(node.key_pos());
165       if (key.length() >= min_length_) {
166         if (cur_++ >= offset_) {
167           return key;
168         }
169       }
170     } else if (node.child() != INVALID_LABEL) {
171       buf_.push_back(node.offset() ^ node.child());
172     }
173   }
174   return Key::invalid_key();
175 }
176 
descending_next()177 const Key &PredictiveCursor::descending_next() {
178   while (!buf_.empty()) {
179     const bool post_order = (buf_.back() & POST_ORDER_FLAG) == POST_ORDER_FLAG;
180     const UInt32 node_id = buf_.back() & ~POST_ORDER_FLAG;
181 
182     const Base base = trie_->ith_node(node_id).base();
183     if (post_order) {
184       buf_.pop_back();
185       if (base.is_linker()) {
186         const Key &key = trie_->get_key(base.key_pos());
187         if (key.length() >= min_length_) {
188           if (cur_++ >= offset_) {
189             return key;
190           }
191         }
192       }
193     } else {
194       buf_.back() |= POST_ORDER_FLAG;
195       UInt16 label = trie_->ith_node(node_id).child();
196       while (label != INVALID_LABEL) {
197         buf_.push_back(base.offset() ^ label);
198         label = trie_->ith_node(base.offset() ^ label).sibling();
199       }
200     }
201   }
202   return Key::invalid_key();
203 }
204 
205 }  // namespace dat
206 }  // namespace grn
207