1 ///////////////////////////////////////////////////////////////////////////////
2 /// \file symbols.hpp
3 ///   Contains the Ternary Search Trie implementation.
4 /// Based on the following papers:
5 /// J. Bentley and R. Sedgewick. (1998) Ternary search trees. Dr. Dobbs Journal
6 /// G. Badr and B.J. Oommen. (2005) Self-Adjusting of Ternary Search Tries Using
7 ///     Conditional Rotations and Randomized Heuristics
8 //
9 //  Copyright 2007 David Jenkins.
10 //  Copyright 2007 Eric Niebler.
11 //
12 //  Distributed under the Boost Software License, Version 1.0. (See
13 //  accompanying file LICENSE_1_0.txt or copy at
14 //  http://www.boost.org/LICENSE_1_0.txt)
15 
16 #ifndef BOOST_XPRESSIVE_DETAIL_SYMBOLS_HPP_DRJ_06_11_2007
17 #define BOOST_XPRESSIVE_DETAIL_SYMBOLS_HPP_DRJ_06_11_2007
18 
19 // MS compatible compilers support #pragma once
20 #if defined(_MSC_VER) && (_MSC_VER >= 1020)
21 # pragma once
22 #endif
23 
24 #include <boost/noncopyable.hpp>
25 #include <boost/range/begin.hpp>
26 #include <boost/range/end.hpp>
27 #include <boost/range/value_type.hpp>
28 #include <boost/range/const_iterator.hpp>
29 #include <boost/shared_ptr.hpp>
30 
31 namespace boost { namespace xpressive { namespace detail
32 {
33 
34     ///////////////////////////////////////////////////////////////////////////////
35     // symbols (using a ternary search trie)
36     //
37     template<typename Map>
38     struct symbols
39     {
40         typedef typename range_value<Map>::type::first_type key_type;
41         typedef typename range_value<Map>::type::second_type value_type;
42         typedef typename range_value<key_type>::type char_type;
43         typedef typename range_const_iterator<Map>::type iterator;
44         typedef typename range_const_iterator<key_type>::type key_iterator;
45         typedef value_type const *result_type;
46 
47         // copies of this symbol table share the TST
48 
49         template<typename Trans>
loadboost::xpressive::detail::symbols50         void load(Map const &map, Trans trans)
51         {
52             iterator begin = boost::begin(map);
53             iterator end = boost::end(map);
54             node* root_p = this->root.get();
55             for(; begin != end; ++begin)
56             {
57                 key_iterator kbegin = boost::begin(begin->first);
58                 key_iterator kend = boost::end(begin->first);
59                 root_p = this->insert(root_p, kbegin, kend, &begin->second, trans);
60             }
61             this->root.reset(root_p);
62         }
63 
64         template<typename BidiIter, typename Trans>
operator ()boost::xpressive::detail::symbols65         result_type operator ()(BidiIter &begin, BidiIter end, Trans trans) const
66         {
67             return this->search(begin, end, trans, this->root.get());
68         }
69 
70         template<typename Sink>
peekboost::xpressive::detail::symbols71         void peek(Sink const &sink) const
72         {
73             this->peek_(this->root.get(), sink);
74         }
75 
76     private:
77         ///////////////////////////////////////////////////////////////////////////////
78         // struct node : a node in the TST.
79         //     The "eq" field stores the result pointer when ch is zero.
80         //
81         struct node
82             : boost::noncopyable
83         {
nodeboost::xpressive::detail::symbols::node84             node(char_type c)
85               : ch(c)
86               , lo(0)
87               , eq(0)
88               , hi(0)
89               #ifdef BOOST_DISABLE_THREADS
90               , tau(0)
91               #endif
92             {}
93 
~nodeboost::xpressive::detail::symbols::node94             ~node()
95             {
96                 delete lo;
97                 if (ch)
98                     delete eq;
99                 delete hi;
100             }
101 
swapboost::xpressive::detail::symbols::node102             void swap(node& that)
103             {
104                 std::swap(ch, that.ch);
105                 std::swap(lo, that.lo);
106                 std::swap(eq, that.eq);
107                 std::swap(hi, that.hi);
108                 #ifdef BOOST_DISABLE_THREADS
109                 std::swap(tau, that.tau);
110                 #endif
111             }
112 
113             char_type ch;
114             node* lo;
115             union
116             {
117                 node* eq;
118                 result_type result;
119             };
120             node* hi;
121             #ifdef BOOST_DISABLE_THREADS
122             long tau;
123             #endif
124         };
125 
126         ///////////////////////////////////////////////////////////////////////////////
127         // insert : insert a string into the TST
128         //
129         template<typename Trans>
insertboost::xpressive::detail::symbols130         node* insert(node* p, key_iterator &begin, key_iterator end, result_type r, Trans trans) const
131         {
132             char_type c1 = 0;
133 
134             if(begin != end)
135             {
136                 c1 = trans(*begin);
137             }
138 
139             if(!p)
140             {
141                 p = new node(c1);
142             }
143 
144             if(c1 < p->ch)
145             {
146                 p->lo = this->insert(p->lo, begin, end, r, trans);
147             }
148             else if(c1 == p->ch)
149             {
150                 if(0 == c1)
151                 {
152                     p->result = r;
153                 }
154                 else
155                 {
156                     p->eq = this->insert(p->eq, ++begin, end, r, trans);
157                 }
158             }
159             else
160             {
161                 p->hi = this->insert(p->hi, begin, end, r, trans);
162             }
163 
164             return p;
165         }
166 
167         #ifdef BOOST_DISABLE_THREADS
168         ///////////////////////////////////////////////////////////////////////////////
169         // conditional rotation : the goal is to minimize the overall
170         //     weighted path length of each binary search tree
171         //
cond_rotationboost::xpressive::detail::symbols172         bool const cond_rotation(bool left, node* const i, node* const j) const
173         {
174             // don't rotate top node in binary search tree
175             if (i == j)
176                 return false;
177             // calculate psi (the rotation condition)
178             node* const k = (left ? i->hi : i->lo);
179             long psi = 2*i->tau - j->tau - (k ? k->tau : 0);
180             if (psi <= 0)
181                 return false;
182 
183             // recalculate the tau values
184             j->tau += -i->tau + (k ? k->tau : 0);
185             i->tau +=  j->tau - (k ? k->tau : 0);
186             // fixup links and swap nodes
187             if (left)
188             {
189                 j->lo = k;
190                 i->hi = i;
191             }
192             else
193             {
194                 j->hi = k;
195                 i->lo = i;
196             }
197             (*i).swap(*j);
198             return true;
199         }
200         #endif
201 
202         ///////////////////////////////////////////////////////////////////////////////
203         // search : find a string in the TST
204         //
205         template<typename BidiIter, typename Trans>
searchboost::xpressive::detail::symbols206         result_type search(BidiIter &begin, BidiIter end, Trans trans, node* p) const
207         {
208             result_type r = 0;
209             node* p2 = p;
210             bool left = false;
211             char_type c1 = (begin != end ? trans(*begin) : 0);
212             while(p)
213             {
214                 #ifdef BOOST_DISABLE_THREADS
215                 ++p->tau;
216                 #endif
217                 if(c1 == p->ch)
218                 {
219                     // conditional rotation test
220                     #ifdef BOOST_DISABLE_THREADS
221                     if (this->cond_rotation(left, p, p2))
222                         p = p2;
223                     #endif
224                     if (0 == p->ch)
225                     {
226                         // it's a match!
227                         r = p->result;
228                     }
229                     if(begin == end)
230                         break;
231                     ++begin;
232                     p = p->eq;
233                     // search for the longest match first
234                     r = search(begin,end,trans,p);
235                     if (0 == r)
236                     {
237                         // search for a match ending here
238                         r = search(end,end,trans,p);
239                         if (0 == r)
240                         {
241                             --begin;
242                         }
243                     }
244                     break;
245                 }
246                 else if(c1 < p->ch)
247                 {
248                     left = true;
249                     p2 = p;
250                     p = p->lo;
251                 }
252                 else // (c1 > p->ch)
253                 {
254                     left = false;
255                     p2 = p;
256                     p = p->hi;
257                 }
258             }
259             return r;
260         }
261 
262         template<typename Sink>
peek_boost::xpressive::detail::symbols263         void peek_(node const *const &p, Sink const &sink) const
264         {
265             if(p)
266             {
267                 sink(p->ch);
268                 this->peek_(p->lo, sink);
269                 this->peek_(p->hi, sink);
270             }
271         }
272 
273         boost::shared_ptr<node> root;
274     };
275 
276 }}} // namespace boost::xpressive::detail
277 
278 #endif
279