1 ///////////////////////////////////////////////////////////////////////////////
2 /// \file boyer_moore.hpp
3 ///   Contains the boyer-moore implementation. Note: this is *not* a general-
4 ///   purpose boyer-moore implementation. It truncates the search string at
5 ///   256 characters, but it is sufficient for the needs of xpressive.
6 //
7 //  Copyright 2008 Eric Niebler. Distributed under the Boost
8 //  Software License, Version 1.0. (See accompanying file
9 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
10 
11 #ifndef BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005
12 #define BOOST_XPRESSIVE_DETAIL_BOYER_MOORE_HPP_EAN_10_04_2005
13 
14 // MS compatible compilers support #pragma once
15 #if defined(_MSC_VER)
16 # pragma once
17 # pragma warning(push)
18 # pragma warning(disable : 4100) // unreferenced formal parameter
19 #endif
20 
21 #include <climits>  // for UCHAR_MAX
22 #include <cstddef>  // for std::ptrdiff_t
23 #include <utility>  // for std::max
24 #include <vector>
25 #include <boost/mpl/bool.hpp>
26 #include <boost/noncopyable.hpp>
27 #include <boost/iterator/iterator_traits.hpp>
28 #include <boost/type_traits/is_convertible.hpp>
29 #include <boost/xpressive/detail/detail_fwd.hpp>
30 
31 namespace boost { namespace xpressive { namespace detail
32 {
33 
34 ///////////////////////////////////////////////////////////////////////////////
35 // boyer_moore
36 //
37 template<typename BidiIter, typename Traits>
38 struct boyer_moore
39   : noncopyable
40 {
41     typedef typename iterator_value<BidiIter>::type char_type;
42     typedef Traits traits_type;
43     typedef has_fold_case<Traits> case_fold;
44     typedef typename Traits::string_type string_type;
45 
46     // initialize the Boyer-Moore search data structure, using the
47     // search sub-sequence to prime the pump.
boyer_mooreboost::xpressive::detail::boyer_moore48     boyer_moore(char_type const *begin, char_type const *end, Traits const &tr, bool icase)
49       : begin_(begin)
50       , last_(begin)
51       , fold_()
52       , find_fun_(
53               icase
54             ? (case_fold() ? &boyer_moore::find_nocase_fold_ : &boyer_moore::find_nocase_)
55             : &boyer_moore::find_
56         )
57     {
58         std::ptrdiff_t const uchar_max = UCHAR_MAX;
59         std::ptrdiff_t diff = std::distance(begin, end);
60         this->length_  = static_cast<unsigned char>((std::min)(diff, uchar_max));
61         std::fill_n(static_cast<unsigned char *>(this->offsets_), uchar_max + 1, this->length_);
62         --this->length_;
63 
64         icase ? this->init_(tr, case_fold()) : this->init_(tr, mpl::false_());
65     }
66 
findboost::xpressive::detail::boyer_moore67     BidiIter find(BidiIter begin, BidiIter end, Traits const &tr) const
68     {
69         return (this->*this->find_fun_)(begin, end, tr);
70     }
71 
72 private:
73 
init_boost::xpressive::detail::boyer_moore74     void init_(Traits const &tr, mpl::false_)
75     {
76         for(unsigned char offset = this->length_; offset; --offset, ++this->last_)
77         {
78             this->offsets_[tr.hash(*this->last_)] = offset;
79         }
80     }
81 
init_boost::xpressive::detail::boyer_moore82     void init_(Traits const &tr, mpl::true_)
83     {
84         this->fold_.reserve(this->length_ + 1);
85         for(unsigned char offset = this->length_; offset; --offset, ++this->last_)
86         {
87             this->fold_.push_back(tr.fold_case(*this->last_));
88             for(typename string_type::const_iterator beg = this->fold_.back().begin(), end = this->fold_.back().end();
89                 beg != end; ++beg)
90             {
91                 this->offsets_[tr.hash(*beg)] = offset;
92             }
93         }
94         this->fold_.push_back(tr.fold_case(*this->last_));
95     }
96 
97     // case-sensitive Boyer-Moore search
find_boost::xpressive::detail::boyer_moore98     BidiIter find_(BidiIter begin, BidiIter end, Traits const &tr) const
99     {
100         typedef typename boost::iterator_difference<BidiIter>::type diff_type;
101         diff_type const endpos = std::distance(begin, end);
102         diff_type offset = static_cast<diff_type>(this->length_);
103 
104         for(diff_type curpos = offset; curpos < endpos; curpos += offset)
105         {
106             std::advance(begin, offset);
107 
108             char_type const *pat_tmp = this->last_;
109             BidiIter str_tmp = begin;
110 
111             for(; tr.translate(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp)
112             {
113                 if(pat_tmp == this->begin_)
114                 {
115                     return str_tmp;
116                 }
117             }
118 
119             offset = this->offsets_[tr.hash(tr.translate(*begin))];
120         }
121 
122         return end;
123     }
124 
125     // case-insensitive Boyer-Moore search
find_nocase_boost::xpressive::detail::boyer_moore126     BidiIter find_nocase_(BidiIter begin, BidiIter end, Traits const &tr) const
127     {
128         typedef typename boost::iterator_difference<BidiIter>::type diff_type;
129         diff_type const endpos = std::distance(begin, end);
130         diff_type offset = static_cast<diff_type>(this->length_);
131 
132         for(diff_type curpos = offset; curpos < endpos; curpos += offset)
133         {
134             std::advance(begin, offset);
135 
136             char_type const *pat_tmp = this->last_;
137             BidiIter str_tmp = begin;
138 
139             for(; tr.translate_nocase(*str_tmp) == *pat_tmp; --pat_tmp, --str_tmp)
140             {
141                 if(pat_tmp == this->begin_)
142                 {
143                     return str_tmp;
144                 }
145             }
146 
147             offset = this->offsets_[tr.hash(tr.translate_nocase(*begin))];
148         }
149 
150         return end;
151     }
152 
153     // case-insensitive Boyer-Moore search with case-folding
find_nocase_fold_boost::xpressive::detail::boyer_moore154     BidiIter find_nocase_fold_(BidiIter begin, BidiIter end, Traits const &tr) const
155     {
156         typedef typename boost::iterator_difference<BidiIter>::type diff_type;
157         diff_type const endpos = std::distance(begin, end);
158         diff_type offset = static_cast<diff_type>(this->length_);
159 
160         for(diff_type curpos = offset; curpos < endpos; curpos += offset)
161         {
162             std::advance(begin, offset);
163 
164             string_type const *pat_tmp = &this->fold_.back();
165             BidiIter str_tmp = begin;
166 
167             for(; pat_tmp->end() != std::find(pat_tmp->begin(), pat_tmp->end(), *str_tmp);
168                 --pat_tmp, --str_tmp)
169             {
170                 if(pat_tmp == &this->fold_.front())
171                 {
172                     return str_tmp;
173                 }
174             }
175 
176             offset = this->offsets_[tr.hash(*begin)];
177         }
178 
179         return end;
180     }
181 
182 private:
183 
184     char_type const *begin_;
185     char_type const *last_;
186     std::vector<string_type> fold_;
187     BidiIter (boyer_moore::*const find_fun_)(BidiIter, BidiIter, Traits const &) const;
188     unsigned char length_;
189     unsigned char offsets_[UCHAR_MAX + 1];
190 };
191 
192 }}} // namespace boost::xpressive::detail
193 
194 #if defined(_MSC_VER)
195 # pragma warning(pop)
196 #endif
197 
198 #endif
199