1 ///////////////////////////////////////////////////////////////////////////////
2 /// \file parser.hpp
3 /// Contains the definition of regex_compiler, a factory for building regex objects
4 /// from strings.
5 //
6 //  Copyright 2008 Eric Niebler. Distributed under the Boost
7 //  Software License, Version 1.0. (See accompanying file
8 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 
10 #ifndef BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_HPP_EAN_10_04_2005
11 #define BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_HPP_EAN_10_04_2005
12 
13 // MS compatible compilers support #pragma once
14 #if defined(_MSC_VER)
15 # pragma once
16 # pragma warning(push)
17 # pragma warning(disable : 4127) // conditional expression is constant
18 #endif
19 
20 #include <boost/assert.hpp>
21 #include <boost/xpressive/regex_constants.hpp>
22 #include <boost/xpressive/detail/detail_fwd.hpp>
23 #include <boost/xpressive/detail/core/matchers.hpp>
24 #include <boost/xpressive/detail/utility/ignore_unused.hpp>
25 #include <boost/xpressive/detail/dynamic/dynamic.hpp>
26 
27 // The Regular Expression grammar, in pseudo BNF:
28 //
29 // expression   = alternates ;
30 //
31 // alternates   = sequence, *('|', sequence) ;
32 //
33 // sequence     = quant, *(quant) ;
34 //
35 // quant        = atom, [*+?] ;
36 //
37 // atom         = literal             |
38 //                '.'                 |
39 //                '\' any             |
40 //                '(' expression ')' ;
41 //
42 // literal      = not a meta-character ;
43 //
44 
45 namespace boost { namespace xpressive { namespace detail
46 {
47 
48 ///////////////////////////////////////////////////////////////////////////////
49 // make_char_xpression
50 //
51 template<typename BidiIter, typename Char, typename Traits>
make_char_xpression(Char ch,regex_constants::syntax_option_type flags,Traits const & tr)52 inline sequence<BidiIter> make_char_xpression
53 (
54     Char ch
55   , regex_constants::syntax_option_type flags
56   , Traits const &tr
57 )
58 {
59     if(0 != (regex_constants::icase_ & flags))
60     {
61         literal_matcher<Traits, mpl::true_, mpl::false_> matcher(ch, tr);
62         return make_dynamic<BidiIter>(matcher);
63     }
64     else
65     {
66         literal_matcher<Traits, mpl::false_, mpl::false_> matcher(ch, tr);
67         return make_dynamic<BidiIter>(matcher);
68     }
69 }
70 
71 ///////////////////////////////////////////////////////////////////////////////
72 // make_any_xpression
73 //
74 template<typename BidiIter, typename Traits>
make_any_xpression(regex_constants::syntax_option_type flags,Traits const & tr)75 inline sequence<BidiIter> make_any_xpression
76 (
77     regex_constants::syntax_option_type flags
78   , Traits const &tr
79 )
80 {
81     using namespace regex_constants;
82     typedef typename iterator_value<BidiIter>::type char_type;
83     typedef detail::set_matcher<Traits, mpl::int_<2> > set_matcher;
84     typedef literal_matcher<Traits, mpl::false_, mpl::true_> literal_matcher;
85 
86     char_type const newline = tr.widen('\n');
87     set_matcher s;
88     s.set_[0] = newline;
89     s.set_[1] = 0;
90     s.inverse();
91 
92     switch(((int)not_dot_newline | not_dot_null) & flags)
93     {
94     case not_dot_null:
95         return make_dynamic<BidiIter>(literal_matcher(char_type(0), tr));
96 
97     case not_dot_newline:
98         return make_dynamic<BidiIter>(literal_matcher(newline, tr));
99 
100     case (int)not_dot_newline | not_dot_null:
101         return make_dynamic<BidiIter>(s);
102 
103     default:
104         return make_dynamic<BidiIter>(any_matcher());
105     }
106 }
107 
108 ///////////////////////////////////////////////////////////////////////////////
109 // make_literal_xpression
110 //
111 template<typename BidiIter, typename Traits>
make_literal_xpression(typename Traits::string_type const & literal,regex_constants::syntax_option_type flags,Traits const & tr)112 inline sequence<BidiIter> make_literal_xpression
113 (
114     typename Traits::string_type const &literal
115   , regex_constants::syntax_option_type flags
116   , Traits const &tr
117 )
118 {
119     BOOST_ASSERT(0 != literal.size());
120     if(1 == literal.size())
121     {
122         return make_char_xpression<BidiIter>(literal[0], flags, tr);
123     }
124 
125     if(0 != (regex_constants::icase_ & flags))
126     {
127         string_matcher<Traits, mpl::true_> matcher(literal, tr);
128         return make_dynamic<BidiIter>(matcher);
129     }
130     else
131     {
132         string_matcher<Traits, mpl::false_> matcher(literal, tr);
133         return make_dynamic<BidiIter>(matcher);
134     }
135 }
136 
137 ///////////////////////////////////////////////////////////////////////////////
138 // make_backref_xpression
139 //
140 template<typename BidiIter, typename Traits>
make_backref_xpression(int mark_nbr,regex_constants::syntax_option_type flags,Traits const & tr)141 inline sequence<BidiIter> make_backref_xpression
142 (
143     int mark_nbr
144   , regex_constants::syntax_option_type flags
145   , Traits const &tr
146 )
147 {
148     if(0 != (regex_constants::icase_ & flags))
149     {
150         return make_dynamic<BidiIter>
151         (
152             mark_matcher<Traits, mpl::true_>(mark_nbr, tr)
153         );
154     }
155     else
156     {
157         return make_dynamic<BidiIter>
158         (
159             mark_matcher<Traits, mpl::false_>(mark_nbr, tr)
160         );
161     }
162 }
163 
164 ///////////////////////////////////////////////////////////////////////////////
165 // merge_charset
166 //
167 template<typename Char, typename Traits>
merge_charset(basic_chset<Char> & basic,compound_charset<Traits> const & compound,Traits const & tr)168 inline void merge_charset
169 (
170     basic_chset<Char> &basic
171   , compound_charset<Traits> const &compound
172   , Traits const &tr
173 )
174 {
175     detail::ignore_unused(tr);
176     if(0 != compound.posix_yes())
177     {
178         typename Traits::char_class_type mask = compound.posix_yes();
179         for(int i = 0; i <= static_cast<int>(UCHAR_MAX); ++i)
180         {
181             if(tr.isctype((Char)i, mask))
182             {
183                 basic.set((Char)i);
184             }
185         }
186     }
187 
188     if(!compound.posix_no().empty())
189     {
190         for(std::size_t j = 0; j < compound.posix_no().size(); ++j)
191         {
192             typename Traits::char_class_type mask = compound.posix_no()[j];
193             for(int i = 0; i <= static_cast<int>(UCHAR_MAX); ++i)
194             {
195                 if(!tr.isctype((Char)i, mask))
196                 {
197                     basic.set((Char)i);
198                 }
199             }
200         }
201     }
202 
203     if(compound.is_inverted())
204     {
205         basic.inverse();
206     }
207 }
208 
209 ///////////////////////////////////////////////////////////////////////////////
210 // make_charset_xpression
211 //
212 template<typename BidiIter, typename Traits>
make_charset_xpression(compound_charset<Traits> & chset,Traits const & tr,regex_constants::syntax_option_type flags)213 inline sequence<BidiIter> make_charset_xpression
214 (
215     compound_charset<Traits> &chset
216   , Traits const &tr
217   , regex_constants::syntax_option_type flags
218 )
219 {
220     typedef typename Traits::char_type char_type;
221     bool const icase = (0 != (regex_constants::icase_ & flags));
222     bool const optimize = is_narrow_char<char_type>::value && 0 != (regex_constants::optimize & flags);
223 
224     // don't care about compile speed -- fold eveything into a bitset<256>
225     if(optimize)
226     {
227         typedef basic_chset<char_type> charset_type;
228         charset_type charset(chset.base());
229         if(icase)
230         {
231             charset_matcher<Traits, mpl::true_, charset_type> matcher(charset);
232             merge_charset(matcher.charset_, chset, tr);
233             return make_dynamic<BidiIter>(matcher);
234         }
235         else
236         {
237             charset_matcher<Traits, mpl::false_, charset_type> matcher(charset);
238             merge_charset(matcher.charset_, chset, tr);
239             return make_dynamic<BidiIter>(matcher);
240         }
241     }
242 
243     // special case to make [[:digit:]] fast
244     else if(chset.base().empty() && chset.posix_no().empty())
245     {
246         BOOST_ASSERT(0 != chset.posix_yes());
247         posix_charset_matcher<Traits> matcher(chset.posix_yes(), chset.is_inverted());
248         return make_dynamic<BidiIter>(matcher);
249     }
250 
251     // default, slow
252     else
253     {
254         if(icase)
255         {
256             charset_matcher<Traits, mpl::true_> matcher(chset);
257             return make_dynamic<BidiIter>(matcher);
258         }
259         else
260         {
261             charset_matcher<Traits, mpl::false_> matcher(chset);
262             return make_dynamic<BidiIter>(matcher);
263         }
264     }
265 }
266 
267 ///////////////////////////////////////////////////////////////////////////////
268 // make_posix_charset_xpression
269 //
270 template<typename BidiIter, typename Traits>
make_posix_charset_xpression(typename Traits::char_class_type m,bool no,regex_constants::syntax_option_type,Traits const &)271 inline sequence<BidiIter> make_posix_charset_xpression
272 (
273     typename Traits::char_class_type m
274   , bool no
275   , regex_constants::syntax_option_type //flags
276   , Traits const & //traits
277 )
278 {
279     posix_charset_matcher<Traits> charset(m, no);
280     return make_dynamic<BidiIter>(charset);
281 }
282 
283 ///////////////////////////////////////////////////////////////////////////////
284 // make_assert_begin_line
285 //
286 template<typename BidiIter, typename Traits>
make_assert_begin_line(regex_constants::syntax_option_type flags,Traits const & tr)287 inline sequence<BidiIter> make_assert_begin_line
288 (
289     regex_constants::syntax_option_type flags
290   , Traits const &tr
291 )
292 {
293     if(0 != (regex_constants::single_line & flags))
294     {
295         return detail::make_dynamic<BidiIter>(detail::assert_bos_matcher());
296     }
297     else
298     {
299         detail::assert_bol_matcher<Traits> matcher(tr);
300         return detail::make_dynamic<BidiIter>(matcher);
301     }
302 }
303 
304 ///////////////////////////////////////////////////////////////////////////////
305 // make_assert_end_line
306 //
307 template<typename BidiIter, typename Traits>
make_assert_end_line(regex_constants::syntax_option_type flags,Traits const & tr)308 inline sequence<BidiIter> make_assert_end_line
309 (
310     regex_constants::syntax_option_type flags
311   , Traits const &tr
312 )
313 {
314     if(0 != (regex_constants::single_line & flags))
315     {
316         return detail::make_dynamic<BidiIter>(detail::assert_eos_matcher());
317     }
318     else
319     {
320         detail::assert_eol_matcher<Traits> matcher(tr);
321         return detail::make_dynamic<BidiIter>(matcher);
322     }
323 }
324 
325 ///////////////////////////////////////////////////////////////////////////////
326 // make_assert_word
327 //
328 template<typename BidiIter, typename Cond, typename Traits>
make_assert_word(Cond,Traits const & tr)329 inline sequence<BidiIter> make_assert_word(Cond, Traits const &tr)
330 {
331     return detail::make_dynamic<BidiIter>
332     (
333         detail::assert_word_matcher<Cond, Traits>(tr)
334     );
335 }
336 
337 ///////////////////////////////////////////////////////////////////////////////
338 // make_independent_end_xpression
339 //
340 template<typename BidiIter>
make_independent_end_xpression(bool pure)341 inline sequence<BidiIter> make_independent_end_xpression(bool pure)
342 {
343     if(pure)
344     {
345         return detail::make_dynamic<BidiIter>(detail::true_matcher());
346     }
347     else
348     {
349         return detail::make_dynamic<BidiIter>(detail::independent_end_matcher());
350     }
351 }
352 
353 }}} // namespace boost::xpressive::detail
354 
355 #if defined(_MSC_VER)
356 # pragma warning(pop)
357 #endif
358 
359 #endif
360