1 ///////////////////////////////////////////////////////////////////////////////
2 /// \file regex_primitives.hpp
3 /// Contains the syntax elements for writing static regular expressions.
4 //
5 //  Copyright 2008 Eric Niebler. Distributed under the Boost
6 //  Software License, Version 1.0. (See accompanying file
7 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
8 
9 #ifndef BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005
10 #define BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005
11 
12 #include <vector>
13 #include <climits>
14 #include <boost/config.hpp>
15 #include <boost/assert.hpp>
16 #include <boost/mpl/if.hpp>
17 #include <boost/mpl/and.hpp>
18 #include <boost/mpl/assert.hpp>
19 #include <boost/detail/workaround.hpp>
20 #include <boost/preprocessor/cat.hpp>
21 #include <boost/xpressive/detail/detail_fwd.hpp>
22 #include <boost/xpressive/detail/core/matchers.hpp>
23 #include <boost/xpressive/detail/core/regex_domain.hpp>
24 #include <boost/xpressive/detail/utility/ignore_unused.hpp>
25 
26 // Doxygen can't handle proto :-(
27 #ifndef BOOST_XPRESSIVE_DOXYGEN_INVOKED
28 # include <boost/proto/core.hpp>
29 # include <boost/proto/transform/arg.hpp>
30 # include <boost/proto/transform/when.hpp>
31 # include <boost/xpressive/detail/core/icase.hpp>
32 # include <boost/xpressive/detail/static/compile.hpp>
33 # include <boost/xpressive/detail/static/modifier.hpp>
34 #endif
35 
36 namespace boost { namespace xpressive { namespace detail
37 {
38 
39     typedef assert_word_placeholder<word_boundary<mpl::true_> > assert_word_boundary;
40     typedef assert_word_placeholder<word_begin> assert_word_begin;
41     typedef assert_word_placeholder<word_end> assert_word_end;
42 
43     // workaround msvc-7.1 bug with function pointer types
44     // within function types:
45     #if BOOST_WORKAROUND(BOOST_MSVC, == 1310)
46     #define mark_number(x) proto::call<mark_number(x)>
47     #define minus_one() proto::make<minus_one()>
48     #endif
49 
50     struct push_back : proto::callable
51     {
52         typedef int result_type;
53 
54         template<typename Subs>
operator ()boost::xpressive::detail::push_back55         int operator ()(Subs &subs, int i) const
56         {
57             subs.push_back(i);
58             return i;
59         }
60     };
61 
62     struct mark_number : proto::callable
63     {
64         typedef int result_type;
65 
66         template<typename Expr>
operator ()boost::xpressive::detail::mark_number67         int operator ()(Expr const &expr) const
68         {
69             return expr.mark_number_;
70         }
71     };
72 
73     typedef mpl::int_<-1> minus_one;
74 
75     // s1 or -s1
76     struct SubMatch
77       : proto::or_<
78             proto::when<basic_mark_tag,                push_back(proto::_data, mark_number(proto::_value))   >
79           , proto::when<proto::negate<basic_mark_tag>, push_back(proto::_data, minus_one())                  >
80         >
81     {};
82 
83     struct SubMatchList
84       : proto::or_<SubMatch, proto::comma<SubMatchList, SubMatch> >
85     {};
86 
87     template<typename Subs>
88     typename enable_if<
89         mpl::and_<proto::is_expr<Subs>, proto::matches<Subs, SubMatchList> >
90       , std::vector<int>
91     >::type
to_vector(Subs const & subs)92     to_vector(Subs const &subs)
93     {
94         std::vector<int> subs_;
95         SubMatchList()(subs, 0, subs_);
96         return subs_;
97     }
98 
99     #if BOOST_WORKAROUND(BOOST_MSVC, == 1310)
100     #undef mark_number
101     #undef minus_one
102     #endif
103 
104     // replace "Expr" with "keep(*State) >> Expr"
105     struct skip_primitives : proto::transform<skip_primitives>
106     {
107         template<typename Expr, typename State, typename Data>
108         struct impl : proto::transform_impl<Expr, State, Data>
109         {
110             typedef
111                 typename proto::shift_right<
112                     typename proto::unary_expr<
113                         keeper_tag
114                       , typename proto::dereference<State>::type
115                     >::type
116                   , Expr
117                 >::type
118             result_type;
119 
operator ()boost::xpressive::detail::skip_primitives::impl120             result_type operator ()(
121                 typename impl::expr_param expr
122               , typename impl::state_param state
123               , typename impl::data_param
124             ) const
125             {
126                 result_type that = {{{state}}, expr};
127                 return that;
128             }
129         };
130     };
131 
132     struct Primitives
133       : proto::or_<
134             proto::terminal<proto::_>
135           , proto::comma<proto::_, proto::_>
136           , proto::subscript<proto::terminal<set_initializer>, proto::_>
137           , proto::assign<proto::terminal<set_initializer>, proto::_>
138           , proto::assign<proto::terminal<attribute_placeholder<proto::_> >, proto::_>
139           , proto::complement<Primitives>
140         >
141     {};
142 
143     struct SkipGrammar
144       : proto::or_<
145             proto::when<Primitives, skip_primitives>
146           , proto::assign<proto::terminal<mark_placeholder>, SkipGrammar>   // don't "skip" mark tags
147           , proto::subscript<SkipGrammar, proto::_>                         // don't put skips in actions
148           , proto::binary_expr<modifier_tag, proto::_, SkipGrammar>         // don't skip modifiers
149           , proto::unary_expr<lookbehind_tag, proto::_>                     // don't skip lookbehinds
150           , proto::nary_expr<proto::_, proto::vararg<SkipGrammar> >         // everything else is fair game!
151         >
152     {};
153 
154     template<typename Skip>
155     struct skip_directive
156     {
157         typedef typename proto::result_of::as_expr<Skip>::type skip_type;
158 
skip_directiveboost::xpressive::detail::skip_directive159         skip_directive(Skip const &skip)
160           : skip_(proto::as_expr(skip))
161         {}
162 
163         template<typename Sig>
164         struct result {};
165 
166         template<typename This, typename Expr>
167         struct result<This(Expr)>
168         {
169             typedef
170                 SkipGrammar::impl<
171                     typename proto::result_of::as_expr<Expr>::type
172                   , skip_type const &
173                   , mpl::void_ &
174                 >
175             skip_transform;
176 
177             typedef
178                 typename proto::shift_right<
179                     typename skip_transform::result_type
180                   , typename proto::dereference<skip_type>::type
181                 >::type
182             type;
183         };
184 
185         template<typename Expr>
186         typename result<skip_directive(Expr)>::type
operator ()boost::xpressive::detail::skip_directive187         operator ()(Expr const &expr) const
188         {
189             mpl::void_ ignore;
190             typedef result<skip_directive(Expr)> result_fun;
191             typename result_fun::type that = {
192                 typename result_fun::skip_transform()(proto::as_expr(expr), this->skip_, ignore)
193               , {skip_}
194             };
195             return that;
196         }
197 
198     private:
199         skip_type skip_;
200     };
201 
202 /*
203 ///////////////////////////////////////////////////////////////////////////////
204 /// INTERNAL ONLY
205 // BOOST_XPRESSIVE_GLOBAL
206 //  for defining globals that neither violate the One Definition Rule nor
207 //  lead to undefined behavior due to global object initialization order.
208 //#define BOOST_XPRESSIVE_GLOBAL(type, name, init)                                        \
209 //    namespace detail                                                                    \
210 //    {                                                                                   \
211 //        template<int Dummy>                                                             \
212 //        struct BOOST_PP_CAT(global_pod_, name)                                          \
213 //        {                                                                               \
214 //            static type const value;                                                    \
215 //        private:                                                                        \
216 //            union type_must_be_pod                                                      \
217 //            {                                                                           \
218 //                type t;                                                                 \
219 //                char ch;                                                                \
220 //            } u;                                                                        \
221 //        };                                                                              \
222 //        template<int Dummy>                                                             \
223 //        type const BOOST_PP_CAT(global_pod_, name)<Dummy>::value = init;                \
224 //    }                                                                                   \
225 //    type const &name = detail::BOOST_PP_CAT(global_pod_, name)<0>::value
226 */
227 
228 
229 } // namespace detail
230 
231 /// INTERNAL ONLY (for backwards compatibility)
232 unsigned int const repeat_max = UINT_MAX-1;
233 
234 ///////////////////////////////////////////////////////////////////////////////
235 /// \brief For infinite repetition of a sub-expression.
236 ///
237 /// Magic value used with the repeat\<\>() function template
238 /// to specify an unbounded repeat. Use as: repeat<17, inf>('a').
239 /// The equivalent in perl is /a{17,}/.
240 unsigned int const inf = UINT_MAX-1;
241 
242 /// INTERNAL ONLY (for backwards compatibility)
243 proto::terminal<detail::epsilon_matcher>::type const epsilon = {{}};
244 
245 ///////////////////////////////////////////////////////////////////////////////
246 /// \brief Successfully matches nothing.
247 ///
248 /// Successfully matches a zero-width sequence. nil always succeeds and
249 /// never consumes any characters.
250 proto::terminal<detail::epsilon_matcher>::type const nil = {{}};
251 
252 ///////////////////////////////////////////////////////////////////////////////
253 /// \brief Matches an alpha-numeric character.
254 ///
255 /// The regex traits are used to determine which characters are alpha-numeric.
256 /// To match any character that is not alpha-numeric, use ~alnum.
257 ///
258 /// \attention alnum is equivalent to /[[:alnum:]]/ in perl. ~alnum is equivalent
259 /// to /[[:^alnum:]]/ in perl.
260 proto::terminal<detail::posix_charset_placeholder>::type const alnum = {{"alnum", false}};
261 
262 ///////////////////////////////////////////////////////////////////////////////
263 /// \brief Matches an alphabetic character.
264 ///
265 /// The regex traits are used to determine which characters are alphabetic.
266 /// To match any character that is not alphabetic, use ~alpha.
267 ///
268 /// \attention alpha is equivalent to /[[:alpha:]]/ in perl. ~alpha is equivalent
269 /// to /[[:^alpha:]]/ in perl.
270 proto::terminal<detail::posix_charset_placeholder>::type const alpha = {{"alpha", false}};
271 
272 ///////////////////////////////////////////////////////////////////////////////
273 /// \brief Matches a blank (horizonal white-space) character.
274 ///
275 /// The regex traits are used to determine which characters are blank characters.
276 /// To match any character that is not blank, use ~blank.
277 ///
278 /// \attention blank is equivalent to /[[:blank:]]/ in perl. ~blank is equivalent
279 /// to /[[:^blank:]]/ in perl.
280 proto::terminal<detail::posix_charset_placeholder>::type const blank = {{"blank", false}};
281 
282 ///////////////////////////////////////////////////////////////////////////////
283 /// \brief Matches a control character.
284 ///
285 /// The regex traits are used to determine which characters are control characters.
286 /// To match any character that is not a control character, use ~cntrl.
287 ///
288 /// \attention cntrl is equivalent to /[[:cntrl:]]/ in perl. ~cntrl is equivalent
289 /// to /[[:^cntrl:]]/ in perl.
290 proto::terminal<detail::posix_charset_placeholder>::type const cntrl = {{"cntrl", false}};
291 
292 ///////////////////////////////////////////////////////////////////////////////
293 /// \brief Matches a digit character.
294 ///
295 /// The regex traits are used to determine which characters are digits.
296 /// To match any character that is not a digit, use ~digit.
297 ///
298 /// \attention digit is equivalent to /[[:digit:]]/ in perl. ~digit is equivalent
299 /// to /[[:^digit:]]/ in perl.
300 proto::terminal<detail::posix_charset_placeholder>::type const digit = {{"digit", false}};
301 
302 ///////////////////////////////////////////////////////////////////////////////
303 /// \brief Matches a graph character.
304 ///
305 /// The regex traits are used to determine which characters are graphable.
306 /// To match any character that is not graphable, use ~graph.
307 ///
308 /// \attention graph is equivalent to /[[:graph:]]/ in perl. ~graph is equivalent
309 /// to /[[:^graph:]]/ in perl.
310 proto::terminal<detail::posix_charset_placeholder>::type const graph = {{"graph", false}};
311 
312 ///////////////////////////////////////////////////////////////////////////////
313 /// \brief Matches a lower-case character.
314 ///
315 /// The regex traits are used to determine which characters are lower-case.
316 /// To match any character that is not a lower-case character, use ~lower.
317 ///
318 /// \attention lower is equivalent to /[[:lower:]]/ in perl. ~lower is equivalent
319 /// to /[[:^lower:]]/ in perl.
320 proto::terminal<detail::posix_charset_placeholder>::type const lower = {{"lower", false}};
321 
322 ///////////////////////////////////////////////////////////////////////////////
323 /// \brief Matches a printable character.
324 ///
325 /// The regex traits are used to determine which characters are printable.
326 /// To match any character that is not printable, use ~print.
327 ///
328 /// \attention print is equivalent to /[[:print:]]/ in perl. ~print is equivalent
329 /// to /[[:^print:]]/ in perl.
330 proto::terminal<detail::posix_charset_placeholder>::type const print = {{"print", false}};
331 
332 ///////////////////////////////////////////////////////////////////////////////
333 /// \brief Matches a punctuation character.
334 ///
335 /// The regex traits are used to determine which characters are punctuation.
336 /// To match any character that is not punctuation, use ~punct.
337 ///
338 /// \attention punct is equivalent to /[[:punct:]]/ in perl. ~punct is equivalent
339 /// to /[[:^punct:]]/ in perl.
340 proto::terminal<detail::posix_charset_placeholder>::type const punct = {{"punct", false}};
341 
342 ///////////////////////////////////////////////////////////////////////////////
343 /// \brief Matches a space character.
344 ///
345 /// The regex traits are used to determine which characters are space characters.
346 /// To match any character that is not white-space, use ~space.
347 ///
348 /// \attention space is equivalent to /[[:space:]]/ in perl. ~space is equivalent
349 /// to /[[:^space:]]/ in perl.
350 proto::terminal<detail::posix_charset_placeholder>::type const space = {{"space", false}};
351 
352 ///////////////////////////////////////////////////////////////////////////////
353 /// \brief Matches an upper-case character.
354 ///
355 /// The regex traits are used to determine which characters are upper-case.
356 /// To match any character that is not upper-case, use ~upper.
357 ///
358 /// \attention upper is equivalent to /[[:upper:]]/ in perl. ~upper is equivalent
359 /// to /[[:^upper:]]/ in perl.
360 proto::terminal<detail::posix_charset_placeholder>::type const upper = {{"upper", false}};
361 
362 ///////////////////////////////////////////////////////////////////////////////
363 /// \brief Matches a hexadecimal digit character.
364 ///
365 /// The regex traits are used to determine which characters are hex digits.
366 /// To match any character that is not a hex digit, use ~xdigit.
367 ///
368 /// \attention xdigit is equivalent to /[[:xdigit:]]/ in perl. ~xdigit is equivalent
369 /// to /[[:^xdigit:]]/ in perl.
370 proto::terminal<detail::posix_charset_placeholder>::type const xdigit = {{"xdigit", false}};
371 
372 ///////////////////////////////////////////////////////////////////////////////
373 /// \brief Beginning of sequence assertion.
374 ///
375 /// For the character sequence [begin, end), 'bos' matches the
376 /// zero-width sub-sequence [begin, begin).
377 proto::terminal<detail::assert_bos_matcher>::type const bos = {{}};
378 
379 ///////////////////////////////////////////////////////////////////////////////
380 /// \brief End of sequence assertion.
381 ///
382 /// For the character sequence [begin, end),
383 /// 'eos' matches the zero-width sub-sequence [end, end).
384 ///
385 /// \attention Unlike the perl end of sequence assertion \$, 'eos' will
386 /// not match at the position [end-1, end-1) if *(end-1) is '\\n'. To
387 /// get that behavior, use (!_n >> eos).
388 proto::terminal<detail::assert_eos_matcher>::type const eos = {{}};
389 
390 ///////////////////////////////////////////////////////////////////////////////
391 /// \brief Beginning of line assertion.
392 ///
393 /// 'bol' matches the zero-width sub-sequence
394 /// immediately following a logical newline sequence. The regex traits
395 /// is used to determine what constitutes a logical newline sequence.
396 proto::terminal<detail::assert_bol_placeholder>::type const bol = {{}};
397 
398 ///////////////////////////////////////////////////////////////////////////////
399 /// \brief End of line assertion.
400 ///
401 /// 'eol' matches the zero-width sub-sequence
402 /// immediately preceeding a logical newline sequence. The regex traits
403 /// is used to determine what constitutes a logical newline sequence.
404 proto::terminal<detail::assert_eol_placeholder>::type const eol = {{}};
405 
406 ///////////////////////////////////////////////////////////////////////////////
407 /// \brief Beginning of word assertion.
408 ///
409 /// 'bow' matches the zero-width sub-sequence
410 /// immediately following a non-word character and preceeding a word character.
411 /// The regex traits are used to determine what constitutes a word character.
412 proto::terminal<detail::assert_word_begin>::type const bow = {{}};
413 
414 ///////////////////////////////////////////////////////////////////////////////
415 /// \brief End of word assertion.
416 ///
417 /// 'eow' matches the zero-width sub-sequence
418 /// immediately following a word character and preceeding a non-word character.
419 /// The regex traits are used to determine what constitutes a word character.
420 proto::terminal<detail::assert_word_end>::type const eow = {{}};
421 
422 ///////////////////////////////////////////////////////////////////////////////
423 /// \brief Word boundary assertion.
424 ///
425 /// '_b' matches the zero-width sub-sequence at the beginning or the end of a word.
426 /// It is equivalent to (bow | eow). The regex traits are used to determine what
427 /// constitutes a word character. To match a non-word boundary, use ~_b.
428 ///
429 /// \attention _b is like \\b in perl. ~_b is like \\B in perl.
430 proto::terminal<detail::assert_word_boundary>::type const _b = {{}};
431 
432 ///////////////////////////////////////////////////////////////////////////////
433 /// \brief Matches a word character.
434 ///
435 /// '_w' matches a single word character. The regex traits are used to determine which
436 /// characters are word characters. Use ~_w to match a character that is not a word
437 /// character.
438 ///
439 /// \attention _w is like \\w in perl. ~_w is like \\W in perl.
440 proto::terminal<detail::posix_charset_placeholder>::type const _w = {{"w", false}};
441 
442 ///////////////////////////////////////////////////////////////////////////////
443 /// \brief Matches a digit character.
444 ///
445 /// '_d' matches a single digit character. The regex traits are used to determine which
446 /// characters are digits. Use ~_d to match a character that is not a digit
447 /// character.
448 ///
449 /// \attention _d is like \\d in perl. ~_d is like \\D in perl.
450 proto::terminal<detail::posix_charset_placeholder>::type const _d = {{"d", false}};
451 
452 ///////////////////////////////////////////////////////////////////////////////
453 /// \brief Matches a space character.
454 ///
455 /// '_s' matches a single space character. The regex traits are used to determine which
456 /// characters are space characters. Use ~_s to match a character that is not a space
457 /// character.
458 ///
459 /// \attention _s is like \\s in perl. ~_s is like \\S in perl.
460 proto::terminal<detail::posix_charset_placeholder>::type const _s = {{"s", false}};
461 
462 ///////////////////////////////////////////////////////////////////////////////
463 /// \brief Matches a literal newline character, '\\n'.
464 ///
465 /// '_n' matches a single newline character, '\\n'. Use ~_n to match a character
466 /// that is not a newline.
467 ///
468 /// \attention ~_n is like '.' in perl without the /s modifier.
469 proto::terminal<char>::type const _n = {'\n'};
470 
471 ///////////////////////////////////////////////////////////////////////////////
472 /// \brief Matches a logical newline sequence.
473 ///
474 /// '_ln' matches a logical newline sequence. This can be any character in the
475 /// line separator class, as determined by the regex traits, or the '\\r\\n' sequence.
476 /// For the purpose of back-tracking, '\\r\\n' is treated as a unit.
477 /// To match any one character that is not a logical newline, use ~_ln.
478 detail::logical_newline_xpression const _ln = {{}};
479 
480 ///////////////////////////////////////////////////////////////////////////////
481 /// \brief Matches any one character.
482 ///
483 /// Match any character, similar to '.' in perl syntax with the /s modifier.
484 /// '_' matches any one character, including the newline.
485 ///
486 /// \attention To match any character except the newline, use ~_n
487 proto::terminal<detail::any_matcher>::type const _ = {{}};
488 
489 ///////////////////////////////////////////////////////////////////////////////
490 /// \brief Reference to the current regex object
491 ///
492 /// Useful when constructing recursive regular expression objects. The 'self'
493 /// identifier is a short-hand for the current regex object. For instance,
494 /// sregex rx = '(' >> (self | nil) >> ')'; will create a regex object that
495 /// matches balanced parens such as "((()))".
496 proto::terminal<detail::self_placeholder>::type const self = {{}};
497 
498 ///////////////////////////////////////////////////////////////////////////////
499 /// \brief Used to create character sets.
500 ///
501 /// There are two ways to create character sets with the 'set' identifier. The
502 /// easiest is to create a comma-separated list of the characters in the set,
503 /// as in (set= 'a','b','c'). This set will match 'a', 'b', or 'c'. The other
504 /// way is to define the set as an argument to the set subscript operator.
505 /// For instance, set[ 'a' | range('b','c') | digit ] will match an 'a', 'b',
506 /// 'c' or a digit character.
507 ///
508 /// To complement a set, apply the '~' operator. For instance, ~(set= 'a','b','c')
509 /// will match any character that is not an 'a', 'b', or 'c'.
510 ///
511 /// Sets can be composed of other, possibly complemented, sets. For instance,
512 /// set[ ~digit | ~(set= 'a','b','c') ].
513 detail::set_initializer_type const set = {{}};
514 
515 ///////////////////////////////////////////////////////////////////////////////
516 /// \brief Sub-match placeholder type, used to create named captures in
517 /// static regexes.
518 ///
519 /// \c mark_tag is the type of the global sub-match placeholders \c s0, \c s1, etc.. You
520 /// can use the \c mark_tag type to create your own sub-match placeholders with
521 /// more meaningful names. This is roughly equivalent to the "named capture"
522 /// feature of dynamic regular expressions.
523 ///
524 /// To create a named sub-match placeholder, initialize it with a unique integer.
525 /// The integer must only be unique within the regex in which the placeholder
526 /// is used. Then you can use it within static regexes to created sub-matches
527 /// by assigning a sub-expression to it, or to refer back to already created
528 /// sub-matches.
529 ///
530 /// \code
531 /// mark_tag number(1); // "number" is now equivalent to "s1"
532 /// // Match a number, followed by a space and the same number again
533 /// sregex rx = (number = +_d) >> ' ' >> number;
534 /// \endcode
535 ///
536 /// After a successful \c regex_match() or \c regex_search(), the sub-match placeholder
537 /// can be used to index into the <tt>match_results\<\></tt> object to retrieve the
538 /// corresponding sub-match.
539 struct mark_tag
540   : proto::extends<detail::basic_mark_tag, mark_tag, detail::regex_domain>
541 {
542 private:
543     typedef proto::extends<detail::basic_mark_tag, mark_tag, detail::regex_domain> base_type;
544 
make_tagboost::xpressive::mark_tag545     static detail::basic_mark_tag make_tag(int mark_nbr)
546     {
547         detail::basic_mark_tag mark = {{mark_nbr}};
548         return mark;
549     }
550 
551 public:
552     /// \brief Initialize a mark_tag placeholder
553     /// \param mark_nbr An integer that uniquely identifies this \c mark_tag
554     /// within the static regexes in which this \c mark_tag will be used.
555     /// \pre <tt>mark_nbr \> 0</tt>
mark_tagboost::xpressive::mark_tag556     mark_tag(int mark_nbr)
557       : base_type(mark_tag::make_tag(mark_nbr))
558     {
559         // Marks numbers must be integers greater than 0.
560         BOOST_ASSERT(mark_nbr > 0);
561     }
562 
563     /// INTERNAL ONLY
operator detail::basic_mark_tag const&boost::xpressive::mark_tag564     operator detail::basic_mark_tag const &() const
565     {
566         return this->proto_base();
567     }
568 
569     BOOST_PROTO_EXTENDS_USING_ASSIGN_NON_DEPENDENT(mark_tag)
570 };
571 
572 // This macro is used when declaring mark_tags that are global because
573 // it guarantees that they are statically initialized. That avoids
574 // order-of-initialization bugs. In user code, the simpler: mark_tag s0(0);
575 // would be preferable.
576 /// INTERNAL ONLY
577 #define BOOST_XPRESSIVE_GLOBAL_MARK_TAG(NAME, VALUE)                            \
578     boost::xpressive::mark_tag::proto_base_expr const NAME = {{VALUE}}          \
579     /**/
580 
581 ///////////////////////////////////////////////////////////////////////////////
582 /// \brief Sub-match placeholder, like $& in Perl
583 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s0, 0);
584 
585 ///////////////////////////////////////////////////////////////////////////////
586 /// \brief Sub-match placeholder, like $1 in perl.
587 ///
588 /// To create a sub-match, assign a sub-expression to the sub-match placeholder.
589 /// For instance, (s1= _) will match any one character and remember which
590 /// character was matched in the 1st sub-match. Later in the pattern, you can
591 /// refer back to the sub-match. For instance,  (s1= _) >> s1  will match any
592 /// character, and then match the same character again.
593 ///
594 /// After a successful regex_match() or regex_search(), the sub-match placeholders
595 /// can be used to index into the match_results\<\> object to retrieve the Nth
596 /// sub-match.
597 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s1, 1);
598 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s2, 2);
599 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s3, 3);
600 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s4, 4);
601 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s5, 5);
602 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s6, 6);
603 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s7, 7);
604 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s8, 8);
605 BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s9, 9);
606 
607 // NOTE: For the purpose of xpressive's documentation, make icase() look like an
608 // ordinary function. In reality, it is a function object defined in detail/icase.hpp
609 // so that it can serve double-duty as regex_constants::icase, the syntax_option_type.
610 #ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED
611 ///////////////////////////////////////////////////////////////////////////////
612 /// \brief Makes a sub-expression case-insensitive.
613 ///
614 /// Use icase() to make a sub-expression case-insensitive. For instance,
615 /// "foo" >> icase(set['b'] >> "ar") will match "foo" exactly followed by
616 /// "bar" irrespective of case.
icase(Expr const & expr)617 template<typename Expr> detail::unspecified icase(Expr const &expr) { return 0; }
618 #endif
619 
620 ///////////////////////////////////////////////////////////////////////////////
621 /// \brief Makes a literal into a regular expression.
622 ///
623 /// Use as_xpr() to turn a literal into a regular expression. For instance,
624 /// "foo" >> "bar" will not compile because both operands to the right-shift
625 /// operator are const char*, and no such operator exists. Use as_xpr("foo") >> "bar"
626 /// instead.
627 ///
628 /// You can use as_xpr() with character literals in addition to string literals.
629 /// For instance, as_xpr('a') will match an 'a'. You can also complement a
630 /// character literal, as with ~as_xpr('a'). This will match any one character
631 /// that is not an 'a'.
632 #ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED
as_xpr(Literal const & literal)633 template<typename Literal> detail::unspecified as_xpr(Literal const &literal) { return 0; }
634 #else
635 proto::functional::as_expr<> const as_xpr = {};
636 #endif
637 
638 ///////////////////////////////////////////////////////////////////////////////
639 /// \brief Embed a regex object by reference.
640 ///
641 /// \param rex The basic_regex object to embed by reference.
642 template<typename BidiIter>
643 inline typename proto::terminal<reference_wrapper<basic_regex<BidiIter> const> >::type const
by_ref(basic_regex<BidiIter> const & rex)644 by_ref(basic_regex<BidiIter> const &rex)
645 {
646     reference_wrapper<basic_regex<BidiIter> const> ref(rex);
647     return proto::terminal<reference_wrapper<basic_regex<BidiIter> const> >::type::make(ref);
648 }
649 
650 ///////////////////////////////////////////////////////////////////////////////
651 /// \brief Match a range of characters.
652 ///
653 /// Match any character in the range [ch_min, ch_max].
654 ///
655 /// \param ch_min The lower end of the range to match.
656 /// \param ch_max The upper end of the range to match.
657 template<typename Char>
658 inline typename proto::terminal<detail::range_placeholder<Char> >::type const
range(Char ch_min,Char ch_max)659 range(Char ch_min, Char ch_max)
660 {
661     detail::range_placeholder<Char> that = {ch_min, ch_max, false};
662     return proto::terminal<detail::range_placeholder<Char> >::type::make(that);
663 }
664 
665 ///////////////////////////////////////////////////////////////////////////////
666 /// \brief Make a sub-expression optional. Equivalent to !as_xpr(expr).
667 ///
668 /// \param expr The sub-expression to make optional.
669 template<typename Expr>
670 typename proto::result_of::make_expr<
671     proto::tag::logical_not
672   , proto::default_domain
673   , Expr const &
674 >::type const
optional(Expr const & expr)675 optional(Expr const &expr)
676 {
677     return proto::make_expr<
678         proto::tag::logical_not
679       , proto::default_domain
680     >(boost::ref(expr));
681 }
682 
683 ///////////////////////////////////////////////////////////////////////////////
684 /// \brief Repeat a sub-expression multiple times.
685 ///
686 /// There are two forms of the repeat\<\>() function template. To match a
687 /// sub-expression N times, use repeat\<N\>(expr). To match a sub-expression
688 /// from M to N times, use repeat\<M,N\>(expr).
689 ///
690 /// The repeat\<\>() function creates a greedy quantifier. To make the quantifier
691 /// non-greedy, apply the unary minus operator, as in -repeat\<M,N\>(expr).
692 ///
693 /// \param expr The sub-expression to repeat.
694 template<unsigned int Min, unsigned int Max, typename Expr>
695 typename proto::result_of::make_expr<
696     detail::generic_quant_tag<Min, Max>
697   , proto::default_domain
698   , Expr const &
699 >::type const
repeat(Expr const & expr)700 repeat(Expr const &expr)
701 {
702     return proto::make_expr<
703         detail::generic_quant_tag<Min, Max>
704       , proto::default_domain
705     >(boost::ref(expr));
706 }
707 
708 /// \overload
709 ///
710 template<unsigned int Count, typename Expr2>
711 typename proto::result_of::make_expr<
712     detail::generic_quant_tag<Count, Count>
713   , proto::default_domain
714   , Expr2 const &
715 >::type const
repeat(Expr2 const & expr2)716 repeat(Expr2 const &expr2)
717 {
718     return proto::make_expr<
719         detail::generic_quant_tag<Count, Count>
720       , proto::default_domain
721     >(boost::ref(expr2));
722 }
723 
724 ///////////////////////////////////////////////////////////////////////////////
725 /// \brief Create an independent sub-expression.
726 ///
727 /// Turn off back-tracking for a sub-expression. Any branches or repeats within
728 /// the sub-expression will match only one way, and no other alternatives are
729 /// tried.
730 ///
731 /// \attention keep(expr) is equivalent to the perl (?>...) extension.
732 ///
733 /// \param expr The sub-expression to modify.
734 template<typename Expr>
735 typename proto::result_of::make_expr<
736     detail::keeper_tag
737   , proto::default_domain
738   , Expr const &
739 >::type const
keep(Expr const & expr)740 keep(Expr const &expr)
741 {
742     return proto::make_expr<
743         detail::keeper_tag
744       , proto::default_domain
745     >(boost::ref(expr));
746 }
747 
748 ///////////////////////////////////////////////////////////////////////////////
749 /// \brief Look-ahead assertion.
750 ///
751 /// before(expr) succeeds if the expr sub-expression would match at the current
752 /// position in the sequence, but expr is not included in the match. For instance,
753 /// before("foo") succeeds if we are before a "foo". Look-ahead assertions can be
754 /// negated with the bit-compliment operator.
755 ///
756 /// \attention before(expr) is equivalent to the perl (?=...) extension.
757 /// ~before(expr) is a negative look-ahead assertion, equivalent to the
758 /// perl (?!...) extension.
759 ///
760 /// \param expr The sub-expression to put in the look-ahead assertion.
761 template<typename Expr>
762 typename proto::result_of::make_expr<
763     detail::lookahead_tag
764   , proto::default_domain
765   , Expr const &
766 >::type const
before(Expr const & expr)767 before(Expr const &expr)
768 {
769     return proto::make_expr<
770         detail::lookahead_tag
771       , proto::default_domain
772     >(boost::ref(expr));
773 }
774 
775 ///////////////////////////////////////////////////////////////////////////////
776 /// \brief Look-behind assertion.
777 ///
778 /// after(expr) succeeds if the expr sub-expression would match at the current
779 /// position minus N in the sequence, where N is the width of expr. expr is not included in
780 /// the match. For instance,  after("foo") succeeds if we are after a "foo". Look-behind
781 /// assertions can be negated with the bit-complement operator.
782 ///
783 /// \attention after(expr) is equivalent to the perl (?<=...) extension.
784 /// ~after(expr) is a negative look-behind assertion, equivalent to the
785 /// perl (?<!...) extension.
786 ///
787 /// \param expr The sub-expression to put in the look-ahead assertion.
788 ///
789 /// \pre expr cannot match a variable number of characters.
790 template<typename Expr>
791 typename proto::result_of::make_expr<
792     detail::lookbehind_tag
793   , proto::default_domain
794   , Expr const &
795 >::type const
after(Expr const & expr)796 after(Expr const &expr)
797 {
798     return proto::make_expr<
799         detail::lookbehind_tag
800       , proto::default_domain
801     >(boost::ref(expr));
802 }
803 
804 ///////////////////////////////////////////////////////////////////////////////
805 /// \brief Specify a regex traits or a std::locale.
806 ///
807 /// imbue() instructs the regex engine to use the specified traits or locale
808 /// when matching the regex. The entire expression must use the same traits/locale.
809 /// For instance, the following specifies a locale for use with a regex:
810 ///   std::locale loc;
811 ///   sregex rx = imbue(loc)(+digit);
812 ///
813 /// \param loc The std::locale or regex traits object.
814 template<typename Locale>
815 inline detail::modifier_op<detail::locale_modifier<Locale> > const
imbue(Locale const & loc)816 imbue(Locale const &loc)
817 {
818     detail::modifier_op<detail::locale_modifier<Locale> > mod =
819     {
820         detail::locale_modifier<Locale>(loc)
821       , regex_constants::ECMAScript
822     };
823     return mod;
824 }
825 
826 proto::terminal<detail::attribute_placeholder<mpl::int_<1> > >::type const a1 = {{}};
827 proto::terminal<detail::attribute_placeholder<mpl::int_<2> > >::type const a2 = {{}};
828 proto::terminal<detail::attribute_placeholder<mpl::int_<3> > >::type const a3 = {{}};
829 proto::terminal<detail::attribute_placeholder<mpl::int_<4> > >::type const a4 = {{}};
830 proto::terminal<detail::attribute_placeholder<mpl::int_<5> > >::type const a5 = {{}};
831 proto::terminal<detail::attribute_placeholder<mpl::int_<6> > >::type const a6 = {{}};
832 proto::terminal<detail::attribute_placeholder<mpl::int_<7> > >::type const a7 = {{}};
833 proto::terminal<detail::attribute_placeholder<mpl::int_<8> > >::type const a8 = {{}};
834 proto::terminal<detail::attribute_placeholder<mpl::int_<9> > >::type const a9 = {{}};
835 
836 ///////////////////////////////////////////////////////////////////////////////
837 /// \brief Specify which characters to skip when matching a regex.
838 ///
839 /// <tt>skip()</tt> instructs the regex engine to skip certain characters when matching
840 /// a regex. It is most useful for writing regexes that ignore whitespace.
841 /// For instance, the following specifies a regex that skips whitespace and
842 /// punctuation:
843 ///
844 /// \code
845 /// // A sentence is one or more words separated by whitespace
846 /// // and punctuation.
847 /// sregex word = +alpha;
848 /// sregex sentence = skip(set[_s | punct])( +word );
849 /// \endcode
850 ///
851 /// The way it works in the above example is to insert
852 /// <tt>keep(*set[_s | punct])</tt> before each primitive within the regex.
853 /// A "primitive" includes terminals like strings, character sets and nested
854 /// regexes. A final <tt>*set[_s | punct]</tt> is added to the end of the
855 /// regex. The regex <tt>sentence</tt> specified above is equivalent to
856 /// the following:
857 ///
858 /// \code
859 /// sregex sentence = +( keep(*set[_s | punct]) >> word )
860 ///                        >> *set[_s | punct];
861 /// \endcode
862 ///
863 /// \attention Skipping does not affect how nested regexes are handled because
864 /// they are treated atomically. String literals are also treated
865 /// atomically; that is, no skipping is done within a string literal. So
866 /// <tt>skip(_s)("this that")</tt> is not the same as
867 /// <tt>skip(_s)("this" >> as_xpr("that"))</tt>. The first will only match
868 /// when there is only one space between "this" and "that". The second will
869 /// skip any and all whitespace between "this" and "that".
870 ///
871 /// \param skip A regex that specifies which characters to skip.
872 template<typename Skip>
skip(Skip const & skip)873 detail::skip_directive<Skip> skip(Skip const &skip)
874 {
875     return detail::skip_directive<Skip>(skip);
876 }
877 
878 namespace detail
879 {
ignore_unused_regex_primitives()880     inline void ignore_unused_regex_primitives()
881     {
882         detail::ignore_unused(repeat_max);
883         detail::ignore_unused(inf);
884         detail::ignore_unused(epsilon);
885         detail::ignore_unused(nil);
886         detail::ignore_unused(alnum);
887         detail::ignore_unused(bos);
888         detail::ignore_unused(eos);
889         detail::ignore_unused(bol);
890         detail::ignore_unused(eol);
891         detail::ignore_unused(bow);
892         detail::ignore_unused(eow);
893         detail::ignore_unused(_b);
894         detail::ignore_unused(_w);
895         detail::ignore_unused(_d);
896         detail::ignore_unused(_s);
897         detail::ignore_unused(_n);
898         detail::ignore_unused(_ln);
899         detail::ignore_unused(_);
900         detail::ignore_unused(self);
901         detail::ignore_unused(set);
902         detail::ignore_unused(s0);
903         detail::ignore_unused(s1);
904         detail::ignore_unused(s2);
905         detail::ignore_unused(s3);
906         detail::ignore_unused(s4);
907         detail::ignore_unused(s5);
908         detail::ignore_unused(s6);
909         detail::ignore_unused(s7);
910         detail::ignore_unused(s8);
911         detail::ignore_unused(s9);
912         detail::ignore_unused(a1);
913         detail::ignore_unused(a2);
914         detail::ignore_unused(a3);
915         detail::ignore_unused(a4);
916         detail::ignore_unused(a5);
917         detail::ignore_unused(a6);
918         detail::ignore_unused(a7);
919         detail::ignore_unused(a8);
920         detail::ignore_unused(a9);
921         detail::ignore_unused(as_xpr);
922     }
923 }
924 
925 }} // namespace boost::xpressive
926 
927 #endif
928