1 ///////////////////////////////////////////////////////////////////////////////
2 /// \file regex_compiler.hpp
3 /// Contains the definition of regex_compiler, a factory for building regex objects
4 /// from strings.
5 //
6 //  Copyright 2008 Eric Niebler. Distributed under the Boost
7 //  Software License, Version 1.0. (See accompanying file
8 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 
10 #ifndef BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
11 #define BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
12 
13 // MS compatible compilers support #pragma once
14 #if defined(_MSC_VER)
15 # pragma once
16 #endif
17 
18 #include <map>
19 #include <boost/config.hpp>
20 #include <boost/assert.hpp>
21 #include <boost/next_prior.hpp>
22 #include <boost/range/begin.hpp>
23 #include <boost/range/end.hpp>
24 #include <boost/mpl/assert.hpp>
25 #include <boost/throw_exception.hpp>
26 #include <boost/type_traits/is_same.hpp>
27 #include <boost/type_traits/is_pointer.hpp>
28 #include <boost/utility/enable_if.hpp>
29 #include <boost/iterator/iterator_traits.hpp>
30 #include <boost/xpressive/basic_regex.hpp>
31 #include <boost/xpressive/detail/dynamic/parser.hpp>
32 #include <boost/xpressive/detail/dynamic/parse_charset.hpp>
33 #include <boost/xpressive/detail/dynamic/parser_enum.hpp>
34 #include <boost/xpressive/detail/dynamic/parser_traits.hpp>
35 #include <boost/xpressive/detail/core/linker.hpp>
36 #include <boost/xpressive/detail/core/optimize.hpp>
37 
38 namespace boost { namespace xpressive
39 {
40 
41 ///////////////////////////////////////////////////////////////////////////////
42 // regex_compiler
43 //
44 /// \brief Class template regex_compiler is a factory for building basic_regex objects from a string.
45 ///
46 /// Class template regex_compiler is used to construct a basic_regex object from a string. The string
47 /// should contain a valid regular expression. You can imbue a regex_compiler object with a locale,
48 /// after which all basic_regex objects created with that regex_compiler object will use that locale.
49 /// After creating a regex_compiler object, and optionally imbueing it with a locale, you can call the
50 /// compile() method to construct a basic_regex object, passing it the string representing the regular
51 /// expression. You can call compile() multiple times on the same regex_compiler object. Two basic_regex
52 /// objects compiled from the same string will have different regex_id's.
53 template<typename BidiIter, typename RegexTraits, typename CompilerTraits>
54 struct regex_compiler
55 {
56     typedef BidiIter iterator_type;
57     typedef typename iterator_value<BidiIter>::type char_type;
58     typedef regex_constants::syntax_option_type flag_type;
59     typedef RegexTraits traits_type;
60     typedef typename traits_type::string_type string_type;
61     typedef typename traits_type::locale_type locale_type;
62     typedef typename traits_type::char_class_type char_class_type;
63 
regex_compilerboost::xpressive::regex_compiler64     explicit regex_compiler(RegexTraits const &traits = RegexTraits())
65       : mark_count_(0)
66       , hidden_mark_count_(0)
67       , traits_(traits)
68       , upper_(0)
69       , self_()
70       , rules_()
71     {
72         this->upper_ = lookup_classname(this->rxtraits(), "upper");
73     }
74 
75     ///////////////////////////////////////////////////////////////////////////
76     // imbue
77     /// Specify the locale to be used by a regex_compiler.
78     ///
79     /// \param loc The locale that this regex_compiler should use.
80     /// \return The previous locale.
imbueboost::xpressive::regex_compiler81     locale_type imbue(locale_type loc)
82     {
83         locale_type oldloc = this->traits_.imbue(loc);
84         this->upper_ = lookup_classname(this->rxtraits(), "upper");
85         return oldloc;
86     }
87 
88     ///////////////////////////////////////////////////////////////////////////
89     // getloc
90     /// Get the locale used by a regex_compiler.
91     ///
92     /// \return The locale used by this regex_compiler.
getlocboost::xpressive::regex_compiler93     locale_type getloc() const
94     {
95         return this->traits_.getloc();
96     }
97 
98     ///////////////////////////////////////////////////////////////////////////
99     // compile
100     /// Builds a basic_regex object from a range of characters.
101     ///
102     /// \param  begin The beginning of a range of characters representing the
103     ///         regular expression to compile.
104     /// \param  end The end of a range of characters representing the
105     ///         regular expression to compile.
106     /// \param  flags Optional bitmask that determines how the pat string is
107     ///         interpreted. (See syntax_option_type.)
108     /// \return A basic_regex object corresponding to the regular expression
109     ///         represented by the character range.
110     /// \pre    InputIter is a model of the InputIterator concept.
111     /// \pre    [begin,end) is a valid range.
112     /// \pre    The range of characters specified by [begin,end) contains a
113     ///         valid string-based representation of a regular expression.
114     /// \throw  regex_error when the range of characters has invalid regular
115     ///         expression syntax.
116     template<typename InputIter>
117     basic_regex<BidiIter>
compileboost::xpressive::regex_compiler118     compile(InputIter begin, InputIter end, flag_type flags = regex_constants::ECMAScript)
119     {
120         typedef typename iterator_category<InputIter>::type category;
121         return this->compile_(begin, end, flags, category());
122     }
123 
124     /// \overload
125     ///
126     template<typename InputRange>
127     typename disable_if<is_pointer<InputRange>, basic_regex<BidiIter> >::type
compileboost::xpressive::regex_compiler128     compile(InputRange const &pat, flag_type flags = regex_constants::ECMAScript)
129     {
130         return this->compile(boost::begin(pat), boost::end(pat), flags);
131     }
132 
133     /// \overload
134     ///
135     basic_regex<BidiIter>
compileboost::xpressive::regex_compiler136     compile(char_type const *begin, flag_type flags = regex_constants::ECMAScript)
137     {
138         BOOST_ASSERT(0 != begin);
139         char_type const *end = begin + std::char_traits<char_type>::length(begin);
140         return this->compile(begin, end, flags);
141     }
142 
143     /// \overload
144     ///
compileboost::xpressive::regex_compiler145     basic_regex<BidiIter> compile(char_type const *begin, std::size_t size, flag_type flags)
146     {
147         BOOST_ASSERT(0 != begin);
148         char_type const *end = begin + size;
149         return this->compile(begin, end, flags);
150     }
151 
152     ///////////////////////////////////////////////////////////////////////////
153     // operator[]
154     /// Return a reference to the named regular expression. If no such named
155     /// regular expression exists, create a new regular expression and return
156     /// a reference to it.
157     ///
158     /// \param  name A std::string containing the name of the regular expression.
159     /// \pre    The string is not empty.
160     /// \throw  bad_alloc on allocation failure.
operator []boost::xpressive::regex_compiler161     basic_regex<BidiIter> &operator [](string_type const &name)
162     {
163         BOOST_ASSERT(!name.empty());
164         return this->rules_[name];
165     }
166 
167     /// \overload
168     ///
operator []boost::xpressive::regex_compiler169     basic_regex<BidiIter> const &operator [](string_type const &name) const
170     {
171         BOOST_ASSERT(!name.empty());
172         return this->rules_[name];
173     }
174 
175 private:
176 
177     typedef detail::escape_value<char_type, char_class_type> escape_value;
178     typedef detail::alternate_matcher<detail::alternates_vector<BidiIter>, RegexTraits> alternate_matcher;
179 
180     ///////////////////////////////////////////////////////////////////////////
181     // compile_
182     /// INTERNAL ONLY
183     template<typename FwdIter>
compile_boost::xpressive::regex_compiler184     basic_regex<BidiIter> compile_(FwdIter begin, FwdIter end, flag_type flags, std::forward_iterator_tag)
185     {
186         BOOST_MPL_ASSERT((is_same<char_type, typename iterator_value<FwdIter>::type>));
187         using namespace regex_constants;
188         this->reset();
189         this->traits_.flags(flags);
190 
191         basic_regex<BidiIter> rextmp, *prex = &rextmp;
192         FwdIter tmp = begin;
193 
194         // Check if this regex is a named rule:
195         string_type name;
196         if(token_group_begin == this->traits_.get_token(tmp, end) &&
197            BOOST_XPR_ENSURE_(tmp != end, error_paren, "mismatched parenthesis") &&
198            token_rule_assign == this->traits_.get_group_type(tmp, end, name))
199         {
200             begin = tmp;
201             BOOST_XPR_ENSURE_
202             (
203                 begin != end && token_group_end == this->traits_.get_token(begin, end)
204               , error_paren
205               , "mismatched parenthesis"
206             );
207             prex = &this->rules_[name];
208         }
209 
210         this->self_ = detail::core_access<BidiIter>::get_regex_impl(*prex);
211 
212         // at the top level, a regex is a sequence of alternates
213         detail::sequence<BidiIter> seq = this->parse_alternates(begin, end);
214         BOOST_XPR_ENSURE_(begin == end, error_paren, "mismatched parenthesis");
215 
216         // terminate the sequence
217         seq += detail::make_dynamic<BidiIter>(detail::end_matcher());
218 
219         // bundle the regex information into a regex_impl object
220         detail::common_compile(seq.xpr().matchable(), *this->self_, this->rxtraits());
221 
222         this->self_->traits_ = new detail::traits_holder<RegexTraits>(this->rxtraits());
223         this->self_->mark_count_ = this->mark_count_;
224         this->self_->hidden_mark_count_ = this->hidden_mark_count_;
225 
226         // References changed, update dependencies.
227         this->self_->tracking_update();
228         this->self_.reset();
229         return *prex;
230     }
231 
232     ///////////////////////////////////////////////////////////////////////////
233     // compile_
234     /// INTERNAL ONLY
235     template<typename InputIter>
compile_boost::xpressive::regex_compiler236     basic_regex<BidiIter> compile_(InputIter begin, InputIter end, flag_type flags, std::input_iterator_tag)
237     {
238         string_type pat(begin, end);
239         return this->compile_(boost::begin(pat), boost::end(pat), flags, std::forward_iterator_tag());
240     }
241 
242     ///////////////////////////////////////////////////////////////////////////
243     // reset
244     /// INTERNAL ONLY
resetboost::xpressive::regex_compiler245     void reset()
246     {
247         this->mark_count_ = 0;
248         this->hidden_mark_count_ = 0;
249         this->traits_.flags(regex_constants::ECMAScript);
250     }
251 
252     ///////////////////////////////////////////////////////////////////////////
253     // regex_traits
254     /// INTERNAL ONLY
rxtraitsboost::xpressive::regex_compiler255     traits_type &rxtraits()
256     {
257         return this->traits_.traits();
258     }
259 
260     ///////////////////////////////////////////////////////////////////////////
261     // regex_traits
262     /// INTERNAL ONLY
rxtraitsboost::xpressive::regex_compiler263     traits_type const &rxtraits() const
264     {
265         return this->traits_.traits();
266     }
267 
268     ///////////////////////////////////////////////////////////////////////////
269     // parse_alternates
270     /// INTERNAL ONLY
271     template<typename FwdIter>
parse_alternatesboost::xpressive::regex_compiler272     detail::sequence<BidiIter> parse_alternates(FwdIter &begin, FwdIter end)
273     {
274         using namespace regex_constants;
275         int count = 0;
276         FwdIter tmp = begin;
277         detail::sequence<BidiIter> seq;
278 
279         do switch(++count)
280         {
281         case 1:
282             seq = this->parse_sequence(tmp, end);
283             break;
284         case 2:
285             seq = detail::make_dynamic<BidiIter>(alternate_matcher()) | seq;
286             BOOST_FALLTHROUGH;
287         default:
288             seq |= this->parse_sequence(tmp, end);
289         }
290         while((begin = tmp) != end && token_alternate == this->traits_.get_token(tmp, end));
291 
292         return seq;
293     }
294 
295     ///////////////////////////////////////////////////////////////////////////
296     // parse_group
297     /// INTERNAL ONLY
298     template<typename FwdIter>
parse_groupboost::xpressive::regex_compiler299     detail::sequence<BidiIter> parse_group(FwdIter &begin, FwdIter end)
300     {
301         using namespace regex_constants;
302         int mark_nbr = 0;
303         bool keeper = false;
304         bool lookahead = false;
305         bool lookbehind = false;
306         bool negative = false;
307         string_type name;
308 
309         detail::sequence<BidiIter> seq, seq_end;
310         FwdIter tmp = FwdIter();
311 
312         syntax_option_type old_flags = this->traits_.flags();
313 
314         switch(this->traits_.get_group_type(begin, end, name))
315         {
316         case token_no_mark:
317             // Don't process empty groups like (?:) or (?i)
318             // BUGBUG this doesn't handle the degenerate (?:)+ correctly
319             if(token_group_end == this->traits_.get_token(tmp = begin, end))
320             {
321                 return this->parse_atom(begin = tmp, end);
322             }
323             break;
324 
325         case token_negative_lookahead:
326             negative = true;
327             BOOST_FALLTHROUGH;
328         case token_positive_lookahead:
329             lookahead = true;
330             break;
331 
332         case token_negative_lookbehind:
333             negative = true;
334             BOOST_FALLTHROUGH;
335         case token_positive_lookbehind:
336             lookbehind = true;
337             break;
338 
339         case token_independent_sub_expression:
340             keeper = true;
341             break;
342 
343         case token_comment:
344             while(BOOST_XPR_ENSURE_(begin != end, error_paren, "mismatched parenthesis"))
345             {
346                 switch(this->traits_.get_token(begin, end))
347                 {
348                 case token_group_end:
349                     return this->parse_atom(begin, end);
350                 case token_escape:
351                     BOOST_XPR_ENSURE_(begin != end, error_escape, "incomplete escape sequence");
352                     BOOST_FALLTHROUGH;
353                 case token_literal:
354                     ++begin;
355                     break;
356                 default:
357                     break;
358                 }
359             }
360             break;
361 
362         case token_recurse:
363             BOOST_XPR_ENSURE_
364             (
365                 begin != end && token_group_end == this->traits_.get_token(begin, end)
366               , error_paren
367               , "mismatched parenthesis"
368             );
369             return detail::make_dynamic<BidiIter>(detail::regex_byref_matcher<BidiIter>(this->self_));
370 
371         case token_rule_assign:
372             BOOST_THROW_EXCEPTION(
373                 regex_error(error_badrule, "rule assignments must be at the front of the regex")
374             );
375             break;
376 
377         case token_rule_ref:
378             {
379                 typedef detail::core_access<BidiIter> access;
380                 BOOST_XPR_ENSURE_
381                 (
382                     begin != end && token_group_end == this->traits_.get_token(begin, end)
383                   , error_paren
384                   , "mismatched parenthesis"
385                 );
386                 basic_regex<BidiIter> &rex = this->rules_[name];
387                 shared_ptr<detail::regex_impl<BidiIter> > impl = access::get_regex_impl(rex);
388                 this->self_->track_reference(*impl);
389                 return detail::make_dynamic<BidiIter>(detail::regex_byref_matcher<BidiIter>(impl));
390             }
391 
392         case token_named_mark:
393             mark_nbr = static_cast<int>(++this->mark_count_);
394             for(std::size_t i = 0; i < this->self_->named_marks_.size(); ++i)
395             {
396                 BOOST_XPR_ENSURE_(this->self_->named_marks_[i].name_ != name, error_badmark, "named mark already exists");
397             }
398             this->self_->named_marks_.push_back(detail::named_mark<char_type>(name, this->mark_count_));
399             seq = detail::make_dynamic<BidiIter>(detail::mark_begin_matcher(mark_nbr));
400             seq_end = detail::make_dynamic<BidiIter>(detail::mark_end_matcher(mark_nbr));
401             break;
402 
403         case token_named_mark_ref:
404             BOOST_XPR_ENSURE_
405             (
406                 begin != end && token_group_end == this->traits_.get_token(begin, end)
407               , error_paren
408               , "mismatched parenthesis"
409             );
410             for(std::size_t i = 0; i < this->self_->named_marks_.size(); ++i)
411             {
412                 if(this->self_->named_marks_[i].name_ == name)
413                 {
414                     mark_nbr = static_cast<int>(this->self_->named_marks_[i].mark_nbr_);
415                     return detail::make_backref_xpression<BidiIter>
416                     (
417                         mark_nbr, this->traits_.flags(), this->rxtraits()
418                     );
419                 }
420             }
421             BOOST_THROW_EXCEPTION(regex_error(error_badmark, "invalid named back-reference"));
422             break;
423 
424         default:
425             mark_nbr = static_cast<int>(++this->mark_count_);
426             seq = detail::make_dynamic<BidiIter>(detail::mark_begin_matcher(mark_nbr));
427             seq_end = detail::make_dynamic<BidiIter>(detail::mark_end_matcher(mark_nbr));
428             break;
429         }
430 
431         // alternates
432         seq += this->parse_alternates(begin, end);
433         seq += seq_end;
434         BOOST_XPR_ENSURE_
435         (
436             begin != end && token_group_end == this->traits_.get_token(begin, end)
437           , error_paren
438           , "mismatched parenthesis"
439         );
440 
441         typedef detail::shared_matchable<BidiIter> xpr_type;
442         if(lookahead)
443         {
444             seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
445             detail::lookahead_matcher<xpr_type> lam(seq.xpr(), negative, seq.pure());
446             seq = detail::make_dynamic<BidiIter>(lam);
447         }
448         else if(lookbehind)
449         {
450             seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
451             detail::lookbehind_matcher<xpr_type> lbm(seq.xpr(), seq.width().value(), negative, seq.pure());
452             seq = detail::make_dynamic<BidiIter>(lbm);
453         }
454         else if(keeper) // independent sub-expression
455         {
456             seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
457             detail::keeper_matcher<xpr_type> km(seq.xpr(), seq.pure());
458             seq = detail::make_dynamic<BidiIter>(km);
459         }
460 
461         // restore the modifiers
462         this->traits_.flags(old_flags);
463         return seq;
464     }
465 
466     ///////////////////////////////////////////////////////////////////////////
467     // parse_charset
468     /// INTERNAL ONLY
469     template<typename FwdIter>
parse_charsetboost::xpressive::regex_compiler470     detail::sequence<BidiIter> parse_charset(FwdIter &begin, FwdIter end)
471     {
472         detail::compound_charset<traits_type> chset;
473 
474         // call out to a helper to actually parse the character set
475         detail::parse_charset(begin, end, chset, this->traits_);
476 
477         return detail::make_charset_xpression<BidiIter>
478         (
479             chset
480           , this->rxtraits()
481           , this->traits_.flags()
482         );
483     }
484 
485     ///////////////////////////////////////////////////////////////////////////
486     // parse_atom
487     /// INTERNAL ONLY
488     template<typename FwdIter>
parse_atomboost::xpressive::regex_compiler489     detail::sequence<BidiIter> parse_atom(FwdIter &begin, FwdIter end)
490     {
491         using namespace regex_constants;
492         escape_value esc = { 0, 0, 0, detail::escape_char };
493         FwdIter old_begin = begin;
494 
495         switch(this->traits_.get_token(begin, end))
496         {
497         case token_literal:
498             return detail::make_literal_xpression<BidiIter>
499             (
500                 this->parse_literal(begin, end), this->traits_.flags(), this->rxtraits()
501             );
502 
503         case token_any:
504             return detail::make_any_xpression<BidiIter>(this->traits_.flags(), this->rxtraits());
505 
506         case token_assert_begin_sequence:
507             return detail::make_dynamic<BidiIter>(detail::assert_bos_matcher());
508 
509         case token_assert_end_sequence:
510             return detail::make_dynamic<BidiIter>(detail::assert_eos_matcher());
511 
512         case token_assert_begin_line:
513             return detail::make_assert_begin_line<BidiIter>(this->traits_.flags(), this->rxtraits());
514 
515         case token_assert_end_line:
516             return detail::make_assert_end_line<BidiIter>(this->traits_.flags(), this->rxtraits());
517 
518         case token_assert_word_boundary:
519             return detail::make_assert_word<BidiIter>(detail::word_boundary<mpl::true_>(), this->rxtraits());
520 
521         case token_assert_not_word_boundary:
522             return detail::make_assert_word<BidiIter>(detail::word_boundary<mpl::false_>(), this->rxtraits());
523 
524         case token_assert_word_begin:
525             return detail::make_assert_word<BidiIter>(detail::word_begin(), this->rxtraits());
526 
527         case token_assert_word_end:
528             return detail::make_assert_word<BidiIter>(detail::word_end(), this->rxtraits());
529 
530         case token_escape:
531             esc = this->parse_escape(begin, end);
532             switch(esc.type_)
533             {
534             case detail::escape_mark:
535                 return detail::make_backref_xpression<BidiIter>
536                 (
537                     esc.mark_nbr_, this->traits_.flags(), this->rxtraits()
538                 );
539             case detail::escape_char:
540                 return detail::make_char_xpression<BidiIter>
541                 (
542                     esc.ch_, this->traits_.flags(), this->rxtraits()
543                 );
544             case detail::escape_class:
545                 return detail::make_posix_charset_xpression<BidiIter>
546                 (
547                     esc.class_
548                   , this->is_upper_(*begin++)
549                   , this->traits_.flags()
550                   , this->rxtraits()
551                 );
552             }
553 
554         case token_group_begin:
555             return this->parse_group(begin, end);
556 
557         case token_charset_begin:
558             return this->parse_charset(begin, end);
559 
560         case token_invalid_quantifier:
561             BOOST_THROW_EXCEPTION(regex_error(error_badrepeat, "quantifier not expected"));
562             break;
563 
564         case token_quote_meta_begin:
565             return detail::make_literal_xpression<BidiIter>
566             (
567                 this->parse_quote_meta(begin, end), this->traits_.flags(), this->rxtraits()
568             );
569 
570         case token_quote_meta_end:
571             BOOST_THROW_EXCEPTION(
572                 regex_error(
573                     error_escape
574                   , "found quote-meta end without corresponding quote-meta begin"
575                 )
576             );
577             break;
578 
579         case token_end_of_pattern:
580             break;
581 
582         default:
583             begin = old_begin;
584             break;
585         }
586 
587         return detail::sequence<BidiIter>();
588     }
589 
590     ///////////////////////////////////////////////////////////////////////////
591     // parse_quant
592     /// INTERNAL ONLY
593     template<typename FwdIter>
parse_quantboost::xpressive::regex_compiler594     detail::sequence<BidiIter> parse_quant(FwdIter &begin, FwdIter end)
595     {
596         BOOST_ASSERT(begin != end);
597         detail::quant_spec spec = { 0, 0, false, &this->hidden_mark_count_ };
598         detail::sequence<BidiIter> seq = this->parse_atom(begin, end);
599 
600         // BUGBUG this doesn't handle the degenerate (?:)+ correctly
601         if(!seq.empty() && begin != end && detail::quant_none != seq.quant())
602         {
603             if(this->traits_.get_quant_spec(begin, end, spec))
604             {
605                 BOOST_ASSERT(spec.min_ <= spec.max_);
606 
607                 if(0 == spec.max_) // quant {0,0} is degenerate -- matches nothing.
608                 {
609                     seq = this->parse_quant(begin, end);
610                 }
611                 else
612                 {
613                     seq.repeat(spec);
614                 }
615             }
616         }
617 
618         return seq;
619     }
620 
621     ///////////////////////////////////////////////////////////////////////////
622     // parse_sequence
623     /// INTERNAL ONLY
624     template<typename FwdIter>
parse_sequenceboost::xpressive::regex_compiler625     detail::sequence<BidiIter> parse_sequence(FwdIter &begin, FwdIter end)
626     {
627         detail::sequence<BidiIter> seq;
628 
629         while(begin != end)
630         {
631             detail::sequence<BidiIter> seq_quant = this->parse_quant(begin, end);
632 
633             // did we find a quantified atom?
634             if(seq_quant.empty())
635                 break;
636 
637             // chain it to the end of the xpression sequence
638             seq += seq_quant;
639         }
640 
641         return seq;
642     }
643 
644     ///////////////////////////////////////////////////////////////////////////
645     // parse_literal
646     //  scan ahead looking for char literals to be globbed together into a string literal
647     /// INTERNAL ONLY
648     template<typename FwdIter>
parse_literalboost::xpressive::regex_compiler649     string_type parse_literal(FwdIter &begin, FwdIter end)
650     {
651         using namespace regex_constants;
652         BOOST_ASSERT(begin != end);
653         BOOST_ASSERT(token_literal == this->traits_.get_token(begin, end));
654         escape_value esc = { 0, 0, 0, detail::escape_char };
655         string_type literal(1, *begin);
656 
657         for(FwdIter prev = begin, tmp = ++begin; begin != end; prev = begin, begin = tmp)
658         {
659             detail::quant_spec spec = { 0, 0, false, &this->hidden_mark_count_ };
660             if(this->traits_.get_quant_spec(tmp, end, spec))
661             {
662                 if(literal.size() != 1)
663                 {
664                     begin = prev;
665                     literal.erase(boost::prior(literal.end()));
666                 }
667                 return literal;
668             }
669             else switch(this->traits_.get_token(tmp, end))
670             {
671             case token_escape:
672                 esc = this->parse_escape(tmp, end);
673                 if(detail::escape_char != esc.type_) return literal;
674                 literal.insert(literal.end(), esc.ch_);
675                 break;
676             case token_literal:
677                 literal.insert(literal.end(), *tmp++);
678                 break;
679             default:
680                 return literal;
681             }
682         }
683 
684         return literal;
685     }
686 
687     ///////////////////////////////////////////////////////////////////////////
688     // parse_quote_meta
689     //  scan ahead looking for char literals to be globbed together into a string literal
690     /// INTERNAL ONLY
691     template<typename FwdIter>
parse_quote_metaboost::xpressive::regex_compiler692     string_type parse_quote_meta(FwdIter &begin, FwdIter end)
693     {
694         using namespace regex_constants;
695         FwdIter old_begin = begin, old_end;
696         while(end != (old_end = begin))
697         {
698             switch(this->traits_.get_token(begin, end))
699             {
700             case token_quote_meta_end:
701                 return string_type(old_begin, old_end);
702             case token_escape:
703                 BOOST_XPR_ENSURE_(begin != end, error_escape, "incomplete escape sequence");
704                 BOOST_FALLTHROUGH;
705             case token_invalid_quantifier:
706             case token_literal:
707                 ++begin;
708                 break;
709             default:
710                 break;
711             }
712         }
713         return string_type(old_begin, begin);
714     }
715 
716     ///////////////////////////////////////////////////////////////////////////////
717     // parse_escape
718     /// INTERNAL ONLY
719     template<typename FwdIter>
parse_escapeboost::xpressive::regex_compiler720     escape_value parse_escape(FwdIter &begin, FwdIter end)
721     {
722         BOOST_XPR_ENSURE_(begin != end, regex_constants::error_escape, "incomplete escape sequence");
723 
724         // first, check to see if this can be a backreference
725         if(0 < this->rxtraits().value(*begin, 10))
726         {
727             // Parse at most 3 decimal digits.
728             FwdIter tmp = begin;
729             int mark_nbr = detail::toi(tmp, end, this->rxtraits(), 10, 999);
730 
731             // If the resulting number could conceivably be a backref, then it is.
732             if(10 > mark_nbr || mark_nbr <= static_cast<int>(this->mark_count_))
733             {
734                 begin = tmp;
735                 escape_value esc = {0, mark_nbr, 0, detail::escape_mark};
736                 return esc;
737             }
738         }
739 
740         // Not a backreference, defer to the parse_escape helper
741         return detail::parse_escape(begin, end, this->traits_);
742     }
743 
is_upper_boost::xpressive::regex_compiler744     bool is_upper_(char_type ch) const
745     {
746         return 0 != this->upper_ && this->rxtraits().isctype(ch, this->upper_);
747     }
748 
749     std::size_t mark_count_;
750     std::size_t hidden_mark_count_;
751     CompilerTraits traits_;
752     typename RegexTraits::char_class_type upper_;
753     shared_ptr<detail::regex_impl<BidiIter> > self_;
754     std::map<string_type, basic_regex<BidiIter> > rules_;
755 };
756 
757 }} // namespace boost::xpressive
758 
759 #endif
760