1 /////////////////////////////////////////////////////////////////////////////// 2 /// \file regex_compiler.hpp 3 /// Contains the definition of regex_compiler, a factory for building regex objects 4 /// from strings. 5 // 6 // Copyright 2008 Eric Niebler. Distributed under the Boost 7 // Software License, Version 1.0. (See accompanying file 8 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 9 10 #ifndef BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005 11 #define BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005 12 13 // MS compatible compilers support #pragma once 14 #if defined(_MSC_VER) 15 # pragma once 16 #endif 17 18 #include <map> 19 #include <boost/config.hpp> 20 #include <boost/assert.hpp> 21 #include <boost/next_prior.hpp> 22 #include <boost/range/begin.hpp> 23 #include <boost/range/end.hpp> 24 #include <boost/mpl/assert.hpp> 25 #include <boost/throw_exception.hpp> 26 #include <boost/type_traits/is_same.hpp> 27 #include <boost/type_traits/is_pointer.hpp> 28 #include <boost/utility/enable_if.hpp> 29 #include <boost/iterator/iterator_traits.hpp> 30 #include <boost/xpressive/basic_regex.hpp> 31 #include <boost/xpressive/detail/dynamic/parser.hpp> 32 #include <boost/xpressive/detail/dynamic/parse_charset.hpp> 33 #include <boost/xpressive/detail/dynamic/parser_enum.hpp> 34 #include <boost/xpressive/detail/dynamic/parser_traits.hpp> 35 #include <boost/xpressive/detail/core/linker.hpp> 36 #include <boost/xpressive/detail/core/optimize.hpp> 37 38 namespace boost { namespace xpressive 39 { 40 41 /////////////////////////////////////////////////////////////////////////////// 42 // regex_compiler 43 // 44 /// \brief Class template regex_compiler is a factory for building basic_regex objects from a string. 45 /// 46 /// Class template regex_compiler is used to construct a basic_regex object from a string. The string 47 /// should contain a valid regular expression. You can imbue a regex_compiler object with a locale, 48 /// after which all basic_regex objects created with that regex_compiler object will use that locale. 49 /// After creating a regex_compiler object, and optionally imbueing it with a locale, you can call the 50 /// compile() method to construct a basic_regex object, passing it the string representing the regular 51 /// expression. You can call compile() multiple times on the same regex_compiler object. Two basic_regex 52 /// objects compiled from the same string will have different regex_id's. 53 template<typename BidiIter, typename RegexTraits, typename CompilerTraits> 54 struct regex_compiler 55 { 56 typedef BidiIter iterator_type; 57 typedef typename iterator_value<BidiIter>::type char_type; 58 typedef regex_constants::syntax_option_type flag_type; 59 typedef RegexTraits traits_type; 60 typedef typename traits_type::string_type string_type; 61 typedef typename traits_type::locale_type locale_type; 62 typedef typename traits_type::char_class_type char_class_type; 63 regex_compilerboost::xpressive::regex_compiler64 explicit regex_compiler(RegexTraits const &traits = RegexTraits()) 65 : mark_count_(0) 66 , hidden_mark_count_(0) 67 , traits_(traits) 68 , upper_(0) 69 , self_() 70 , rules_() 71 { 72 this->upper_ = lookup_classname(this->rxtraits(), "upper"); 73 } 74 75 /////////////////////////////////////////////////////////////////////////// 76 // imbue 77 /// Specify the locale to be used by a regex_compiler. 78 /// 79 /// \param loc The locale that this regex_compiler should use. 80 /// \return The previous locale. imbueboost::xpressive::regex_compiler81 locale_type imbue(locale_type loc) 82 { 83 locale_type oldloc = this->traits_.imbue(loc); 84 this->upper_ = lookup_classname(this->rxtraits(), "upper"); 85 return oldloc; 86 } 87 88 /////////////////////////////////////////////////////////////////////////// 89 // getloc 90 /// Get the locale used by a regex_compiler. 91 /// 92 /// \return The locale used by this regex_compiler. getlocboost::xpressive::regex_compiler93 locale_type getloc() const 94 { 95 return this->traits_.getloc(); 96 } 97 98 /////////////////////////////////////////////////////////////////////////// 99 // compile 100 /// Builds a basic_regex object from a range of characters. 101 /// 102 /// \param begin The beginning of a range of characters representing the 103 /// regular expression to compile. 104 /// \param end The end of a range of characters representing the 105 /// regular expression to compile. 106 /// \param flags Optional bitmask that determines how the pat string is 107 /// interpreted. (See syntax_option_type.) 108 /// \return A basic_regex object corresponding to the regular expression 109 /// represented by the character range. 110 /// \pre InputIter is a model of the InputIterator concept. 111 /// \pre [begin,end) is a valid range. 112 /// \pre The range of characters specified by [begin,end) contains a 113 /// valid string-based representation of a regular expression. 114 /// \throw regex_error when the range of characters has invalid regular 115 /// expression syntax. 116 template<typename InputIter> 117 basic_regex<BidiIter> compileboost::xpressive::regex_compiler118 compile(InputIter begin, InputIter end, flag_type flags = regex_constants::ECMAScript) 119 { 120 typedef typename iterator_category<InputIter>::type category; 121 return this->compile_(begin, end, flags, category()); 122 } 123 124 /// \overload 125 /// 126 template<typename InputRange> 127 typename disable_if<is_pointer<InputRange>, basic_regex<BidiIter> >::type compileboost::xpressive::regex_compiler128 compile(InputRange const &pat, flag_type flags = regex_constants::ECMAScript) 129 { 130 return this->compile(boost::begin(pat), boost::end(pat), flags); 131 } 132 133 /// \overload 134 /// 135 basic_regex<BidiIter> compileboost::xpressive::regex_compiler136 compile(char_type const *begin, flag_type flags = regex_constants::ECMAScript) 137 { 138 BOOST_ASSERT(0 != begin); 139 char_type const *end = begin + std::char_traits<char_type>::length(begin); 140 return this->compile(begin, end, flags); 141 } 142 143 /// \overload 144 /// compileboost::xpressive::regex_compiler145 basic_regex<BidiIter> compile(char_type const *begin, std::size_t size, flag_type flags) 146 { 147 BOOST_ASSERT(0 != begin); 148 char_type const *end = begin + size; 149 return this->compile(begin, end, flags); 150 } 151 152 /////////////////////////////////////////////////////////////////////////// 153 // operator[] 154 /// Return a reference to the named regular expression. If no such named 155 /// regular expression exists, create a new regular expression and return 156 /// a reference to it. 157 /// 158 /// \param name A std::string containing the name of the regular expression. 159 /// \pre The string is not empty. 160 /// \throw bad_alloc on allocation failure. operator []boost::xpressive::regex_compiler161 basic_regex<BidiIter> &operator [](string_type const &name) 162 { 163 BOOST_ASSERT(!name.empty()); 164 return this->rules_[name]; 165 } 166 167 /// \overload 168 /// operator []boost::xpressive::regex_compiler169 basic_regex<BidiIter> const &operator [](string_type const &name) const 170 { 171 BOOST_ASSERT(!name.empty()); 172 return this->rules_[name]; 173 } 174 175 private: 176 177 typedef detail::escape_value<char_type, char_class_type> escape_value; 178 typedef detail::alternate_matcher<detail::alternates_vector<BidiIter>, RegexTraits> alternate_matcher; 179 180 /////////////////////////////////////////////////////////////////////////// 181 // compile_ 182 /// INTERNAL ONLY 183 template<typename FwdIter> compile_boost::xpressive::regex_compiler184 basic_regex<BidiIter> compile_(FwdIter begin, FwdIter end, flag_type flags, std::forward_iterator_tag) 185 { 186 BOOST_MPL_ASSERT((is_same<char_type, typename iterator_value<FwdIter>::type>)); 187 using namespace regex_constants; 188 this->reset(); 189 this->traits_.flags(flags); 190 191 basic_regex<BidiIter> rextmp, *prex = &rextmp; 192 FwdIter tmp = begin; 193 194 // Check if this regex is a named rule: 195 string_type name; 196 if(token_group_begin == this->traits_.get_token(tmp, end) && 197 BOOST_XPR_ENSURE_(tmp != end, error_paren, "mismatched parenthesis") && 198 token_rule_assign == this->traits_.get_group_type(tmp, end, name)) 199 { 200 begin = tmp; 201 BOOST_XPR_ENSURE_ 202 ( 203 begin != end && token_group_end == this->traits_.get_token(begin, end) 204 , error_paren 205 , "mismatched parenthesis" 206 ); 207 prex = &this->rules_[name]; 208 } 209 210 this->self_ = detail::core_access<BidiIter>::get_regex_impl(*prex); 211 212 // at the top level, a regex is a sequence of alternates 213 detail::sequence<BidiIter> seq = this->parse_alternates(begin, end); 214 BOOST_XPR_ENSURE_(begin == end, error_paren, "mismatched parenthesis"); 215 216 // terminate the sequence 217 seq += detail::make_dynamic<BidiIter>(detail::end_matcher()); 218 219 // bundle the regex information into a regex_impl object 220 detail::common_compile(seq.xpr().matchable(), *this->self_, this->rxtraits()); 221 222 this->self_->traits_ = new detail::traits_holder<RegexTraits>(this->rxtraits()); 223 this->self_->mark_count_ = this->mark_count_; 224 this->self_->hidden_mark_count_ = this->hidden_mark_count_; 225 226 // References changed, update dependencies. 227 this->self_->tracking_update(); 228 this->self_.reset(); 229 return *prex; 230 } 231 232 /////////////////////////////////////////////////////////////////////////// 233 // compile_ 234 /// INTERNAL ONLY 235 template<typename InputIter> compile_boost::xpressive::regex_compiler236 basic_regex<BidiIter> compile_(InputIter begin, InputIter end, flag_type flags, std::input_iterator_tag) 237 { 238 string_type pat(begin, end); 239 return this->compile_(boost::begin(pat), boost::end(pat), flags, std::forward_iterator_tag()); 240 } 241 242 /////////////////////////////////////////////////////////////////////////// 243 // reset 244 /// INTERNAL ONLY resetboost::xpressive::regex_compiler245 void reset() 246 { 247 this->mark_count_ = 0; 248 this->hidden_mark_count_ = 0; 249 this->traits_.flags(regex_constants::ECMAScript); 250 } 251 252 /////////////////////////////////////////////////////////////////////////// 253 // regex_traits 254 /// INTERNAL ONLY rxtraitsboost::xpressive::regex_compiler255 traits_type &rxtraits() 256 { 257 return this->traits_.traits(); 258 } 259 260 /////////////////////////////////////////////////////////////////////////// 261 // regex_traits 262 /// INTERNAL ONLY rxtraitsboost::xpressive::regex_compiler263 traits_type const &rxtraits() const 264 { 265 return this->traits_.traits(); 266 } 267 268 /////////////////////////////////////////////////////////////////////////// 269 // parse_alternates 270 /// INTERNAL ONLY 271 template<typename FwdIter> parse_alternatesboost::xpressive::regex_compiler272 detail::sequence<BidiIter> parse_alternates(FwdIter &begin, FwdIter end) 273 { 274 using namespace regex_constants; 275 int count = 0; 276 FwdIter tmp = begin; 277 detail::sequence<BidiIter> seq; 278 279 do switch(++count) 280 { 281 case 1: 282 seq = this->parse_sequence(tmp, end); 283 break; 284 case 2: 285 seq = detail::make_dynamic<BidiIter>(alternate_matcher()) | seq; 286 BOOST_FALLTHROUGH; 287 default: 288 seq |= this->parse_sequence(tmp, end); 289 } 290 while((begin = tmp) != end && token_alternate == this->traits_.get_token(tmp, end)); 291 292 return seq; 293 } 294 295 /////////////////////////////////////////////////////////////////////////// 296 // parse_group 297 /// INTERNAL ONLY 298 template<typename FwdIter> parse_groupboost::xpressive::regex_compiler299 detail::sequence<BidiIter> parse_group(FwdIter &begin, FwdIter end) 300 { 301 using namespace regex_constants; 302 int mark_nbr = 0; 303 bool keeper = false; 304 bool lookahead = false; 305 bool lookbehind = false; 306 bool negative = false; 307 string_type name; 308 309 detail::sequence<BidiIter> seq, seq_end; 310 FwdIter tmp = FwdIter(); 311 312 syntax_option_type old_flags = this->traits_.flags(); 313 314 switch(this->traits_.get_group_type(begin, end, name)) 315 { 316 case token_no_mark: 317 // Don't process empty groups like (?:) or (?i) 318 // BUGBUG this doesn't handle the degenerate (?:)+ correctly 319 if(token_group_end == this->traits_.get_token(tmp = begin, end)) 320 { 321 return this->parse_atom(begin = tmp, end); 322 } 323 break; 324 325 case token_negative_lookahead: 326 negative = true; 327 BOOST_FALLTHROUGH; 328 case token_positive_lookahead: 329 lookahead = true; 330 break; 331 332 case token_negative_lookbehind: 333 negative = true; 334 BOOST_FALLTHROUGH; 335 case token_positive_lookbehind: 336 lookbehind = true; 337 break; 338 339 case token_independent_sub_expression: 340 keeper = true; 341 break; 342 343 case token_comment: 344 while(BOOST_XPR_ENSURE_(begin != end, error_paren, "mismatched parenthesis")) 345 { 346 switch(this->traits_.get_token(begin, end)) 347 { 348 case token_group_end: 349 return this->parse_atom(begin, end); 350 case token_escape: 351 BOOST_XPR_ENSURE_(begin != end, error_escape, "incomplete escape sequence"); 352 BOOST_FALLTHROUGH; 353 case token_literal: 354 ++begin; 355 break; 356 default: 357 break; 358 } 359 } 360 break; 361 362 case token_recurse: 363 BOOST_XPR_ENSURE_ 364 ( 365 begin != end && token_group_end == this->traits_.get_token(begin, end) 366 , error_paren 367 , "mismatched parenthesis" 368 ); 369 return detail::make_dynamic<BidiIter>(detail::regex_byref_matcher<BidiIter>(this->self_)); 370 371 case token_rule_assign: 372 BOOST_THROW_EXCEPTION( 373 regex_error(error_badrule, "rule assignments must be at the front of the regex") 374 ); 375 break; 376 377 case token_rule_ref: 378 { 379 typedef detail::core_access<BidiIter> access; 380 BOOST_XPR_ENSURE_ 381 ( 382 begin != end && token_group_end == this->traits_.get_token(begin, end) 383 , error_paren 384 , "mismatched parenthesis" 385 ); 386 basic_regex<BidiIter> &rex = this->rules_[name]; 387 shared_ptr<detail::regex_impl<BidiIter> > impl = access::get_regex_impl(rex); 388 this->self_->track_reference(*impl); 389 return detail::make_dynamic<BidiIter>(detail::regex_byref_matcher<BidiIter>(impl)); 390 } 391 392 case token_named_mark: 393 mark_nbr = static_cast<int>(++this->mark_count_); 394 for(std::size_t i = 0; i < this->self_->named_marks_.size(); ++i) 395 { 396 BOOST_XPR_ENSURE_(this->self_->named_marks_[i].name_ != name, error_badmark, "named mark already exists"); 397 } 398 this->self_->named_marks_.push_back(detail::named_mark<char_type>(name, this->mark_count_)); 399 seq = detail::make_dynamic<BidiIter>(detail::mark_begin_matcher(mark_nbr)); 400 seq_end = detail::make_dynamic<BidiIter>(detail::mark_end_matcher(mark_nbr)); 401 break; 402 403 case token_named_mark_ref: 404 BOOST_XPR_ENSURE_ 405 ( 406 begin != end && token_group_end == this->traits_.get_token(begin, end) 407 , error_paren 408 , "mismatched parenthesis" 409 ); 410 for(std::size_t i = 0; i < this->self_->named_marks_.size(); ++i) 411 { 412 if(this->self_->named_marks_[i].name_ == name) 413 { 414 mark_nbr = static_cast<int>(this->self_->named_marks_[i].mark_nbr_); 415 return detail::make_backref_xpression<BidiIter> 416 ( 417 mark_nbr, this->traits_.flags(), this->rxtraits() 418 ); 419 } 420 } 421 BOOST_THROW_EXCEPTION(regex_error(error_badmark, "invalid named back-reference")); 422 break; 423 424 default: 425 mark_nbr = static_cast<int>(++this->mark_count_); 426 seq = detail::make_dynamic<BidiIter>(detail::mark_begin_matcher(mark_nbr)); 427 seq_end = detail::make_dynamic<BidiIter>(detail::mark_end_matcher(mark_nbr)); 428 break; 429 } 430 431 // alternates 432 seq += this->parse_alternates(begin, end); 433 seq += seq_end; 434 BOOST_XPR_ENSURE_ 435 ( 436 begin != end && token_group_end == this->traits_.get_token(begin, end) 437 , error_paren 438 , "mismatched parenthesis" 439 ); 440 441 typedef detail::shared_matchable<BidiIter> xpr_type; 442 if(lookahead) 443 { 444 seq += detail::make_independent_end_xpression<BidiIter>(seq.pure()); 445 detail::lookahead_matcher<xpr_type> lam(seq.xpr(), negative, seq.pure()); 446 seq = detail::make_dynamic<BidiIter>(lam); 447 } 448 else if(lookbehind) 449 { 450 seq += detail::make_independent_end_xpression<BidiIter>(seq.pure()); 451 detail::lookbehind_matcher<xpr_type> lbm(seq.xpr(), seq.width().value(), negative, seq.pure()); 452 seq = detail::make_dynamic<BidiIter>(lbm); 453 } 454 else if(keeper) // independent sub-expression 455 { 456 seq += detail::make_independent_end_xpression<BidiIter>(seq.pure()); 457 detail::keeper_matcher<xpr_type> km(seq.xpr(), seq.pure()); 458 seq = detail::make_dynamic<BidiIter>(km); 459 } 460 461 // restore the modifiers 462 this->traits_.flags(old_flags); 463 return seq; 464 } 465 466 /////////////////////////////////////////////////////////////////////////// 467 // parse_charset 468 /// INTERNAL ONLY 469 template<typename FwdIter> parse_charsetboost::xpressive::regex_compiler470 detail::sequence<BidiIter> parse_charset(FwdIter &begin, FwdIter end) 471 { 472 detail::compound_charset<traits_type> chset; 473 474 // call out to a helper to actually parse the character set 475 detail::parse_charset(begin, end, chset, this->traits_); 476 477 return detail::make_charset_xpression<BidiIter> 478 ( 479 chset 480 , this->rxtraits() 481 , this->traits_.flags() 482 ); 483 } 484 485 /////////////////////////////////////////////////////////////////////////// 486 // parse_atom 487 /// INTERNAL ONLY 488 template<typename FwdIter> parse_atomboost::xpressive::regex_compiler489 detail::sequence<BidiIter> parse_atom(FwdIter &begin, FwdIter end) 490 { 491 using namespace regex_constants; 492 escape_value esc = { 0, 0, 0, detail::escape_char }; 493 FwdIter old_begin = begin; 494 495 switch(this->traits_.get_token(begin, end)) 496 { 497 case token_literal: 498 return detail::make_literal_xpression<BidiIter> 499 ( 500 this->parse_literal(begin, end), this->traits_.flags(), this->rxtraits() 501 ); 502 503 case token_any: 504 return detail::make_any_xpression<BidiIter>(this->traits_.flags(), this->rxtraits()); 505 506 case token_assert_begin_sequence: 507 return detail::make_dynamic<BidiIter>(detail::assert_bos_matcher()); 508 509 case token_assert_end_sequence: 510 return detail::make_dynamic<BidiIter>(detail::assert_eos_matcher()); 511 512 case token_assert_begin_line: 513 return detail::make_assert_begin_line<BidiIter>(this->traits_.flags(), this->rxtraits()); 514 515 case token_assert_end_line: 516 return detail::make_assert_end_line<BidiIter>(this->traits_.flags(), this->rxtraits()); 517 518 case token_assert_word_boundary: 519 return detail::make_assert_word<BidiIter>(detail::word_boundary<mpl::true_>(), this->rxtraits()); 520 521 case token_assert_not_word_boundary: 522 return detail::make_assert_word<BidiIter>(detail::word_boundary<mpl::false_>(), this->rxtraits()); 523 524 case token_assert_word_begin: 525 return detail::make_assert_word<BidiIter>(detail::word_begin(), this->rxtraits()); 526 527 case token_assert_word_end: 528 return detail::make_assert_word<BidiIter>(detail::word_end(), this->rxtraits()); 529 530 case token_escape: 531 esc = this->parse_escape(begin, end); 532 switch(esc.type_) 533 { 534 case detail::escape_mark: 535 return detail::make_backref_xpression<BidiIter> 536 ( 537 esc.mark_nbr_, this->traits_.flags(), this->rxtraits() 538 ); 539 case detail::escape_char: 540 return detail::make_char_xpression<BidiIter> 541 ( 542 esc.ch_, this->traits_.flags(), this->rxtraits() 543 ); 544 case detail::escape_class: 545 return detail::make_posix_charset_xpression<BidiIter> 546 ( 547 esc.class_ 548 , this->is_upper_(*begin++) 549 , this->traits_.flags() 550 , this->rxtraits() 551 ); 552 } 553 554 case token_group_begin: 555 return this->parse_group(begin, end); 556 557 case token_charset_begin: 558 return this->parse_charset(begin, end); 559 560 case token_invalid_quantifier: 561 BOOST_THROW_EXCEPTION(regex_error(error_badrepeat, "quantifier not expected")); 562 break; 563 564 case token_quote_meta_begin: 565 return detail::make_literal_xpression<BidiIter> 566 ( 567 this->parse_quote_meta(begin, end), this->traits_.flags(), this->rxtraits() 568 ); 569 570 case token_quote_meta_end: 571 BOOST_THROW_EXCEPTION( 572 regex_error( 573 error_escape 574 , "found quote-meta end without corresponding quote-meta begin" 575 ) 576 ); 577 break; 578 579 case token_end_of_pattern: 580 break; 581 582 default: 583 begin = old_begin; 584 break; 585 } 586 587 return detail::sequence<BidiIter>(); 588 } 589 590 /////////////////////////////////////////////////////////////////////////// 591 // parse_quant 592 /// INTERNAL ONLY 593 template<typename FwdIter> parse_quantboost::xpressive::regex_compiler594 detail::sequence<BidiIter> parse_quant(FwdIter &begin, FwdIter end) 595 { 596 BOOST_ASSERT(begin != end); 597 detail::quant_spec spec = { 0, 0, false, &this->hidden_mark_count_ }; 598 detail::sequence<BidiIter> seq = this->parse_atom(begin, end); 599 600 // BUGBUG this doesn't handle the degenerate (?:)+ correctly 601 if(!seq.empty() && begin != end && detail::quant_none != seq.quant()) 602 { 603 if(this->traits_.get_quant_spec(begin, end, spec)) 604 { 605 BOOST_ASSERT(spec.min_ <= spec.max_); 606 607 if(0 == spec.max_) // quant {0,0} is degenerate -- matches nothing. 608 { 609 seq = this->parse_quant(begin, end); 610 } 611 else 612 { 613 seq.repeat(spec); 614 } 615 } 616 } 617 618 return seq; 619 } 620 621 /////////////////////////////////////////////////////////////////////////// 622 // parse_sequence 623 /// INTERNAL ONLY 624 template<typename FwdIter> parse_sequenceboost::xpressive::regex_compiler625 detail::sequence<BidiIter> parse_sequence(FwdIter &begin, FwdIter end) 626 { 627 detail::sequence<BidiIter> seq; 628 629 while(begin != end) 630 { 631 detail::sequence<BidiIter> seq_quant = this->parse_quant(begin, end); 632 633 // did we find a quantified atom? 634 if(seq_quant.empty()) 635 break; 636 637 // chain it to the end of the xpression sequence 638 seq += seq_quant; 639 } 640 641 return seq; 642 } 643 644 /////////////////////////////////////////////////////////////////////////// 645 // parse_literal 646 // scan ahead looking for char literals to be globbed together into a string literal 647 /// INTERNAL ONLY 648 template<typename FwdIter> parse_literalboost::xpressive::regex_compiler649 string_type parse_literal(FwdIter &begin, FwdIter end) 650 { 651 using namespace regex_constants; 652 BOOST_ASSERT(begin != end); 653 BOOST_ASSERT(token_literal == this->traits_.get_token(begin, end)); 654 escape_value esc = { 0, 0, 0, detail::escape_char }; 655 string_type literal(1, *begin); 656 657 for(FwdIter prev = begin, tmp = ++begin; begin != end; prev = begin, begin = tmp) 658 { 659 detail::quant_spec spec = { 0, 0, false, &this->hidden_mark_count_ }; 660 if(this->traits_.get_quant_spec(tmp, end, spec)) 661 { 662 if(literal.size() != 1) 663 { 664 begin = prev; 665 literal.erase(boost::prior(literal.end())); 666 } 667 return literal; 668 } 669 else switch(this->traits_.get_token(tmp, end)) 670 { 671 case token_escape: 672 esc = this->parse_escape(tmp, end); 673 if(detail::escape_char != esc.type_) return literal; 674 literal.insert(literal.end(), esc.ch_); 675 break; 676 case token_literal: 677 literal.insert(literal.end(), *tmp++); 678 break; 679 default: 680 return literal; 681 } 682 } 683 684 return literal; 685 } 686 687 /////////////////////////////////////////////////////////////////////////// 688 // parse_quote_meta 689 // scan ahead looking for char literals to be globbed together into a string literal 690 /// INTERNAL ONLY 691 template<typename FwdIter> parse_quote_metaboost::xpressive::regex_compiler692 string_type parse_quote_meta(FwdIter &begin, FwdIter end) 693 { 694 using namespace regex_constants; 695 FwdIter old_begin = begin, old_end; 696 while(end != (old_end = begin)) 697 { 698 switch(this->traits_.get_token(begin, end)) 699 { 700 case token_quote_meta_end: 701 return string_type(old_begin, old_end); 702 case token_escape: 703 BOOST_XPR_ENSURE_(begin != end, error_escape, "incomplete escape sequence"); 704 BOOST_FALLTHROUGH; 705 case token_invalid_quantifier: 706 case token_literal: 707 ++begin; 708 break; 709 default: 710 break; 711 } 712 } 713 return string_type(old_begin, begin); 714 } 715 716 /////////////////////////////////////////////////////////////////////////////// 717 // parse_escape 718 /// INTERNAL ONLY 719 template<typename FwdIter> parse_escapeboost::xpressive::regex_compiler720 escape_value parse_escape(FwdIter &begin, FwdIter end) 721 { 722 BOOST_XPR_ENSURE_(begin != end, regex_constants::error_escape, "incomplete escape sequence"); 723 724 // first, check to see if this can be a backreference 725 if(0 < this->rxtraits().value(*begin, 10)) 726 { 727 // Parse at most 3 decimal digits. 728 FwdIter tmp = begin; 729 int mark_nbr = detail::toi(tmp, end, this->rxtraits(), 10, 999); 730 731 // If the resulting number could conceivably be a backref, then it is. 732 if(10 > mark_nbr || mark_nbr <= static_cast<int>(this->mark_count_)) 733 { 734 begin = tmp; 735 escape_value esc = {0, mark_nbr, 0, detail::escape_mark}; 736 return esc; 737 } 738 } 739 740 // Not a backreference, defer to the parse_escape helper 741 return detail::parse_escape(begin, end, this->traits_); 742 } 743 is_upper_boost::xpressive::regex_compiler744 bool is_upper_(char_type ch) const 745 { 746 return 0 != this->upper_ && this->rxtraits().isctype(ch, this->upper_); 747 } 748 749 std::size_t mark_count_; 750 std::size_t hidden_mark_count_; 751 CompilerTraits traits_; 752 typename RegexTraits::char_class_type upper_; 753 shared_ptr<detail::regex_impl<BidiIter> > self_; 754 std::map<string_type, basic_regex<BidiIter> > rules_; 755 }; 756 757 }} // namespace boost::xpressive 758 759 #endif 760