1 // class template regex -*- C++ -*- 2 3 // Copyright (C) 2013-2018 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 /** 26 * @file bits/regex.tcc 27 * This is an internal header file, included by other library headers. 28 * Do not attempt to use it directly. @headername{regex} 29 */ 30 31 namespace std _GLIBCXX_VISIBILITY(default) 32 { 33 _GLIBCXX_BEGIN_NAMESPACE_VERSION 34 35 namespace __detail 36 { 37 // Result of merging regex_match and regex_search. 38 // 39 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use 40 // the other one if possible, for test purpose). 41 // 42 // That __match_mode is true means regex_match, else regex_search. 43 template<typename _BiIter, typename _Alloc, 44 typename _CharT, typename _TraitsT, 45 _RegexExecutorPolicy __policy, 46 bool __match_mode> 47 bool 48 __regex_algo_impl(_BiIter __s, 49 _BiIter __e, 50 match_results<_BiIter, _Alloc>& __m, 51 const basic_regex<_CharT, _TraitsT>& __re, 52 regex_constants::match_flag_type __flags) 53 { 54 if (__re._M_automaton == nullptr) 55 return false; 56 57 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m; 58 __m._M_begin = __s; 59 __m._M_resize(__re._M_automaton->_M_sub_count()); 60 for (auto& __it : __res) 61 __it.matched = false; 62 63 bool __ret; 64 if ((__re.flags() & regex_constants::__polynomial) 65 || (__policy == _RegexExecutorPolicy::_S_alternate 66 && !__re._M_automaton->_M_has_backref)) 67 { 68 _Executor<_BiIter, _Alloc, _TraitsT, false> 69 __executor(__s, __e, __m, __re, __flags); 70 if (__match_mode) 71 __ret = __executor._M_match(); 72 else 73 __ret = __executor._M_search(); 74 } 75 else 76 { 77 _Executor<_BiIter, _Alloc, _TraitsT, true> 78 __executor(__s, __e, __m, __re, __flags); 79 if (__match_mode) 80 __ret = __executor._M_match(); 81 else 82 __ret = __executor._M_search(); 83 } 84 if (__ret) 85 { 86 for (auto& __it : __res) 87 if (!__it.matched) 88 __it.first = __it.second = __e; 89 auto& __pre = __m._M_prefix(); 90 auto& __suf = __m._M_suffix(); 91 if (__match_mode) 92 { 93 __pre.matched = false; 94 __pre.first = __s; 95 __pre.second = __s; 96 __suf.matched = false; 97 __suf.first = __e; 98 __suf.second = __e; 99 } 100 else 101 { 102 __pre.first = __s; 103 __pre.second = __res[0].first; 104 __pre.matched = (__pre.first != __pre.second); 105 __suf.first = __res[0].second; 106 __suf.second = __e; 107 __suf.matched = (__suf.first != __suf.second); 108 } 109 } 110 else 111 { 112 __m._M_resize(0); 113 for (auto& __it : __res) 114 { 115 __it.matched = false; 116 __it.first = __it.second = __e; 117 } 118 } 119 return __ret; 120 } 121 } 122 123 template<typename _Ch_type> 124 template<typename _Fwd_iter> 125 typename regex_traits<_Ch_type>::string_type 126 regex_traits<_Ch_type>:: 127 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const 128 { 129 typedef std::ctype<char_type> __ctype_type; 130 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 131 132 static const char* __collatenames[] = 133 { 134 "NUL", 135 "SOH", 136 "STX", 137 "ETX", 138 "EOT", 139 "ENQ", 140 "ACK", 141 "alert", 142 "backspace", 143 "tab", 144 "newline", 145 "vertical-tab", 146 "form-feed", 147 "carriage-return", 148 "SO", 149 "SI", 150 "DLE", 151 "DC1", 152 "DC2", 153 "DC3", 154 "DC4", 155 "NAK", 156 "SYN", 157 "ETB", 158 "CAN", 159 "EM", 160 "SUB", 161 "ESC", 162 "IS4", 163 "IS3", 164 "IS2", 165 "IS1", 166 "space", 167 "exclamation-mark", 168 "quotation-mark", 169 "number-sign", 170 "dollar-sign", 171 "percent-sign", 172 "ampersand", 173 "apostrophe", 174 "left-parenthesis", 175 "right-parenthesis", 176 "asterisk", 177 "plus-sign", 178 "comma", 179 "hyphen", 180 "period", 181 "slash", 182 "zero", 183 "one", 184 "two", 185 "three", 186 "four", 187 "five", 188 "six", 189 "seven", 190 "eight", 191 "nine", 192 "colon", 193 "semicolon", 194 "less-than-sign", 195 "equals-sign", 196 "greater-than-sign", 197 "question-mark", 198 "commercial-at", 199 "A", 200 "B", 201 "C", 202 "D", 203 "E", 204 "F", 205 "G", 206 "H", 207 "I", 208 "J", 209 "K", 210 "L", 211 "M", 212 "N", 213 "O", 214 "P", 215 "Q", 216 "R", 217 "S", 218 "T", 219 "U", 220 "V", 221 "W", 222 "X", 223 "Y", 224 "Z", 225 "left-square-bracket", 226 "backslash", 227 "right-square-bracket", 228 "circumflex", 229 "underscore", 230 "grave-accent", 231 "a", 232 "b", 233 "c", 234 "d", 235 "e", 236 "f", 237 "g", 238 "h", 239 "i", 240 "j", 241 "k", 242 "l", 243 "m", 244 "n", 245 "o", 246 "p", 247 "q", 248 "r", 249 "s", 250 "t", 251 "u", 252 "v", 253 "w", 254 "x", 255 "y", 256 "z", 257 "left-curly-bracket", 258 "vertical-line", 259 "right-curly-bracket", 260 "tilde", 261 "DEL", 262 }; 263 264 string __s; 265 for (; __first != __last; ++__first) 266 __s += __fctyp.narrow(*__first, 0); 267 268 for (const auto& __it : __collatenames) 269 if (__s == __it) 270 return string_type(1, __fctyp.widen( 271 static_cast<char>(&__it - __collatenames))); 272 273 // TODO Add digraph support: 274 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html 275 276 return string_type(); 277 } 278 279 template<typename _Ch_type> 280 template<typename _Fwd_iter> 281 typename regex_traits<_Ch_type>::char_class_type 282 regex_traits<_Ch_type>:: 283 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const 284 { 285 typedef std::ctype<char_type> __ctype_type; 286 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 287 288 // Mappings from class name to class mask. 289 static const pair<const char*, char_class_type> __classnames[] = 290 { 291 {"d", ctype_base::digit}, 292 {"w", {ctype_base::alnum, _RegexMask::_S_under}}, 293 {"s", ctype_base::space}, 294 {"alnum", ctype_base::alnum}, 295 {"alpha", ctype_base::alpha}, 296 {"blank", ctype_base::blank}, 297 {"cntrl", ctype_base::cntrl}, 298 {"digit", ctype_base::digit}, 299 {"graph", ctype_base::graph}, 300 {"lower", ctype_base::lower}, 301 {"print", ctype_base::print}, 302 {"punct", ctype_base::punct}, 303 {"space", ctype_base::space}, 304 {"upper", ctype_base::upper}, 305 {"xdigit", ctype_base::xdigit}, 306 }; 307 308 string __s; 309 for (; __first != __last; ++__first) 310 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0); 311 312 for (const auto& __it : __classnames) 313 if (__s == __it.first) 314 { 315 if (__icase 316 && ((__it.second 317 & (ctype_base::lower | ctype_base::upper)) != 0)) 318 return ctype_base::alpha; 319 return __it.second; 320 } 321 return 0; 322 } 323 324 template<typename _Ch_type> 325 bool 326 regex_traits<_Ch_type>:: 327 isctype(_Ch_type __c, char_class_type __f) const 328 { 329 typedef std::ctype<char_type> __ctype_type; 330 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 331 332 return __fctyp.is(__f._M_base, __c) 333 // [[:w:]] 334 || ((__f._M_extended & _RegexMask::_S_under) 335 && __c == __fctyp.widen('_')); 336 } 337 338 template<typename _Ch_type> 339 int 340 regex_traits<_Ch_type>:: 341 value(_Ch_type __ch, int __radix) const 342 { 343 std::basic_istringstream<char_type> __is(string_type(1, __ch)); 344 long __v; 345 if (__radix == 8) 346 __is >> std::oct; 347 else if (__radix == 16) 348 __is >> std::hex; 349 __is >> __v; 350 return __is.fail() ? -1 : __v; 351 } 352 353 template<typename _Bi_iter, typename _Alloc> 354 template<typename _Out_iter> 355 _Out_iter match_results<_Bi_iter, _Alloc>:: 356 format(_Out_iter __out, 357 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first, 358 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last, 359 match_flag_type __flags) const 360 { 361 __glibcxx_assert( ready() ); 362 regex_traits<char_type> __traits; 363 typedef std::ctype<char_type> __ctype_type; 364 const __ctype_type& 365 __fctyp(use_facet<__ctype_type>(__traits.getloc())); 366 367 auto __output = [&](size_t __idx) 368 { 369 auto& __sub = (*this)[__idx]; 370 if (__sub.matched) 371 __out = std::copy(__sub.first, __sub.second, __out); 372 }; 373 374 if (__flags & regex_constants::format_sed) 375 { 376 bool __escaping = false; 377 for (; __fmt_first != __fmt_last; __fmt_first++) 378 { 379 if (__escaping) 380 { 381 __escaping = false; 382 if (__fctyp.is(__ctype_type::digit, *__fmt_first)) 383 __output(__traits.value(*__fmt_first, 10)); 384 else 385 *__out++ = *__fmt_first; 386 continue; 387 } 388 if (*__fmt_first == '\\') 389 { 390 __escaping = true; 391 continue; 392 } 393 if (*__fmt_first == '&') 394 { 395 __output(0); 396 continue; 397 } 398 *__out++ = *__fmt_first; 399 } 400 if (__escaping) 401 *__out++ = '\\'; 402 } 403 else 404 { 405 while (1) 406 { 407 auto __next = std::find(__fmt_first, __fmt_last, '$'); 408 if (__next == __fmt_last) 409 break; 410 411 __out = std::copy(__fmt_first, __next, __out); 412 413 auto __eat = [&](char __ch) -> bool 414 { 415 if (*__next == __ch) 416 { 417 ++__next; 418 return true; 419 } 420 return false; 421 }; 422 423 if (++__next == __fmt_last) 424 *__out++ = '$'; 425 else if (__eat('$')) 426 *__out++ = '$'; 427 else if (__eat('&')) 428 __output(0); 429 else if (__eat('`')) 430 { 431 auto& __sub = _M_prefix(); 432 if (__sub.matched) 433 __out = std::copy(__sub.first, __sub.second, __out); 434 } 435 else if (__eat('\'')) 436 { 437 auto& __sub = _M_suffix(); 438 if (__sub.matched) 439 __out = std::copy(__sub.first, __sub.second, __out); 440 } 441 else if (__fctyp.is(__ctype_type::digit, *__next)) 442 { 443 long __num = __traits.value(*__next, 10); 444 if (++__next != __fmt_last 445 && __fctyp.is(__ctype_type::digit, *__next)) 446 { 447 __num *= 10; 448 __num += __traits.value(*__next++, 10); 449 } 450 if (0 <= __num && __num < this->size()) 451 __output(__num); 452 } 453 else 454 *__out++ = '$'; 455 __fmt_first = __next; 456 } 457 __out = std::copy(__fmt_first, __fmt_last, __out); 458 } 459 return __out; 460 } 461 462 template<typename _Out_iter, typename _Bi_iter, 463 typename _Rx_traits, typename _Ch_type> 464 _Out_iter 465 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, 466 const basic_regex<_Ch_type, _Rx_traits>& __e, 467 const _Ch_type* __fmt, 468 regex_constants::match_flag_type __flags) 469 { 470 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT; 471 _IterT __i(__first, __last, __e, __flags); 472 _IterT __end; 473 if (__i == __end) 474 { 475 if (!(__flags & regex_constants::format_no_copy)) 476 __out = std::copy(__first, __last, __out); 477 } 478 else 479 { 480 sub_match<_Bi_iter> __last; 481 auto __len = char_traits<_Ch_type>::length(__fmt); 482 for (; __i != __end; ++__i) 483 { 484 if (!(__flags & regex_constants::format_no_copy)) 485 __out = std::copy(__i->prefix().first, __i->prefix().second, 486 __out); 487 __out = __i->format(__out, __fmt, __fmt + __len, __flags); 488 __last = __i->suffix(); 489 if (__flags & regex_constants::format_first_only) 490 break; 491 } 492 if (!(__flags & regex_constants::format_no_copy)) 493 __out = std::copy(__last.first, __last.second, __out); 494 } 495 return __out; 496 } 497 498 template<typename _Bi_iter, 499 typename _Ch_type, 500 typename _Rx_traits> 501 bool 502 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 503 operator==(const regex_iterator& __rhs) const 504 { 505 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr) 506 return true; 507 return _M_pregex == __rhs._M_pregex 508 && _M_begin == __rhs._M_begin 509 && _M_end == __rhs._M_end 510 && _M_flags == __rhs._M_flags 511 && _M_match[0] == __rhs._M_match[0]; 512 } 513 514 template<typename _Bi_iter, 515 typename _Ch_type, 516 typename _Rx_traits> 517 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 518 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 519 operator++() 520 { 521 // In all cases in which the call to regex_search returns true, 522 // match.prefix().first shall be equal to the previous value of 523 // match[0].second, and for each index i in the half-open range 524 // [0, match.size()) for which match[i].matched is true, 525 // match[i].position() shall return distance(begin, match[i].first). 526 // [28.12.1.4.5] 527 if (_M_match[0].matched) 528 { 529 auto __start = _M_match[0].second; 530 auto __prefix_first = _M_match[0].second; 531 if (_M_match[0].first == _M_match[0].second) 532 { 533 if (__start == _M_end) 534 { 535 _M_pregex = nullptr; 536 return *this; 537 } 538 else 539 { 540 if (regex_search(__start, _M_end, _M_match, *_M_pregex, 541 _M_flags 542 | regex_constants::match_not_null 543 | regex_constants::match_continuous)) 544 { 545 __glibcxx_assert(_M_match[0].matched); 546 auto& __prefix = _M_match._M_prefix(); 547 __prefix.first = __prefix_first; 548 __prefix.matched = __prefix.first != __prefix.second; 549 // [28.12.1.4.5] 550 _M_match._M_begin = _M_begin; 551 return *this; 552 } 553 else 554 ++__start; 555 } 556 } 557 _M_flags |= regex_constants::match_prev_avail; 558 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags)) 559 { 560 __glibcxx_assert(_M_match[0].matched); 561 auto& __prefix = _M_match._M_prefix(); 562 __prefix.first = __prefix_first; 563 __prefix.matched = __prefix.first != __prefix.second; 564 // [28.12.1.4.5] 565 _M_match._M_begin = _M_begin; 566 } 567 else 568 _M_pregex = nullptr; 569 } 570 return *this; 571 } 572 573 template<typename _Bi_iter, 574 typename _Ch_type, 575 typename _Rx_traits> 576 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 577 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 578 operator=(const regex_token_iterator& __rhs) 579 { 580 _M_position = __rhs._M_position; 581 _M_subs = __rhs._M_subs; 582 _M_n = __rhs._M_n; 583 _M_suffix = __rhs._M_suffix; 584 _M_has_m1 = __rhs._M_has_m1; 585 _M_normalize_result(); 586 return *this; 587 } 588 589 template<typename _Bi_iter, 590 typename _Ch_type, 591 typename _Rx_traits> 592 bool 593 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 594 operator==(const regex_token_iterator& __rhs) const 595 { 596 if (_M_end_of_seq() && __rhs._M_end_of_seq()) 597 return true; 598 if (_M_suffix.matched && __rhs._M_suffix.matched 599 && _M_suffix == __rhs._M_suffix) 600 return true; 601 if (_M_end_of_seq() || _M_suffix.matched 602 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched) 603 return false; 604 return _M_position == __rhs._M_position 605 && _M_n == __rhs._M_n 606 && _M_subs == __rhs._M_subs; 607 } 608 609 template<typename _Bi_iter, 610 typename _Ch_type, 611 typename _Rx_traits> 612 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 613 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 614 operator++() 615 { 616 _Position __prev = _M_position; 617 if (_M_suffix.matched) 618 *this = regex_token_iterator(); 619 else if (_M_n + 1 < _M_subs.size()) 620 { 621 _M_n++; 622 _M_result = &_M_current_match(); 623 } 624 else 625 { 626 _M_n = 0; 627 ++_M_position; 628 if (_M_position != _Position()) 629 _M_result = &_M_current_match(); 630 else if (_M_has_m1 && __prev->suffix().length() != 0) 631 { 632 _M_suffix.matched = true; 633 _M_suffix.first = __prev->suffix().first; 634 _M_suffix.second = __prev->suffix().second; 635 _M_result = &_M_suffix; 636 } 637 else 638 *this = regex_token_iterator(); 639 } 640 return *this; 641 } 642 643 template<typename _Bi_iter, 644 typename _Ch_type, 645 typename _Rx_traits> 646 void 647 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 648 _M_init(_Bi_iter __a, _Bi_iter __b) 649 { 650 _M_has_m1 = false; 651 for (auto __it : _M_subs) 652 if (__it == -1) 653 { 654 _M_has_m1 = true; 655 break; 656 } 657 if (_M_position != _Position()) 658 _M_result = &_M_current_match(); 659 else if (_M_has_m1) 660 { 661 _M_suffix.matched = true; 662 _M_suffix.first = __a; 663 _M_suffix.second = __b; 664 _M_result = &_M_suffix; 665 } 666 else 667 _M_result = nullptr; 668 } 669 670 _GLIBCXX_END_NAMESPACE_VERSION 671 } // namespace 672