1 // class template regex -*- C++ -*- 2 3 // Copyright (C) 2013-2022 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 /** 26 * @file bits/regex.tcc 27 * This is an internal header file, included by other library headers. 28 * Do not attempt to use it directly. @headername{regex} 29 */ 30 31 namespace std _GLIBCXX_VISIBILITY(default) 32 { 33 _GLIBCXX_BEGIN_NAMESPACE_VERSION 34 35 namespace __detail 36 { 37 /// @cond undocumented 38 39 // Result of merging regex_match and regex_search. 40 // 41 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use 42 // the other one if possible, for test purpose). 43 // 44 // That __match_mode is true means regex_match, else regex_search. 45 template<typename _BiIter, typename _Alloc, 46 typename _CharT, typename _TraitsT> 47 bool __regex_algo_impl(_BiIter __s,_BiIter __e,match_results<_BiIter,_Alloc> & __m,const basic_regex<_CharT,_TraitsT> & __re,regex_constants::match_flag_type __flags,_RegexExecutorPolicy __policy,bool __match_mode)48 __regex_algo_impl(_BiIter __s, 49 _BiIter __e, 50 match_results<_BiIter, _Alloc>& __m, 51 const basic_regex<_CharT, _TraitsT>& __re, 52 regex_constants::match_flag_type __flags, 53 _RegexExecutorPolicy __policy, 54 bool __match_mode) 55 { 56 if (__re._M_automaton == nullptr) 57 return false; 58 59 typename match_results<_BiIter, _Alloc>::_Unchecked& __res = __m; 60 __m._M_begin = __s; 61 __m._M_resize(__re._M_automaton->_M_sub_count()); 62 63 bool __ret; 64 if ((__re.flags() & regex_constants::__polynomial) 65 || (__policy == _RegexExecutorPolicy::_S_alternate 66 && !__re._M_automaton->_M_has_backref)) 67 { 68 _Executor<_BiIter, _Alloc, _TraitsT, false> 69 __executor(__s, __e, __res, __re, __flags); 70 if (__match_mode) 71 __ret = __executor._M_match(); 72 else 73 __ret = __executor._M_search(); 74 } 75 else 76 { 77 _Executor<_BiIter, _Alloc, _TraitsT, true> 78 __executor(__s, __e, __res, __re, __flags); 79 if (__match_mode) 80 __ret = __executor._M_match(); 81 else 82 __ret = __executor._M_search(); 83 } 84 if (__ret) 85 { 86 for (auto& __it : __res) 87 if (!__it.matched) 88 __it.first = __it.second = __e; 89 auto& __pre = __m._M_prefix(); 90 auto& __suf = __m._M_suffix(); 91 if (__match_mode) 92 { 93 __pre.matched = false; 94 __pre.first = __s; 95 __pre.second = __s; 96 __suf.matched = false; 97 __suf.first = __e; 98 __suf.second = __e; 99 } 100 else 101 { 102 __pre.first = __s; 103 __pre.second = __res[0].first; 104 __pre.matched = (__pre.first != __pre.second); 105 __suf.first = __res[0].second; 106 __suf.second = __e; 107 __suf.matched = (__suf.first != __suf.second); 108 } 109 } 110 else 111 { 112 __m._M_establish_failed_match(__e); 113 } 114 return __ret; 115 } 116 /// @endcond 117 } // namespace __detail 118 119 /// @cond 120 121 template<typename _Ch_type> 122 template<typename _Fwd_iter> 123 typename regex_traits<_Ch_type>::string_type 124 regex_traits<_Ch_type>:: lookup_collatename(_Fwd_iter __first,_Fwd_iter __last) const125 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const 126 { 127 typedef std::ctype<char_type> __ctype_type; 128 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 129 130 static const char* __collatenames[] = 131 { 132 "NUL", 133 "SOH", 134 "STX", 135 "ETX", 136 "EOT", 137 "ENQ", 138 "ACK", 139 "alert", 140 "backspace", 141 "tab", 142 "newline", 143 "vertical-tab", 144 "form-feed", 145 "carriage-return", 146 "SO", 147 "SI", 148 "DLE", 149 "DC1", 150 "DC2", 151 "DC3", 152 "DC4", 153 "NAK", 154 "SYN", 155 "ETB", 156 "CAN", 157 "EM", 158 "SUB", 159 "ESC", 160 "IS4", 161 "IS3", 162 "IS2", 163 "IS1", 164 "space", 165 "exclamation-mark", 166 "quotation-mark", 167 "number-sign", 168 "dollar-sign", 169 "percent-sign", 170 "ampersand", 171 "apostrophe", 172 "left-parenthesis", 173 "right-parenthesis", 174 "asterisk", 175 "plus-sign", 176 "comma", 177 "hyphen", 178 "period", 179 "slash", 180 "zero", 181 "one", 182 "two", 183 "three", 184 "four", 185 "five", 186 "six", 187 "seven", 188 "eight", 189 "nine", 190 "colon", 191 "semicolon", 192 "less-than-sign", 193 "equals-sign", 194 "greater-than-sign", 195 "question-mark", 196 "commercial-at", 197 "A", 198 "B", 199 "C", 200 "D", 201 "E", 202 "F", 203 "G", 204 "H", 205 "I", 206 "J", 207 "K", 208 "L", 209 "M", 210 "N", 211 "O", 212 "P", 213 "Q", 214 "R", 215 "S", 216 "T", 217 "U", 218 "V", 219 "W", 220 "X", 221 "Y", 222 "Z", 223 "left-square-bracket", 224 "backslash", 225 "right-square-bracket", 226 "circumflex", 227 "underscore", 228 "grave-accent", 229 "a", 230 "b", 231 "c", 232 "d", 233 "e", 234 "f", 235 "g", 236 "h", 237 "i", 238 "j", 239 "k", 240 "l", 241 "m", 242 "n", 243 "o", 244 "p", 245 "q", 246 "r", 247 "s", 248 "t", 249 "u", 250 "v", 251 "w", 252 "x", 253 "y", 254 "z", 255 "left-curly-bracket", 256 "vertical-line", 257 "right-curly-bracket", 258 "tilde", 259 "DEL", 260 }; 261 262 string __s; 263 for (; __first != __last; ++__first) 264 __s += __fctyp.narrow(*__first, 0); 265 266 for (const auto& __it : __collatenames) 267 if (__s == __it) 268 return string_type(1, __fctyp.widen( 269 static_cast<char>(&__it - __collatenames))); 270 271 // TODO Add digraph support: 272 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html 273 274 return string_type(); 275 } 276 277 template<typename _Ch_type> 278 template<typename _Fwd_iter> 279 typename regex_traits<_Ch_type>::char_class_type 280 regex_traits<_Ch_type>:: lookup_classname(_Fwd_iter __first,_Fwd_iter __last,bool __icase) const281 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const 282 { 283 typedef std::ctype<char_type> __ctype_type; 284 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 285 286 // Mappings from class name to class mask. 287 static const pair<const char*, char_class_type> __classnames[] = 288 { 289 {"d", ctype_base::digit}, 290 {"w", {ctype_base::alnum, _RegexMask::_S_under}}, 291 {"s", ctype_base::space}, 292 {"alnum", ctype_base::alnum}, 293 {"alpha", ctype_base::alpha}, 294 {"blank", ctype_base::blank}, 295 {"cntrl", ctype_base::cntrl}, 296 {"digit", ctype_base::digit}, 297 {"graph", ctype_base::graph}, 298 {"lower", ctype_base::lower}, 299 {"print", ctype_base::print}, 300 {"punct", ctype_base::punct}, 301 {"space", ctype_base::space}, 302 {"upper", ctype_base::upper}, 303 {"xdigit", ctype_base::xdigit}, 304 }; 305 306 string __s; 307 for (; __first != __last; ++__first) 308 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0); 309 310 for (const auto& __it : __classnames) 311 if (__s == __it.first) 312 { 313 if (__icase 314 && ((__it.second 315 & (ctype_base::lower | ctype_base::upper)) != 0)) 316 return ctype_base::alpha; 317 return __it.second; 318 } 319 return 0; 320 } 321 322 template<typename _Ch_type> 323 bool 324 regex_traits<_Ch_type>:: isctype(_Ch_type __c,char_class_type __f) const325 isctype(_Ch_type __c, char_class_type __f) const 326 { 327 typedef std::ctype<char_type> __ctype_type; 328 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 329 330 return __fctyp.is(__f._M_base, __c) 331 // [[:w:]] 332 || ((__f._M_extended & _RegexMask::_S_under) 333 && __c == __fctyp.widen('_')); 334 } 335 336 template<typename _Ch_type> 337 int 338 regex_traits<_Ch_type>:: value(_Ch_type __ch,int __radix) const339 value(_Ch_type __ch, int __radix) const 340 { 341 std::basic_istringstream<char_type> __is(string_type(1, __ch)); 342 long __v; 343 if (__radix == 8) 344 __is >> std::oct; 345 else if (__radix == 16) 346 __is >> std::hex; 347 __is >> __v; 348 return __is.fail() ? -1 : __v; 349 } 350 351 template<typename _Bi_iter, typename _Alloc> 352 template<typename _Out_iter> 353 _Out_iter 354 match_results<_Bi_iter, _Alloc>:: format(_Out_iter __out,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_first,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_last,match_flag_type __flags) const355 format(_Out_iter __out, 356 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first, 357 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last, 358 match_flag_type __flags) const 359 { 360 __glibcxx_assert( ready() ); 361 regex_traits<char_type> __traits; 362 typedef std::ctype<char_type> __ctype_type; 363 const __ctype_type& 364 __fctyp(use_facet<__ctype_type>(__traits.getloc())); 365 366 auto __output = [&](size_t __idx) 367 { 368 auto& __sub = (*this)[__idx]; 369 if (__sub.matched) 370 __out = std::copy(__sub.first, __sub.second, __out); 371 }; 372 373 if (__flags & regex_constants::format_sed) 374 { 375 bool __escaping = false; 376 for (; __fmt_first != __fmt_last; __fmt_first++) 377 { 378 if (__escaping) 379 { 380 __escaping = false; 381 if (__fctyp.is(__ctype_type::digit, *__fmt_first)) 382 __output(__traits.value(*__fmt_first, 10)); 383 else 384 *__out++ = *__fmt_first; 385 continue; 386 } 387 if (*__fmt_first == '\\') 388 { 389 __escaping = true; 390 continue; 391 } 392 if (*__fmt_first == '&') 393 { 394 __output(0); 395 continue; 396 } 397 *__out++ = *__fmt_first; 398 } 399 if (__escaping) 400 *__out++ = '\\'; 401 } 402 else 403 { 404 while (1) 405 { 406 auto __next = std::find(__fmt_first, __fmt_last, '$'); 407 if (__next == __fmt_last) 408 break; 409 410 __out = std::copy(__fmt_first, __next, __out); 411 412 auto __eat = [&](char __ch) -> bool 413 { 414 if (*__next == __ch) 415 { 416 ++__next; 417 return true; 418 } 419 return false; 420 }; 421 422 if (++__next == __fmt_last) 423 *__out++ = '$'; 424 else if (__eat('$')) 425 *__out++ = '$'; 426 else if (__eat('&')) 427 __output(0); 428 else if (__eat('`')) 429 { 430 auto& __sub = _M_prefix(); 431 if (__sub.matched) 432 __out = std::copy(__sub.first, __sub.second, __out); 433 } 434 else if (__eat('\'')) 435 { 436 auto& __sub = _M_suffix(); 437 if (__sub.matched) 438 __out = std::copy(__sub.first, __sub.second, __out); 439 } 440 else if (__fctyp.is(__ctype_type::digit, *__next)) 441 { 442 long __num = __traits.value(*__next, 10); 443 if (++__next != __fmt_last 444 && __fctyp.is(__ctype_type::digit, *__next)) 445 { 446 __num *= 10; 447 __num += __traits.value(*__next++, 10); 448 } 449 if (0 <= __num && __num < this->size()) 450 __output(__num); 451 } 452 else 453 *__out++ = '$'; 454 __fmt_first = __next; 455 } 456 __out = std::copy(__fmt_first, __fmt_last, __out); 457 } 458 return __out; 459 } 460 461 template<typename _Out_iter, typename _Bi_iter, 462 typename _Rx_traits, typename _Ch_type> 463 _Out_iter 464 __regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, 465 const basic_regex<_Ch_type, _Rx_traits>& __e, 466 const _Ch_type* __fmt, size_t __len, 467 regex_constants::match_flag_type __flags) 468 { 469 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT; 470 _IterT __i(__first, __last, __e, __flags); 471 _IterT __end; 472 if (__i == __end) 473 { 474 if (!(__flags & regex_constants::format_no_copy)) 475 __out = std::copy(__first, __last, __out); 476 } 477 else 478 { 479 sub_match<_Bi_iter> __last; 480 for (; __i != __end; ++__i) 481 { 482 if (!(__flags & regex_constants::format_no_copy)) 483 __out = std::copy(__i->prefix().first, __i->prefix().second, 484 __out); 485 __out = __i->format(__out, __fmt, __fmt + __len, __flags); 486 __last = __i->suffix(); 487 if (__flags & regex_constants::format_first_only) 488 break; 489 } 490 if (!(__flags & regex_constants::format_no_copy)) 491 __out = std::copy(__last.first, __last.second, __out); 492 } 493 return __out; 494 } 495 496 template<typename _Bi_iter, 497 typename _Ch_type, 498 typename _Rx_traits> 499 bool 500 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ==(const regex_iterator & __rhs) const501 operator==(const regex_iterator& __rhs) const noexcept 502 { 503 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr) 504 return true; 505 return _M_pregex == __rhs._M_pregex 506 && _M_begin == __rhs._M_begin 507 && _M_end == __rhs._M_end 508 && _M_flags == __rhs._M_flags 509 && _M_match[0] == __rhs._M_match[0]; 510 } 511 512 template<typename _Bi_iter, 513 typename _Ch_type, 514 typename _Rx_traits> 515 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 516 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ++()517 operator++() 518 { 519 // In all cases in which the call to regex_search returns true, 520 // match.prefix().first shall be equal to the previous value of 521 // match[0].second, and for each index i in the half-open range 522 // [0, match.size()) for which match[i].matched is true, 523 // match[i].position() shall return distance(begin, match[i].first). 524 // [28.12.1.4.5] 525 if (_M_match[0].matched) 526 { 527 auto __start = _M_match[0].second; 528 auto __prefix_first = _M_match[0].second; 529 if (_M_match[0].first == _M_match[0].second) 530 { 531 if (__start == _M_end) 532 { 533 _M_pregex = nullptr; 534 return *this; 535 } 536 else 537 { 538 if (regex_search(__start, _M_end, _M_match, *_M_pregex, 539 _M_flags 540 | regex_constants::match_not_null 541 | regex_constants::match_continuous)) 542 { 543 __glibcxx_assert(_M_match[0].matched); 544 auto& __prefix = _M_match._M_prefix(); 545 __prefix.first = __prefix_first; 546 __prefix.matched = __prefix.first != __prefix.second; 547 // [28.12.1.4.5] 548 _M_match._M_begin = _M_begin; 549 return *this; 550 } 551 else 552 ++__start; 553 } 554 } 555 _M_flags |= regex_constants::match_prev_avail; 556 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags)) 557 { 558 __glibcxx_assert(_M_match[0].matched); 559 auto& __prefix = _M_match._M_prefix(); 560 __prefix.first = __prefix_first; 561 __prefix.matched = __prefix.first != __prefix.second; 562 // [28.12.1.4.5] 563 _M_match._M_begin = _M_begin; 564 } 565 else 566 _M_pregex = nullptr; 567 } 568 return *this; 569 } 570 571 template<typename _Bi_iter, 572 typename _Ch_type, 573 typename _Rx_traits> 574 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 575 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator =(const regex_token_iterator & __rhs)576 operator=(const regex_token_iterator& __rhs) 577 { 578 _M_position = __rhs._M_position; 579 _M_subs = __rhs._M_subs; 580 _M_n = __rhs._M_n; 581 _M_suffix = __rhs._M_suffix; 582 _M_has_m1 = __rhs._M_has_m1; 583 _M_normalize_result(); 584 return *this; 585 } 586 587 template<typename _Bi_iter, 588 typename _Ch_type, 589 typename _Rx_traits> 590 bool 591 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ==(const regex_token_iterator & __rhs) const592 operator==(const regex_token_iterator& __rhs) const 593 { 594 if (_M_end_of_seq() && __rhs._M_end_of_seq()) 595 return true; 596 if (_M_suffix.matched && __rhs._M_suffix.matched 597 && _M_suffix == __rhs._M_suffix) 598 return true; 599 if (_M_end_of_seq() || _M_suffix.matched 600 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched) 601 return false; 602 return _M_position == __rhs._M_position 603 && _M_n == __rhs._M_n 604 && _M_subs == __rhs._M_subs; 605 } 606 607 template<typename _Bi_iter, 608 typename _Ch_type, 609 typename _Rx_traits> 610 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 611 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ++()612 operator++() 613 { 614 _Position __prev = _M_position; 615 if (_M_suffix.matched) 616 *this = regex_token_iterator(); 617 else if (_M_n + 1 < _M_subs.size()) 618 { 619 _M_n++; 620 _M_result = &_M_current_match(); 621 } 622 else 623 { 624 _M_n = 0; 625 ++_M_position; 626 if (_M_position != _Position()) 627 _M_result = &_M_current_match(); 628 else if (_M_has_m1 && __prev->suffix().length() != 0) 629 { 630 _M_suffix.matched = true; 631 _M_suffix.first = __prev->suffix().first; 632 _M_suffix.second = __prev->suffix().second; 633 _M_result = &_M_suffix; 634 } 635 else 636 *this = regex_token_iterator(); 637 } 638 return *this; 639 } 640 641 template<typename _Bi_iter, 642 typename _Ch_type, 643 typename _Rx_traits> 644 void 645 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: _M_init(_Bi_iter __a,_Bi_iter __b)646 _M_init(_Bi_iter __a, _Bi_iter __b) 647 { 648 _M_has_m1 = false; 649 for (auto __it : _M_subs) 650 if (__it == -1) 651 { 652 _M_has_m1 = true; 653 break; 654 } 655 if (_M_position != _Position()) 656 _M_result = &_M_current_match(); 657 else if (_M_has_m1) 658 { 659 _M_suffix.matched = true; 660 _M_suffix.first = __a; 661 _M_suffix.second = __b; 662 _M_result = &_M_suffix; 663 } 664 else 665 _M_result = nullptr; 666 } 667 668 /// @endcond 669 670 _GLIBCXX_END_NAMESPACE_VERSION 671 } // namespace 672