1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2022 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  *  @file bits/regex.tcc
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 
35 namespace __detail
36 {
37   /// @cond undocumented
38 
39   // Result of merging regex_match and regex_search.
40   //
41   // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
42   // the other one if possible, for test purpose).
43   //
44   // That __match_mode is true means regex_match, else regex_search.
45   template<typename _BiIter, typename _Alloc,
46 	   typename _CharT, typename _TraitsT>
47     bool
__regex_algo_impl(_BiIter __s,_BiIter __e,match_results<_BiIter,_Alloc> & __m,const basic_regex<_CharT,_TraitsT> & __re,regex_constants::match_flag_type __flags,_RegexExecutorPolicy __policy,bool __match_mode)48     __regex_algo_impl(_BiIter                              __s,
49 		      _BiIter                              __e,
50 		      match_results<_BiIter, _Alloc>&      __m,
51 		      const basic_regex<_CharT, _TraitsT>& __re,
52 		      regex_constants::match_flag_type     __flags,
53 		      _RegexExecutorPolicy		   __policy,
54 		      bool				   __match_mode)
55     {
56       if (__re._M_automaton == nullptr)
57 	return false;
58 
59       typename match_results<_BiIter, _Alloc>::_Unchecked& __res = __m;
60       __m._M_begin = __s;
61       __m._M_resize(__re._M_automaton->_M_sub_count());
62 
63       bool __ret;
64       if ((__re.flags() & regex_constants::__polynomial)
65 	  || (__policy == _RegexExecutorPolicy::_S_alternate
66 	      && !__re._M_automaton->_M_has_backref))
67 	{
68 	  _Executor<_BiIter, _Alloc, _TraitsT, false>
69 	    __executor(__s, __e, __res, __re, __flags);
70 	  if (__match_mode)
71 	    __ret = __executor._M_match();
72 	  else
73 	    __ret = __executor._M_search();
74 	}
75       else
76 	{
77 	  _Executor<_BiIter, _Alloc, _TraitsT, true>
78 	    __executor(__s, __e, __res, __re, __flags);
79 	  if (__match_mode)
80 	    __ret = __executor._M_match();
81 	  else
82 	    __ret = __executor._M_search();
83 	}
84       if (__ret)
85 	{
86 	  for (auto& __it : __res)
87 	    if (!__it.matched)
88 	      __it.first = __it.second = __e;
89 	  auto& __pre = __m._M_prefix();
90 	  auto& __suf = __m._M_suffix();
91 	  if (__match_mode)
92 	    {
93 	      __pre.matched = false;
94 	      __pre.first = __s;
95 	      __pre.second = __s;
96 	      __suf.matched = false;
97 	      __suf.first = __e;
98 	      __suf.second = __e;
99 	    }
100 	  else
101 	    {
102 	      __pre.first = __s;
103 	      __pre.second = __res[0].first;
104 	      __pre.matched = (__pre.first != __pre.second);
105 	      __suf.first = __res[0].second;
106 	      __suf.second = __e;
107 	      __suf.matched = (__suf.first != __suf.second);
108 	    }
109 	}
110       else
111 	{
112 	  __m._M_establish_failed_match(__e);
113 	}
114       return __ret;
115     }
116   /// @endcond
117 } // namespace __detail
118 
119   /// @cond
120 
121   template<typename _Ch_type>
122   template<typename _Fwd_iter>
123     typename regex_traits<_Ch_type>::string_type
124     regex_traits<_Ch_type>::
lookup_collatename(_Fwd_iter __first,_Fwd_iter __last) const125     lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
126     {
127       typedef std::ctype<char_type> __ctype_type;
128       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
129 
130       static const char* __collatenames[] =
131 	{
132 	  "NUL",
133 	  "SOH",
134 	  "STX",
135 	  "ETX",
136 	  "EOT",
137 	  "ENQ",
138 	  "ACK",
139 	  "alert",
140 	  "backspace",
141 	  "tab",
142 	  "newline",
143 	  "vertical-tab",
144 	  "form-feed",
145 	  "carriage-return",
146 	  "SO",
147 	  "SI",
148 	  "DLE",
149 	  "DC1",
150 	  "DC2",
151 	  "DC3",
152 	  "DC4",
153 	  "NAK",
154 	  "SYN",
155 	  "ETB",
156 	  "CAN",
157 	  "EM",
158 	  "SUB",
159 	  "ESC",
160 	  "IS4",
161 	  "IS3",
162 	  "IS2",
163 	  "IS1",
164 	  "space",
165 	  "exclamation-mark",
166 	  "quotation-mark",
167 	  "number-sign",
168 	  "dollar-sign",
169 	  "percent-sign",
170 	  "ampersand",
171 	  "apostrophe",
172 	  "left-parenthesis",
173 	  "right-parenthesis",
174 	  "asterisk",
175 	  "plus-sign",
176 	  "comma",
177 	  "hyphen",
178 	  "period",
179 	  "slash",
180 	  "zero",
181 	  "one",
182 	  "two",
183 	  "three",
184 	  "four",
185 	  "five",
186 	  "six",
187 	  "seven",
188 	  "eight",
189 	  "nine",
190 	  "colon",
191 	  "semicolon",
192 	  "less-than-sign",
193 	  "equals-sign",
194 	  "greater-than-sign",
195 	  "question-mark",
196 	  "commercial-at",
197 	  "A",
198 	  "B",
199 	  "C",
200 	  "D",
201 	  "E",
202 	  "F",
203 	  "G",
204 	  "H",
205 	  "I",
206 	  "J",
207 	  "K",
208 	  "L",
209 	  "M",
210 	  "N",
211 	  "O",
212 	  "P",
213 	  "Q",
214 	  "R",
215 	  "S",
216 	  "T",
217 	  "U",
218 	  "V",
219 	  "W",
220 	  "X",
221 	  "Y",
222 	  "Z",
223 	  "left-square-bracket",
224 	  "backslash",
225 	  "right-square-bracket",
226 	  "circumflex",
227 	  "underscore",
228 	  "grave-accent",
229 	  "a",
230 	  "b",
231 	  "c",
232 	  "d",
233 	  "e",
234 	  "f",
235 	  "g",
236 	  "h",
237 	  "i",
238 	  "j",
239 	  "k",
240 	  "l",
241 	  "m",
242 	  "n",
243 	  "o",
244 	  "p",
245 	  "q",
246 	  "r",
247 	  "s",
248 	  "t",
249 	  "u",
250 	  "v",
251 	  "w",
252 	  "x",
253 	  "y",
254 	  "z",
255 	  "left-curly-bracket",
256 	  "vertical-line",
257 	  "right-curly-bracket",
258 	  "tilde",
259 	  "DEL",
260 	};
261 
262       string __s;
263       for (; __first != __last; ++__first)
264 	__s += __fctyp.narrow(*__first, 0);
265 
266       for (const auto& __it : __collatenames)
267 	if (__s == __it)
268 	  return string_type(1, __fctyp.widen(
269 	    static_cast<char>(&__it - __collatenames)));
270 
271       // TODO Add digraph support:
272       // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
273 
274       return string_type();
275     }
276 
277   template<typename _Ch_type>
278   template<typename _Fwd_iter>
279     typename regex_traits<_Ch_type>::char_class_type
280     regex_traits<_Ch_type>::
lookup_classname(_Fwd_iter __first,_Fwd_iter __last,bool __icase) const281     lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
282     {
283       typedef std::ctype<char_type> __ctype_type;
284       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
285 
286       // Mappings from class name to class mask.
287       static const pair<const char*, char_class_type> __classnames[] =
288       {
289 	{"d", ctype_base::digit},
290 	{"w", {ctype_base::alnum, _RegexMask::_S_under}},
291 	{"s", ctype_base::space},
292 	{"alnum", ctype_base::alnum},
293 	{"alpha", ctype_base::alpha},
294 	{"blank", ctype_base::blank},
295 	{"cntrl", ctype_base::cntrl},
296 	{"digit", ctype_base::digit},
297 	{"graph", ctype_base::graph},
298 	{"lower", ctype_base::lower},
299 	{"print", ctype_base::print},
300 	{"punct", ctype_base::punct},
301 	{"space", ctype_base::space},
302 	{"upper", ctype_base::upper},
303 	{"xdigit", ctype_base::xdigit},
304       };
305 
306       string __s;
307       for (; __first != __last; ++__first)
308 	__s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
309 
310       for (const auto& __it : __classnames)
311 	if (__s == __it.first)
312 	  {
313 	    if (__icase
314 		&& ((__it.second
315 		     & (ctype_base::lower | ctype_base::upper)) != 0))
316 	      return ctype_base::alpha;
317 	    return __it.second;
318 	  }
319       return 0;
320     }
321 
322   template<typename _Ch_type>
323     bool
324     regex_traits<_Ch_type>::
isctype(_Ch_type __c,char_class_type __f) const325     isctype(_Ch_type __c, char_class_type __f) const
326     {
327       typedef std::ctype<char_type> __ctype_type;
328       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
329 
330       return __fctyp.is(__f._M_base, __c)
331 	// [[:w:]]
332 	|| ((__f._M_extended & _RegexMask::_S_under)
333 	    && __c == __fctyp.widen('_'));
334     }
335 
336   template<typename _Ch_type>
337     int
338     regex_traits<_Ch_type>::
value(_Ch_type __ch,int __radix) const339     value(_Ch_type __ch, int __radix) const
340     {
341       std::basic_istringstream<char_type> __is(string_type(1, __ch));
342       long __v;
343       if (__radix == 8)
344 	__is >> std::oct;
345       else if (__radix == 16)
346 	__is >> std::hex;
347       __is >> __v;
348       return __is.fail() ? -1 : __v;
349     }
350 
351   template<typename _Bi_iter, typename _Alloc>
352   template<typename _Out_iter>
353     _Out_iter
354     match_results<_Bi_iter, _Alloc>::
format(_Out_iter __out,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_first,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_last,match_flag_type __flags) const355     format(_Out_iter __out,
356 	   const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
357 	   const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
358 	   match_flag_type __flags) const
359     {
360       __glibcxx_assert( ready() );
361       regex_traits<char_type> __traits;
362       typedef std::ctype<char_type> __ctype_type;
363       const __ctype_type&
364 	__fctyp(use_facet<__ctype_type>(__traits.getloc()));
365 
366       auto __output = [&](size_t __idx)
367 	{
368 	  auto& __sub = (*this)[__idx];
369 	  if (__sub.matched)
370 	    __out = std::copy(__sub.first, __sub.second, __out);
371 	};
372 
373       if (__flags & regex_constants::format_sed)
374 	{
375 	  bool __escaping = false;
376 	  for (; __fmt_first != __fmt_last; __fmt_first++)
377 	    {
378 	      if (__escaping)
379 		{
380 		  __escaping = false;
381 		  if (__fctyp.is(__ctype_type::digit, *__fmt_first))
382 		    __output(__traits.value(*__fmt_first, 10));
383 		  else
384 		    *__out++ = *__fmt_first;
385 		  continue;
386 		}
387 	      if (*__fmt_first == '\\')
388 		{
389 		  __escaping = true;
390 		  continue;
391 		}
392 	      if (*__fmt_first == '&')
393 		{
394 		  __output(0);
395 		  continue;
396 		}
397 	      *__out++ = *__fmt_first;
398 	    }
399 	  if (__escaping)
400 	    *__out++ = '\\';
401 	}
402       else
403 	{
404 	  while (1)
405 	    {
406 	      auto __next = std::find(__fmt_first, __fmt_last, '$');
407 	      if (__next == __fmt_last)
408 		break;
409 
410 	      __out = std::copy(__fmt_first, __next, __out);
411 
412 	      auto __eat = [&](char __ch) -> bool
413 		{
414 		  if (*__next == __ch)
415 		    {
416 		      ++__next;
417 		      return true;
418 		    }
419 		  return false;
420 		};
421 
422 	      if (++__next == __fmt_last)
423 		*__out++ = '$';
424 	      else if (__eat('$'))
425 		*__out++ = '$';
426 	      else if (__eat('&'))
427 		__output(0);
428 	      else if (__eat('`'))
429 		{
430 		  auto& __sub = _M_prefix();
431 		  if (__sub.matched)
432 		    __out = std::copy(__sub.first, __sub.second, __out);
433 		}
434 	      else if (__eat('\''))
435 		{
436 		  auto& __sub = _M_suffix();
437 		  if (__sub.matched)
438 		    __out = std::copy(__sub.first, __sub.second, __out);
439 		}
440 	      else if (__fctyp.is(__ctype_type::digit, *__next))
441 		{
442 		  long __num = __traits.value(*__next, 10);
443 		  if (++__next != __fmt_last
444 		      && __fctyp.is(__ctype_type::digit, *__next))
445 		    {
446 		      __num *= 10;
447 		      __num += __traits.value(*__next++, 10);
448 		    }
449 		  if (0 <= __num && __num < this->size())
450 		    __output(__num);
451 		}
452 	      else
453 		*__out++ = '$';
454 	      __fmt_first = __next;
455 	    }
456 	  __out = std::copy(__fmt_first, __fmt_last, __out);
457 	}
458       return __out;
459     }
460 
461   template<typename _Out_iter, typename _Bi_iter,
462 	   typename _Rx_traits, typename _Ch_type>
463     _Out_iter
464     __regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
465 		    const basic_regex<_Ch_type, _Rx_traits>& __e,
466 		    const _Ch_type* __fmt, size_t __len,
467 		    regex_constants::match_flag_type __flags)
468     {
469       typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
470       _IterT __i(__first, __last, __e, __flags);
471       _IterT __end;
472       if (__i == __end)
473 	{
474 	  if (!(__flags & regex_constants::format_no_copy))
475 	    __out = std::copy(__first, __last, __out);
476 	}
477       else
478 	{
479 	  sub_match<_Bi_iter> __last;
480 	  for (; __i != __end; ++__i)
481 	    {
482 	      if (!(__flags & regex_constants::format_no_copy))
483 		__out = std::copy(__i->prefix().first, __i->prefix().second,
484 				  __out);
485 	      __out = __i->format(__out, __fmt, __fmt + __len, __flags);
486 	      __last = __i->suffix();
487 	      if (__flags & regex_constants::format_first_only)
488 		break;
489 	    }
490 	  if (!(__flags & regex_constants::format_no_copy))
491 	    __out = std::copy(__last.first, __last.second, __out);
492 	}
493       return __out;
494     }
495 
496   template<typename _Bi_iter,
497 	   typename _Ch_type,
498 	   typename _Rx_traits>
499     bool
500     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator ==(const regex_iterator & __rhs) const501     operator==(const regex_iterator& __rhs) const noexcept
502     {
503       if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
504 	return true;
505       return _M_pregex == __rhs._M_pregex
506 	  && _M_begin == __rhs._M_begin
507 	  && _M_end == __rhs._M_end
508 	  && _M_flags == __rhs._M_flags
509 	  && _M_match[0] == __rhs._M_match[0];
510     }
511 
512   template<typename _Bi_iter,
513 	   typename _Ch_type,
514 	   typename _Rx_traits>
515     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
516     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator ++()517     operator++()
518     {
519       // In all cases in which the call to regex_search returns true,
520       // match.prefix().first shall be equal to the previous value of
521       // match[0].second, and for each index i in the half-open range
522       // [0, match.size()) for which match[i].matched is true,
523       // match[i].position() shall return distance(begin, match[i].first).
524       // [28.12.1.4.5]
525       if (_M_match[0].matched)
526 	{
527 	  auto __start = _M_match[0].second;
528 	  auto __prefix_first = _M_match[0].second;
529 	  if (_M_match[0].first == _M_match[0].second)
530 	    {
531 	      if (__start == _M_end)
532 		{
533 		  _M_pregex = nullptr;
534 		  return *this;
535 		}
536 	      else
537 		{
538 		  if (regex_search(__start, _M_end, _M_match, *_M_pregex,
539 				   _M_flags
540 				   | regex_constants::match_not_null
541 				   | regex_constants::match_continuous))
542 		    {
543 		      __glibcxx_assert(_M_match[0].matched);
544 		      auto& __prefix = _M_match._M_prefix();
545 		      __prefix.first = __prefix_first;
546 		      __prefix.matched = __prefix.first != __prefix.second;
547 		      // [28.12.1.4.5]
548 		      _M_match._M_begin = _M_begin;
549 		      return *this;
550 		    }
551 		  else
552 		    ++__start;
553 		}
554 	    }
555 	  _M_flags |= regex_constants::match_prev_avail;
556 	  if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
557 	    {
558 	      __glibcxx_assert(_M_match[0].matched);
559 	      auto& __prefix = _M_match._M_prefix();
560 	      __prefix.first = __prefix_first;
561 	      __prefix.matched = __prefix.first != __prefix.second;
562 	      // [28.12.1.4.5]
563 	      _M_match._M_begin = _M_begin;
564 	    }
565 	  else
566 	    _M_pregex = nullptr;
567 	}
568       return *this;
569     }
570 
571   template<typename _Bi_iter,
572 	   typename _Ch_type,
573 	   typename _Rx_traits>
574     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
575     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator =(const regex_token_iterator & __rhs)576     operator=(const regex_token_iterator& __rhs)
577     {
578       _M_position = __rhs._M_position;
579       _M_subs = __rhs._M_subs;
580       _M_n = __rhs._M_n;
581       _M_suffix = __rhs._M_suffix;
582       _M_has_m1 = __rhs._M_has_m1;
583       _M_normalize_result();
584       return *this;
585     }
586 
587   template<typename _Bi_iter,
588 	   typename _Ch_type,
589 	   typename _Rx_traits>
590     bool
591     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator ==(const regex_token_iterator & __rhs) const592     operator==(const regex_token_iterator& __rhs) const
593     {
594       if (_M_end_of_seq() && __rhs._M_end_of_seq())
595 	return true;
596       if (_M_suffix.matched && __rhs._M_suffix.matched
597 	  && _M_suffix == __rhs._M_suffix)
598 	return true;
599       if (_M_end_of_seq() || _M_suffix.matched
600 	  || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
601 	return false;
602       return _M_position == __rhs._M_position
603 	&& _M_n == __rhs._M_n
604 	&& _M_subs == __rhs._M_subs;
605     }
606 
607   template<typename _Bi_iter,
608 	   typename _Ch_type,
609 	   typename _Rx_traits>
610     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
611     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator ++()612     operator++()
613     {
614       _Position __prev = _M_position;
615       if (_M_suffix.matched)
616 	*this = regex_token_iterator();
617       else if (_M_n + 1 < _M_subs.size())
618 	{
619 	  _M_n++;
620 	  _M_result = &_M_current_match();
621 	}
622       else
623 	{
624 	  _M_n = 0;
625 	  ++_M_position;
626 	  if (_M_position != _Position())
627 	    _M_result = &_M_current_match();
628 	  else if (_M_has_m1 && __prev->suffix().length() != 0)
629 	    {
630 	      _M_suffix.matched = true;
631 	      _M_suffix.first = __prev->suffix().first;
632 	      _M_suffix.second = __prev->suffix().second;
633 	      _M_result = &_M_suffix;
634 	    }
635 	  else
636 	    *this = regex_token_iterator();
637 	}
638       return *this;
639     }
640 
641   template<typename _Bi_iter,
642 	   typename _Ch_type,
643 	   typename _Rx_traits>
644     void
645     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
_M_init(_Bi_iter __a,_Bi_iter __b)646     _M_init(_Bi_iter __a, _Bi_iter __b)
647     {
648       _M_has_m1 = false;
649       for (auto __it : _M_subs)
650 	if (__it == -1)
651 	  {
652 	    _M_has_m1 = true;
653 	    break;
654 	  }
655       if (_M_position != _Position())
656 	_M_result = &_M_current_match();
657       else if (_M_has_m1)
658 	{
659 	  _M_suffix.matched = true;
660 	  _M_suffix.first = __a;
661 	  _M_suffix.second = __b;
662 	  _M_result = &_M_suffix;
663 	}
664       else
665 	_M_result = nullptr;
666     }
667 
668   /// @endcond
669 
670 _GLIBCXX_END_NAMESPACE_VERSION
671 } // namespace
672