1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2018 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  *  @file bits/regex.tcc
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 
35 namespace __detail
36 {
37   // Result of merging regex_match and regex_search.
38   //
39   // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
40   // the other one if possible, for test purpose).
41   //
42   // That __match_mode is true means regex_match, else regex_search.
43   template<typename _BiIter, typename _Alloc,
44 	   typename _CharT, typename _TraitsT,
45 	   _RegexExecutorPolicy __policy,
46 	   bool __match_mode>
47     bool
48     __regex_algo_impl(_BiIter                              __s,
49 		      _BiIter                              __e,
50 		      match_results<_BiIter, _Alloc>&      __m,
51 		      const basic_regex<_CharT, _TraitsT>& __re,
52 		      regex_constants::match_flag_type     __flags)
53     {
54       if (__re._M_automaton == nullptr)
55 	return false;
56 
57       typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
58       __m._M_begin = __s;
59       __m._M_resize(__re._M_automaton->_M_sub_count());
60       for (auto& __it : __res)
61 	__it.matched = false;
62 
63       bool __ret;
64       if ((__re.flags() & regex_constants::__polynomial)
65 	  || (__policy == _RegexExecutorPolicy::_S_alternate
66 	      && !__re._M_automaton->_M_has_backref))
67 	{
68 	  _Executor<_BiIter, _Alloc, _TraitsT, false>
69 	    __executor(__s, __e, __m, __re, __flags);
70 	  if (__match_mode)
71 	    __ret = __executor._M_match();
72 	  else
73 	    __ret = __executor._M_search();
74 	}
75       else
76 	{
77 	  _Executor<_BiIter, _Alloc, _TraitsT, true>
78 	    __executor(__s, __e, __m, __re, __flags);
79 	  if (__match_mode)
80 	    __ret = __executor._M_match();
81 	  else
82 	    __ret = __executor._M_search();
83 	}
84       if (__ret)
85 	{
86 	  for (auto& __it : __res)
87 	    if (!__it.matched)
88 	      __it.first = __it.second = __e;
89 	  auto& __pre = __m._M_prefix();
90 	  auto& __suf = __m._M_suffix();
91 	  if (__match_mode)
92 	    {
93 	      __pre.matched = false;
94 	      __pre.first = __s;
95 	      __pre.second = __s;
96 	      __suf.matched = false;
97 	      __suf.first = __e;
98 	      __suf.second = __e;
99 	    }
100 	  else
101 	    {
102 	      __pre.first = __s;
103 	      __pre.second = __res[0].first;
104 	      __pre.matched = (__pre.first != __pre.second);
105 	      __suf.first = __res[0].second;
106 	      __suf.second = __e;
107 	      __suf.matched = (__suf.first != __suf.second);
108 	    }
109 	}
110       else
111 	{
112 	  __m._M_resize(0);
113 	  for (auto& __it : __res)
114 	    {
115 	      __it.matched = false;
116 	      __it.first = __it.second = __e;
117 	    }
118 	}
119       return __ret;
120     }
121 }
122 
123   template<typename _Ch_type>
124   template<typename _Fwd_iter>
125     typename regex_traits<_Ch_type>::string_type
126     regex_traits<_Ch_type>::
127     lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
128     {
129       typedef std::ctype<char_type> __ctype_type;
130       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
131 
132       static const char* __collatenames[] =
133 	{
134 	  "NUL",
135 	  "SOH",
136 	  "STX",
137 	  "ETX",
138 	  "EOT",
139 	  "ENQ",
140 	  "ACK",
141 	  "alert",
142 	  "backspace",
143 	  "tab",
144 	  "newline",
145 	  "vertical-tab",
146 	  "form-feed",
147 	  "carriage-return",
148 	  "SO",
149 	  "SI",
150 	  "DLE",
151 	  "DC1",
152 	  "DC2",
153 	  "DC3",
154 	  "DC4",
155 	  "NAK",
156 	  "SYN",
157 	  "ETB",
158 	  "CAN",
159 	  "EM",
160 	  "SUB",
161 	  "ESC",
162 	  "IS4",
163 	  "IS3",
164 	  "IS2",
165 	  "IS1",
166 	  "space",
167 	  "exclamation-mark",
168 	  "quotation-mark",
169 	  "number-sign",
170 	  "dollar-sign",
171 	  "percent-sign",
172 	  "ampersand",
173 	  "apostrophe",
174 	  "left-parenthesis",
175 	  "right-parenthesis",
176 	  "asterisk",
177 	  "plus-sign",
178 	  "comma",
179 	  "hyphen",
180 	  "period",
181 	  "slash",
182 	  "zero",
183 	  "one",
184 	  "two",
185 	  "three",
186 	  "four",
187 	  "five",
188 	  "six",
189 	  "seven",
190 	  "eight",
191 	  "nine",
192 	  "colon",
193 	  "semicolon",
194 	  "less-than-sign",
195 	  "equals-sign",
196 	  "greater-than-sign",
197 	  "question-mark",
198 	  "commercial-at",
199 	  "A",
200 	  "B",
201 	  "C",
202 	  "D",
203 	  "E",
204 	  "F",
205 	  "G",
206 	  "H",
207 	  "I",
208 	  "J",
209 	  "K",
210 	  "L",
211 	  "M",
212 	  "N",
213 	  "O",
214 	  "P",
215 	  "Q",
216 	  "R",
217 	  "S",
218 	  "T",
219 	  "U",
220 	  "V",
221 	  "W",
222 	  "X",
223 	  "Y",
224 	  "Z",
225 	  "left-square-bracket",
226 	  "backslash",
227 	  "right-square-bracket",
228 	  "circumflex",
229 	  "underscore",
230 	  "grave-accent",
231 	  "a",
232 	  "b",
233 	  "c",
234 	  "d",
235 	  "e",
236 	  "f",
237 	  "g",
238 	  "h",
239 	  "i",
240 	  "j",
241 	  "k",
242 	  "l",
243 	  "m",
244 	  "n",
245 	  "o",
246 	  "p",
247 	  "q",
248 	  "r",
249 	  "s",
250 	  "t",
251 	  "u",
252 	  "v",
253 	  "w",
254 	  "x",
255 	  "y",
256 	  "z",
257 	  "left-curly-bracket",
258 	  "vertical-line",
259 	  "right-curly-bracket",
260 	  "tilde",
261 	  "DEL",
262 	};
263 
264       string __s;
265       for (; __first != __last; ++__first)
266 	__s += __fctyp.narrow(*__first, 0);
267 
268       for (const auto& __it : __collatenames)
269 	if (__s == __it)
270 	  return string_type(1, __fctyp.widen(
271 	    static_cast<char>(&__it - __collatenames)));
272 
273       // TODO Add digraph support:
274       // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
275 
276       return string_type();
277     }
278 
279   template<typename _Ch_type>
280   template<typename _Fwd_iter>
281     typename regex_traits<_Ch_type>::char_class_type
282     regex_traits<_Ch_type>::
283     lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
284     {
285       typedef std::ctype<char_type> __ctype_type;
286       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
287 
288       // Mappings from class name to class mask.
289       static const pair<const char*, char_class_type> __classnames[] =
290       {
291 	{"d", ctype_base::digit},
292 	{"w", {ctype_base::alnum, _RegexMask::_S_under}},
293 	{"s", ctype_base::space},
294 	{"alnum", ctype_base::alnum},
295 	{"alpha", ctype_base::alpha},
296 	{"blank", ctype_base::blank},
297 	{"cntrl", ctype_base::cntrl},
298 	{"digit", ctype_base::digit},
299 	{"graph", ctype_base::graph},
300 	{"lower", ctype_base::lower},
301 	{"print", ctype_base::print},
302 	{"punct", ctype_base::punct},
303 	{"space", ctype_base::space},
304 	{"upper", ctype_base::upper},
305 	{"xdigit", ctype_base::xdigit},
306       };
307 
308       string __s;
309       for (; __first != __last; ++__first)
310 	__s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
311 
312       for (const auto& __it : __classnames)
313 	if (__s == __it.first)
314 	  {
315 	    if (__icase
316 		&& ((__it.second
317 		     & (ctype_base::lower | ctype_base::upper)) != 0))
318 	      return ctype_base::alpha;
319 	    return __it.second;
320 	  }
321       return 0;
322     }
323 
324   template<typename _Ch_type>
325     bool
326     regex_traits<_Ch_type>::
327     isctype(_Ch_type __c, char_class_type __f) const
328     {
329       typedef std::ctype<char_type> __ctype_type;
330       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
331 
332       return __fctyp.is(__f._M_base, __c)
333 	// [[:w:]]
334 	|| ((__f._M_extended & _RegexMask::_S_under)
335 	    && __c == __fctyp.widen('_'));
336     }
337 
338   template<typename _Ch_type>
339     int
340     regex_traits<_Ch_type>::
341     value(_Ch_type __ch, int __radix) const
342     {
343       std::basic_istringstream<char_type> __is(string_type(1, __ch));
344       long __v;
345       if (__radix == 8)
346 	__is >> std::oct;
347       else if (__radix == 16)
348 	__is >> std::hex;
349       __is >> __v;
350       return __is.fail() ? -1 : __v;
351     }
352 
353   template<typename _Bi_iter, typename _Alloc>
354   template<typename _Out_iter>
355     _Out_iter match_results<_Bi_iter, _Alloc>::
356     format(_Out_iter __out,
357 	   const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
358 	   const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
359 	   match_flag_type __flags) const
360     {
361       __glibcxx_assert( ready() );
362       regex_traits<char_type> __traits;
363       typedef std::ctype<char_type> __ctype_type;
364       const __ctype_type&
365 	__fctyp(use_facet<__ctype_type>(__traits.getloc()));
366 
367       auto __output = [&](size_t __idx)
368 	{
369 	  auto& __sub = (*this)[__idx];
370 	  if (__sub.matched)
371 	    __out = std::copy(__sub.first, __sub.second, __out);
372 	};
373 
374       if (__flags & regex_constants::format_sed)
375 	{
376 	  bool __escaping = false;
377 	  for (; __fmt_first != __fmt_last; __fmt_first++)
378 	    {
379 	      if (__escaping)
380 		{
381 		  __escaping = false;
382 		  if (__fctyp.is(__ctype_type::digit, *__fmt_first))
383 		    __output(__traits.value(*__fmt_first, 10));
384 		  else
385 		    *__out++ = *__fmt_first;
386 		  continue;
387 		}
388 	      if (*__fmt_first == '\\')
389 		{
390 		  __escaping = true;
391 		  continue;
392 		}
393 	      if (*__fmt_first == '&')
394 		{
395 		  __output(0);
396 		  continue;
397 		}
398 	      *__out++ = *__fmt_first;
399 	    }
400 	  if (__escaping)
401 	    *__out++ = '\\';
402 	}
403       else
404 	{
405 	  while (1)
406 	    {
407 	      auto __next = std::find(__fmt_first, __fmt_last, '$');
408 	      if (__next == __fmt_last)
409 		break;
410 
411 	      __out = std::copy(__fmt_first, __next, __out);
412 
413 	      auto __eat = [&](char __ch) -> bool
414 		{
415 		  if (*__next == __ch)
416 		    {
417 		      ++__next;
418 		      return true;
419 		    }
420 		  return false;
421 		};
422 
423 	      if (++__next == __fmt_last)
424 		*__out++ = '$';
425 	      else if (__eat('$'))
426 		*__out++ = '$';
427 	      else if (__eat('&'))
428 		__output(0);
429 	      else if (__eat('`'))
430 		{
431 		  auto& __sub = _M_prefix();
432 		  if (__sub.matched)
433 		    __out = std::copy(__sub.first, __sub.second, __out);
434 		}
435 	      else if (__eat('\''))
436 		{
437 		  auto& __sub = _M_suffix();
438 		  if (__sub.matched)
439 		    __out = std::copy(__sub.first, __sub.second, __out);
440 		}
441 	      else if (__fctyp.is(__ctype_type::digit, *__next))
442 		{
443 		  long __num = __traits.value(*__next, 10);
444 		  if (++__next != __fmt_last
445 		      && __fctyp.is(__ctype_type::digit, *__next))
446 		    {
447 		      __num *= 10;
448 		      __num += __traits.value(*__next++, 10);
449 		    }
450 		  if (0 <= __num && __num < this->size())
451 		    __output(__num);
452 		}
453 	      else
454 		*__out++ = '$';
455 	      __fmt_first = __next;
456 	    }
457 	  __out = std::copy(__fmt_first, __fmt_last, __out);
458 	}
459       return __out;
460     }
461 
462   template<typename _Out_iter, typename _Bi_iter,
463 	   typename _Rx_traits, typename _Ch_type>
464     _Out_iter
465     regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
466 		  const basic_regex<_Ch_type, _Rx_traits>& __e,
467 		  const _Ch_type* __fmt,
468 		  regex_constants::match_flag_type __flags)
469     {
470       typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
471       _IterT __i(__first, __last, __e, __flags);
472       _IterT __end;
473       if (__i == __end)
474 	{
475 	  if (!(__flags & regex_constants::format_no_copy))
476 	    __out = std::copy(__first, __last, __out);
477 	}
478       else
479 	{
480 	  sub_match<_Bi_iter> __last;
481 	  auto __len = char_traits<_Ch_type>::length(__fmt);
482 	  for (; __i != __end; ++__i)
483 	    {
484 	      if (!(__flags & regex_constants::format_no_copy))
485 		__out = std::copy(__i->prefix().first, __i->prefix().second,
486 				  __out);
487 	      __out = __i->format(__out, __fmt, __fmt + __len, __flags);
488 	      __last = __i->suffix();
489 	      if (__flags & regex_constants::format_first_only)
490 		break;
491 	    }
492 	  if (!(__flags & regex_constants::format_no_copy))
493 	    __out = std::copy(__last.first, __last.second, __out);
494 	}
495       return __out;
496     }
497 
498   template<typename _Bi_iter,
499 	   typename _Ch_type,
500 	   typename _Rx_traits>
501     bool
502     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
503     operator==(const regex_iterator& __rhs) const
504     {
505       if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
506 	return true;
507       return _M_pregex == __rhs._M_pregex
508 	  && _M_begin == __rhs._M_begin
509 	  && _M_end == __rhs._M_end
510 	  && _M_flags == __rhs._M_flags
511 	  && _M_match[0] == __rhs._M_match[0];
512     }
513 
514   template<typename _Bi_iter,
515 	   typename _Ch_type,
516 	   typename _Rx_traits>
517     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
518     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
519     operator++()
520     {
521       // In all cases in which the call to regex_search returns true,
522       // match.prefix().first shall be equal to the previous value of
523       // match[0].second, and for each index i in the half-open range
524       // [0, match.size()) for which match[i].matched is true,
525       // match[i].position() shall return distance(begin, match[i].first).
526       // [28.12.1.4.5]
527       if (_M_match[0].matched)
528 	{
529 	  auto __start = _M_match[0].second;
530 	  auto __prefix_first = _M_match[0].second;
531 	  if (_M_match[0].first == _M_match[0].second)
532 	    {
533 	      if (__start == _M_end)
534 		{
535 		  _M_pregex = nullptr;
536 		  return *this;
537 		}
538 	      else
539 		{
540 		  if (regex_search(__start, _M_end, _M_match, *_M_pregex,
541 				   _M_flags
542 				   | regex_constants::match_not_null
543 				   | regex_constants::match_continuous))
544 		    {
545 		      __glibcxx_assert(_M_match[0].matched);
546 		      auto& __prefix = _M_match._M_prefix();
547 		      __prefix.first = __prefix_first;
548 		      __prefix.matched = __prefix.first != __prefix.second;
549 		      // [28.12.1.4.5]
550 		      _M_match._M_begin = _M_begin;
551 		      return *this;
552 		    }
553 		  else
554 		    ++__start;
555 		}
556 	    }
557 	  _M_flags |= regex_constants::match_prev_avail;
558 	  if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
559 	    {
560 	      __glibcxx_assert(_M_match[0].matched);
561 	      auto& __prefix = _M_match._M_prefix();
562 	      __prefix.first = __prefix_first;
563 	      __prefix.matched = __prefix.first != __prefix.second;
564 	      // [28.12.1.4.5]
565 	      _M_match._M_begin = _M_begin;
566 	    }
567 	  else
568 	    _M_pregex = nullptr;
569 	}
570       return *this;
571     }
572 
573   template<typename _Bi_iter,
574 	   typename _Ch_type,
575 	   typename _Rx_traits>
576     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
577     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
578     operator=(const regex_token_iterator& __rhs)
579     {
580       _M_position = __rhs._M_position;
581       _M_subs = __rhs._M_subs;
582       _M_n = __rhs._M_n;
583       _M_suffix = __rhs._M_suffix;
584       _M_has_m1 = __rhs._M_has_m1;
585       _M_normalize_result();
586       return *this;
587     }
588 
589   template<typename _Bi_iter,
590 	   typename _Ch_type,
591 	   typename _Rx_traits>
592     bool
593     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
594     operator==(const regex_token_iterator& __rhs) const
595     {
596       if (_M_end_of_seq() && __rhs._M_end_of_seq())
597 	return true;
598       if (_M_suffix.matched && __rhs._M_suffix.matched
599 	  && _M_suffix == __rhs._M_suffix)
600 	return true;
601       if (_M_end_of_seq() || _M_suffix.matched
602 	  || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
603 	return false;
604       return _M_position == __rhs._M_position
605 	&& _M_n == __rhs._M_n
606 	&& _M_subs == __rhs._M_subs;
607     }
608 
609   template<typename _Bi_iter,
610 	   typename _Ch_type,
611 	   typename _Rx_traits>
612     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
613     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
614     operator++()
615     {
616       _Position __prev = _M_position;
617       if (_M_suffix.matched)
618 	*this = regex_token_iterator();
619       else if (_M_n + 1 < _M_subs.size())
620 	{
621 	  _M_n++;
622 	  _M_result = &_M_current_match();
623 	}
624       else
625 	{
626 	  _M_n = 0;
627 	  ++_M_position;
628 	  if (_M_position != _Position())
629 	    _M_result = &_M_current_match();
630 	  else if (_M_has_m1 && __prev->suffix().length() != 0)
631 	    {
632 	      _M_suffix.matched = true;
633 	      _M_suffix.first = __prev->suffix().first;
634 	      _M_suffix.second = __prev->suffix().second;
635 	      _M_result = &_M_suffix;
636 	    }
637 	  else
638 	    *this = regex_token_iterator();
639 	}
640       return *this;
641     }
642 
643   template<typename _Bi_iter,
644 	   typename _Ch_type,
645 	   typename _Rx_traits>
646     void
647     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
648     _M_init(_Bi_iter __a, _Bi_iter __b)
649     {
650       _M_has_m1 = false;
651       for (auto __it : _M_subs)
652 	if (__it == -1)
653 	  {
654 	    _M_has_m1 = true;
655 	    break;
656 	  }
657       if (_M_position != _Position())
658 	_M_result = &_M_current_match();
659       else if (_M_has_m1)
660 	{
661 	  _M_suffix.matched = true;
662 	  _M_suffix.first = __a;
663 	  _M_suffix.second = __b;
664 	  _M_result = &_M_suffix;
665 	}
666       else
667 	_M_result = nullptr;
668     }
669 
670 _GLIBCXX_END_NAMESPACE_VERSION
671 } // namespace
672