1*38fd1498Szrj // class template regex -*- C++ -*-
2*38fd1498Szrj 
3*38fd1498Szrj // Copyright (C) 2013-2018 Free Software Foundation, Inc.
4*38fd1498Szrj //
5*38fd1498Szrj // This file is part of the GNU ISO C++ Library.  This library is free
6*38fd1498Szrj // software; you can redistribute it and/or modify it under the
7*38fd1498Szrj // terms of the GNU General Public License as published by the
8*38fd1498Szrj // Free Software Foundation; either version 3, or (at your option)
9*38fd1498Szrj // any later version.
10*38fd1498Szrj 
11*38fd1498Szrj // This library is distributed in the hope that it will be useful,
12*38fd1498Szrj // but WITHOUT ANY WARRANTY; without even the implied warranty of
13*38fd1498Szrj // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14*38fd1498Szrj // GNU General Public License for more details.
15*38fd1498Szrj 
16*38fd1498Szrj // Under Section 7 of GPL version 3, you are granted additional
17*38fd1498Szrj // permissions described in the GCC Runtime Library Exception, version
18*38fd1498Szrj // 3.1, as published by the Free Software Foundation.
19*38fd1498Szrj 
20*38fd1498Szrj // You should have received a copy of the GNU General Public License and
21*38fd1498Szrj // a copy of the GCC Runtime Library Exception along with this program;
22*38fd1498Szrj // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23*38fd1498Szrj // <http://www.gnu.org/licenses/>.
24*38fd1498Szrj 
25*38fd1498Szrj /**
26*38fd1498Szrj  *  @file bits/regex_scanner.tcc
27*38fd1498Szrj  *  This is an internal header file, included by other library headers.
28*38fd1498Szrj  *  Do not attempt to use it directly. @headername{regex}
29*38fd1498Szrj  */
30*38fd1498Szrj 
31*38fd1498Szrj // FIXME make comments doxygen format.
32*38fd1498Szrj 
33*38fd1498Szrj // N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep
34*38fd1498Szrj // and awk
35*38fd1498Szrj // 1) grep is basic except '\n' is treated as '|'
36*38fd1498Szrj // 2) egrep is extended except '\n' is treated as '|'
37*38fd1498Szrj // 3) awk is extended except special escaping rules, and there's no
38*38fd1498Szrj //    back-reference.
39*38fd1498Szrj //
40*38fd1498Szrj // References:
41*38fd1498Szrj //
42*38fd1498Szrj // ECMAScript: ECMA-262 15.10
43*38fd1498Szrj //
44*38fd1498Szrj // basic, extended:
45*38fd1498Szrj // http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html
46*38fd1498Szrj //
47*38fd1498Szrj // awk: http://pubs.opengroup.org/onlinepubs/000095399/utilities/awk.html
48*38fd1498Szrj 
49*38fd1498Szrj namespace std _GLIBCXX_VISIBILITY(default)
50*38fd1498Szrj {
51*38fd1498Szrj _GLIBCXX_BEGIN_NAMESPACE_VERSION
52*38fd1498Szrj 
53*38fd1498Szrj namespace __detail
54*38fd1498Szrj {
55*38fd1498Szrj   template<typename _CharT>
56*38fd1498Szrj     _Scanner<_CharT>::
_Scanner(typename _Scanner::_IterT __begin,typename _Scanner::_IterT __end,_FlagT __flags,std::locale __loc)57*38fd1498Szrj     _Scanner(typename _Scanner::_IterT __begin,
58*38fd1498Szrj 	     typename _Scanner::_IterT __end,
59*38fd1498Szrj 	     _FlagT __flags, std::locale __loc)
60*38fd1498Szrj     : _ScannerBase(__flags),
61*38fd1498Szrj       _M_current(__begin), _M_end(__end),
62*38fd1498Szrj       _M_ctype(std::use_facet<_CtypeT>(__loc)),
63*38fd1498Szrj       _M_eat_escape(_M_is_ecma()
64*38fd1498Szrj 		    ? &_Scanner::_M_eat_escape_ecma
65*38fd1498Szrj 		    : &_Scanner::_M_eat_escape_posix)
66*38fd1498Szrj     { _M_advance(); }
67*38fd1498Szrj 
68*38fd1498Szrj   template<typename _CharT>
69*38fd1498Szrj     void
70*38fd1498Szrj     _Scanner<_CharT>::
_M_advance()71*38fd1498Szrj     _M_advance()
72*38fd1498Szrj     {
73*38fd1498Szrj       if (_M_current == _M_end)
74*38fd1498Szrj 	{
75*38fd1498Szrj 	  _M_token = _S_token_eof;
76*38fd1498Szrj 	  return;
77*38fd1498Szrj 	}
78*38fd1498Szrj 
79*38fd1498Szrj       if (_M_state == _S_state_normal)
80*38fd1498Szrj 	_M_scan_normal();
81*38fd1498Szrj       else if (_M_state == _S_state_in_bracket)
82*38fd1498Szrj 	_M_scan_in_bracket();
83*38fd1498Szrj       else if (_M_state == _S_state_in_brace)
84*38fd1498Szrj 	_M_scan_in_brace();
85*38fd1498Szrj       else
86*38fd1498Szrj 	{
87*38fd1498Szrj 	  __glibcxx_assert(false);
88*38fd1498Szrj 	}
89*38fd1498Szrj     }
90*38fd1498Szrj 
91*38fd1498Szrj   // Differences between styles:
92*38fd1498Szrj   // 1) "\(", "\)", "\{" in basic. It's not escaping.
93*38fd1498Szrj   // 2) "(?:", "(?=", "(?!" in ECMAScript.
94*38fd1498Szrj   template<typename _CharT>
95*38fd1498Szrj     void
96*38fd1498Szrj     _Scanner<_CharT>::
_M_scan_normal()97*38fd1498Szrj     _M_scan_normal()
98*38fd1498Szrj     {
99*38fd1498Szrj       auto __c = *_M_current++;
100*38fd1498Szrj 
101*38fd1498Szrj       if (std::strchr(_M_spec_char, _M_ctype.narrow(__c, ' ')) == nullptr)
102*38fd1498Szrj 	{
103*38fd1498Szrj 	  _M_token = _S_token_ord_char;
104*38fd1498Szrj 	  _M_value.assign(1, __c);
105*38fd1498Szrj 	  return;
106*38fd1498Szrj 	}
107*38fd1498Szrj       if (__c == '\\')
108*38fd1498Szrj 	{
109*38fd1498Szrj 	  if (_M_current == _M_end)
110*38fd1498Szrj 	    __throw_regex_error(
111*38fd1498Szrj 	      regex_constants::error_escape,
112*38fd1498Szrj 	      "Unexpected end of regex when escaping.");
113*38fd1498Szrj 
114*38fd1498Szrj 	  if (!_M_is_basic()
115*38fd1498Szrj 	      || (*_M_current != '('
116*38fd1498Szrj 		  && *_M_current != ')'
117*38fd1498Szrj 		  && *_M_current != '{'))
118*38fd1498Szrj 	    {
119*38fd1498Szrj 	      (this->*_M_eat_escape)();
120*38fd1498Szrj 	      return;
121*38fd1498Szrj 	    }
122*38fd1498Szrj 	  __c = *_M_current++;
123*38fd1498Szrj 	}
124*38fd1498Szrj       if (__c == '(')
125*38fd1498Szrj 	{
126*38fd1498Szrj 	  if (_M_is_ecma() && *_M_current == '?')
127*38fd1498Szrj 	    {
128*38fd1498Szrj 	      if (++_M_current == _M_end)
129*38fd1498Szrj 		__throw_regex_error(
130*38fd1498Szrj 		  regex_constants::error_paren,
131*38fd1498Szrj 		  "Unexpected end of regex when in an open parenthesis.");
132*38fd1498Szrj 
133*38fd1498Szrj 	      if (*_M_current == ':')
134*38fd1498Szrj 		{
135*38fd1498Szrj 		  ++_M_current;
136*38fd1498Szrj 		  _M_token = _S_token_subexpr_no_group_begin;
137*38fd1498Szrj 		}
138*38fd1498Szrj 	      else if (*_M_current == '=')
139*38fd1498Szrj 		{
140*38fd1498Szrj 		  ++_M_current;
141*38fd1498Szrj 		  _M_token = _S_token_subexpr_lookahead_begin;
142*38fd1498Szrj 		  _M_value.assign(1, 'p');
143*38fd1498Szrj 		}
144*38fd1498Szrj 	      else if (*_M_current == '!')
145*38fd1498Szrj 		{
146*38fd1498Szrj 		  ++_M_current;
147*38fd1498Szrj 		  _M_token = _S_token_subexpr_lookahead_begin;
148*38fd1498Szrj 		  _M_value.assign(1, 'n');
149*38fd1498Szrj 		}
150*38fd1498Szrj 	      else
151*38fd1498Szrj 		__throw_regex_error(
152*38fd1498Szrj 		  regex_constants::error_paren,
153*38fd1498Szrj 		  "Invalid special open parenthesis.");
154*38fd1498Szrj 	    }
155*38fd1498Szrj 	  else if (_M_flags & regex_constants::nosubs)
156*38fd1498Szrj 	    _M_token = _S_token_subexpr_no_group_begin;
157*38fd1498Szrj 	  else
158*38fd1498Szrj 	    _M_token = _S_token_subexpr_begin;
159*38fd1498Szrj 	}
160*38fd1498Szrj       else if (__c == ')')
161*38fd1498Szrj 	_M_token = _S_token_subexpr_end;
162*38fd1498Szrj       else if (__c == '[')
163*38fd1498Szrj 	{
164*38fd1498Szrj 	  _M_state = _S_state_in_bracket;
165*38fd1498Szrj 	  _M_at_bracket_start = true;
166*38fd1498Szrj 	  if (_M_current != _M_end && *_M_current == '^')
167*38fd1498Szrj 	    {
168*38fd1498Szrj 	      _M_token = _S_token_bracket_neg_begin;
169*38fd1498Szrj 	      ++_M_current;
170*38fd1498Szrj 	    }
171*38fd1498Szrj 	  else
172*38fd1498Szrj 	    _M_token = _S_token_bracket_begin;
173*38fd1498Szrj 	}
174*38fd1498Szrj       else if (__c == '{')
175*38fd1498Szrj 	{
176*38fd1498Szrj 	  _M_state = _S_state_in_brace;
177*38fd1498Szrj 	  _M_token = _S_token_interval_begin;
178*38fd1498Szrj 	}
179*38fd1498Szrj       else if (__c != ']' && __c != '}')
180*38fd1498Szrj 	{
181*38fd1498Szrj 	  auto __it = _M_token_tbl;
182*38fd1498Szrj 	  auto __narrowc = _M_ctype.narrow(__c, '\0');
183*38fd1498Szrj 	  for (; __it->first != '\0'; ++__it)
184*38fd1498Szrj 	    if (__it->first == __narrowc)
185*38fd1498Szrj 	      {
186*38fd1498Szrj 		_M_token = __it->second;
187*38fd1498Szrj 		return;
188*38fd1498Szrj 	      }
189*38fd1498Szrj 	  __glibcxx_assert(false);
190*38fd1498Szrj 	}
191*38fd1498Szrj       else
192*38fd1498Szrj 	{
193*38fd1498Szrj 	  _M_token = _S_token_ord_char;
194*38fd1498Szrj 	  _M_value.assign(1, __c);
195*38fd1498Szrj 	}
196*38fd1498Szrj     }
197*38fd1498Szrj 
198*38fd1498Szrj   // Differences between styles:
199*38fd1498Szrj   // 1) different semantics of "[]" and "[^]".
200*38fd1498Szrj   // 2) Escaping in bracket expr.
201*38fd1498Szrj   template<typename _CharT>
202*38fd1498Szrj     void
203*38fd1498Szrj     _Scanner<_CharT>::
_M_scan_in_bracket()204*38fd1498Szrj     _M_scan_in_bracket()
205*38fd1498Szrj     {
206*38fd1498Szrj       if (_M_current == _M_end)
207*38fd1498Szrj 	__throw_regex_error(
208*38fd1498Szrj 	  regex_constants::error_brack,
209*38fd1498Szrj 	  "Unexpected end of regex when in bracket expression.");
210*38fd1498Szrj 
211*38fd1498Szrj       auto __c = *_M_current++;
212*38fd1498Szrj 
213*38fd1498Szrj       if (__c == '-')
214*38fd1498Szrj 	_M_token = _S_token_bracket_dash;
215*38fd1498Szrj       else if (__c == '[')
216*38fd1498Szrj 	{
217*38fd1498Szrj 	  if (_M_current == _M_end)
218*38fd1498Szrj 	    __throw_regex_error(regex_constants::error_brack,
219*38fd1498Szrj 				"Unexpected character class open bracket.");
220*38fd1498Szrj 
221*38fd1498Szrj 	  if (*_M_current == '.')
222*38fd1498Szrj 	    {
223*38fd1498Szrj 	      _M_token = _S_token_collsymbol;
224*38fd1498Szrj 	      _M_eat_class(*_M_current++);
225*38fd1498Szrj 	    }
226*38fd1498Szrj 	  else if (*_M_current == ':')
227*38fd1498Szrj 	    {
228*38fd1498Szrj 	      _M_token = _S_token_char_class_name;
229*38fd1498Szrj 	      _M_eat_class(*_M_current++);
230*38fd1498Szrj 	    }
231*38fd1498Szrj 	  else if (*_M_current == '=')
232*38fd1498Szrj 	    {
233*38fd1498Szrj 	      _M_token = _S_token_equiv_class_name;
234*38fd1498Szrj 	      _M_eat_class(*_M_current++);
235*38fd1498Szrj 	    }
236*38fd1498Szrj 	  else
237*38fd1498Szrj 	    {
238*38fd1498Szrj 	      _M_token = _S_token_ord_char;
239*38fd1498Szrj 	      _M_value.assign(1, __c);
240*38fd1498Szrj 	    }
241*38fd1498Szrj 	}
242*38fd1498Szrj       // In POSIX, when encountering "[]" or "[^]", the ']' is interpreted
243*38fd1498Szrj       // literally. So "[]]" and "[^]]" are valid regexes. See the testcases
244*38fd1498Szrj       // `*/empty_range.cc`.
245*38fd1498Szrj       else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start))
246*38fd1498Szrj 	{
247*38fd1498Szrj 	  _M_token = _S_token_bracket_end;
248*38fd1498Szrj 	  _M_state = _S_state_normal;
249*38fd1498Szrj 	}
250*38fd1498Szrj       // ECMAScript and awk permits escaping in bracket.
251*38fd1498Szrj       else if (__c == '\\' && (_M_is_ecma() || _M_is_awk()))
252*38fd1498Szrj 	(this->*_M_eat_escape)();
253*38fd1498Szrj       else
254*38fd1498Szrj 	{
255*38fd1498Szrj 	  _M_token = _S_token_ord_char;
256*38fd1498Szrj 	  _M_value.assign(1, __c);
257*38fd1498Szrj 	}
258*38fd1498Szrj       _M_at_bracket_start = false;
259*38fd1498Szrj     }
260*38fd1498Szrj 
261*38fd1498Szrj   // Differences between styles:
262*38fd1498Szrj   // 1) "\}" in basic style.
263*38fd1498Szrj   template<typename _CharT>
264*38fd1498Szrj     void
265*38fd1498Szrj     _Scanner<_CharT>::
_M_scan_in_brace()266*38fd1498Szrj     _M_scan_in_brace()
267*38fd1498Szrj     {
268*38fd1498Szrj       if (_M_current == _M_end)
269*38fd1498Szrj 	__throw_regex_error(
270*38fd1498Szrj 	  regex_constants::error_brace,
271*38fd1498Szrj 	  "Unexpected end of regex when in brace expression.");
272*38fd1498Szrj 
273*38fd1498Szrj       auto __c = *_M_current++;
274*38fd1498Szrj 
275*38fd1498Szrj       if (_M_ctype.is(_CtypeT::digit, __c))
276*38fd1498Szrj 	{
277*38fd1498Szrj 	  _M_token = _S_token_dup_count;
278*38fd1498Szrj 	  _M_value.assign(1, __c);
279*38fd1498Szrj 	  while (_M_current != _M_end
280*38fd1498Szrj 		 && _M_ctype.is(_CtypeT::digit, *_M_current))
281*38fd1498Szrj 	    _M_value += *_M_current++;
282*38fd1498Szrj 	}
283*38fd1498Szrj       else if (__c == ',')
284*38fd1498Szrj 	_M_token = _S_token_comma;
285*38fd1498Szrj       // basic use \}.
286*38fd1498Szrj       else if (_M_is_basic())
287*38fd1498Szrj 	{
288*38fd1498Szrj 	  if (__c == '\\' && _M_current != _M_end && *_M_current == '}')
289*38fd1498Szrj 	    {
290*38fd1498Szrj 	      _M_state = _S_state_normal;
291*38fd1498Szrj 	      _M_token = _S_token_interval_end;
292*38fd1498Szrj 	      ++_M_current;
293*38fd1498Szrj 	    }
294*38fd1498Szrj 	  else
295*38fd1498Szrj 	    __throw_regex_error(regex_constants::error_badbrace,
296*38fd1498Szrj 				"Unexpected character in brace expression.");
297*38fd1498Szrj 	}
298*38fd1498Szrj       else if (__c == '}')
299*38fd1498Szrj 	{
300*38fd1498Szrj 	  _M_state = _S_state_normal;
301*38fd1498Szrj 	  _M_token = _S_token_interval_end;
302*38fd1498Szrj 	}
303*38fd1498Szrj       else
304*38fd1498Szrj 	__throw_regex_error(regex_constants::error_badbrace,
305*38fd1498Szrj 			    "Unexpected character in brace expression.");
306*38fd1498Szrj     }
307*38fd1498Szrj 
308*38fd1498Szrj   template<typename _CharT>
309*38fd1498Szrj     void
310*38fd1498Szrj     _Scanner<_CharT>::
_M_eat_escape_ecma()311*38fd1498Szrj     _M_eat_escape_ecma()
312*38fd1498Szrj     {
313*38fd1498Szrj       if (_M_current == _M_end)
314*38fd1498Szrj 	__throw_regex_error(regex_constants::error_escape,
315*38fd1498Szrj 			    "Unexpected end of regex when escaping.");
316*38fd1498Szrj 
317*38fd1498Szrj       auto __c = *_M_current++;
318*38fd1498Szrj       auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
319*38fd1498Szrj 
320*38fd1498Szrj       if (__pos != nullptr && (__c != 'b' || _M_state == _S_state_in_bracket))
321*38fd1498Szrj 	{
322*38fd1498Szrj 	  _M_token = _S_token_ord_char;
323*38fd1498Szrj 	  _M_value.assign(1, *__pos);
324*38fd1498Szrj 	}
325*38fd1498Szrj       else if (__c == 'b')
326*38fd1498Szrj 	{
327*38fd1498Szrj 	  _M_token = _S_token_word_bound;
328*38fd1498Szrj 	  _M_value.assign(1, 'p');
329*38fd1498Szrj 	}
330*38fd1498Szrj       else if (__c == 'B')
331*38fd1498Szrj 	{
332*38fd1498Szrj 	  _M_token = _S_token_word_bound;
333*38fd1498Szrj 	  _M_value.assign(1, 'n');
334*38fd1498Szrj 	}
335*38fd1498Szrj       // N3376 28.13
336*38fd1498Szrj       else if (__c == 'd'
337*38fd1498Szrj 	       || __c == 'D'
338*38fd1498Szrj 	       || __c == 's'
339*38fd1498Szrj 	       || __c == 'S'
340*38fd1498Szrj 	       || __c == 'w'
341*38fd1498Szrj 	       || __c == 'W')
342*38fd1498Szrj 	{
343*38fd1498Szrj 	  _M_token = _S_token_quoted_class;
344*38fd1498Szrj 	  _M_value.assign(1, __c);
345*38fd1498Szrj 	}
346*38fd1498Szrj       else if (__c == 'c')
347*38fd1498Szrj 	{
348*38fd1498Szrj 	  if (_M_current == _M_end)
349*38fd1498Szrj 	    __throw_regex_error(
350*38fd1498Szrj 	      regex_constants::error_escape,
351*38fd1498Szrj 	      "Unexpected end of regex when reading control code.");
352*38fd1498Szrj 	  _M_token = _S_token_ord_char;
353*38fd1498Szrj 	  _M_value.assign(1, *_M_current++);
354*38fd1498Szrj 	}
355*38fd1498Szrj       else if (__c == 'x' || __c == 'u')
356*38fd1498Szrj 	{
357*38fd1498Szrj 	  _M_value.erase();
358*38fd1498Szrj 	  for (int __i = 0; __i < (__c == 'x' ? 2 : 4); __i++)
359*38fd1498Szrj 	    {
360*38fd1498Szrj 	      if (_M_current == _M_end
361*38fd1498Szrj 		  || !_M_ctype.is(_CtypeT::xdigit, *_M_current))
362*38fd1498Szrj 		__throw_regex_error(
363*38fd1498Szrj 		  regex_constants::error_escape,
364*38fd1498Szrj 		  "Unexpected end of regex when ascii character.");
365*38fd1498Szrj 	      _M_value += *_M_current++;
366*38fd1498Szrj 	    }
367*38fd1498Szrj 	  _M_token = _S_token_hex_num;
368*38fd1498Szrj 	}
369*38fd1498Szrj       // ECMAScript recognizes multi-digit back-references.
370*38fd1498Szrj       else if (_M_ctype.is(_CtypeT::digit, __c))
371*38fd1498Szrj 	{
372*38fd1498Szrj 	  _M_value.assign(1, __c);
373*38fd1498Szrj 	  while (_M_current != _M_end
374*38fd1498Szrj 		 && _M_ctype.is(_CtypeT::digit, *_M_current))
375*38fd1498Szrj 	    _M_value += *_M_current++;
376*38fd1498Szrj 	  _M_token = _S_token_backref;
377*38fd1498Szrj 	}
378*38fd1498Szrj       else
379*38fd1498Szrj 	{
380*38fd1498Szrj 	  _M_token = _S_token_ord_char;
381*38fd1498Szrj 	  _M_value.assign(1, __c);
382*38fd1498Szrj 	}
383*38fd1498Szrj     }
384*38fd1498Szrj 
385*38fd1498Szrj   // Differences between styles:
386*38fd1498Szrj   // 1) Extended doesn't support backref, but basic does.
387*38fd1498Szrj   template<typename _CharT>
388*38fd1498Szrj     void
389*38fd1498Szrj     _Scanner<_CharT>::
_M_eat_escape_posix()390*38fd1498Szrj     _M_eat_escape_posix()
391*38fd1498Szrj     {
392*38fd1498Szrj       if (_M_current == _M_end)
393*38fd1498Szrj 	__throw_regex_error(regex_constants::error_escape,
394*38fd1498Szrj 			    "Unexpected end of regex when escaping.");
395*38fd1498Szrj 
396*38fd1498Szrj       auto __c = *_M_current;
397*38fd1498Szrj       auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'));
398*38fd1498Szrj 
399*38fd1498Szrj       if (__pos != nullptr && *__pos != '\0')
400*38fd1498Szrj 	{
401*38fd1498Szrj 	  _M_token = _S_token_ord_char;
402*38fd1498Szrj 	  _M_value.assign(1, __c);
403*38fd1498Szrj 	}
404*38fd1498Szrj       // We MUST judge awk before handling backrefs. There's no backref in awk.
405*38fd1498Szrj       else if (_M_is_awk())
406*38fd1498Szrj 	{
407*38fd1498Szrj 	  _M_eat_escape_awk();
408*38fd1498Szrj 	  return;
409*38fd1498Szrj 	}
410*38fd1498Szrj       else if (_M_is_basic() && _M_ctype.is(_CtypeT::digit, __c) && __c != '0')
411*38fd1498Szrj 	{
412*38fd1498Szrj 	  _M_token = _S_token_backref;
413*38fd1498Szrj 	  _M_value.assign(1, __c);
414*38fd1498Szrj 	}
415*38fd1498Szrj       else
416*38fd1498Szrj 	{
417*38fd1498Szrj #ifdef __STRICT_ANSI__
418*38fd1498Szrj 	  // POSIX says it is undefined to escape ordinary characters
419*38fd1498Szrj 	  __throw_regex_error(regex_constants::error_escape,
420*38fd1498Szrj 			      "Unexpected escape character.");
421*38fd1498Szrj #else
422*38fd1498Szrj 	  _M_token = _S_token_ord_char;
423*38fd1498Szrj 	  _M_value.assign(1, __c);
424*38fd1498Szrj #endif
425*38fd1498Szrj 	}
426*38fd1498Szrj       ++_M_current;
427*38fd1498Szrj     }
428*38fd1498Szrj 
429*38fd1498Szrj   template<typename _CharT>
430*38fd1498Szrj     void
431*38fd1498Szrj     _Scanner<_CharT>::
_M_eat_escape_awk()432*38fd1498Szrj     _M_eat_escape_awk()
433*38fd1498Szrj     {
434*38fd1498Szrj       auto __c = *_M_current++;
435*38fd1498Szrj       auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
436*38fd1498Szrj 
437*38fd1498Szrj       if (__pos != nullptr)
438*38fd1498Szrj 	{
439*38fd1498Szrj 	  _M_token = _S_token_ord_char;
440*38fd1498Szrj 	  _M_value.assign(1, *__pos);
441*38fd1498Szrj 	}
442*38fd1498Szrj       // \ddd for oct representation
443*38fd1498Szrj       else if (_M_ctype.is(_CtypeT::digit, __c)
444*38fd1498Szrj 	       && __c != '8'
445*38fd1498Szrj 	       && __c != '9')
446*38fd1498Szrj 	{
447*38fd1498Szrj 	  _M_value.assign(1,  __c);
448*38fd1498Szrj 	  for (int __i = 0;
449*38fd1498Szrj 	       __i < 2
450*38fd1498Szrj 	       && _M_current != _M_end
451*38fd1498Szrj 	       && _M_ctype.is(_CtypeT::digit, *_M_current)
452*38fd1498Szrj 	       && *_M_current != '8'
453*38fd1498Szrj 	       && *_M_current != '9';
454*38fd1498Szrj 	       __i++)
455*38fd1498Szrj 	    _M_value += *_M_current++;
456*38fd1498Szrj 	  _M_token = _S_token_oct_num;
457*38fd1498Szrj 	  return;
458*38fd1498Szrj 	}
459*38fd1498Szrj       else
460*38fd1498Szrj 	__throw_regex_error(regex_constants::error_escape,
461*38fd1498Szrj 			    "Unexpected escape character.");
462*38fd1498Szrj     }
463*38fd1498Szrj 
464*38fd1498Szrj   // Eats a character class or throws an exception.
465*38fd1498Szrj   // __ch could be ':', '.' or '=', _M_current is the char after ']' when
466*38fd1498Szrj   // returning.
467*38fd1498Szrj   template<typename _CharT>
468*38fd1498Szrj     void
469*38fd1498Szrj     _Scanner<_CharT>::
_M_eat_class(char __ch)470*38fd1498Szrj     _M_eat_class(char __ch)
471*38fd1498Szrj     {
472*38fd1498Szrj       for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;)
473*38fd1498Szrj 	_M_value += *_M_current++;
474*38fd1498Szrj       if (_M_current == _M_end
475*38fd1498Szrj 	  || *_M_current++ != __ch
476*38fd1498Szrj 	  || _M_current == _M_end // skip __ch
477*38fd1498Szrj 	  || *_M_current++ != ']') // skip ']'
478*38fd1498Szrj 	{
479*38fd1498Szrj 	  if (__ch == ':')
480*38fd1498Szrj 	    __throw_regex_error(regex_constants::error_ctype,
481*38fd1498Szrj 				"Unexpected end of character class.");
482*38fd1498Szrj 	  else
483*38fd1498Szrj 	    __throw_regex_error(regex_constants::error_collate,
484*38fd1498Szrj 				"Unexpected end of character class.");
485*38fd1498Szrj 	}
486*38fd1498Szrj     }
487*38fd1498Szrj 
488*38fd1498Szrj #ifdef _GLIBCXX_DEBUG
489*38fd1498Szrj   template<typename _CharT>
490*38fd1498Szrj     std::ostream&
491*38fd1498Szrj     _Scanner<_CharT>::
_M_print(std::ostream & ostr)492*38fd1498Szrj     _M_print(std::ostream& ostr)
493*38fd1498Szrj     {
494*38fd1498Szrj       switch (_M_token)
495*38fd1498Szrj       {
496*38fd1498Szrj       case _S_token_anychar:
497*38fd1498Szrj 	ostr << "any-character\n";
498*38fd1498Szrj 	break;
499*38fd1498Szrj       case _S_token_backref:
500*38fd1498Szrj 	ostr << "backref\n";
501*38fd1498Szrj 	break;
502*38fd1498Szrj       case _S_token_bracket_begin:
503*38fd1498Szrj 	ostr << "bracket-begin\n";
504*38fd1498Szrj 	break;
505*38fd1498Szrj       case _S_token_bracket_neg_begin:
506*38fd1498Szrj 	ostr << "bracket-neg-begin\n";
507*38fd1498Szrj 	break;
508*38fd1498Szrj       case _S_token_bracket_end:
509*38fd1498Szrj 	ostr << "bracket-end\n";
510*38fd1498Szrj 	break;
511*38fd1498Szrj       case _S_token_char_class_name:
512*38fd1498Szrj 	ostr << "char-class-name \"" << _M_value << "\"\n";
513*38fd1498Szrj 	break;
514*38fd1498Szrj       case _S_token_closure0:
515*38fd1498Szrj 	ostr << "closure0\n";
516*38fd1498Szrj 	break;
517*38fd1498Szrj       case _S_token_closure1:
518*38fd1498Szrj 	ostr << "closure1\n";
519*38fd1498Szrj 	break;
520*38fd1498Szrj       case _S_token_collsymbol:
521*38fd1498Szrj 	ostr << "collsymbol \"" << _M_value << "\"\n";
522*38fd1498Szrj 	break;
523*38fd1498Szrj       case _S_token_comma:
524*38fd1498Szrj 	ostr << "comma\n";
525*38fd1498Szrj 	break;
526*38fd1498Szrj       case _S_token_dup_count:
527*38fd1498Szrj 	ostr << "dup count: " << _M_value << "\n";
528*38fd1498Szrj 	break;
529*38fd1498Szrj       case _S_token_eof:
530*38fd1498Szrj 	ostr << "EOF\n";
531*38fd1498Szrj 	break;
532*38fd1498Szrj       case _S_token_equiv_class_name:
533*38fd1498Szrj 	ostr << "equiv-class-name \"" << _M_value << "\"\n";
534*38fd1498Szrj 	break;
535*38fd1498Szrj       case _S_token_interval_begin:
536*38fd1498Szrj 	ostr << "interval begin\n";
537*38fd1498Szrj 	break;
538*38fd1498Szrj       case _S_token_interval_end:
539*38fd1498Szrj 	ostr << "interval end\n";
540*38fd1498Szrj 	break;
541*38fd1498Szrj       case _S_token_line_begin:
542*38fd1498Szrj 	ostr << "line begin\n";
543*38fd1498Szrj 	break;
544*38fd1498Szrj       case _S_token_line_end:
545*38fd1498Szrj 	ostr << "line end\n";
546*38fd1498Szrj 	break;
547*38fd1498Szrj       case _S_token_opt:
548*38fd1498Szrj 	ostr << "opt\n";
549*38fd1498Szrj 	break;
550*38fd1498Szrj       case _S_token_or:
551*38fd1498Szrj 	ostr << "or\n";
552*38fd1498Szrj 	break;
553*38fd1498Szrj       case _S_token_ord_char:
554*38fd1498Szrj 	ostr << "ordinary character: \"" << _M_value << "\"\n";
555*38fd1498Szrj 	break;
556*38fd1498Szrj       case _S_token_subexpr_begin:
557*38fd1498Szrj 	ostr << "subexpr begin\n";
558*38fd1498Szrj 	break;
559*38fd1498Szrj       case _S_token_subexpr_no_group_begin:
560*38fd1498Szrj 	ostr << "no grouping subexpr begin\n";
561*38fd1498Szrj 	break;
562*38fd1498Szrj       case _S_token_subexpr_lookahead_begin:
563*38fd1498Szrj 	ostr << "lookahead subexpr begin\n";
564*38fd1498Szrj 	break;
565*38fd1498Szrj       case _S_token_subexpr_end:
566*38fd1498Szrj 	ostr << "subexpr end\n";
567*38fd1498Szrj 	break;
568*38fd1498Szrj       case _S_token_unknown:
569*38fd1498Szrj 	ostr << "-- unknown token --\n";
570*38fd1498Szrj 	break;
571*38fd1498Szrj       case _S_token_oct_num:
572*38fd1498Szrj 	ostr << "oct number " << _M_value << "\n";
573*38fd1498Szrj 	break;
574*38fd1498Szrj       case _S_token_hex_num:
575*38fd1498Szrj 	ostr << "hex number " << _M_value << "\n";
576*38fd1498Szrj 	break;
577*38fd1498Szrj       case _S_token_quoted_class:
578*38fd1498Szrj 	ostr << "quoted class " << "\\" << _M_value << "\n";
579*38fd1498Szrj 	break;
580*38fd1498Szrj       default:
581*38fd1498Szrj 	_GLIBCXX_DEBUG_ASSERT(false);
582*38fd1498Szrj       }
583*38fd1498Szrj       return ostr;
584*38fd1498Szrj     }
585*38fd1498Szrj #endif
586*38fd1498Szrj 
587*38fd1498Szrj } // namespace __detail
588*38fd1498Szrj _GLIBCXX_END_NAMESPACE_VERSION
589*38fd1498Szrj } // namespace
590