1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2010, 2011 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  *  @file bits/regex_compiler.h
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __regex
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37   struct _Scanner_base
38   {
39     typedef unsigned int _StateT;
40 
41     static constexpr _StateT _S_state_at_start    = 1 << 0;
42     static constexpr _StateT _S_state_in_brace    = 1 << 2;
43     static constexpr _StateT _S_state_in_bracket  = 1 << 3;
44 
45     virtual ~_Scanner_base() { };
46   };
47 
48   //
49   // @brief Scans an input range for regex tokens.
50   //
51   // The %_Scanner class interprets the regular expression pattern in the input
52   // range passed to its constructor as a sequence of parse tokens passed to
53   // the regular expression compiler.  The sequence of tokens provided depends
54   // on the flag settings passed to the constructor:  different regular
55   // expression grammars will interpret the same input pattern in
56   // syntactically different ways.
57   //
58   template<typename _InputIterator>
59     class _Scanner: public _Scanner_base
60     {
61     public:
62       typedef _InputIterator                                        _IteratorT;
63       typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
64       typedef std::basic_string<_CharT>                             _StringT;
65       typedef regex_constants::syntax_option_type                   _FlagT;
66       typedef const std::ctype<_CharT>                              _CtypeT;
67 
68       // Token types returned from the scanner.
69       enum _TokenT
70       {
71 	_S_token_anychar,
72 	_S_token_backref,
73 	_S_token_bracket_begin,
74 	_S_token_bracket_end,
75 	_S_token_inverse_class,
76 	_S_token_char_class_name,
77 	_S_token_closure0,
78 	_S_token_closure1,
79 	_S_token_collelem_multi,
80 	_S_token_collelem_single,
81 	_S_token_collsymbol,
82 	_S_token_comma,
83 	_S_token_dash,
84 	_S_token_dup_count,
85 	_S_token_eof,
86 	_S_token_equiv_class_name,
87 	_S_token_interval_begin,
88 	_S_token_interval_end,
89 	_S_token_line_begin,
90 	_S_token_line_end,
91 	_S_token_opt,
92 	_S_token_or,
93 	_S_token_ord_char,
94 	_S_token_quoted_char,
95 	_S_token_subexpr_begin,
96 	_S_token_subexpr_end,
97 	_S_token_word_begin,
98 	_S_token_word_end,
99 	_S_token_unknown
100       };
101 
102     public:
103       _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
104 	       std::locale __loc)
105       : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
106         _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
107       { _M_advance(); }
108 
109       void
110       _M_advance();
111 
112       _TokenT
113       _M_token() const
114       { return _M_curToken; }
115 
116       const _StringT&
117       _M_value() const
118       { return _M_curValue; }
119 
120 #ifdef _GLIBCXX_DEBUG
121       std::ostream&
122       _M_print(std::ostream&);
123 #endif
124 
125     private:
126       void
127       _M_eat_escape();
128 
129       void
130       _M_scan_in_brace();
131 
132       void
133       _M_scan_in_bracket();
134 
135       void
136       _M_eat_charclass();
137 
138       void
139       _M_eat_equivclass();
140 
141       void
142       _M_eat_collsymbol();
143 
144     private:
145       _IteratorT  _M_current;
146       _IteratorT  _M_end;
147       _FlagT      _M_flags;
148       _CtypeT&    _M_ctype;
149       _TokenT     _M_curToken;
150       _StringT    _M_curValue;
151       _StateT     _M_state;
152     };
153 
154   template<typename _InputIterator>
155     void
156     _Scanner<_InputIterator>::
157     _M_advance()
158     {
159       if (_M_current == _M_end)
160 	{
161 	  _M_curToken = _S_token_eof;
162 	  return;
163 	}
164 
165       _CharT __c = *_M_current;
166       if (_M_state & _S_state_in_bracket)
167 	{
168 	  _M_scan_in_bracket();
169 	  return;
170 	}
171       if (_M_state & _S_state_in_brace)
172 	{
173 	  _M_scan_in_brace();
174 	  return;
175 	}
176 #if 0
177       // TODO: re-enable line anchors when _M_assertion is implemented.
178       // See PR libstdc++/47724
179       else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
180 	{
181 	  _M_curToken = _S_token_line_begin;
182 	  ++_M_current;
183 	  return;
184 	}
185       else if (__c == _M_ctype.widen('$'))
186 	{
187 	  _M_curToken = _S_token_line_end;
188 	  ++_M_current;
189 	  return;
190 	}
191 #endif
192       else if (__c == _M_ctype.widen('.'))
193 	{
194 	  _M_curToken = _S_token_anychar;
195 	  ++_M_current;
196 	  return;
197 	}
198       else if (__c == _M_ctype.widen('*'))
199 	{
200 	  _M_curToken = _S_token_closure0;
201 	  ++_M_current;
202 	  return;
203 	}
204       else if (__c == _M_ctype.widen('+'))
205 	{
206 	  _M_curToken = _S_token_closure1;
207 	  ++_M_current;
208 	  return;
209 	}
210       else if (__c == _M_ctype.widen('|'))
211 	{
212 	  _M_curToken = _S_token_or;
213 	  ++_M_current;
214 	  return;
215 	}
216       else if (__c == _M_ctype.widen('['))
217 	{
218 	  _M_curToken = _S_token_bracket_begin;
219 	  _M_state |= (_S_state_in_bracket | _S_state_at_start);
220 	  ++_M_current;
221 	  return;
222 	}
223       else if (__c == _M_ctype.widen('\\'))
224 	{
225 	  _M_eat_escape();
226 	  return;
227 	}
228       else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
229 	{
230 	  if (__c == _M_ctype.widen('('))
231 	    {
232 	      _M_curToken = _S_token_subexpr_begin;
233 	      ++_M_current;
234 	      return;
235 	    }
236 	  else if (__c == _M_ctype.widen(')'))
237 	    {
238 	      _M_curToken = _S_token_subexpr_end;
239 	      ++_M_current;
240 	      return;
241 	    }
242 	  else if (__c == _M_ctype.widen('{'))
243 	    {
244 	      _M_curToken = _S_token_interval_begin;
245 	      _M_state |= _S_state_in_brace;
246 	      ++_M_current;
247 	      return;
248 	    }
249 	}
250 
251       _M_curToken = _S_token_ord_char;
252       _M_curValue.assign(1, __c);
253       ++_M_current;
254     }
255 
256 
257   template<typename _InputIterator>
258     void
259     _Scanner<_InputIterator>::
260     _M_scan_in_brace()
261     {
262       if (_M_ctype.is(_CtypeT::digit, *_M_current))
263 	{
264 	  _M_curToken = _S_token_dup_count;
265 	  _M_curValue.assign(1, *_M_current);
266 	  ++_M_current;
267 	  while (_M_current != _M_end
268 		 && _M_ctype.is(_CtypeT::digit, *_M_current))
269 	    {
270 	      _M_curValue += *_M_current;
271 	      ++_M_current;
272 	    }
273 	  return;
274 	}
275       else if (*_M_current == _M_ctype.widen(','))
276 	{
277 	  _M_curToken = _S_token_comma;
278 	  ++_M_current;
279 	  return;
280 	}
281       if (_M_flags & (regex_constants::basic | regex_constants::grep))
282 	{
283 	  if (*_M_current == _M_ctype.widen('\\'))
284 	    _M_eat_escape();
285 	}
286       else
287 	{
288 	  if (*_M_current == _M_ctype.widen('}'))
289 	    {
290 	      _M_curToken = _S_token_interval_end;
291 	      _M_state &= ~_S_state_in_brace;
292 	      ++_M_current;
293 	      return;
294 	    }
295 	}
296     }
297 
298   template<typename _InputIterator>
299     void
300     _Scanner<_InputIterator>::
301     _M_scan_in_bracket()
302     {
303       if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
304 	{
305 	  _M_curToken = _S_token_inverse_class;
306 	  _M_state &= ~_S_state_at_start;
307 	  ++_M_current;
308 	  return;
309 	}
310       else if (*_M_current == _M_ctype.widen('['))
311 	{
312 	  ++_M_current;
313 	  if (_M_current == _M_end)
314 	    {
315 	      _M_curToken = _S_token_eof;
316 	      return;
317 	    }
318 
319 	  if (*_M_current == _M_ctype.widen('.'))
320 	    {
321 	      _M_curToken = _S_token_collsymbol;
322 	      _M_eat_collsymbol();
323 	      return;
324 	    }
325 	  else if (*_M_current == _M_ctype.widen(':'))
326 	    {
327 	      _M_curToken = _S_token_char_class_name;
328 	      _M_eat_charclass();
329 	      return;
330 	    }
331 	  else if (*_M_current == _M_ctype.widen('='))
332 	    {
333 	      _M_curToken = _S_token_equiv_class_name;
334 	      _M_eat_equivclass();
335 	      return;
336 	    }
337 	}
338       else if (*_M_current == _M_ctype.widen('-'))
339 	{
340 	  _M_curToken = _S_token_dash;
341 	  ++_M_current;
342 	  return;
343 	}
344       else if (*_M_current == _M_ctype.widen(']'))
345 	{
346 	  if (!(_M_flags & regex_constants::ECMAScript)
347 	      || !(_M_state & _S_state_at_start))
348 	    {
349 	      // special case: only if  _not_ chr first after
350 	      // '[' or '[^' and if not ECMAscript
351 	      _M_curToken = _S_token_bracket_end;
352 	      ++_M_current;
353 	      return;
354 	    }
355 	}
356       _M_curToken = _S_token_collelem_single;
357       _M_curValue.assign(1, *_M_current);
358       ++_M_current;
359     }
360 
361   template<typename _InputIterator>
362     void
363     _Scanner<_InputIterator>::
364     _M_eat_escape()
365     {
366       ++_M_current;
367       if (_M_current == _M_end)
368 	{
369 	  _M_curToken = _S_token_eof;
370 	  return;
371 	}
372       _CharT __c = *_M_current;
373       ++_M_current;
374 
375       if (__c == _M_ctype.widen('('))
376 	{
377 	  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
378 	    {
379 	      _M_curToken = _S_token_ord_char;
380 	      _M_curValue.assign(1, __c);
381 	    }
382 	  else
383 	    _M_curToken = _S_token_subexpr_begin;
384 	}
385       else if (__c == _M_ctype.widen(')'))
386 	{
387 	  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
388 	    {
389 	      _M_curToken = _S_token_ord_char;
390 	      _M_curValue.assign(1, __c);
391 	    }
392 	  else
393 	    _M_curToken = _S_token_subexpr_end;
394 	}
395       else if (__c == _M_ctype.widen('{'))
396 	{
397 	  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
398 	    {
399 	      _M_curToken = _S_token_ord_char;
400 	      _M_curValue.assign(1, __c);
401 	    }
402 	  else
403 	    {
404 	      _M_curToken = _S_token_interval_begin;
405 	      _M_state |= _S_state_in_brace;
406 	    }
407 	}
408       else if (__c == _M_ctype.widen('}'))
409 	{
410 	  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
411 	    {
412 	      _M_curToken = _S_token_ord_char;
413 	      _M_curValue.assign(1, __c);
414 	    }
415 	  else
416 	    {
417 	      if (!(_M_state && _S_state_in_brace))
418 		__throw_regex_error(regex_constants::error_badbrace);
419 	      _M_state &= ~_S_state_in_brace;
420 	      _M_curToken = _S_token_interval_end;
421 	    }
422 	}
423       else if (__c == _M_ctype.widen('x'))
424 	{
425 	  ++_M_current;
426 	  if (_M_current == _M_end)
427 	    {
428 	      _M_curToken = _S_token_eof;
429 	      return;
430 	    }
431 	  if (_M_ctype.is(_CtypeT::digit, *_M_current))
432 	    {
433 	      _M_curValue.assign(1, *_M_current);
434 	      ++_M_current;
435 	      if (_M_current == _M_end)
436 		{
437 		  _M_curToken = _S_token_eof;
438 		  return;
439 		}
440 	      if (_M_ctype.is(_CtypeT::digit, *_M_current))
441 		{
442 		  _M_curValue += *_M_current;
443 		  ++_M_current;
444 		  return;
445 		}
446 	    }
447 	}
448       else if (__c == _M_ctype.widen('^')
449 	       || __c == _M_ctype.widen('.')
450 	       || __c == _M_ctype.widen('*')
451 	       || __c == _M_ctype.widen('$')
452 	       || __c == _M_ctype.widen('\\'))
453 	{
454 	  _M_curToken = _S_token_ord_char;
455 	  _M_curValue.assign(1, __c);
456 	}
457       else if (_M_ctype.is(_CtypeT::digit, __c))
458 	{
459 	  _M_curToken = _S_token_backref;
460 	  _M_curValue.assign(1, __c);
461 	}
462       else
463 	__throw_regex_error(regex_constants::error_escape);
464     }
465 
466 
467   // Eats a character class or throwns an exception.
468   // current point to ':' delimiter on entry, char after ']' on return
469   template<typename _InputIterator>
470     void
471     _Scanner<_InputIterator>::
472     _M_eat_charclass()
473     {
474       ++_M_current; // skip ':'
475       if (_M_current == _M_end)
476 	__throw_regex_error(regex_constants::error_ctype);
477       for (_M_curValue.clear();
478 	   _M_current != _M_end && *_M_current != _M_ctype.widen(':');
479 	   ++_M_current)
480 	_M_curValue += *_M_current;
481       if (_M_current == _M_end)
482 	__throw_regex_error(regex_constants::error_ctype);
483       ++_M_current; // skip ':'
484       if (*_M_current != _M_ctype.widen(']'))
485 	__throw_regex_error(regex_constants::error_ctype);
486       ++_M_current; // skip ']'
487     }
488 
489 
490   template<typename _InputIterator>
491     void
492     _Scanner<_InputIterator>::
493     _M_eat_equivclass()
494     {
495       ++_M_current; // skip '='
496       if (_M_current == _M_end)
497 	__throw_regex_error(regex_constants::error_collate);
498       for (_M_curValue.clear();
499 	   _M_current != _M_end && *_M_current != _M_ctype.widen('=');
500 	   ++_M_current)
501 	_M_curValue += *_M_current;
502       if (_M_current == _M_end)
503 	__throw_regex_error(regex_constants::error_collate);
504       ++_M_current; // skip '='
505       if (*_M_current != _M_ctype.widen(']'))
506 	__throw_regex_error(regex_constants::error_collate);
507       ++_M_current; // skip ']'
508     }
509 
510 
511   template<typename _InputIterator>
512     void
513     _Scanner<_InputIterator>::
514     _M_eat_collsymbol()
515     {
516       ++_M_current; // skip '.'
517       if (_M_current == _M_end)
518 	__throw_regex_error(regex_constants::error_collate);
519       for (_M_curValue.clear();
520 	   _M_current != _M_end && *_M_current != _M_ctype.widen('.');
521 	   ++_M_current)
522 	_M_curValue += *_M_current;
523       if (_M_current == _M_end)
524 	__throw_regex_error(regex_constants::error_collate);
525       ++_M_current; // skip '.'
526       if (*_M_current != _M_ctype.widen(']'))
527 	__throw_regex_error(regex_constants::error_collate);
528       ++_M_current; // skip ']'
529     }
530 
531 #ifdef _GLIBCXX_DEBUG
532   template<typename _InputIterator>
533     std::ostream&
534     _Scanner<_InputIterator>::
535     _M_print(std::ostream& ostr)
536     {
537       switch (_M_curToken)
538       {
539 	case _S_token_anychar:
540 	  ostr << "any-character\n";
541 	  break;
542 	case _S_token_backref:
543 	  ostr << "backref\n";
544 	  break;
545 	case _S_token_bracket_begin:
546 	  ostr << "bracket-begin\n";
547 	  break;
548 	case _S_token_bracket_end:
549 	  ostr << "bracket-end\n";
550 	  break;
551 	case _S_token_char_class_name:
552 	  ostr << "char-class-name \"" << _M_curValue << "\"\n";
553 	  break;
554 	case _S_token_closure0:
555 	  ostr << "closure0\n";
556 	  break;
557 	case _S_token_closure1:
558 	  ostr << "closure1\n";
559 	  break;
560 	case _S_token_collelem_multi:
561 	  ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
562 	  break;
563 	case _S_token_collelem_single:
564 	  ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
565 	  break;
566 	case _S_token_collsymbol:
567 	  ostr << "collsymbol \"" << _M_curValue << "\"\n";
568 	  break;
569 	case _S_token_comma:
570 	  ostr << "comma\n";
571 	  break;
572 	case _S_token_dash:
573 	  ostr << "dash\n";
574 	  break;
575 	case _S_token_dup_count:
576 	  ostr << "dup count: " << _M_curValue << "\n";
577 	  break;
578 	case _S_token_eof:
579 	  ostr << "EOF\n";
580 	  break;
581 	case _S_token_equiv_class_name:
582 	  ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
583 	  break;
584 	case _S_token_interval_begin:
585 	  ostr << "interval begin\n";
586 	  break;
587 	case _S_token_interval_end:
588 	  ostr << "interval end\n";
589 	  break;
590 	case _S_token_line_begin:
591 	  ostr << "line begin\n";
592 	  break;
593 	case _S_token_line_end:
594 	  ostr << "line end\n";
595 	  break;
596 	case _S_token_opt:
597 	  ostr << "opt\n";
598 	  break;
599 	case _S_token_or:
600 	  ostr << "or\n";
601 	  break;
602 	case _S_token_ord_char:
603 	  ostr << "ordinary character: \"" << _M_value() << "\"\n";
604 	  break;
605 	case _S_token_quoted_char:
606 	  ostr << "quoted char\n";
607 	  break;
608 	case _S_token_subexpr_begin:
609 	  ostr << "subexpr begin\n";
610 	  break;
611 	case _S_token_subexpr_end:
612 	  ostr << "subexpr end\n";
613 	  break;
614 	case _S_token_word_begin:
615 	  ostr << "word begin\n";
616 	  break;
617 	case _S_token_word_end:
618 	  ostr << "word end\n";
619 	  break;
620 	case _S_token_unknown:
621 	  ostr << "-- unknown token --\n";
622 	  break;
623       }
624       return ostr;
625     }
626 #endif
627 
628   // Builds an NFA from an input iterator interval.
629   template<typename _InIter, typename _TraitsT>
630     class _Compiler
631     {
632     public:
633       typedef _InIter                                            _IterT;
634       typedef typename std::iterator_traits<_InIter>::value_type _CharT;
635       typedef std::basic_string<_CharT>                          _StringT;
636       typedef regex_constants::syntax_option_type                _FlagT;
637 
638     public:
639       _Compiler(const _InIter& __b, const _InIter& __e,
640 		_TraitsT& __traits, _FlagT __flags);
641 
642       const _Nfa&
643       _M_nfa() const
644       { return _M_state_store; }
645 
646     private:
647       typedef _Scanner<_InIter>                              _ScannerT;
648       typedef typename _ScannerT::_TokenT                    _TokenT;
649       typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
650       typedef _RangeMatcher<_InIter, _TraitsT>               _RMatcherT;
651 
652       // accepts a specific token or returns false.
653       bool
654       _M_match_token(_TokenT __token);
655 
656       void
657       _M_disjunction();
658 
659       bool
660       _M_alternative();
661 
662       bool
663       _M_term();
664 
665       bool
666       _M_assertion();
667 
668       bool
669       _M_quantifier();
670 
671       bool
672       _M_atom();
673 
674       bool
675       _M_bracket_expression();
676 
677       bool
678       _M_bracket_list(_RMatcherT& __matcher);
679 
680       bool
681       _M_follow_list(_RMatcherT& __matcher);
682 
683       bool
684       _M_follow_list2(_RMatcherT& __matcher);
685 
686       bool
687       _M_expression_term(_RMatcherT& __matcher);
688 
689       bool
690       _M_range_expression(_RMatcherT& __matcher);
691 
692       bool
693       _M_start_range(_RMatcherT& __matcher);
694 
695       bool
696       _M_collating_symbol(_RMatcherT& __matcher);
697 
698       bool
699       _M_equivalence_class(_RMatcherT& __matcher);
700 
701       bool
702       _M_character_class(_RMatcherT& __matcher);
703 
704       int
705       _M_cur_int_value(int __radix);
706 
707     private:
708       _TraitsT&      _M_traits;
709       _ScannerT      _M_scanner;
710       _StringT       _M_cur_value;
711       _Nfa           _M_state_store;
712       _StackT        _M_stack;
713     };
714 
715   template<typename _InIter, typename _TraitsT>
716     _Compiler<_InIter, _TraitsT>::
717     _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
718 	      _Compiler<_InIter, _TraitsT>::_FlagT __flags)
719     : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
720       _M_state_store(__flags)
721     {
722       typedef _StartTagger<_InIter, _TraitsT> _Start;
723       typedef _EndTagger<_InIter, _TraitsT> _End;
724 
725       _StateSeq __r(_M_state_store,
726       		    _M_state_store._M_insert_subexpr_begin(_Start(0)));
727       _M_disjunction();
728       if (!_M_stack.empty())
729 	{
730 	  __r._M_append(_M_stack.top());
731 	  _M_stack.pop();
732 	}
733       __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
734       __r._M_append(_M_state_store._M_insert_accept());
735     }
736 
737   template<typename _InIter, typename _TraitsT>
738     bool
739     _Compiler<_InIter, _TraitsT>::
740     _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
741     {
742       if (token == _M_scanner._M_token())
743 	{
744 	  _M_cur_value = _M_scanner._M_value();
745 	  _M_scanner._M_advance();
746 	  return true;
747 	}
748       return false;
749     }
750 
751   template<typename _InIter, typename _TraitsT>
752     void
753     _Compiler<_InIter, _TraitsT>::
754     _M_disjunction()
755     {
756       this->_M_alternative();
757       if (_M_match_token(_ScannerT::_S_token_or))
758 	{
759 	  _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
760 	  this->_M_disjunction();
761 	  _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
762 	  _M_stack.push(_StateSeq(__alt1, __alt2));
763 	}
764     }
765 
766   template<typename _InIter, typename _TraitsT>
767     bool
768     _Compiler<_InIter, _TraitsT>::
769     _M_alternative()
770     {
771       if (this->_M_term())
772 	{
773 	  _StateSeq __re = _M_stack.top(); _M_stack.pop();
774 	  this->_M_alternative();
775 	  if (!_M_stack.empty())
776 	    {
777 	      __re._M_append(_M_stack.top());
778 	      _M_stack.pop();
779 	    }
780 	  _M_stack.push(__re);
781 	  return true;
782 	}
783       return false;
784     }
785 
786   template<typename _InIter, typename _TraitsT>
787     bool
788     _Compiler<_InIter, _TraitsT>::
789     _M_term()
790     {
791       if (this->_M_assertion())
792 	return true;
793       if (this->_M_atom())
794 	{
795 	  this->_M_quantifier();
796 	  return true;
797 	}
798       return false;
799     }
800 
801   template<typename _InIter, typename _TraitsT>
802     bool
803     _Compiler<_InIter, _TraitsT>::
804     _M_assertion()
805     {
806       if (_M_match_token(_ScannerT::_S_token_line_begin))
807 	{
808 	  // __m.push(_Matcher::_S_opcode_line_begin);
809 	  return true;
810 	}
811       if (_M_match_token(_ScannerT::_S_token_line_end))
812 	{
813 	  // __m.push(_Matcher::_S_opcode_line_end);
814 	  return true;
815 	}
816       if (_M_match_token(_ScannerT::_S_token_word_begin))
817 	{
818 	  // __m.push(_Matcher::_S_opcode_word_begin);
819 	  return true;
820 	}
821       if (_M_match_token(_ScannerT::_S_token_word_end))
822 	{
823 	  // __m.push(_Matcher::_S_opcode_word_end);
824 	  return true;
825 	}
826       return false;
827     }
828 
829   template<typename _InIter, typename _TraitsT>
830     bool
831     _Compiler<_InIter, _TraitsT>::
832     _M_quantifier()
833     {
834       if (_M_match_token(_ScannerT::_S_token_closure0))
835 	{
836 	  if (_M_stack.empty())
837 	    __throw_regex_error(regex_constants::error_badrepeat);
838 	  _StateSeq __r(_M_stack.top(), -1);
839 	  __r._M_append(__r._M_front());
840 	  _M_stack.pop();
841 	  _M_stack.push(__r);
842 	  return true;
843 	}
844       if (_M_match_token(_ScannerT::_S_token_closure1))
845 	{
846 	  if (_M_stack.empty())
847 	    __throw_regex_error(regex_constants::error_badrepeat);
848 	  _StateSeq __r(_M_state_store,
849 			_M_state_store.
850 			_M_insert_alt(_S_invalid_state_id,
851 				      _M_stack.top()._M_front()));
852 	  _M_stack.top()._M_append(__r);
853 	  return true;
854 	}
855       if (_M_match_token(_ScannerT::_S_token_opt))
856 	{
857 	  if (_M_stack.empty())
858 	  __throw_regex_error(regex_constants::error_badrepeat);
859 	  _StateSeq __r(_M_stack.top(), -1);
860 	  _M_stack.pop();
861 	  _M_stack.push(__r);
862 	  return true;
863 	}
864       if (_M_match_token(_ScannerT::_S_token_interval_begin))
865 	{
866 	  if (_M_stack.empty())
867 	    __throw_regex_error(regex_constants::error_badrepeat);
868 	  if (!_M_match_token(_ScannerT::_S_token_dup_count))
869 	    __throw_regex_error(regex_constants::error_badbrace);
870 	  _StateSeq __r(_M_stack.top());
871 	  int __min_rep = _M_cur_int_value(10);
872 	  for (int __i = 1; __i < __min_rep; ++__i)
873 	    _M_stack.top()._M_append(__r._M_clone());
874 	  if (_M_match_token(_ScannerT::_S_token_comma))
875 	    if (_M_match_token(_ScannerT::_S_token_dup_count))
876 	      {
877 		int __n = _M_cur_int_value(10) - __min_rep;
878 		if (__n < 0)
879 		  __throw_regex_error(regex_constants::error_badbrace);
880 		for (int __i = 0; __i < __n; ++__i)
881 		  {
882 		    _StateSeq __r(_M_state_store,
883 				  _M_state_store.
884 				  _M_insert_alt(_S_invalid_state_id,
885 						_M_stack.top()._M_front()));
886 		    _M_stack.top()._M_append(__r);
887 		  }
888 	      }
889 	    else
890 	      {
891 		_StateSeq __r(_M_stack.top(), -1);
892 		__r._M_push_back(__r._M_front());
893 		_M_stack.pop();
894 		_M_stack.push(__r);
895 	      }
896 	  if (!_M_match_token(_ScannerT::_S_token_interval_end))
897 	    __throw_regex_error(regex_constants::error_brace);
898 	  return true;
899 	}
900       return false;
901     }
902 
903   template<typename _InIter, typename _TraitsT>
904     bool
905     _Compiler<_InIter, _TraitsT>::
906     _M_atom()
907     {
908       typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
909       typedef _StartTagger<_InIter, _TraitsT> _Start;
910       typedef _EndTagger<_InIter, _TraitsT> _End;
911 
912       if (_M_match_token(_ScannerT::_S_token_anychar))
913 	{
914 	  _M_stack.push(_StateSeq(_M_state_store,
915                                   _M_state_store._M_insert_matcher
916                                   (_AnyMatcher)));
917 	  return true;
918 	}
919       if (_M_match_token(_ScannerT::_S_token_ord_char))
920 	{
921 	  _M_stack.push(_StateSeq(_M_state_store,
922                                   _M_state_store._M_insert_matcher
923                                   (_CMatcher(_M_cur_value[0], _M_traits))));
924 	  return true;
925 	}
926       if (_M_match_token(_ScannerT::_S_token_quoted_char))
927 	{
928 	  // note that in the ECMA grammar, this case covers backrefs.
929 	  _M_stack.push(_StateSeq(_M_state_store,
930 				  _M_state_store._M_insert_matcher
931 				  (_CMatcher(_M_cur_value[0], _M_traits))));
932 	  return true;
933 	}
934       if (_M_match_token(_ScannerT::_S_token_backref))
935 	{
936 	  // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
937 	  return true;
938 	}
939       if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
940 	{
941 	  int __mark = _M_state_store._M_sub_count();
942 	  _StateSeq __r(_M_state_store,
943 			_M_state_store.
944 			_M_insert_subexpr_begin(_Start(__mark)));
945 	  this->_M_disjunction();
946 	  if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
947 	    __throw_regex_error(regex_constants::error_paren);
948 	  if (!_M_stack.empty())
949 	    {
950 	      __r._M_append(_M_stack.top());
951 	      _M_stack.pop();
952 	    }
953 	  __r._M_append(_M_state_store._M_insert_subexpr_end
954 			(__mark, _End(__mark)));
955 	  _M_stack.push(__r);
956 	  return true;
957 	}
958       return _M_bracket_expression();
959     }
960 
961   template<typename _InIter, typename _TraitsT>
962     bool
963     _Compiler<_InIter, _TraitsT>::
964     _M_bracket_expression()
965     {
966       if (_M_match_token(_ScannerT::_S_token_bracket_begin))
967 	{
968 	  _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
969 			       _M_traits);
970 	  if (!_M_bracket_list(__matcher)
971 	      || !_M_match_token(_ScannerT::_S_token_bracket_end))
972 	    __throw_regex_error(regex_constants::error_brack);
973 	  _M_stack.push(_StateSeq(_M_state_store,
974 				  _M_state_store._M_insert_matcher(__matcher)));
975 	  return true;
976 	}
977       return false;
978     }
979 
980   // If the dash is the last character in the bracket expression, it is not
981   // special.
982   template<typename _InIter, typename _TraitsT>
983     bool
984     _Compiler<_InIter, _TraitsT>::
985     _M_bracket_list(_RMatcherT& __matcher)
986     {
987       if (_M_follow_list(__matcher))
988 	{
989 	  if (_M_match_token(_ScannerT::_S_token_dash))
990 	    __matcher._M_add_char(_M_cur_value[0]);
991 	  return true;
992 	}
993       return false;
994     }
995 
996   template<typename _InIter, typename _TraitsT>
997     bool
998     _Compiler<_InIter, _TraitsT>::
999     _M_follow_list(_RMatcherT& __matcher)
1000     { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
1001 
1002   template<typename _InIter, typename _TraitsT>
1003     bool
1004     _Compiler<_InIter, _TraitsT>::
1005     _M_follow_list2(_RMatcherT& __matcher)
1006     {
1007       if (_M_expression_term(__matcher))
1008 	return _M_follow_list2(__matcher);
1009       return true;
1010     }
1011 
1012   template<typename _InIter, typename _TraitsT>
1013     bool
1014     _Compiler<_InIter, _TraitsT>::
1015     _M_expression_term(_RMatcherT& __matcher)
1016     {
1017       return (_M_collating_symbol(__matcher)
1018 	      || _M_character_class(__matcher)
1019 	      || _M_equivalence_class(__matcher)
1020 	      || (_M_start_range(__matcher)
1021 		  && _M_range_expression(__matcher)));
1022     }
1023 
1024   template<typename _InIter, typename _TraitsT>
1025     bool
1026     _Compiler<_InIter, _TraitsT>::
1027     _M_range_expression(_RMatcherT& __matcher)
1028     {
1029       if (!_M_collating_symbol(__matcher))
1030 	if (!_M_match_token(_ScannerT::_S_token_dash))
1031 	  __throw_regex_error(regex_constants::error_range);
1032       __matcher._M_make_range();
1033       return true;
1034     }
1035 
1036   template<typename _InIter, typename _TraitsT>
1037     bool
1038     _Compiler<_InIter, _TraitsT>::
1039     _M_start_range(_RMatcherT& __matcher)
1040     { return _M_match_token(_ScannerT::_S_token_dash); }
1041 
1042   template<typename _InIter, typename _TraitsT>
1043     bool
1044     _Compiler<_InIter, _TraitsT>::
1045     _M_collating_symbol(_RMatcherT& __matcher)
1046     {
1047       if (_M_match_token(_ScannerT::_S_token_collelem_single))
1048 	{
1049 	  __matcher._M_add_char(_M_cur_value[0]);
1050 	  return true;
1051 	}
1052       if (_M_match_token(_ScannerT::_S_token_collsymbol))
1053 	{
1054 	  __matcher._M_add_collating_element(_M_cur_value);
1055 	  return true;
1056 	}
1057       return false;
1058     }
1059 
1060   template<typename _InIter, typename _TraitsT>
1061     bool
1062     _Compiler<_InIter, _TraitsT>::
1063     _M_equivalence_class(_RMatcherT& __matcher)
1064     {
1065       if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
1066 	{
1067 	  __matcher._M_add_equivalence_class(_M_cur_value);
1068 	  return true;
1069 	}
1070       return false;
1071     }
1072 
1073   template<typename _InIter, typename _TraitsT>
1074     bool
1075     _Compiler<_InIter, _TraitsT>::
1076     _M_character_class(_RMatcherT& __matcher)
1077     {
1078       if (_M_match_token(_ScannerT::_S_token_char_class_name))
1079 	{
1080 	  __matcher._M_add_character_class(_M_cur_value);
1081 	  return true;
1082 	}
1083       return false;
1084     }
1085 
1086   template<typename _InIter, typename _TraitsT>
1087     int
1088     _Compiler<_InIter, _TraitsT>::
1089     _M_cur_int_value(int __radix)
1090     {
1091       int __v = 0;
1092       for (typename _StringT::size_type __i = 0;
1093 	   __i < _M_cur_value.length(); ++__i)
1094 	__v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
1095       return __v;
1096     }
1097 
1098   template<typename _InIter, typename _TraitsT>
1099     _AutomatonPtr
1100     __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
1101 	      regex_constants::syntax_option_type __f)
1102     { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
1103                                         __f)._M_nfa())); }
1104 
1105 _GLIBCXX_END_NAMESPACE_VERSION
1106 } // namespace __regex
1107 } // namespace std
1108 
1109 /* vim: set ts=8 sw=2 sts=2: */
1110