1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2010-2014 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  *  @file bits/regex_compiler.h
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{regex}
29  */
30 
_GLIBCXX_VISIBILITY(default)31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37   /**
38    * @addtogroup regex-detail
39    * @{
40    */
41 
42   template<typename, bool, bool>
43     struct _BracketMatcher;
44 
45   /**
46    * @brief Builds an NFA from an input iterator interval.
47    *
48    * The %_TraitsT type should fulfill requirements [28.3].
49    */
50   template<typename _TraitsT>
51     class _Compiler
52     {
53     public:
54       typedef typename _TraitsT::char_type        _CharT;
55       typedef const _CharT*                       _IterT;
56       typedef _NFA<_TraitsT>              	  _RegexT;
57       typedef regex_constants::syntax_option_type _FlagT;
58 
59       _Compiler(_IterT __b, _IterT __e,
60 		const _TraitsT& __traits, _FlagT __flags);
61 
62       std::shared_ptr<_RegexT>
63       _M_get_nfa()
64       { return make_shared<_RegexT>(std::move(_M_nfa)); }
65 
66     private:
67       typedef _Scanner<_CharT>               _ScannerT;
68       typedef typename _TraitsT::string_type _StringT;
69       typedef typename _ScannerT::_TokenT    _TokenT;
70       typedef _StateSeq<_TraitsT>            _StateSeqT;
71       typedef std::stack<_StateSeqT>         _StackT;
72       typedef std::ctype<_CharT>             _CtypeT;
73 
74       // accepts a specific token or returns false.
75       bool
76       _M_match_token(_TokenT __token);
77 
78       void
79       _M_disjunction();
80 
81       void
82       _M_alternative();
83 
84       bool
85       _M_term();
86 
87       bool
88       _M_assertion();
89 
90       bool
91       _M_quantifier();
92 
93       bool
94       _M_atom();
95 
96       bool
97       _M_bracket_expression();
98 
99       template<bool __icase, bool __collate>
100 	void
101 	_M_insert_any_matcher_ecma();
102 
103       template<bool __icase, bool __collate>
104 	void
105 	_M_insert_any_matcher_posix();
106 
107       template<bool __icase, bool __collate>
108 	void
109 	_M_insert_char_matcher();
110 
111       template<bool __icase, bool __collate>
112 	void
113 	_M_insert_character_class_matcher();
114 
115       template<bool __icase, bool __collate>
116 	void
117 	_M_insert_bracket_matcher(bool __neg);
118 
119       template<bool __icase, bool __collate>
120 	void
121 	_M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>&
122 			   __matcher);
123 
124       int
125       _M_cur_int_value(int __radix);
126 
127       bool
128       _M_try_char();
129 
130       _StateSeqT
131       _M_pop()
132       {
133 	auto ret = _M_stack.top();
134 	_M_stack.pop();
135 	return ret;
136       }
137 
138       _FlagT          _M_flags;
139       const _TraitsT& _M_traits;
140       const _CtypeT&  _M_ctype;
141       _ScannerT       _M_scanner;
142       _RegexT         _M_nfa;
143       _StringT        _M_value;
144       _StackT         _M_stack;
145     };
146 
147   template<typename _TraitsT>
148     inline std::shared_ptr<_NFA<_TraitsT>>
149     __compile_nfa(const typename _TraitsT::char_type* __first,
150 		  const typename _TraitsT::char_type* __last,
151 		  const _TraitsT& __traits,
152 		  regex_constants::syntax_option_type __flags)
153     {
154       using _Cmplr = _Compiler<_TraitsT>;
155       return _Cmplr(__first, __last, __traits, __flags)._M_get_nfa();
156     }
157 
158   // [28.13.14]
159   template<typename _TraitsT, bool __icase, bool __collate>
160     class _RegexTranslator
161     {
162     public:
163       typedef typename _TraitsT::char_type	      _CharT;
164       typedef typename _TraitsT::string_type	      _StringT;
165       typedef typename std::conditional<__collate,
166 					_StringT,
167 					_CharT>::type _StrTransT;
168 
169       explicit
170       _RegexTranslator(const _TraitsT& __traits)
171       : _M_traits(__traits)
172       { }
173 
174       _CharT
175       _M_translate(_CharT __ch) const
176       {
177 	if (__icase)
178 	  return _M_traits.translate_nocase(__ch);
179 	else if (__collate)
180 	  return _M_traits.translate(__ch);
181 	else
182 	  return __ch;
183       }
184 
185       _StrTransT
186       _M_transform(_CharT __ch) const
187       {
188 	return _M_transform_impl(__ch, typename integral_constant<bool,
189 				 __collate>::type());
190       }
191 
192     private:
193       _StrTransT
194       _M_transform_impl(_CharT __ch, false_type) const
195       { return __ch; }
196 
197       _StrTransT
198       _M_transform_impl(_CharT __ch, true_type) const
199       {
200 	_StrTransT __str = _StrTransT(1, _M_translate(__ch));
201 	return _M_traits.transform(__str.begin(), __str.end());
202       }
203 
204       const _TraitsT& _M_traits;
205     };
206 
207   template<typename _TraitsT>
208     class _RegexTranslator<_TraitsT, false, false>
209     {
210     public:
211       typedef typename _TraitsT::char_type _CharT;
212       typedef _CharT                       _StrTransT;
213 
214       explicit
215       _RegexTranslator(const _TraitsT& __traits)
216       { }
217 
218       _CharT
219       _M_translate(_CharT __ch) const
220       { return __ch; }
221 
222       _StrTransT
223       _M_transform(_CharT __ch) const
224       { return __ch; }
225     };
226 
227   template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate>
228     struct _AnyMatcher;
229 
230   template<typename _TraitsT, bool __icase, bool __collate>
231     struct _AnyMatcher<_TraitsT, false, __icase, __collate>
232     {
233       typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
234       typedef typename _TransT::_CharT                       _CharT;
235 
236       explicit
237       _AnyMatcher(const _TraitsT& __traits)
238       : _M_translator(__traits)
239       { }
240 
241       bool
242       operator()(_CharT __ch) const
243       {
244 	static auto __nul = _M_translator._M_translate('\0');
245 	return _M_translator._M_translate(__ch) != __nul;
246       }
247 
248       _TransT _M_translator;
249     };
250 
251   template<typename _TraitsT, bool __icase, bool __collate>
252     struct _AnyMatcher<_TraitsT, true, __icase, __collate>
253     {
254       typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
255       typedef typename _TransT::_CharT                       _CharT;
256 
257       explicit
258       _AnyMatcher(const _TraitsT& __traits)
259       : _M_translator(__traits)
260       { }
261 
262       bool
263       operator()(_CharT __ch) const
264       { return _M_apply(__ch, typename is_same<_CharT, char>::type()); }
265 
266       bool
267       _M_apply(_CharT __ch, true_type) const
268       {
269 	auto __c = _M_translator._M_translate(__ch);
270 	auto __n = _M_translator._M_translate('\n');
271 	auto __r = _M_translator._M_translate('\r');
272 	return __c != __n && __c != __r;
273       }
274 
275       bool
276       _M_apply(_CharT __ch, false_type) const
277       {
278 	auto __c = _M_translator._M_translate(__ch);
279 	auto __n = _M_translator._M_translate('\n');
280 	auto __r = _M_translator._M_translate('\r');
281 	auto __u2028 = _M_translator._M_translate(u'\u2028');
282 	auto __u2029 = _M_translator._M_translate(u'\u2029');
283 	return __c != __n && __c != __r && __c != __u2028 && __c != __u2029;
284       }
285 
286       _TransT _M_translator;
287     };
288 
289   template<typename _TraitsT, bool __icase, bool __collate>
290     struct _CharMatcher
291     {
292       typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
293       typedef typename _TransT::_CharT                       _CharT;
294 
295       _CharMatcher(_CharT __ch, const _TraitsT& __traits)
296       : _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch))
297       { }
298 
299       bool
300       operator()(_CharT __ch) const
301       { return _M_ch == _M_translator._M_translate(__ch); }
302 
303       _TransT _M_translator;
304       _CharT  _M_ch;
305     };
306 
307   /// Matches a character range (bracket expression)
308   template<typename _TraitsT, bool __icase, bool __collate>
309     struct _BracketMatcher
310     {
311     public:
312       typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
313       typedef typename _TransT::_CharT                       _CharT;
314       typedef typename _TransT::_StrTransT                   _StrTransT;
315       typedef typename _TraitsT::string_type                 _StringT;
316       typedef typename _TraitsT::char_class_type             _CharClassT;
317 
318     public:
319       _BracketMatcher(bool __is_non_matching,
320 		      const _TraitsT& __traits)
321       : _M_class_set(0), _M_translator(__traits), _M_traits(__traits),
322       _M_is_non_matching(__is_non_matching)
323 #ifdef _GLIBCXX_DEBUG
324       , _M_is_ready(false)
325 #endif
326       { }
327 
328       bool
329       operator()(_CharT __ch) const
330       {
331 	_GLIBCXX_DEBUG_ASSERT(_M_is_ready);
332 	return _M_apply(__ch, _IsChar());
333       }
334 
335       void
336       _M_add_char(_CharT __c)
337       {
338 	_M_char_set.push_back(_M_translator._M_translate(__c));
339 #ifdef _GLIBCXX_DEBUG
340 	_M_is_ready = false;
341 #endif
342       }
343 
344       void
345       _M_add_collating_element(const _StringT& __s)
346       {
347 	auto __st = _M_traits.lookup_collatename(__s.data(),
348 						 __s.data() + __s.size());
349 	if (__st.empty())
350 	  __throw_regex_error(regex_constants::error_collate);
351 	_M_char_set.push_back(_M_translator._M_translate(__st[0]));
352 #ifdef _GLIBCXX_DEBUG
353 	_M_is_ready = false;
354 #endif
355       }
356 
357       void
358       _M_add_equivalence_class(const _StringT& __s)
359       {
360 	auto __st = _M_traits.lookup_collatename(__s.data(),
361 						 __s.data() + __s.size());
362 	if (__st.empty())
363 	  __throw_regex_error(regex_constants::error_collate);
364 	__st = _M_traits.transform_primary(__st.data(),
365 					   __st.data() + __st.size());
366 	_M_equiv_set.push_back(__st);
367 #ifdef _GLIBCXX_DEBUG
368 	_M_is_ready = false;
369 #endif
370       }
371 
372       // __neg should be true for \D, \S and \W only.
373       void
374       _M_add_character_class(const _StringT& __s, bool __neg)
375       {
376 	auto __mask = _M_traits.lookup_classname(__s.data(),
377 						 __s.data() + __s.size(),
378 						 __icase);
379 	if (__mask == 0)
380 	  __throw_regex_error(regex_constants::error_ctype);
381 	if (!__neg)
382 	  _M_class_set |= __mask;
383 	else
384 	  _M_neg_class_set.push_back(__mask);
385 #ifdef _GLIBCXX_DEBUG
386 	_M_is_ready = false;
387 #endif
388       }
389 
390       void
391       _M_make_range(_CharT __l, _CharT __r)
392       {
393 	_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
394 					 _M_translator._M_transform(__r)));
395 #ifdef _GLIBCXX_DEBUG
396 	_M_is_ready = false;
397 #endif
398       }
399 
400       void
401       _M_ready()
402       {
403 	_M_make_cache(_IsChar());
404 #ifdef _GLIBCXX_DEBUG
405 	_M_is_ready = true;
406 #endif
407       }
408 
409     private:
410       typedef typename is_same<_CharT, char>::type _IsChar;
411       struct _Dummy { };
412       typedef typename conditional<_IsChar::value,
413 				   std::bitset<1 << (8 * sizeof(_CharT))>,
414 				   _Dummy>::type _CacheT;
415       typedef typename make_unsigned<_CharT>::type _UnsignedCharT;
416 
417     private:
418       bool
419       _M_apply(_CharT __ch, false_type) const;
420 
421       bool
422       _M_apply(_CharT __ch, true_type) const
423       { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; }
424 
425       void
426       _M_make_cache(true_type)
427       {
428 	for (int __i = 0; __i < _M_cache.size(); __i++)
429 	  _M_cache[static_cast<_UnsignedCharT>(__i)] =
430 	    _M_apply(__i, false_type());
431       }
432 
433       void
434       _M_make_cache(false_type)
435       { }
436 
437     private:
438       _CacheT                                   _M_cache;
439       std::vector<_CharT>                       _M_char_set;
440       std::vector<_StringT>                     _M_equiv_set;
441       std::vector<pair<_StrTransT, _StrTransT>> _M_range_set;
442       std::vector<_CharClassT>                  _M_neg_class_set;
443       _CharClassT                               _M_class_set;
444       _TransT                                   _M_translator;
445       const _TraitsT&                           _M_traits;
446       bool                                      _M_is_non_matching;
447 #ifdef _GLIBCXX_DEBUG
448       bool                                      _M_is_ready;
449 #endif
450     };
451 
452  //@} regex-detail
453 _GLIBCXX_END_NAMESPACE_VERSION
454 } // namespace __detail
455 } // namespace std
456 
457 #include <bits/regex_compiler.tcc>
458