1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2015 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  *  @file bits/regex_automaton.h
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{regex}
29  */
30 
31 // This macro defines the maximal state number a NFA can have.
32 #ifndef _GLIBCXX_REGEX_STATE_LIMIT
33 #define _GLIBCXX_REGEX_STATE_LIMIT 100000
34 #endif
35 
_GLIBCXX_VISIBILITY(default)36 namespace std _GLIBCXX_VISIBILITY(default)
37 {
38 namespace __detail
39 {
40 _GLIBCXX_BEGIN_NAMESPACE_VERSION
41 
42   /**
43    *  @defgroup regex-detail Base and Implementation Classes
44    *  @ingroup regex
45    *  @{
46    */
47 
48   typedef long _StateIdT;
49   static const _StateIdT _S_invalid_state_id  = -1;
50 
51   template<typename _CharT>
52     using _Matcher = std::function<bool (_CharT)>;
53 
54   /// Operation codes that define the type of transitions within the base NFA
55   /// that represents the regular expression.
56   enum _Opcode : int
57   {
58       _S_opcode_unknown,
59       _S_opcode_alternative,
60       _S_opcode_repeat,
61       _S_opcode_backref,
62       _S_opcode_line_begin_assertion,
63       _S_opcode_line_end_assertion,
64       _S_opcode_word_boundary,
65       _S_opcode_subexpr_lookahead,
66       _S_opcode_subexpr_begin,
67       _S_opcode_subexpr_end,
68       _S_opcode_dummy,
69       _S_opcode_match,
70       _S_opcode_accept,
71   };
72 
73   struct _State_base
74   {
75     _Opcode      _M_opcode;           // type of outgoing transition
76     _StateIdT    _M_next;             // outgoing transition
77     union // Since they are mutually exclusive.
78     {
79       size_t _M_subexpr;        // for _S_opcode_subexpr_*
80       size_t _M_backref_index;  // for _S_opcode_backref
81       struct
82       {
83 	// for _S_opcode_alternative, _S_opcode_repeat and
84 	// _S_opcode_subexpr_lookahead
85 	_StateIdT  _M_alt;
86 	// for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
87 	// quantifiers (ungreedy if set true)
88 	bool       _M_neg;
89       };
90     };
91 
92     explicit _State_base(_Opcode __opcode)
93     : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
94     { }
95 
96   protected:
97     ~_State_base() = default;
98 
99   public:
100 #ifdef _GLIBCXX_DEBUG
101     std::ostream&
102     _M_print(std::ostream& ostr) const;
103 
104     // Prints graphviz dot commands for state.
105     std::ostream&
106     _M_dot(std::ostream& __ostr, _StateIdT __id) const;
107 #endif
108   };
109 
110   template<typename _TraitsT>
111     struct _State : _State_base
112     {
113       typedef _Matcher<typename _TraitsT::char_type> _MatcherT;
114 
115       _MatcherT      _M_matches;        // for _S_opcode_match
116 
117       explicit _State(_Opcode __opcode) : _State_base(__opcode) { }
118     };
119 
120   struct _NFA_base
121   {
122     typedef size_t                              _SizeT;
123     typedef regex_constants::syntax_option_type _FlagT;
124 
125     explicit
126     _NFA_base(_FlagT __f)
127     : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
128     _M_has_backref(false)
129     { }
130 
131     _NFA_base(_NFA_base&&) = default;
132 
133   protected:
134     ~_NFA_base() = default;
135 
136   public:
137     _FlagT
138     _M_options() const
139     { return _M_flags; }
140 
141     _StateIdT
142     _M_start() const
143     { return _M_start_state; }
144 
145     _SizeT
146     _M_sub_count() const
147     { return _M_subexpr_count; }
148 
149     std::vector<size_t>       _M_paren_stack;
150     _FlagT                    _M_flags;
151     _StateIdT                 _M_start_state;
152     _SizeT                    _M_subexpr_count;
153     bool                      _M_has_backref;
154   };
155 
156   template<typename _TraitsT>
157     struct _NFA
158     : _NFA_base, std::vector<_State<_TraitsT>>
159     {
160       typedef _State<_TraitsT>				_StateT;
161       typedef _Matcher<typename _TraitsT::char_type>	_MatcherT;
162 
163       _NFA(const typename _TraitsT::locale_type& __loc, _FlagT __flags)
164       : _NFA_base(__flags)
165       { _M_traits.imbue(__loc); }
166 
167       // for performance reasons _NFA objects should only be moved not copied
168       _NFA(const _NFA&) = delete;
169       _NFA(_NFA&&) = default;
170 
171       _StateIdT
172       _M_insert_accept()
173       {
174 	auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
175 	return __ret;
176       }
177 
178       _StateIdT
179       _M_insert_alt(_StateIdT __next, _StateIdT __alt, bool __neg)
180       {
181 	_StateT __tmp(_S_opcode_alternative);
182 	// It labels every quantifier to make greedy comparison easier in BFS
183 	// approach.
184 	__tmp._M_next = __next;
185 	__tmp._M_alt = __alt;
186 	return _M_insert_state(std::move(__tmp));
187       }
188 
189       _StateIdT
190       _M_insert_repeat(_StateIdT __next, _StateIdT __alt, bool __neg)
191       {
192 	_StateT __tmp(_S_opcode_repeat);
193 	// It labels every quantifier to make greedy comparison easier in BFS
194 	// approach.
195 	__tmp._M_next = __next;
196 	__tmp._M_alt = __alt;
197 	__tmp._M_neg = __neg;
198 	return _M_insert_state(std::move(__tmp));
199       }
200 
201       _StateIdT
202       _M_insert_matcher(_MatcherT __m)
203       {
204 	_StateT __tmp(_S_opcode_match);
205 	__tmp._M_matches = std::move(__m);
206 	return _M_insert_state(std::move(__tmp));
207       }
208 
209       _StateIdT
210       _M_insert_subexpr_begin()
211       {
212 	auto __id = this->_M_subexpr_count++;
213 	this->_M_paren_stack.push_back(__id);
214 	_StateT __tmp(_S_opcode_subexpr_begin);
215 	__tmp._M_subexpr = __id;
216 	return _M_insert_state(std::move(__tmp));
217       }
218 
219       _StateIdT
220       _M_insert_subexpr_end()
221       {
222 	_StateT __tmp(_S_opcode_subexpr_end);
223 	__tmp._M_subexpr = this->_M_paren_stack.back();
224 	this->_M_paren_stack.pop_back();
225 	return _M_insert_state(std::move(__tmp));
226       }
227 
228       _StateIdT
229       _M_insert_backref(size_t __index);
230 
231       _StateIdT
232       _M_insert_line_begin()
233       { return _M_insert_state(_StateT(_S_opcode_line_begin_assertion)); }
234 
235       _StateIdT
236       _M_insert_line_end()
237       { return _M_insert_state(_StateT(_S_opcode_line_end_assertion)); }
238 
239       _StateIdT
240       _M_insert_word_bound(bool __neg)
241       {
242 	_StateT __tmp(_S_opcode_word_boundary);
243 	__tmp._M_neg = __neg;
244 	return _M_insert_state(std::move(__tmp));
245       }
246 
247       _StateIdT
248       _M_insert_lookahead(_StateIdT __alt, bool __neg)
249       {
250 	_StateT __tmp(_S_opcode_subexpr_lookahead);
251 	__tmp._M_alt = __alt;
252 	__tmp._M_neg = __neg;
253 	return _M_insert_state(std::move(__tmp));
254       }
255 
256       _StateIdT
257       _M_insert_dummy()
258       { return _M_insert_state(_StateT(_S_opcode_dummy)); }
259 
260       _StateIdT
261       _M_insert_state(_StateT __s)
262       {
263 	this->push_back(std::move(__s));
264 	if (this->size() > _GLIBCXX_REGEX_STATE_LIMIT)
265 	  __throw_regex_error(regex_constants::error_space);
266 	return this->size()-1;
267       }
268 
269       // Eliminate dummy node in this NFA to make it compact.
270       void
271       _M_eliminate_dummy();
272 
273 #ifdef _GLIBCXX_DEBUG
274       std::ostream&
275       _M_dot(std::ostream& __ostr) const;
276 #endif
277     public:
278       _TraitsT                  _M_traits;
279     };
280 
281   /// Describes a sequence of one or more %_State, its current start
282   /// and end(s).  This structure contains fragments of an NFA during
283   /// construction.
284   template<typename _TraitsT>
285     class _StateSeq
286     {
287     public:
288       typedef _NFA<_TraitsT> _RegexT;
289 
290     public:
291       _StateSeq(_RegexT& __nfa, _StateIdT __s)
292       : _M_nfa(__nfa), _M_start(__s), _M_end(__s)
293       { }
294 
295       _StateSeq(_RegexT& __nfa, _StateIdT __s, _StateIdT __end)
296       : _M_nfa(__nfa), _M_start(__s), _M_end(__end)
297       { }
298 
299       // Append a state on *this and change *this to the new sequence.
300       void
301       _M_append(_StateIdT __id)
302       {
303 	_M_nfa[_M_end]._M_next = __id;
304 	_M_end = __id;
305       }
306 
307       // Append a sequence on *this and change *this to the new sequence.
308       void
309       _M_append(const _StateSeq& __s)
310       {
311 	_M_nfa[_M_end]._M_next = __s._M_start;
312 	_M_end = __s._M_end;
313       }
314 
315       // Clones an entire sequence.
316       _StateSeq
317       _M_clone();
318 
319     public:
320       _RegexT&  _M_nfa;
321       _StateIdT _M_start;
322       _StateIdT _M_end;
323     };
324 
325  //@} regex-detail
326 _GLIBCXX_END_NAMESPACE_VERSION
327 } // namespace __detail
328 } // namespace std
329 
330 #include <bits/regex_automaton.tcc>
331