1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2018 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  *  @file bits/regex_executor.h
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{regex}
29  */
30 
31 // FIXME convert comments to doxygen format.
32 
33 namespace std _GLIBCXX_VISIBILITY(default)
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37 namespace __detail
38 {
39   /**
40    * @addtogroup regex-detail
41    * @{
42    */
43 
44   /**
45    * @brief Takes a regex and an input string and does the matching.
46    *
47    * The %_Executor class has two modes: DFS mode and BFS mode, controlled
48    * by the template parameter %__dfs_mode.
49    */
50   template<typename _BiIter, typename _Alloc, typename _TraitsT,
51 	   bool __dfs_mode>
52     class _Executor
53     {
54       using __search_mode = integral_constant<bool, __dfs_mode>;
55       using __dfs = true_type;
56       using __bfs = false_type;
57 
58       enum class _Match_mode : unsigned char { _Exact, _Prefix };
59 
60     public:
61       typedef typename iterator_traits<_BiIter>::value_type _CharT;
62       typedef basic_regex<_CharT, _TraitsT>                 _RegexT;
63       typedef std::vector<sub_match<_BiIter>, _Alloc>       _ResultsVec;
64       typedef regex_constants::match_flag_type              _FlagT;
65       typedef typename _TraitsT::char_class_type            _ClassT;
66       typedef _NFA<_TraitsT>                                _NFAT;
67 
68     public:
69       _Executor(_BiIter         __begin,
70 		_BiIter         __end,
71 		_ResultsVec&    __results,
72 		const _RegexT&  __re,
73 		_FlagT          __flags)
74       : _M_begin(__begin),
75       _M_end(__end),
76       _M_re(__re),
77       _M_nfa(*__re._M_automaton),
78       _M_results(__results),
79       _M_rep_count(_M_nfa.size()),
80       _M_states(_M_nfa._M_start(), _M_nfa.size()),
81       _M_flags((__flags & regex_constants::match_prev_avail)
82 	       ? (__flags
83 		  & ~regex_constants::match_not_bol
84 		  & ~regex_constants::match_not_bow)
85 	       : __flags)
86       { }
87 
88       // Set matched when string exactly matches the pattern.
89       bool
90       _M_match()
91       {
92 	_M_current = _M_begin;
93 	return _M_main(_Match_mode::_Exact);
94       }
95 
96       // Set matched when some prefix of the string matches the pattern.
97       bool
98       _M_search_from_first()
99       {
100 	_M_current = _M_begin;
101 	return _M_main(_Match_mode::_Prefix);
102       }
103 
104       bool
105       _M_search();
106 
107     private:
108       void
109       _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
110 
111       void
112       _M_handle_repeat(_Match_mode, _StateIdT);
113 
114       void
115       _M_handle_subexpr_begin(_Match_mode, _StateIdT);
116 
117       void
118       _M_handle_subexpr_end(_Match_mode, _StateIdT);
119 
120       void
121       _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
122 
123       void
124       _M_handle_line_end_assertion(_Match_mode, _StateIdT);
125 
126       void
127       _M_handle_word_boundary(_Match_mode, _StateIdT);
128 
129       void
130       _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
131 
132       void
133       _M_handle_match(_Match_mode, _StateIdT);
134 
135       void
136       _M_handle_backref(_Match_mode, _StateIdT);
137 
138       void
139       _M_handle_accept(_Match_mode, _StateIdT);
140 
141       void
142       _M_handle_alternative(_Match_mode, _StateIdT);
143 
144       void
145       _M_dfs(_Match_mode __match_mode, _StateIdT __start);
146 
147       bool
148       _M_main(_Match_mode __match_mode)
149       { return _M_main_dispatch(__match_mode, __search_mode{}); }
150 
151       bool
152       _M_main_dispatch(_Match_mode __match_mode, __dfs);
153 
154       bool
155       _M_main_dispatch(_Match_mode __match_mode, __bfs);
156 
157       bool
158       _M_is_word(_CharT __ch) const
159       {
160 	static const _CharT __s[2] = { 'w' };
161 	return _M_re._M_automaton->_M_traits.isctype
162 	  (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
163       }
164 
165       bool
166       _M_at_begin() const
167       {
168 	return _M_current == _M_begin
169 	  && !(_M_flags & (regex_constants::match_not_bol
170 			   | regex_constants::match_prev_avail));
171       }
172 
173       bool
174       _M_at_end() const
175       {
176 	return _M_current == _M_end
177 	  && !(_M_flags & regex_constants::match_not_eol);
178       }
179 
180       bool
181       _M_word_boundary() const;
182 
183       bool
184       _M_lookahead(_StateIdT __next);
185 
186        // Holds additional information used in BFS-mode.
187       template<typename _SearchMode, typename _ResultsVec>
188 	struct _State_info;
189 
190       template<typename _ResultsVec>
191 	struct _State_info<__bfs, _ResultsVec>
192 	{
193 	  explicit
194 	  _State_info(_StateIdT __start, size_t __n)
195 	  : _M_visited_states(new bool[__n]()), _M_start(__start)
196 	  { }
197 
198 	  bool _M_visited(_StateIdT __i)
199 	  {
200 	    if (_M_visited_states[__i])
201 	      return true;
202 	    _M_visited_states[__i] = true;
203 	    return false;
204 	  }
205 
206 	  void _M_queue(_StateIdT __i, const _ResultsVec& __res)
207 	  { _M_match_queue.emplace_back(__i, __res); }
208 
209 	  // Dummy implementations for BFS mode.
210 	  _BiIter* _M_get_sol_pos() { return nullptr; }
211 
212 	  // Saves states that need to be considered for the next character.
213 	  vector<pair<_StateIdT, _ResultsVec>>	_M_match_queue;
214 	  // Indicates which states are already visited.
215 	  unique_ptr<bool[]>			_M_visited_states;
216 	  // To record current solution.
217 	  _StateIdT _M_start;
218 	};
219 
220       template<typename _ResultsVec>
221 	struct _State_info<__dfs, _ResultsVec>
222 	{
223 	  explicit
224 	  _State_info(_StateIdT __start, size_t) : _M_start(__start)
225 	  { }
226 
227 	  // Dummy implementations for DFS mode.
228 	  bool _M_visited(_StateIdT) const { return false; }
229 	  void _M_queue(_StateIdT, const _ResultsVec&) { }
230 
231 	  _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
232 
233 	  // To record current solution.
234 	  _StateIdT _M_start;
235 	  _BiIter   _M_sol_pos;
236 	};
237 
238     public:
239       _ResultsVec                                           _M_cur_results;
240       _BiIter                                               _M_current;
241       _BiIter                                               _M_begin;
242       const _BiIter                                         _M_end;
243       const _RegexT&                                        _M_re;
244       const _NFAT&                                          _M_nfa;
245       _ResultsVec&                                          _M_results;
246       vector<pair<_BiIter, int>>                            _M_rep_count;
247       _State_info<__search_mode, _ResultsVec>		    _M_states;
248       _FlagT                                                _M_flags;
249       // Do we have a solution so far?
250       bool                                                  _M_has_sol;
251     };
252 
253  //@} regex-detail
254 } // namespace __detail
255 _GLIBCXX_END_NAMESPACE_VERSION
256 } // namespace std
257 
258 #include <bits/regex_executor.tcc>
259