1*38fd1498Szrj // class template regex -*- C++ -*-
2*38fd1498Szrj 
3*38fd1498Szrj // Copyright (C) 2013-2018 Free Software Foundation, Inc.
4*38fd1498Szrj //
5*38fd1498Szrj // This file is part of the GNU ISO C++ Library.  This library is free
6*38fd1498Szrj // software; you can redistribute it and/or modify it under the
7*38fd1498Szrj // terms of the GNU General Public License as published by the
8*38fd1498Szrj // Free Software Foundation; either version 3, or (at your option)
9*38fd1498Szrj // any later version.
10*38fd1498Szrj 
11*38fd1498Szrj // This library is distributed in the hope that it will be useful,
12*38fd1498Szrj // but WITHOUT ANY WARRANTY; without even the implied warranty of
13*38fd1498Szrj // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14*38fd1498Szrj // GNU General Public License for more details.
15*38fd1498Szrj 
16*38fd1498Szrj // Under Section 7 of GPL version 3, you are granted additional
17*38fd1498Szrj // permissions described in the GCC Runtime Library Exception, version
18*38fd1498Szrj // 3.1, as published by the Free Software Foundation.
19*38fd1498Szrj 
20*38fd1498Szrj // You should have received a copy of the GNU General Public License and
21*38fd1498Szrj // a copy of the GCC Runtime Library Exception along with this program;
22*38fd1498Szrj // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23*38fd1498Szrj // <http://www.gnu.org/licenses/>.
24*38fd1498Szrj 
25*38fd1498Szrj /**
26*38fd1498Szrj  *  @file bits/regex_executor.h
27*38fd1498Szrj  *  This is an internal header file, included by other library headers.
28*38fd1498Szrj  *  Do not attempt to use it directly. @headername{regex}
29*38fd1498Szrj  */
30*38fd1498Szrj 
31*38fd1498Szrj // FIXME convert comments to doxygen format.
32*38fd1498Szrj 
_GLIBCXX_VISIBILITY(default)33*38fd1498Szrj namespace std _GLIBCXX_VISIBILITY(default)
34*38fd1498Szrj {
35*38fd1498Szrj _GLIBCXX_BEGIN_NAMESPACE_VERSION
36*38fd1498Szrj 
37*38fd1498Szrj namespace __detail
38*38fd1498Szrj {
39*38fd1498Szrj   /**
40*38fd1498Szrj    * @addtogroup regex-detail
41*38fd1498Szrj    * @{
42*38fd1498Szrj    */
43*38fd1498Szrj 
44*38fd1498Szrj   /**
45*38fd1498Szrj    * @brief Takes a regex and an input string and does the matching.
46*38fd1498Szrj    *
47*38fd1498Szrj    * The %_Executor class has two modes: DFS mode and BFS mode, controlled
48*38fd1498Szrj    * by the template parameter %__dfs_mode.
49*38fd1498Szrj    */
50*38fd1498Szrj   template<typename _BiIter, typename _Alloc, typename _TraitsT,
51*38fd1498Szrj 	   bool __dfs_mode>
52*38fd1498Szrj     class _Executor
53*38fd1498Szrj     {
54*38fd1498Szrj       using __search_mode = integral_constant<bool, __dfs_mode>;
55*38fd1498Szrj       using __dfs = true_type;
56*38fd1498Szrj       using __bfs = false_type;
57*38fd1498Szrj 
58*38fd1498Szrj       enum class _Match_mode : unsigned char { _Exact, _Prefix };
59*38fd1498Szrj 
60*38fd1498Szrj     public:
61*38fd1498Szrj       typedef typename iterator_traits<_BiIter>::value_type _CharT;
62*38fd1498Szrj       typedef basic_regex<_CharT, _TraitsT>                 _RegexT;
63*38fd1498Szrj       typedef std::vector<sub_match<_BiIter>, _Alloc>       _ResultsVec;
64*38fd1498Szrj       typedef regex_constants::match_flag_type              _FlagT;
65*38fd1498Szrj       typedef typename _TraitsT::char_class_type            _ClassT;
66*38fd1498Szrj       typedef _NFA<_TraitsT>                                _NFAT;
67*38fd1498Szrj 
68*38fd1498Szrj     public:
69*38fd1498Szrj       _Executor(_BiIter         __begin,
70*38fd1498Szrj 		_BiIter         __end,
71*38fd1498Szrj 		_ResultsVec&    __results,
72*38fd1498Szrj 		const _RegexT&  __re,
73*38fd1498Szrj 		_FlagT          __flags)
74*38fd1498Szrj       : _M_begin(__begin),
75*38fd1498Szrj       _M_end(__end),
76*38fd1498Szrj       _M_re(__re),
77*38fd1498Szrj       _M_nfa(*__re._M_automaton),
78*38fd1498Szrj       _M_results(__results),
79*38fd1498Szrj       _M_rep_count(_M_nfa.size()),
80*38fd1498Szrj       _M_states(_M_nfa._M_start(), _M_nfa.size()),
81*38fd1498Szrj       _M_flags((__flags & regex_constants::match_prev_avail)
82*38fd1498Szrj 	       ? (__flags
83*38fd1498Szrj 		  & ~regex_constants::match_not_bol
84*38fd1498Szrj 		  & ~regex_constants::match_not_bow)
85*38fd1498Szrj 	       : __flags)
86*38fd1498Szrj       { }
87*38fd1498Szrj 
88*38fd1498Szrj       // Set matched when string exactly matches the pattern.
89*38fd1498Szrj       bool
90*38fd1498Szrj       _M_match()
91*38fd1498Szrj       {
92*38fd1498Szrj 	_M_current = _M_begin;
93*38fd1498Szrj 	return _M_main(_Match_mode::_Exact);
94*38fd1498Szrj       }
95*38fd1498Szrj 
96*38fd1498Szrj       // Set matched when some prefix of the string matches the pattern.
97*38fd1498Szrj       bool
98*38fd1498Szrj       _M_search_from_first()
99*38fd1498Szrj       {
100*38fd1498Szrj 	_M_current = _M_begin;
101*38fd1498Szrj 	return _M_main(_Match_mode::_Prefix);
102*38fd1498Szrj       }
103*38fd1498Szrj 
104*38fd1498Szrj       bool
105*38fd1498Szrj       _M_search();
106*38fd1498Szrj 
107*38fd1498Szrj     private:
108*38fd1498Szrj       void
109*38fd1498Szrj       _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
110*38fd1498Szrj 
111*38fd1498Szrj       void
112*38fd1498Szrj       _M_handle_repeat(_Match_mode, _StateIdT);
113*38fd1498Szrj 
114*38fd1498Szrj       void
115*38fd1498Szrj       _M_handle_subexpr_begin(_Match_mode, _StateIdT);
116*38fd1498Szrj 
117*38fd1498Szrj       void
118*38fd1498Szrj       _M_handle_subexpr_end(_Match_mode, _StateIdT);
119*38fd1498Szrj 
120*38fd1498Szrj       void
121*38fd1498Szrj       _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
122*38fd1498Szrj 
123*38fd1498Szrj       void
124*38fd1498Szrj       _M_handle_line_end_assertion(_Match_mode, _StateIdT);
125*38fd1498Szrj 
126*38fd1498Szrj       void
127*38fd1498Szrj       _M_handle_word_boundary(_Match_mode, _StateIdT);
128*38fd1498Szrj 
129*38fd1498Szrj       void
130*38fd1498Szrj       _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
131*38fd1498Szrj 
132*38fd1498Szrj       void
133*38fd1498Szrj       _M_handle_match(_Match_mode, _StateIdT);
134*38fd1498Szrj 
135*38fd1498Szrj       void
136*38fd1498Szrj       _M_handle_backref(_Match_mode, _StateIdT);
137*38fd1498Szrj 
138*38fd1498Szrj       void
139*38fd1498Szrj       _M_handle_accept(_Match_mode, _StateIdT);
140*38fd1498Szrj 
141*38fd1498Szrj       void
142*38fd1498Szrj       _M_handle_alternative(_Match_mode, _StateIdT);
143*38fd1498Szrj 
144*38fd1498Szrj       void
145*38fd1498Szrj       _M_dfs(_Match_mode __match_mode, _StateIdT __start);
146*38fd1498Szrj 
147*38fd1498Szrj       bool
148*38fd1498Szrj       _M_main(_Match_mode __match_mode)
149*38fd1498Szrj       { return _M_main_dispatch(__match_mode, __search_mode{}); }
150*38fd1498Szrj 
151*38fd1498Szrj       bool
152*38fd1498Szrj       _M_main_dispatch(_Match_mode __match_mode, __dfs);
153*38fd1498Szrj 
154*38fd1498Szrj       bool
155*38fd1498Szrj       _M_main_dispatch(_Match_mode __match_mode, __bfs);
156*38fd1498Szrj 
157*38fd1498Szrj       bool
158*38fd1498Szrj       _M_is_word(_CharT __ch) const
159*38fd1498Szrj       {
160*38fd1498Szrj 	static const _CharT __s[2] = { 'w' };
161*38fd1498Szrj 	return _M_re._M_automaton->_M_traits.isctype
162*38fd1498Szrj 	  (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
163*38fd1498Szrj       }
164*38fd1498Szrj 
165*38fd1498Szrj       bool
166*38fd1498Szrj       _M_at_begin() const
167*38fd1498Szrj       {
168*38fd1498Szrj 	return _M_current == _M_begin
169*38fd1498Szrj 	  && !(_M_flags & (regex_constants::match_not_bol
170*38fd1498Szrj 			   | regex_constants::match_prev_avail));
171*38fd1498Szrj       }
172*38fd1498Szrj 
173*38fd1498Szrj       bool
174*38fd1498Szrj       _M_at_end() const
175*38fd1498Szrj       {
176*38fd1498Szrj 	return _M_current == _M_end
177*38fd1498Szrj 	  && !(_M_flags & regex_constants::match_not_eol);
178*38fd1498Szrj       }
179*38fd1498Szrj 
180*38fd1498Szrj       bool
181*38fd1498Szrj       _M_word_boundary() const;
182*38fd1498Szrj 
183*38fd1498Szrj       bool
184*38fd1498Szrj       _M_lookahead(_StateIdT __next);
185*38fd1498Szrj 
186*38fd1498Szrj        // Holds additional information used in BFS-mode.
187*38fd1498Szrj       template<typename _SearchMode, typename _ResultsVec>
188*38fd1498Szrj 	struct _State_info;
189*38fd1498Szrj 
190*38fd1498Szrj       template<typename _ResultsVec>
191*38fd1498Szrj 	struct _State_info<__bfs, _ResultsVec>
192*38fd1498Szrj 	{
193*38fd1498Szrj 	  explicit
194*38fd1498Szrj 	  _State_info(_StateIdT __start, size_t __n)
195*38fd1498Szrj 	  : _M_visited_states(new bool[__n]()), _M_start(__start)
196*38fd1498Szrj 	  { }
197*38fd1498Szrj 
198*38fd1498Szrj 	  bool _M_visited(_StateIdT __i)
199*38fd1498Szrj 	  {
200*38fd1498Szrj 	    if (_M_visited_states[__i])
201*38fd1498Szrj 	      return true;
202*38fd1498Szrj 	    _M_visited_states[__i] = true;
203*38fd1498Szrj 	    return false;
204*38fd1498Szrj 	  }
205*38fd1498Szrj 
206*38fd1498Szrj 	  void _M_queue(_StateIdT __i, const _ResultsVec& __res)
207*38fd1498Szrj 	  { _M_match_queue.emplace_back(__i, __res); }
208*38fd1498Szrj 
209*38fd1498Szrj 	  // Dummy implementations for BFS mode.
210*38fd1498Szrj 	  _BiIter* _M_get_sol_pos() { return nullptr; }
211*38fd1498Szrj 
212*38fd1498Szrj 	  // Saves states that need to be considered for the next character.
213*38fd1498Szrj 	  vector<pair<_StateIdT, _ResultsVec>>	_M_match_queue;
214*38fd1498Szrj 	  // Indicates which states are already visited.
215*38fd1498Szrj 	  unique_ptr<bool[]>			_M_visited_states;
216*38fd1498Szrj 	  // To record current solution.
217*38fd1498Szrj 	  _StateIdT _M_start;
218*38fd1498Szrj 	};
219*38fd1498Szrj 
220*38fd1498Szrj       template<typename _ResultsVec>
221*38fd1498Szrj 	struct _State_info<__dfs, _ResultsVec>
222*38fd1498Szrj 	{
223*38fd1498Szrj 	  explicit
224*38fd1498Szrj 	  _State_info(_StateIdT __start, size_t) : _M_start(__start)
225*38fd1498Szrj 	  { }
226*38fd1498Szrj 
227*38fd1498Szrj 	  // Dummy implementations for DFS mode.
228*38fd1498Szrj 	  bool _M_visited(_StateIdT) const { return false; }
229*38fd1498Szrj 	  void _M_queue(_StateIdT, const _ResultsVec&) { }
230*38fd1498Szrj 
231*38fd1498Szrj 	  _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
232*38fd1498Szrj 
233*38fd1498Szrj 	  // To record current solution.
234*38fd1498Szrj 	  _StateIdT _M_start;
235*38fd1498Szrj 	  _BiIter   _M_sol_pos;
236*38fd1498Szrj 	};
237*38fd1498Szrj 
238*38fd1498Szrj     public:
239*38fd1498Szrj       _ResultsVec                                           _M_cur_results;
240*38fd1498Szrj       _BiIter                                               _M_current;
241*38fd1498Szrj       _BiIter                                               _M_begin;
242*38fd1498Szrj       const _BiIter                                         _M_end;
243*38fd1498Szrj       const _RegexT&                                        _M_re;
244*38fd1498Szrj       const _NFAT&                                          _M_nfa;
245*38fd1498Szrj       _ResultsVec&                                          _M_results;
246*38fd1498Szrj       vector<pair<_BiIter, int>>                            _M_rep_count;
247*38fd1498Szrj       _State_info<__search_mode, _ResultsVec>		    _M_states;
248*38fd1498Szrj       _FlagT                                                _M_flags;
249*38fd1498Szrj       // Do we have a solution so far?
250*38fd1498Szrj       bool                                                  _M_has_sol;
251*38fd1498Szrj     };
252*38fd1498Szrj 
253*38fd1498Szrj  //@} regex-detail
254*38fd1498Szrj } // namespace __detail
255*38fd1498Szrj _GLIBCXX_END_NAMESPACE_VERSION
256*38fd1498Szrj } // namespace std
257*38fd1498Szrj 
258*38fd1498Szrj #include <bits/regex_executor.tcc>
259