1*38fd1498Szrj // class template regex -*- C++ -*-
2*38fd1498Szrj
3*38fd1498Szrj // Copyright (C) 2013-2018 Free Software Foundation, Inc.
4*38fd1498Szrj //
5*38fd1498Szrj // This file is part of the GNU ISO C++ Library. This library is free
6*38fd1498Szrj // software; you can redistribute it and/or modify it under the
7*38fd1498Szrj // terms of the GNU General Public License as published by the
8*38fd1498Szrj // Free Software Foundation; either version 3, or (at your option)
9*38fd1498Szrj // any later version.
10*38fd1498Szrj
11*38fd1498Szrj // This library is distributed in the hope that it will be useful,
12*38fd1498Szrj // but WITHOUT ANY WARRANTY; without even the implied warranty of
13*38fd1498Szrj // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14*38fd1498Szrj // GNU General Public License for more details.
15*38fd1498Szrj
16*38fd1498Szrj // Under Section 7 of GPL version 3, you are granted additional
17*38fd1498Szrj // permissions described in the GCC Runtime Library Exception, version
18*38fd1498Szrj // 3.1, as published by the Free Software Foundation.
19*38fd1498Szrj
20*38fd1498Szrj // You should have received a copy of the GNU General Public License and
21*38fd1498Szrj // a copy of the GCC Runtime Library Exception along with this program;
22*38fd1498Szrj // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23*38fd1498Szrj // <http://www.gnu.org/licenses/>.
24*38fd1498Szrj
25*38fd1498Szrj /**
26*38fd1498Szrj * @file bits/regex_executor.h
27*38fd1498Szrj * This is an internal header file, included by other library headers.
28*38fd1498Szrj * Do not attempt to use it directly. @headername{regex}
29*38fd1498Szrj */
30*38fd1498Szrj
31*38fd1498Szrj // FIXME convert comments to doxygen format.
32*38fd1498Szrj
_GLIBCXX_VISIBILITY(default)33*38fd1498Szrj namespace std _GLIBCXX_VISIBILITY(default)
34*38fd1498Szrj {
35*38fd1498Szrj _GLIBCXX_BEGIN_NAMESPACE_VERSION
36*38fd1498Szrj
37*38fd1498Szrj namespace __detail
38*38fd1498Szrj {
39*38fd1498Szrj /**
40*38fd1498Szrj * @addtogroup regex-detail
41*38fd1498Szrj * @{
42*38fd1498Szrj */
43*38fd1498Szrj
44*38fd1498Szrj /**
45*38fd1498Szrj * @brief Takes a regex and an input string and does the matching.
46*38fd1498Szrj *
47*38fd1498Szrj * The %_Executor class has two modes: DFS mode and BFS mode, controlled
48*38fd1498Szrj * by the template parameter %__dfs_mode.
49*38fd1498Szrj */
50*38fd1498Szrj template<typename _BiIter, typename _Alloc, typename _TraitsT,
51*38fd1498Szrj bool __dfs_mode>
52*38fd1498Szrj class _Executor
53*38fd1498Szrj {
54*38fd1498Szrj using __search_mode = integral_constant<bool, __dfs_mode>;
55*38fd1498Szrj using __dfs = true_type;
56*38fd1498Szrj using __bfs = false_type;
57*38fd1498Szrj
58*38fd1498Szrj enum class _Match_mode : unsigned char { _Exact, _Prefix };
59*38fd1498Szrj
60*38fd1498Szrj public:
61*38fd1498Szrj typedef typename iterator_traits<_BiIter>::value_type _CharT;
62*38fd1498Szrj typedef basic_regex<_CharT, _TraitsT> _RegexT;
63*38fd1498Szrj typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
64*38fd1498Szrj typedef regex_constants::match_flag_type _FlagT;
65*38fd1498Szrj typedef typename _TraitsT::char_class_type _ClassT;
66*38fd1498Szrj typedef _NFA<_TraitsT> _NFAT;
67*38fd1498Szrj
68*38fd1498Szrj public:
69*38fd1498Szrj _Executor(_BiIter __begin,
70*38fd1498Szrj _BiIter __end,
71*38fd1498Szrj _ResultsVec& __results,
72*38fd1498Szrj const _RegexT& __re,
73*38fd1498Szrj _FlagT __flags)
74*38fd1498Szrj : _M_begin(__begin),
75*38fd1498Szrj _M_end(__end),
76*38fd1498Szrj _M_re(__re),
77*38fd1498Szrj _M_nfa(*__re._M_automaton),
78*38fd1498Szrj _M_results(__results),
79*38fd1498Szrj _M_rep_count(_M_nfa.size()),
80*38fd1498Szrj _M_states(_M_nfa._M_start(), _M_nfa.size()),
81*38fd1498Szrj _M_flags((__flags & regex_constants::match_prev_avail)
82*38fd1498Szrj ? (__flags
83*38fd1498Szrj & ~regex_constants::match_not_bol
84*38fd1498Szrj & ~regex_constants::match_not_bow)
85*38fd1498Szrj : __flags)
86*38fd1498Szrj { }
87*38fd1498Szrj
88*38fd1498Szrj // Set matched when string exactly matches the pattern.
89*38fd1498Szrj bool
90*38fd1498Szrj _M_match()
91*38fd1498Szrj {
92*38fd1498Szrj _M_current = _M_begin;
93*38fd1498Szrj return _M_main(_Match_mode::_Exact);
94*38fd1498Szrj }
95*38fd1498Szrj
96*38fd1498Szrj // Set matched when some prefix of the string matches the pattern.
97*38fd1498Szrj bool
98*38fd1498Szrj _M_search_from_first()
99*38fd1498Szrj {
100*38fd1498Szrj _M_current = _M_begin;
101*38fd1498Szrj return _M_main(_Match_mode::_Prefix);
102*38fd1498Szrj }
103*38fd1498Szrj
104*38fd1498Szrj bool
105*38fd1498Szrj _M_search();
106*38fd1498Szrj
107*38fd1498Szrj private:
108*38fd1498Szrj void
109*38fd1498Szrj _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
110*38fd1498Szrj
111*38fd1498Szrj void
112*38fd1498Szrj _M_handle_repeat(_Match_mode, _StateIdT);
113*38fd1498Szrj
114*38fd1498Szrj void
115*38fd1498Szrj _M_handle_subexpr_begin(_Match_mode, _StateIdT);
116*38fd1498Szrj
117*38fd1498Szrj void
118*38fd1498Szrj _M_handle_subexpr_end(_Match_mode, _StateIdT);
119*38fd1498Szrj
120*38fd1498Szrj void
121*38fd1498Szrj _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
122*38fd1498Szrj
123*38fd1498Szrj void
124*38fd1498Szrj _M_handle_line_end_assertion(_Match_mode, _StateIdT);
125*38fd1498Szrj
126*38fd1498Szrj void
127*38fd1498Szrj _M_handle_word_boundary(_Match_mode, _StateIdT);
128*38fd1498Szrj
129*38fd1498Szrj void
130*38fd1498Szrj _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
131*38fd1498Szrj
132*38fd1498Szrj void
133*38fd1498Szrj _M_handle_match(_Match_mode, _StateIdT);
134*38fd1498Szrj
135*38fd1498Szrj void
136*38fd1498Szrj _M_handle_backref(_Match_mode, _StateIdT);
137*38fd1498Szrj
138*38fd1498Szrj void
139*38fd1498Szrj _M_handle_accept(_Match_mode, _StateIdT);
140*38fd1498Szrj
141*38fd1498Szrj void
142*38fd1498Szrj _M_handle_alternative(_Match_mode, _StateIdT);
143*38fd1498Szrj
144*38fd1498Szrj void
145*38fd1498Szrj _M_dfs(_Match_mode __match_mode, _StateIdT __start);
146*38fd1498Szrj
147*38fd1498Szrj bool
148*38fd1498Szrj _M_main(_Match_mode __match_mode)
149*38fd1498Szrj { return _M_main_dispatch(__match_mode, __search_mode{}); }
150*38fd1498Szrj
151*38fd1498Szrj bool
152*38fd1498Szrj _M_main_dispatch(_Match_mode __match_mode, __dfs);
153*38fd1498Szrj
154*38fd1498Szrj bool
155*38fd1498Szrj _M_main_dispatch(_Match_mode __match_mode, __bfs);
156*38fd1498Szrj
157*38fd1498Szrj bool
158*38fd1498Szrj _M_is_word(_CharT __ch) const
159*38fd1498Szrj {
160*38fd1498Szrj static const _CharT __s[2] = { 'w' };
161*38fd1498Szrj return _M_re._M_automaton->_M_traits.isctype
162*38fd1498Szrj (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
163*38fd1498Szrj }
164*38fd1498Szrj
165*38fd1498Szrj bool
166*38fd1498Szrj _M_at_begin() const
167*38fd1498Szrj {
168*38fd1498Szrj return _M_current == _M_begin
169*38fd1498Szrj && !(_M_flags & (regex_constants::match_not_bol
170*38fd1498Szrj | regex_constants::match_prev_avail));
171*38fd1498Szrj }
172*38fd1498Szrj
173*38fd1498Szrj bool
174*38fd1498Szrj _M_at_end() const
175*38fd1498Szrj {
176*38fd1498Szrj return _M_current == _M_end
177*38fd1498Szrj && !(_M_flags & regex_constants::match_not_eol);
178*38fd1498Szrj }
179*38fd1498Szrj
180*38fd1498Szrj bool
181*38fd1498Szrj _M_word_boundary() const;
182*38fd1498Szrj
183*38fd1498Szrj bool
184*38fd1498Szrj _M_lookahead(_StateIdT __next);
185*38fd1498Szrj
186*38fd1498Szrj // Holds additional information used in BFS-mode.
187*38fd1498Szrj template<typename _SearchMode, typename _ResultsVec>
188*38fd1498Szrj struct _State_info;
189*38fd1498Szrj
190*38fd1498Szrj template<typename _ResultsVec>
191*38fd1498Szrj struct _State_info<__bfs, _ResultsVec>
192*38fd1498Szrj {
193*38fd1498Szrj explicit
194*38fd1498Szrj _State_info(_StateIdT __start, size_t __n)
195*38fd1498Szrj : _M_visited_states(new bool[__n]()), _M_start(__start)
196*38fd1498Szrj { }
197*38fd1498Szrj
198*38fd1498Szrj bool _M_visited(_StateIdT __i)
199*38fd1498Szrj {
200*38fd1498Szrj if (_M_visited_states[__i])
201*38fd1498Szrj return true;
202*38fd1498Szrj _M_visited_states[__i] = true;
203*38fd1498Szrj return false;
204*38fd1498Szrj }
205*38fd1498Szrj
206*38fd1498Szrj void _M_queue(_StateIdT __i, const _ResultsVec& __res)
207*38fd1498Szrj { _M_match_queue.emplace_back(__i, __res); }
208*38fd1498Szrj
209*38fd1498Szrj // Dummy implementations for BFS mode.
210*38fd1498Szrj _BiIter* _M_get_sol_pos() { return nullptr; }
211*38fd1498Szrj
212*38fd1498Szrj // Saves states that need to be considered for the next character.
213*38fd1498Szrj vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
214*38fd1498Szrj // Indicates which states are already visited.
215*38fd1498Szrj unique_ptr<bool[]> _M_visited_states;
216*38fd1498Szrj // To record current solution.
217*38fd1498Szrj _StateIdT _M_start;
218*38fd1498Szrj };
219*38fd1498Szrj
220*38fd1498Szrj template<typename _ResultsVec>
221*38fd1498Szrj struct _State_info<__dfs, _ResultsVec>
222*38fd1498Szrj {
223*38fd1498Szrj explicit
224*38fd1498Szrj _State_info(_StateIdT __start, size_t) : _M_start(__start)
225*38fd1498Szrj { }
226*38fd1498Szrj
227*38fd1498Szrj // Dummy implementations for DFS mode.
228*38fd1498Szrj bool _M_visited(_StateIdT) const { return false; }
229*38fd1498Szrj void _M_queue(_StateIdT, const _ResultsVec&) { }
230*38fd1498Szrj
231*38fd1498Szrj _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
232*38fd1498Szrj
233*38fd1498Szrj // To record current solution.
234*38fd1498Szrj _StateIdT _M_start;
235*38fd1498Szrj _BiIter _M_sol_pos;
236*38fd1498Szrj };
237*38fd1498Szrj
238*38fd1498Szrj public:
239*38fd1498Szrj _ResultsVec _M_cur_results;
240*38fd1498Szrj _BiIter _M_current;
241*38fd1498Szrj _BiIter _M_begin;
242*38fd1498Szrj const _BiIter _M_end;
243*38fd1498Szrj const _RegexT& _M_re;
244*38fd1498Szrj const _NFAT& _M_nfa;
245*38fd1498Szrj _ResultsVec& _M_results;
246*38fd1498Szrj vector<pair<_BiIter, int>> _M_rep_count;
247*38fd1498Szrj _State_info<__search_mode, _ResultsVec> _M_states;
248*38fd1498Szrj _FlagT _M_flags;
249*38fd1498Szrj // Do we have a solution so far?
250*38fd1498Szrj bool _M_has_sol;
251*38fd1498Szrj };
252*38fd1498Szrj
253*38fd1498Szrj //@} regex-detail
254*38fd1498Szrj } // namespace __detail
255*38fd1498Szrj _GLIBCXX_END_NAMESPACE_VERSION
256*38fd1498Szrj } // namespace std
257*38fd1498Szrj
258*38fd1498Szrj #include <bits/regex_executor.tcc>
259