1 // class template regex -*- C++ -*- 2 3 // Copyright (C) 2013-2022 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 /** 26 * @file bits/regex_automaton.tcc 27 * This is an internal header file, included by other library headers. 28 * Do not attempt to use it directly. @headername{regex} 29 */ 30 31 namespace std _GLIBCXX_VISIBILITY(default) 32 { 33 _GLIBCXX_BEGIN_NAMESPACE_VERSION 34 35 namespace __detail 36 { 37 #ifdef _GLIBCXX_DEBUG 38 inline std::ostream& _M_print(std::ostream & ostr) const39 _State_base::_M_print(std::ostream& ostr) const 40 { 41 switch (_M_opcode) 42 { 43 case _S_opcode_alternative: 44 case _S_opcode_repeat: 45 ostr << "alt next=" << _M_next << " alt=" << _M_alt; 46 break; 47 case _S_opcode_subexpr_begin: 48 ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr; 49 break; 50 case _S_opcode_subexpr_end: 51 ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr; 52 break; 53 case _S_opcode_backref: 54 ostr << "backref next=" << _M_next << " index=" << _M_backref_index; 55 break; 56 case _S_opcode_match: 57 ostr << "match next=" << _M_next; 58 break; 59 case _S_opcode_accept: 60 ostr << "accept next=" << _M_next; 61 break; 62 default: 63 ostr << "unknown next=" << _M_next; 64 break; 65 } 66 return ostr; 67 } 68 69 // Prints graphviz dot commands for state. 70 inline std::ostream& _M_dot(std::ostream & __ostr,_StateIdT __id) const71 _State_base::_M_dot(std::ostream& __ostr, _StateIdT __id) const 72 { 73 switch (_M_opcode) 74 { 75 case _S_opcode_alternative: 76 case _S_opcode_repeat: 77 __ostr << __id << " [label=\"" << __id << "\\nALT\"];\n" 78 << __id << " -> " << _M_next 79 << " [label=\"next\", tailport=\"s\"];\n" 80 << __id << " -> " << _M_alt 81 << " [label=\"alt\", tailport=\"n\"];\n"; 82 break; 83 case _S_opcode_backref: 84 __ostr << __id << " [label=\"" << __id << "\\nBACKREF " 85 << _M_subexpr << "\"];\n" 86 << __id << " -> " << _M_next << " [label=\"<match>\"];\n"; 87 break; 88 case _S_opcode_line_begin_assertion: 89 __ostr << __id << " [label=\"" << __id << "\\nLINE_BEGIN \"];\n" 90 << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 91 break; 92 case _S_opcode_line_end_assertion: 93 __ostr << __id << " [label=\"" << __id << "\\nLINE_END \"];\n" 94 << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 95 break; 96 case _S_opcode_word_boundary: 97 __ostr << __id << " [label=\"" << __id << "\\nWORD_BOUNDRY " 98 << _M_neg << "\"];\n" 99 << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 100 break; 101 case _S_opcode_subexpr_lookahead: 102 __ostr << __id << " [label=\"" << __id << "\\nLOOK_AHEAD\"];\n" 103 << __id << " -> " << _M_next 104 << " [label=\"epsilon\", tailport=\"s\"];\n" 105 << __id << " -> " << _M_alt 106 << " [label=\"<assert>\", tailport=\"n\"];\n"; 107 break; 108 case _S_opcode_subexpr_begin: 109 __ostr << __id << " [label=\"" << __id << "\\nSBEGIN " 110 << _M_subexpr << "\"];\n" 111 << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 112 break; 113 case _S_opcode_subexpr_end: 114 __ostr << __id << " [label=\"" << __id << "\\nSEND " 115 << _M_subexpr << "\"];\n" 116 << __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; 117 break; 118 case _S_opcode_dummy: 119 break; 120 case _S_opcode_match: 121 __ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n" 122 << __id << " -> " << _M_next << " [label=\"<match>\"];\n"; 123 break; 124 case _S_opcode_accept: 125 __ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ; 126 break; 127 default: 128 _GLIBCXX_DEBUG_ASSERT(false); 129 break; 130 } 131 return __ostr; 132 } 133 134 template<typename _TraitsT> 135 std::ostream& _M_dot(std::ostream & __ostr) const136 _NFA<_TraitsT>::_M_dot(std::ostream& __ostr) const 137 { 138 __ostr << "digraph _Nfa {\n" 139 " rankdir=LR;\n"; 140 for (size_t __i = 0; __i < this->size(); ++__i) 141 (*this)[__i]._M_dot(__ostr, __i); 142 __ostr << "}\n"; 143 return __ostr; 144 } 145 #endif 146 147 template<typename _TraitsT> 148 _StateIdT _M_insert_backref(size_t __index)149 _NFA<_TraitsT>::_M_insert_backref(size_t __index) 150 { 151 if (this->_M_flags & regex_constants::__polynomial) 152 __throw_regex_error(regex_constants::error_complexity, 153 "Unexpected back-reference in polynomial mode."); 154 // To figure out whether a backref is valid, a stack is used to store 155 // unfinished sub-expressions. For example, when parsing 156 // "(a(b)(c\\1(d)))" at '\\1', _M_subexpr_count is 3, indicating that 3 157 // sub expressions are parsed or partially parsed(in the stack), aka, 158 // "(a..", "(b)" and "(c.."). 159 // _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this 160 // time, "\\2" is valid, but "\\1" and "\\3" are not. 161 if (__index >= _M_subexpr_count) 162 __throw_regex_error( 163 regex_constants::error_backref, 164 "Back-reference index exceeds current sub-expression count."); 165 for (auto __it : this->_M_paren_stack) 166 if (__index == __it) 167 __throw_regex_error( 168 regex_constants::error_backref, 169 "Back-reference referred to an opened sub-expression."); 170 this->_M_has_backref = true; 171 _StateT __tmp(_S_opcode_backref); 172 __tmp._M_backref_index = __index; 173 return _M_insert_state(std::move(__tmp)); 174 } 175 176 template<typename _TraitsT> 177 void _M_eliminate_dummy()178 _NFA<_TraitsT>::_M_eliminate_dummy() 179 { 180 for (auto& __it : *this) 181 { 182 while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode() 183 == _S_opcode_dummy) 184 __it._M_next = (*this)[__it._M_next]._M_next; 185 if (__it._M_has_alt()) 186 while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode() 187 == _S_opcode_dummy) 188 __it._M_alt = (*this)[__it._M_alt]._M_next; 189 } 190 } 191 192 // Just apply DFS on the sequence and re-link their links. 193 template<typename _TraitsT> 194 _StateSeq<_TraitsT> _M_clone()195 _StateSeq<_TraitsT>::_M_clone() 196 { 197 _GLIBCXX_STD_C::map<_StateIdT, _StateIdT> __m; 198 std::stack<_StateIdT, _GLIBCXX_STD_C::deque<_StateIdT>> __stack; 199 __stack.push(_M_start); 200 while (!__stack.empty()) 201 { 202 auto __u = __stack.top(); 203 __stack.pop(); 204 auto __dup = _M_nfa[__u]; 205 // _M_insert_state() never return -1 206 auto __id = _M_nfa._M_insert_state(std::move(__dup)); 207 __m[__u] = __id; 208 if (__dup._M_has_alt()) 209 if (__dup._M_alt != _S_invalid_state_id 210 && __m.count(__dup._M_alt) == 0) 211 __stack.push(__dup._M_alt); 212 if (__u == _M_end) 213 continue; 214 if (__dup._M_next != _S_invalid_state_id 215 && __m.count(__dup._M_next) == 0) 216 __stack.push(__dup._M_next); 217 } 218 for (auto __it : __m) 219 { 220 auto __v = __it.second; 221 auto& __ref = _M_nfa[__v]; 222 if (__ref._M_next != _S_invalid_state_id) 223 __ref._M_next = __m.find(__ref._M_next)->second; 224 if (__ref._M_has_alt() && __ref._M_alt != _S_invalid_state_id) 225 __ref._M_alt = __m.find(__ref._M_alt)->second; 226 } 227 return _StateSeq(_M_nfa, __m[_M_start], __m[_M_end]); 228 } 229 } // namespace __detail 230 231 _GLIBCXX_END_NAMESPACE_VERSION 232 } // namespace 233