1*38fd1498Szrj // class template regex -*- C++ -*- 2*38fd1498Szrj 3*38fd1498Szrj // Copyright (C) 2013-2018 Free Software Foundation, Inc. 4*38fd1498Szrj // 5*38fd1498Szrj // This file is part of the GNU ISO C++ Library. This library is free 6*38fd1498Szrj // software; you can redistribute it and/or modify it under the 7*38fd1498Szrj // terms of the GNU General Public License as published by the 8*38fd1498Szrj // Free Software Foundation; either version 3, or (at your option) 9*38fd1498Szrj // any later version. 10*38fd1498Szrj 11*38fd1498Szrj // This library is distributed in the hope that it will be useful, 12*38fd1498Szrj // but WITHOUT ANY WARRANTY; without even the implied warranty of 13*38fd1498Szrj // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14*38fd1498Szrj // GNU General Public License for more details. 15*38fd1498Szrj 16*38fd1498Szrj // Under Section 7 of GPL version 3, you are granted additional 17*38fd1498Szrj // permissions described in the GCC Runtime Library Exception, version 18*38fd1498Szrj // 3.1, as published by the Free Software Foundation. 19*38fd1498Szrj 20*38fd1498Szrj // You should have received a copy of the GNU General Public License and 21*38fd1498Szrj // a copy of the GCC Runtime Library Exception along with this program; 22*38fd1498Szrj // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23*38fd1498Szrj // <http://www.gnu.org/licenses/>. 24*38fd1498Szrj 25*38fd1498Szrj /** 26*38fd1498Szrj * @file bits/regex_scanner.tcc 27*38fd1498Szrj * This is an internal header file, included by other library headers. 28*38fd1498Szrj * Do not attempt to use it directly. @headername{regex} 29*38fd1498Szrj */ 30*38fd1498Szrj 31*38fd1498Szrj // FIXME make comments doxygen format. 32*38fd1498Szrj 33*38fd1498Szrj // N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep 34*38fd1498Szrj // and awk 35*38fd1498Szrj // 1) grep is basic except '\n' is treated as '|' 36*38fd1498Szrj // 2) egrep is extended except '\n' is treated as '|' 37*38fd1498Szrj // 3) awk is extended except special escaping rules, and there's no 38*38fd1498Szrj // back-reference. 39*38fd1498Szrj // 40*38fd1498Szrj // References: 41*38fd1498Szrj // 42*38fd1498Szrj // ECMAScript: ECMA-262 15.10 43*38fd1498Szrj // 44*38fd1498Szrj // basic, extended: 45*38fd1498Szrj // http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap09.html 46*38fd1498Szrj // 47*38fd1498Szrj // awk: http://pubs.opengroup.org/onlinepubs/000095399/utilities/awk.html 48*38fd1498Szrj 49*38fd1498Szrj namespace std _GLIBCXX_VISIBILITY(default) 50*38fd1498Szrj { 51*38fd1498Szrj _GLIBCXX_BEGIN_NAMESPACE_VERSION 52*38fd1498Szrj 53*38fd1498Szrj namespace __detail 54*38fd1498Szrj { 55*38fd1498Szrj template<typename _CharT> 56*38fd1498Szrj _Scanner<_CharT>:: _Scanner(typename _Scanner::_IterT __begin,typename _Scanner::_IterT __end,_FlagT __flags,std::locale __loc)57*38fd1498Szrj _Scanner(typename _Scanner::_IterT __begin, 58*38fd1498Szrj typename _Scanner::_IterT __end, 59*38fd1498Szrj _FlagT __flags, std::locale __loc) 60*38fd1498Szrj : _ScannerBase(__flags), 61*38fd1498Szrj _M_current(__begin), _M_end(__end), 62*38fd1498Szrj _M_ctype(std::use_facet<_CtypeT>(__loc)), 63*38fd1498Szrj _M_eat_escape(_M_is_ecma() 64*38fd1498Szrj ? &_Scanner::_M_eat_escape_ecma 65*38fd1498Szrj : &_Scanner::_M_eat_escape_posix) 66*38fd1498Szrj { _M_advance(); } 67*38fd1498Szrj 68*38fd1498Szrj template<typename _CharT> 69*38fd1498Szrj void 70*38fd1498Szrj _Scanner<_CharT>:: _M_advance()71*38fd1498Szrj _M_advance() 72*38fd1498Szrj { 73*38fd1498Szrj if (_M_current == _M_end) 74*38fd1498Szrj { 75*38fd1498Szrj _M_token = _S_token_eof; 76*38fd1498Szrj return; 77*38fd1498Szrj } 78*38fd1498Szrj 79*38fd1498Szrj if (_M_state == _S_state_normal) 80*38fd1498Szrj _M_scan_normal(); 81*38fd1498Szrj else if (_M_state == _S_state_in_bracket) 82*38fd1498Szrj _M_scan_in_bracket(); 83*38fd1498Szrj else if (_M_state == _S_state_in_brace) 84*38fd1498Szrj _M_scan_in_brace(); 85*38fd1498Szrj else 86*38fd1498Szrj { 87*38fd1498Szrj __glibcxx_assert(false); 88*38fd1498Szrj } 89*38fd1498Szrj } 90*38fd1498Szrj 91*38fd1498Szrj // Differences between styles: 92*38fd1498Szrj // 1) "\(", "\)", "\{" in basic. It's not escaping. 93*38fd1498Szrj // 2) "(?:", "(?=", "(?!" in ECMAScript. 94*38fd1498Szrj template<typename _CharT> 95*38fd1498Szrj void 96*38fd1498Szrj _Scanner<_CharT>:: _M_scan_normal()97*38fd1498Szrj _M_scan_normal() 98*38fd1498Szrj { 99*38fd1498Szrj auto __c = *_M_current++; 100*38fd1498Szrj 101*38fd1498Szrj if (std::strchr(_M_spec_char, _M_ctype.narrow(__c, ' ')) == nullptr) 102*38fd1498Szrj { 103*38fd1498Szrj _M_token = _S_token_ord_char; 104*38fd1498Szrj _M_value.assign(1, __c); 105*38fd1498Szrj return; 106*38fd1498Szrj } 107*38fd1498Szrj if (__c == '\\') 108*38fd1498Szrj { 109*38fd1498Szrj if (_M_current == _M_end) 110*38fd1498Szrj __throw_regex_error( 111*38fd1498Szrj regex_constants::error_escape, 112*38fd1498Szrj "Unexpected end of regex when escaping."); 113*38fd1498Szrj 114*38fd1498Szrj if (!_M_is_basic() 115*38fd1498Szrj || (*_M_current != '(' 116*38fd1498Szrj && *_M_current != ')' 117*38fd1498Szrj && *_M_current != '{')) 118*38fd1498Szrj { 119*38fd1498Szrj (this->*_M_eat_escape)(); 120*38fd1498Szrj return; 121*38fd1498Szrj } 122*38fd1498Szrj __c = *_M_current++; 123*38fd1498Szrj } 124*38fd1498Szrj if (__c == '(') 125*38fd1498Szrj { 126*38fd1498Szrj if (_M_is_ecma() && *_M_current == '?') 127*38fd1498Szrj { 128*38fd1498Szrj if (++_M_current == _M_end) 129*38fd1498Szrj __throw_regex_error( 130*38fd1498Szrj regex_constants::error_paren, 131*38fd1498Szrj "Unexpected end of regex when in an open parenthesis."); 132*38fd1498Szrj 133*38fd1498Szrj if (*_M_current == ':') 134*38fd1498Szrj { 135*38fd1498Szrj ++_M_current; 136*38fd1498Szrj _M_token = _S_token_subexpr_no_group_begin; 137*38fd1498Szrj } 138*38fd1498Szrj else if (*_M_current == '=') 139*38fd1498Szrj { 140*38fd1498Szrj ++_M_current; 141*38fd1498Szrj _M_token = _S_token_subexpr_lookahead_begin; 142*38fd1498Szrj _M_value.assign(1, 'p'); 143*38fd1498Szrj } 144*38fd1498Szrj else if (*_M_current == '!') 145*38fd1498Szrj { 146*38fd1498Szrj ++_M_current; 147*38fd1498Szrj _M_token = _S_token_subexpr_lookahead_begin; 148*38fd1498Szrj _M_value.assign(1, 'n'); 149*38fd1498Szrj } 150*38fd1498Szrj else 151*38fd1498Szrj __throw_regex_error( 152*38fd1498Szrj regex_constants::error_paren, 153*38fd1498Szrj "Invalid special open parenthesis."); 154*38fd1498Szrj } 155*38fd1498Szrj else if (_M_flags & regex_constants::nosubs) 156*38fd1498Szrj _M_token = _S_token_subexpr_no_group_begin; 157*38fd1498Szrj else 158*38fd1498Szrj _M_token = _S_token_subexpr_begin; 159*38fd1498Szrj } 160*38fd1498Szrj else if (__c == ')') 161*38fd1498Szrj _M_token = _S_token_subexpr_end; 162*38fd1498Szrj else if (__c == '[') 163*38fd1498Szrj { 164*38fd1498Szrj _M_state = _S_state_in_bracket; 165*38fd1498Szrj _M_at_bracket_start = true; 166*38fd1498Szrj if (_M_current != _M_end && *_M_current == '^') 167*38fd1498Szrj { 168*38fd1498Szrj _M_token = _S_token_bracket_neg_begin; 169*38fd1498Szrj ++_M_current; 170*38fd1498Szrj } 171*38fd1498Szrj else 172*38fd1498Szrj _M_token = _S_token_bracket_begin; 173*38fd1498Szrj } 174*38fd1498Szrj else if (__c == '{') 175*38fd1498Szrj { 176*38fd1498Szrj _M_state = _S_state_in_brace; 177*38fd1498Szrj _M_token = _S_token_interval_begin; 178*38fd1498Szrj } 179*38fd1498Szrj else if (__c != ']' && __c != '}') 180*38fd1498Szrj { 181*38fd1498Szrj auto __it = _M_token_tbl; 182*38fd1498Szrj auto __narrowc = _M_ctype.narrow(__c, '\0'); 183*38fd1498Szrj for (; __it->first != '\0'; ++__it) 184*38fd1498Szrj if (__it->first == __narrowc) 185*38fd1498Szrj { 186*38fd1498Szrj _M_token = __it->second; 187*38fd1498Szrj return; 188*38fd1498Szrj } 189*38fd1498Szrj __glibcxx_assert(false); 190*38fd1498Szrj } 191*38fd1498Szrj else 192*38fd1498Szrj { 193*38fd1498Szrj _M_token = _S_token_ord_char; 194*38fd1498Szrj _M_value.assign(1, __c); 195*38fd1498Szrj } 196*38fd1498Szrj } 197*38fd1498Szrj 198*38fd1498Szrj // Differences between styles: 199*38fd1498Szrj // 1) different semantics of "[]" and "[^]". 200*38fd1498Szrj // 2) Escaping in bracket expr. 201*38fd1498Szrj template<typename _CharT> 202*38fd1498Szrj void 203*38fd1498Szrj _Scanner<_CharT>:: _M_scan_in_bracket()204*38fd1498Szrj _M_scan_in_bracket() 205*38fd1498Szrj { 206*38fd1498Szrj if (_M_current == _M_end) 207*38fd1498Szrj __throw_regex_error( 208*38fd1498Szrj regex_constants::error_brack, 209*38fd1498Szrj "Unexpected end of regex when in bracket expression."); 210*38fd1498Szrj 211*38fd1498Szrj auto __c = *_M_current++; 212*38fd1498Szrj 213*38fd1498Szrj if (__c == '-') 214*38fd1498Szrj _M_token = _S_token_bracket_dash; 215*38fd1498Szrj else if (__c == '[') 216*38fd1498Szrj { 217*38fd1498Szrj if (_M_current == _M_end) 218*38fd1498Szrj __throw_regex_error(regex_constants::error_brack, 219*38fd1498Szrj "Unexpected character class open bracket."); 220*38fd1498Szrj 221*38fd1498Szrj if (*_M_current == '.') 222*38fd1498Szrj { 223*38fd1498Szrj _M_token = _S_token_collsymbol; 224*38fd1498Szrj _M_eat_class(*_M_current++); 225*38fd1498Szrj } 226*38fd1498Szrj else if (*_M_current == ':') 227*38fd1498Szrj { 228*38fd1498Szrj _M_token = _S_token_char_class_name; 229*38fd1498Szrj _M_eat_class(*_M_current++); 230*38fd1498Szrj } 231*38fd1498Szrj else if (*_M_current == '=') 232*38fd1498Szrj { 233*38fd1498Szrj _M_token = _S_token_equiv_class_name; 234*38fd1498Szrj _M_eat_class(*_M_current++); 235*38fd1498Szrj } 236*38fd1498Szrj else 237*38fd1498Szrj { 238*38fd1498Szrj _M_token = _S_token_ord_char; 239*38fd1498Szrj _M_value.assign(1, __c); 240*38fd1498Szrj } 241*38fd1498Szrj } 242*38fd1498Szrj // In POSIX, when encountering "[]" or "[^]", the ']' is interpreted 243*38fd1498Szrj // literally. So "[]]" and "[^]]" are valid regexes. See the testcases 244*38fd1498Szrj // `*/empty_range.cc`. 245*38fd1498Szrj else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start)) 246*38fd1498Szrj { 247*38fd1498Szrj _M_token = _S_token_bracket_end; 248*38fd1498Szrj _M_state = _S_state_normal; 249*38fd1498Szrj } 250*38fd1498Szrj // ECMAScript and awk permits escaping in bracket. 251*38fd1498Szrj else if (__c == '\\' && (_M_is_ecma() || _M_is_awk())) 252*38fd1498Szrj (this->*_M_eat_escape)(); 253*38fd1498Szrj else 254*38fd1498Szrj { 255*38fd1498Szrj _M_token = _S_token_ord_char; 256*38fd1498Szrj _M_value.assign(1, __c); 257*38fd1498Szrj } 258*38fd1498Szrj _M_at_bracket_start = false; 259*38fd1498Szrj } 260*38fd1498Szrj 261*38fd1498Szrj // Differences between styles: 262*38fd1498Szrj // 1) "\}" in basic style. 263*38fd1498Szrj template<typename _CharT> 264*38fd1498Szrj void 265*38fd1498Szrj _Scanner<_CharT>:: _M_scan_in_brace()266*38fd1498Szrj _M_scan_in_brace() 267*38fd1498Szrj { 268*38fd1498Szrj if (_M_current == _M_end) 269*38fd1498Szrj __throw_regex_error( 270*38fd1498Szrj regex_constants::error_brace, 271*38fd1498Szrj "Unexpected end of regex when in brace expression."); 272*38fd1498Szrj 273*38fd1498Szrj auto __c = *_M_current++; 274*38fd1498Szrj 275*38fd1498Szrj if (_M_ctype.is(_CtypeT::digit, __c)) 276*38fd1498Szrj { 277*38fd1498Szrj _M_token = _S_token_dup_count; 278*38fd1498Szrj _M_value.assign(1, __c); 279*38fd1498Szrj while (_M_current != _M_end 280*38fd1498Szrj && _M_ctype.is(_CtypeT::digit, *_M_current)) 281*38fd1498Szrj _M_value += *_M_current++; 282*38fd1498Szrj } 283*38fd1498Szrj else if (__c == ',') 284*38fd1498Szrj _M_token = _S_token_comma; 285*38fd1498Szrj // basic use \}. 286*38fd1498Szrj else if (_M_is_basic()) 287*38fd1498Szrj { 288*38fd1498Szrj if (__c == '\\' && _M_current != _M_end && *_M_current == '}') 289*38fd1498Szrj { 290*38fd1498Szrj _M_state = _S_state_normal; 291*38fd1498Szrj _M_token = _S_token_interval_end; 292*38fd1498Szrj ++_M_current; 293*38fd1498Szrj } 294*38fd1498Szrj else 295*38fd1498Szrj __throw_regex_error(regex_constants::error_badbrace, 296*38fd1498Szrj "Unexpected character in brace expression."); 297*38fd1498Szrj } 298*38fd1498Szrj else if (__c == '}') 299*38fd1498Szrj { 300*38fd1498Szrj _M_state = _S_state_normal; 301*38fd1498Szrj _M_token = _S_token_interval_end; 302*38fd1498Szrj } 303*38fd1498Szrj else 304*38fd1498Szrj __throw_regex_error(regex_constants::error_badbrace, 305*38fd1498Szrj "Unexpected character in brace expression."); 306*38fd1498Szrj } 307*38fd1498Szrj 308*38fd1498Szrj template<typename _CharT> 309*38fd1498Szrj void 310*38fd1498Szrj _Scanner<_CharT>:: _M_eat_escape_ecma()311*38fd1498Szrj _M_eat_escape_ecma() 312*38fd1498Szrj { 313*38fd1498Szrj if (_M_current == _M_end) 314*38fd1498Szrj __throw_regex_error(regex_constants::error_escape, 315*38fd1498Szrj "Unexpected end of regex when escaping."); 316*38fd1498Szrj 317*38fd1498Szrj auto __c = *_M_current++; 318*38fd1498Szrj auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0')); 319*38fd1498Szrj 320*38fd1498Szrj if (__pos != nullptr && (__c != 'b' || _M_state == _S_state_in_bracket)) 321*38fd1498Szrj { 322*38fd1498Szrj _M_token = _S_token_ord_char; 323*38fd1498Szrj _M_value.assign(1, *__pos); 324*38fd1498Szrj } 325*38fd1498Szrj else if (__c == 'b') 326*38fd1498Szrj { 327*38fd1498Szrj _M_token = _S_token_word_bound; 328*38fd1498Szrj _M_value.assign(1, 'p'); 329*38fd1498Szrj } 330*38fd1498Szrj else if (__c == 'B') 331*38fd1498Szrj { 332*38fd1498Szrj _M_token = _S_token_word_bound; 333*38fd1498Szrj _M_value.assign(1, 'n'); 334*38fd1498Szrj } 335*38fd1498Szrj // N3376 28.13 336*38fd1498Szrj else if (__c == 'd' 337*38fd1498Szrj || __c == 'D' 338*38fd1498Szrj || __c == 's' 339*38fd1498Szrj || __c == 'S' 340*38fd1498Szrj || __c == 'w' 341*38fd1498Szrj || __c == 'W') 342*38fd1498Szrj { 343*38fd1498Szrj _M_token = _S_token_quoted_class; 344*38fd1498Szrj _M_value.assign(1, __c); 345*38fd1498Szrj } 346*38fd1498Szrj else if (__c == 'c') 347*38fd1498Szrj { 348*38fd1498Szrj if (_M_current == _M_end) 349*38fd1498Szrj __throw_regex_error( 350*38fd1498Szrj regex_constants::error_escape, 351*38fd1498Szrj "Unexpected end of regex when reading control code."); 352*38fd1498Szrj _M_token = _S_token_ord_char; 353*38fd1498Szrj _M_value.assign(1, *_M_current++); 354*38fd1498Szrj } 355*38fd1498Szrj else if (__c == 'x' || __c == 'u') 356*38fd1498Szrj { 357*38fd1498Szrj _M_value.erase(); 358*38fd1498Szrj for (int __i = 0; __i < (__c == 'x' ? 2 : 4); __i++) 359*38fd1498Szrj { 360*38fd1498Szrj if (_M_current == _M_end 361*38fd1498Szrj || !_M_ctype.is(_CtypeT::xdigit, *_M_current)) 362*38fd1498Szrj __throw_regex_error( 363*38fd1498Szrj regex_constants::error_escape, 364*38fd1498Szrj "Unexpected end of regex when ascii character."); 365*38fd1498Szrj _M_value += *_M_current++; 366*38fd1498Szrj } 367*38fd1498Szrj _M_token = _S_token_hex_num; 368*38fd1498Szrj } 369*38fd1498Szrj // ECMAScript recognizes multi-digit back-references. 370*38fd1498Szrj else if (_M_ctype.is(_CtypeT::digit, __c)) 371*38fd1498Szrj { 372*38fd1498Szrj _M_value.assign(1, __c); 373*38fd1498Szrj while (_M_current != _M_end 374*38fd1498Szrj && _M_ctype.is(_CtypeT::digit, *_M_current)) 375*38fd1498Szrj _M_value += *_M_current++; 376*38fd1498Szrj _M_token = _S_token_backref; 377*38fd1498Szrj } 378*38fd1498Szrj else 379*38fd1498Szrj { 380*38fd1498Szrj _M_token = _S_token_ord_char; 381*38fd1498Szrj _M_value.assign(1, __c); 382*38fd1498Szrj } 383*38fd1498Szrj } 384*38fd1498Szrj 385*38fd1498Szrj // Differences between styles: 386*38fd1498Szrj // 1) Extended doesn't support backref, but basic does. 387*38fd1498Szrj template<typename _CharT> 388*38fd1498Szrj void 389*38fd1498Szrj _Scanner<_CharT>:: _M_eat_escape_posix()390*38fd1498Szrj _M_eat_escape_posix() 391*38fd1498Szrj { 392*38fd1498Szrj if (_M_current == _M_end) 393*38fd1498Szrj __throw_regex_error(regex_constants::error_escape, 394*38fd1498Szrj "Unexpected end of regex when escaping."); 395*38fd1498Szrj 396*38fd1498Szrj auto __c = *_M_current; 397*38fd1498Szrj auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')); 398*38fd1498Szrj 399*38fd1498Szrj if (__pos != nullptr && *__pos != '\0') 400*38fd1498Szrj { 401*38fd1498Szrj _M_token = _S_token_ord_char; 402*38fd1498Szrj _M_value.assign(1, __c); 403*38fd1498Szrj } 404*38fd1498Szrj // We MUST judge awk before handling backrefs. There's no backref in awk. 405*38fd1498Szrj else if (_M_is_awk()) 406*38fd1498Szrj { 407*38fd1498Szrj _M_eat_escape_awk(); 408*38fd1498Szrj return; 409*38fd1498Szrj } 410*38fd1498Szrj else if (_M_is_basic() && _M_ctype.is(_CtypeT::digit, __c) && __c != '0') 411*38fd1498Szrj { 412*38fd1498Szrj _M_token = _S_token_backref; 413*38fd1498Szrj _M_value.assign(1, __c); 414*38fd1498Szrj } 415*38fd1498Szrj else 416*38fd1498Szrj { 417*38fd1498Szrj #ifdef __STRICT_ANSI__ 418*38fd1498Szrj // POSIX says it is undefined to escape ordinary characters 419*38fd1498Szrj __throw_regex_error(regex_constants::error_escape, 420*38fd1498Szrj "Unexpected escape character."); 421*38fd1498Szrj #else 422*38fd1498Szrj _M_token = _S_token_ord_char; 423*38fd1498Szrj _M_value.assign(1, __c); 424*38fd1498Szrj #endif 425*38fd1498Szrj } 426*38fd1498Szrj ++_M_current; 427*38fd1498Szrj } 428*38fd1498Szrj 429*38fd1498Szrj template<typename _CharT> 430*38fd1498Szrj void 431*38fd1498Szrj _Scanner<_CharT>:: _M_eat_escape_awk()432*38fd1498Szrj _M_eat_escape_awk() 433*38fd1498Szrj { 434*38fd1498Szrj auto __c = *_M_current++; 435*38fd1498Szrj auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0')); 436*38fd1498Szrj 437*38fd1498Szrj if (__pos != nullptr) 438*38fd1498Szrj { 439*38fd1498Szrj _M_token = _S_token_ord_char; 440*38fd1498Szrj _M_value.assign(1, *__pos); 441*38fd1498Szrj } 442*38fd1498Szrj // \ddd for oct representation 443*38fd1498Szrj else if (_M_ctype.is(_CtypeT::digit, __c) 444*38fd1498Szrj && __c != '8' 445*38fd1498Szrj && __c != '9') 446*38fd1498Szrj { 447*38fd1498Szrj _M_value.assign(1, __c); 448*38fd1498Szrj for (int __i = 0; 449*38fd1498Szrj __i < 2 450*38fd1498Szrj && _M_current != _M_end 451*38fd1498Szrj && _M_ctype.is(_CtypeT::digit, *_M_current) 452*38fd1498Szrj && *_M_current != '8' 453*38fd1498Szrj && *_M_current != '9'; 454*38fd1498Szrj __i++) 455*38fd1498Szrj _M_value += *_M_current++; 456*38fd1498Szrj _M_token = _S_token_oct_num; 457*38fd1498Szrj return; 458*38fd1498Szrj } 459*38fd1498Szrj else 460*38fd1498Szrj __throw_regex_error(regex_constants::error_escape, 461*38fd1498Szrj "Unexpected escape character."); 462*38fd1498Szrj } 463*38fd1498Szrj 464*38fd1498Szrj // Eats a character class or throws an exception. 465*38fd1498Szrj // __ch could be ':', '.' or '=', _M_current is the char after ']' when 466*38fd1498Szrj // returning. 467*38fd1498Szrj template<typename _CharT> 468*38fd1498Szrj void 469*38fd1498Szrj _Scanner<_CharT>:: _M_eat_class(char __ch)470*38fd1498Szrj _M_eat_class(char __ch) 471*38fd1498Szrj { 472*38fd1498Szrj for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;) 473*38fd1498Szrj _M_value += *_M_current++; 474*38fd1498Szrj if (_M_current == _M_end 475*38fd1498Szrj || *_M_current++ != __ch 476*38fd1498Szrj || _M_current == _M_end // skip __ch 477*38fd1498Szrj || *_M_current++ != ']') // skip ']' 478*38fd1498Szrj { 479*38fd1498Szrj if (__ch == ':') 480*38fd1498Szrj __throw_regex_error(regex_constants::error_ctype, 481*38fd1498Szrj "Unexpected end of character class."); 482*38fd1498Szrj else 483*38fd1498Szrj __throw_regex_error(regex_constants::error_collate, 484*38fd1498Szrj "Unexpected end of character class."); 485*38fd1498Szrj } 486*38fd1498Szrj } 487*38fd1498Szrj 488*38fd1498Szrj #ifdef _GLIBCXX_DEBUG 489*38fd1498Szrj template<typename _CharT> 490*38fd1498Szrj std::ostream& 491*38fd1498Szrj _Scanner<_CharT>:: _M_print(std::ostream & ostr)492*38fd1498Szrj _M_print(std::ostream& ostr) 493*38fd1498Szrj { 494*38fd1498Szrj switch (_M_token) 495*38fd1498Szrj { 496*38fd1498Szrj case _S_token_anychar: 497*38fd1498Szrj ostr << "any-character\n"; 498*38fd1498Szrj break; 499*38fd1498Szrj case _S_token_backref: 500*38fd1498Szrj ostr << "backref\n"; 501*38fd1498Szrj break; 502*38fd1498Szrj case _S_token_bracket_begin: 503*38fd1498Szrj ostr << "bracket-begin\n"; 504*38fd1498Szrj break; 505*38fd1498Szrj case _S_token_bracket_neg_begin: 506*38fd1498Szrj ostr << "bracket-neg-begin\n"; 507*38fd1498Szrj break; 508*38fd1498Szrj case _S_token_bracket_end: 509*38fd1498Szrj ostr << "bracket-end\n"; 510*38fd1498Szrj break; 511*38fd1498Szrj case _S_token_char_class_name: 512*38fd1498Szrj ostr << "char-class-name \"" << _M_value << "\"\n"; 513*38fd1498Szrj break; 514*38fd1498Szrj case _S_token_closure0: 515*38fd1498Szrj ostr << "closure0\n"; 516*38fd1498Szrj break; 517*38fd1498Szrj case _S_token_closure1: 518*38fd1498Szrj ostr << "closure1\n"; 519*38fd1498Szrj break; 520*38fd1498Szrj case _S_token_collsymbol: 521*38fd1498Szrj ostr << "collsymbol \"" << _M_value << "\"\n"; 522*38fd1498Szrj break; 523*38fd1498Szrj case _S_token_comma: 524*38fd1498Szrj ostr << "comma\n"; 525*38fd1498Szrj break; 526*38fd1498Szrj case _S_token_dup_count: 527*38fd1498Szrj ostr << "dup count: " << _M_value << "\n"; 528*38fd1498Szrj break; 529*38fd1498Szrj case _S_token_eof: 530*38fd1498Szrj ostr << "EOF\n"; 531*38fd1498Szrj break; 532*38fd1498Szrj case _S_token_equiv_class_name: 533*38fd1498Szrj ostr << "equiv-class-name \"" << _M_value << "\"\n"; 534*38fd1498Szrj break; 535*38fd1498Szrj case _S_token_interval_begin: 536*38fd1498Szrj ostr << "interval begin\n"; 537*38fd1498Szrj break; 538*38fd1498Szrj case _S_token_interval_end: 539*38fd1498Szrj ostr << "interval end\n"; 540*38fd1498Szrj break; 541*38fd1498Szrj case _S_token_line_begin: 542*38fd1498Szrj ostr << "line begin\n"; 543*38fd1498Szrj break; 544*38fd1498Szrj case _S_token_line_end: 545*38fd1498Szrj ostr << "line end\n"; 546*38fd1498Szrj break; 547*38fd1498Szrj case _S_token_opt: 548*38fd1498Szrj ostr << "opt\n"; 549*38fd1498Szrj break; 550*38fd1498Szrj case _S_token_or: 551*38fd1498Szrj ostr << "or\n"; 552*38fd1498Szrj break; 553*38fd1498Szrj case _S_token_ord_char: 554*38fd1498Szrj ostr << "ordinary character: \"" << _M_value << "\"\n"; 555*38fd1498Szrj break; 556*38fd1498Szrj case _S_token_subexpr_begin: 557*38fd1498Szrj ostr << "subexpr begin\n"; 558*38fd1498Szrj break; 559*38fd1498Szrj case _S_token_subexpr_no_group_begin: 560*38fd1498Szrj ostr << "no grouping subexpr begin\n"; 561*38fd1498Szrj break; 562*38fd1498Szrj case _S_token_subexpr_lookahead_begin: 563*38fd1498Szrj ostr << "lookahead subexpr begin\n"; 564*38fd1498Szrj break; 565*38fd1498Szrj case _S_token_subexpr_end: 566*38fd1498Szrj ostr << "subexpr end\n"; 567*38fd1498Szrj break; 568*38fd1498Szrj case _S_token_unknown: 569*38fd1498Szrj ostr << "-- unknown token --\n"; 570*38fd1498Szrj break; 571*38fd1498Szrj case _S_token_oct_num: 572*38fd1498Szrj ostr << "oct number " << _M_value << "\n"; 573*38fd1498Szrj break; 574*38fd1498Szrj case _S_token_hex_num: 575*38fd1498Szrj ostr << "hex number " << _M_value << "\n"; 576*38fd1498Szrj break; 577*38fd1498Szrj case _S_token_quoted_class: 578*38fd1498Szrj ostr << "quoted class " << "\\" << _M_value << "\n"; 579*38fd1498Szrj break; 580*38fd1498Szrj default: 581*38fd1498Szrj _GLIBCXX_DEBUG_ASSERT(false); 582*38fd1498Szrj } 583*38fd1498Szrj return ostr; 584*38fd1498Szrj } 585*38fd1498Szrj #endif 586*38fd1498Szrj 587*38fd1498Szrj } // namespace __detail 588*38fd1498Szrj _GLIBCXX_END_NAMESPACE_VERSION 589*38fd1498Szrj } // namespace 590