1 // Copyright (c) 2008-2009 Ben Hanson 2 // Copyright (c) 2008-2011 Hartmut Kaiser 3 // 4 // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 7 #if !defined(BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM) 8 #define BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM 9 10 #if defined(_MSC_VER) 11 #pragma once 12 #endif 13 14 #include <boost/spirit/home/support/detail/lexer/char_traits.hpp> 15 #include <boost/spirit/home/support/detail/lexer/consts.hpp> 16 #include <boost/spirit/home/support/detail/lexer/rules.hpp> 17 #include <boost/spirit/home/support/detail/lexer/size_t.hpp> 18 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp> 19 #include <boost/spirit/home/support/detail/lexer/debug.hpp> 20 #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp> 21 #include <boost/algorithm/string.hpp> 22 #include <boost/scoped_array.hpp> 23 24 /////////////////////////////////////////////////////////////////////////////// 25 namespace boost { namespace spirit { namespace lex { namespace lexertl 26 { 27 namespace detail 28 { 29 30 /////////////////////////////////////////////////////////////////////////// 31 template <typename CharT> 32 struct string_lit; 33 34 template <> 35 struct string_lit<char> 36 { getboost::spirit::lex::lexertl::detail::string_lit37 static char get(char c) { return c; } getboost::spirit::lex::lexertl::detail::string_lit38 static std::string get(char const* str = "") { return str; } 39 }; 40 41 template <> 42 struct string_lit<wchar_t> 43 { getboost::spirit::lex::lexertl::detail::string_lit44 static wchar_t get(char c) 45 { 46 typedef std::ctype<wchar_t> ctype_t; 47 return std::use_facet<ctype_t>(std::locale()).widen(c); 48 } getboost::spirit::lex::lexertl::detail::string_lit49 static std::basic_string<wchar_t> get(char const* source = "") 50 { 51 using namespace std; // some systems have size_t in ns std 52 size_t len = strlen(source); 53 boost::scoped_array<wchar_t> result (new wchar_t[len+1]); 54 result.get()[len] = '\0'; 55 56 // working with wide character streams is supported only if the 57 // platform provides the std::ctype<wchar_t> facet 58 BOOST_ASSERT(std::has_facet<std::ctype<wchar_t> >(std::locale())); 59 60 std::use_facet<std::ctype<wchar_t> >(std::locale()) 61 .widen(source, source + len, result.get()); 62 return result.get(); 63 } 64 }; 65 66 template <typename Char> L(char c)67 inline Char L(char c) 68 { 69 return string_lit<Char>::get(c); 70 } 71 72 template <typename Char> L(char const * c="")73 inline std::basic_string<Char> L(char const* c = "") 74 { 75 return string_lit<Char>::get(c); 76 } 77 78 /////////////////////////////////////////////////////////////////////////// 79 template <typename Char> 80 inline bool generate_delimiter(std::basic_ostream<Char> & os_)81 generate_delimiter(std::basic_ostream<Char> &os_) 82 { 83 os_ << std::basic_string<Char>(80, '/') << "\n"; 84 return os_.good(); 85 } 86 87 /////////////////////////////////////////////////////////////////////////// 88 // Generate a table of the names of the used lexer states, which is a bit 89 // tricky, because the table stored with the rules is sorted based on the 90 // names, but we need it sorted using the state ids. 91 template <typename Char> 92 inline bool generate_cpp_state_info(boost::lexer::basic_rules<Char> const & rules_,std::basic_ostream<Char> & os_,Char const * name_suffix)93 generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_ 94 , std::basic_ostream<Char> &os_, Char const* name_suffix) 95 { 96 // we need to re-sort the state names in ascending order of the state 97 // ids, filling possible gaps in between later 98 typedef typename 99 boost::lexer::basic_rules<Char>::string_size_t_map::const_iterator 100 state_iterator; 101 typedef std::map<std::size_t, Char const*> reverse_state_map_type; 102 103 reverse_state_map_type reverse_state_map; 104 state_iterator send = rules_.statemap().end(); 105 for (state_iterator sit = rules_.statemap().begin(); sit != send; ++sit) 106 { 107 typedef typename reverse_state_map_type::value_type value_type; 108 reverse_state_map.insert(value_type((*sit).second, (*sit).first.c_str())); 109 } 110 111 generate_delimiter(os_); 112 os_ << "// this table defines the names of the lexer states\n"; 113 os_ << boost::lexer::detail::strings<Char>::char_name() 114 << " const* const lexer_state_names" 115 << (name_suffix[0] ? "_" : "") << name_suffix 116 << "[" << rules_.statemap().size() << "] = \n{\n"; 117 118 typedef typename reverse_state_map_type::iterator iterator; 119 iterator rend = reverse_state_map.end(); 120 std::size_t last_id = 0; 121 for (iterator rit = reverse_state_map.begin(); rit != rend; ++last_id) 122 { 123 for (/**/; last_id < (*rit).first; ++last_id) 124 { 125 os_ << " 0, // \"<undefined state>\"\n"; 126 } 127 os_ << " " 128 << boost::lexer::detail::strings<Char>::char_prefix() 129 << "\"" << (*rit).second << "\""; 130 if (++rit != rend) 131 os_ << ",\n"; 132 else 133 os_ << "\n"; // don't generate the final comma 134 } 135 os_ << "};\n\n"; 136 137 generate_delimiter(os_); 138 os_ << "// this variable defines the number of lexer states\n"; 139 os_ << "std::size_t const lexer_state_count" 140 << (name_suffix[0] ? "_" : "") << name_suffix 141 << " = " << rules_.statemap().size() << ";\n\n"; 142 return os_.good(); 143 } 144 145 template <typename Char> 146 inline bool generate_cpp_state_table(std::basic_ostream<Char> & os_,Char const * name_suffix,bool bol,bool eol)147 generate_cpp_state_table (std::basic_ostream<Char> &os_ 148 , Char const* name_suffix, bool bol, bool eol) 149 { 150 std::basic_string<Char> suffix(L<Char>(name_suffix[0] ? "_" : "")); 151 suffix += name_suffix; 152 153 generate_delimiter(os_); 154 os_ << "// this defines a generic accessors for the information above\n"; 155 os_ << "struct lexer" << suffix << "\n{\n"; 156 os_ << " // version number and feature-set of compatible static lexer engine\n"; 157 os_ << " enum\n"; 158 os_ << " {\n static_version = " << SPIRIT_STATIC_LEXER_VERSION << ",\n"; 159 os_ << " supports_bol = " << std::boolalpha << bol << ",\n"; 160 os_ << " supports_eol = " << std::boolalpha << eol << "\n"; 161 os_ << " };\n\n"; 162 os_ << " // return the number of lexer states\n"; 163 os_ << " static std::size_t state_count()\n"; 164 os_ << " {\n return lexer_state_count" << suffix << "; \n }\n\n"; 165 os_ << " // return the name of the lexer state as given by 'idx'\n"; 166 os_ << " static " << boost::lexer::detail::strings<Char>::char_name() 167 << " const* state_name(std::size_t idx)\n"; 168 os_ << " {\n return lexer_state_names" << suffix << "[idx]; \n }\n\n"; 169 os_ << " // return the next matched token\n"; 170 os_ << " template<typename Iterator>\n"; 171 os_ << " static std::size_t next(std::size_t &start_state_, bool& bol_\n"; 172 os_ << " , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n"; 173 os_ << " {\n return next_token" << suffix 174 << "(start_state_, bol_, start_token_, end_, unique_id_);\n }\n"; 175 os_ << "};\n\n"; 176 return os_.good(); 177 } 178 179 /////////////////////////////////////////////////////////////////////////// 180 // generate function body based on traversing the DFA tables 181 template <typename Char> generate_function_body_dfa(std::basic_ostream<Char> & os_,boost::lexer::basic_state_machine<Char> const & sm_)182 bool generate_function_body_dfa(std::basic_ostream<Char>& os_ 183 , boost::lexer::basic_state_machine<Char> const &sm_) 184 { 185 std::size_t const dfas_ = sm_.data()._dfa->size(); 186 std::size_t const lookups_ = sm_.data()._lookup->front()->size(); 187 188 os_ << " enum {end_state_index, id_index, unique_id_index, " 189 "state_index, bol_index,\n"; 190 os_ << " eol_index, dead_state_index, dfa_offset};\n\n"; 191 os_ << " static std::size_t const npos = " 192 "static_cast<std::size_t>(~0);\n"; 193 194 if (dfas_ > 1) 195 { 196 for (std::size_t state_ = 0; state_ < dfas_; ++state_) 197 { 198 std::size_t i_ = 0; 199 std::size_t j_ = 1; 200 std::size_t count_ = lookups_ / 8; 201 std::size_t const* lookup_ = &sm_.data()._lookup[state_]->front(); 202 std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front(); 203 204 os_ << " static std::size_t const lookup" << state_ 205 << "_[" << lookups_ << "] = {\n "; 206 for (/**/; i_ < count_; ++i_) 207 { 208 std::size_t const index_ = i_ * 8; 209 os_ << lookup_[index_]; 210 for (/**/; j_ < 8; ++j_) 211 { 212 os_ << ", " << lookup_[index_ + j_]; 213 } 214 if (i_ < count_ - 1) 215 { 216 os_ << ",\n "; 217 } 218 j_ = 1; 219 } 220 os_ << " };\n"; 221 222 count_ = sm_.data()._dfa[state_]->size (); 223 os_ << " static const std::size_t dfa" << state_ << "_[" 224 << count_ << "] = {\n "; 225 count_ /= 8; 226 for (i_ = 0; i_ < count_; ++i_) 227 { 228 std::size_t const index_ = i_ * 8; 229 os_ << dfa_[index_]; 230 for (j_ = 1; j_ < 8; ++j_) 231 { 232 os_ << ", " << dfa_[index_ + j_]; 233 } 234 if (i_ < count_ - 1) 235 { 236 os_ << ",\n "; 237 } 238 } 239 240 std::size_t const mod_ = sm_.data()._dfa[state_]->size () % 8; 241 if (mod_) 242 { 243 std::size_t const index_ = count_ * 8; 244 if (count_) 245 { 246 os_ << ",\n "; 247 } 248 os_ << dfa_[index_]; 249 for (j_ = 1; j_ < mod_; ++j_) 250 { 251 os_ << ", " << dfa_[index_ + j_]; 252 } 253 } 254 os_ << " };\n"; 255 } 256 257 std::size_t count_ = sm_.data()._dfa_alphabet.size(); 258 std::size_t i_ = 1; 259 260 os_ << " static std::size_t const* lookup_arr_[" << count_ 261 << "] = { lookup0_"; 262 for (i_ = 1; i_ < count_; ++i_) 263 { 264 os_ << ", " << "lookup" << i_ << "_"; 265 } 266 os_ << " };\n"; 267 268 os_ << " static std::size_t const dfa_alphabet_arr_[" 269 << count_ << "] = { "; 270 os_ << sm_.data()._dfa_alphabet.front (); 271 for (i_ = 1; i_ < count_; ++i_) 272 { 273 os_ << ", " << sm_.data()._dfa_alphabet[i_]; 274 } 275 os_ << " };\n"; 276 277 os_ << " static std::size_t const* dfa_arr_[" << count_ 278 << "] = { "; 279 os_ << "dfa0_"; 280 for (i_ = 1; i_ < count_; ++i_) 281 { 282 os_ << ", " << "dfa" << i_ << "_"; 283 } 284 os_ << " };\n"; 285 } 286 else 287 { 288 std::size_t const* lookup_ = &sm_.data()._lookup[0]->front(); 289 std::size_t const* dfa_ = &sm_.data()._dfa[0]->front(); 290 std::size_t i_ = 0; 291 std::size_t j_ = 1; 292 std::size_t count_ = lookups_ / 8; 293 294 os_ << " static std::size_t const lookup_["; 295 os_ << sm_.data()._lookup[0]->size() << "] = {\n "; 296 for (/**/; i_ < count_; ++i_) 297 { 298 const std::size_t index_ = i_ * 8; 299 os_ << lookup_[index_]; 300 for (/**/; j_ < 8; ++j_) 301 { 302 os_ << ", " << lookup_[index_ + j_]; 303 } 304 if (i_ < count_ - 1) 305 { 306 os_ << ",\n "; 307 } 308 j_ = 1; 309 } 310 os_ << " };\n"; 311 312 os_ << " static std::size_t const dfa_alphabet_ = " 313 << sm_.data()._dfa_alphabet.front () << ";\n"; 314 os_ << " static std::size_t const dfa_[" 315 << sm_.data()._dfa[0]->size () << "] = {\n "; 316 count_ = sm_.data()._dfa[0]->size () / 8; 317 for (i_ = 0; i_ < count_; ++i_) 318 { 319 const std::size_t index_ = i_ * 8; 320 os_ << dfa_[index_]; 321 for (j_ = 1; j_ < 8; ++j_) 322 { 323 os_ << ", " << dfa_[index_ + j_]; 324 } 325 if (i_ < count_ - 1) 326 { 327 os_ << ",\n "; 328 } 329 } 330 331 const std::size_t mod_ = sm_.data()._dfa[0]->size () % 8; 332 if (mod_) 333 { 334 const std::size_t index_ = count_ * 8; 335 if (count_) 336 { 337 os_ << ",\n "; 338 } 339 os_ << dfa_[index_]; 340 for (j_ = 1; j_ < mod_; ++j_) 341 { 342 os_ << ", " << dfa_[index_ + j_]; 343 } 344 } 345 os_ << " };\n"; 346 } 347 348 os_ << "\n if (start_token_ == end_)\n"; 349 os_ << " {\n"; 350 os_ << " unique_id_ = npos;\n"; 351 os_ << " return 0;\n"; 352 os_ << " }\n\n"; 353 if (sm_.data()._seen_BOL_assertion) 354 { 355 os_ << " bool bol = bol_;\n\n"; 356 } 357 358 if (dfas_ > 1) 359 { 360 os_ << "again:\n"; 361 os_ << " std::size_t const* lookup_ = lookup_arr_[start_state_];\n"; 362 os_ << " std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n"; 363 os_ << " std::size_t const*dfa_ = dfa_arr_[start_state_];\n"; 364 } 365 366 os_ << " std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n"; 367 os_ << " Iterator curr_ = start_token_;\n"; 368 os_ << " bool end_state_ = *ptr_ != 0;\n"; 369 os_ << " std::size_t id_ = *(ptr_ + id_index);\n"; 370 os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n"; 371 if (dfas_ > 1) 372 { 373 os_ << " std::size_t end_start_state_ = start_state_;\n"; 374 } 375 if (sm_.data()._seen_BOL_assertion) 376 { 377 os_ << " bool end_bol_ = bol_;\n"; 378 } 379 os_ << " Iterator end_token_ = start_token_;\n\n"; 380 381 os_ << " while (curr_ != end_)\n"; 382 os_ << " {\n"; 383 384 if (sm_.data()._seen_BOL_assertion) 385 { 386 os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n\n"; 387 } 388 389 if (sm_.data()._seen_EOL_assertion) 390 { 391 os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n"; 392 } 393 394 if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion) 395 { 396 os_ << " if (BOL_state_ && bol)\n"; 397 os_ << " {\n"; 398 os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n"; 399 os_ << " }\n"; 400 os_ << " else if (EOL_state_ && *curr_ == '\\n')\n"; 401 os_ << " {\n"; 402 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n"; 403 os_ << " }\n"; 404 os_ << " else\n"; 405 os_ << " {\n"; 406 if (lookups_ == 256) 407 { 408 os_ << " unsigned char index = \n"; 409 os_ << " static_cast<unsigned char>(*curr_++);\n"; 410 } 411 else 412 { 413 os_ << " std::size_t index = *curr_++\n"; 414 } 415 os_ << " bol = (index == '\\n') ? true : false;\n"; 416 os_ << " std::size_t const state_ = ptr_[\n"; 417 os_ << " lookup_[static_cast<std::size_t>(index)]];\n"; 418 419 os_ << '\n'; 420 os_ << " if (state_ == 0) break;\n"; 421 os_ << '\n'; 422 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n"; 423 os_ << " }\n\n"; 424 } 425 else if (sm_.data()._seen_BOL_assertion) 426 { 427 os_ << " if (BOL_state_ && bol)\n"; 428 os_ << " {\n"; 429 os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n"; 430 os_ << " }\n"; 431 os_ << " else\n"; 432 os_ << " {\n"; 433 if (lookups_ == 256) 434 { 435 os_ << " unsigned char index = \n"; 436 os_ << " static_cast<unsigned char>(*curr_++);\n"; 437 } 438 else 439 { 440 os_ << " std::size_t index = *curr_++\n"; 441 } 442 os_ << " bol = (index == '\\n') ? true : false;\n"; 443 os_ << " std::size_t const state_ = ptr_[\n"; 444 os_ << " lookup_[static_cast<std::size_t>(index)]];\n"; 445 446 os_ << '\n'; 447 os_ << " if (state_ == 0) break;\n"; 448 os_ << '\n'; 449 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n"; 450 os_ << " }\n\n"; 451 } 452 else if (sm_.data()._seen_EOL_assertion) 453 { 454 os_ << " if (EOL_state_ && *curr_ == '\\n')\n"; 455 os_ << " {\n"; 456 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n"; 457 os_ << " }\n"; 458 os_ << " else\n"; 459 os_ << " {\n"; 460 if (lookups_ == 256) 461 { 462 os_ << " unsigned char index = \n"; 463 os_ << " static_cast<unsigned char>(*curr_++);\n"; 464 } 465 else 466 { 467 os_ << " std::size_t index = *curr_++\n"; 468 } 469 os_ << " bol = (index == '\\n') ? true : false;\n"; 470 os_ << " std::size_t const state_ = ptr_[\n"; 471 os_ << " lookup_[static_cast<std::size_t>(index)]];\n"; 472 473 os_ << '\n'; 474 os_ << " if (state_ == 0) break;\n"; 475 os_ << '\n'; 476 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n"; 477 os_ << " }\n\n"; 478 } 479 else 480 { 481 os_ << " std::size_t const state_ =\n"; 482 483 if (lookups_ == 256) 484 { 485 os_ << " ptr_[lookup_[" 486 "static_cast<unsigned char>(*curr_++)]];\n"; 487 } 488 else 489 { 490 os_ << " ptr_[lookup_[*curr_++]];\n"; 491 } 492 493 os_ << '\n'; 494 os_ << " if (state_ == 0) break;\n"; 495 os_ << '\n'; 496 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n"; 497 } 498 499 os_ << " if (*ptr_)\n"; 500 os_ << " {\n"; 501 os_ << " end_state_ = true;\n"; 502 os_ << " id_ = *(ptr_ + id_index);\n"; 503 os_ << " uid_ = *(ptr_ + unique_id_index);\n"; 504 if (dfas_ > 1) 505 { 506 os_ << " end_start_state_ = *(ptr_ + state_index);\n"; 507 } 508 if (sm_.data()._seen_BOL_assertion) 509 { 510 os_ << " end_bol_ = bol;\n"; 511 } 512 os_ << " end_token_ = curr_;\n"; 513 os_ << " }\n"; 514 os_ << " }\n\n"; 515 516 if (sm_.data()._seen_EOL_assertion) 517 { 518 os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n"; 519 520 os_ << " if (EOL_state_ && curr_ == end_)\n"; 521 os_ << " {\n"; 522 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n"; 523 524 os_ << " if (*ptr_)\n"; 525 os_ << " {\n"; 526 os_ << " end_state_ = true;\n"; 527 os_ << " id_ = *(ptr_ + id_index);\n"; 528 os_ << " uid_ = *(ptr_ + unique_id_index);\n"; 529 if (dfas_ > 1) 530 { 531 os_ << " end_start_state_ = *(ptr_ + state_index);\n"; 532 } 533 if (sm_.data()._seen_BOL_assertion) 534 { 535 os_ << " end_bol_ = bol;\n"; 536 } 537 os_ << " end_token_ = curr_;\n"; 538 os_ << " }\n"; 539 os_ << " }\n\n"; 540 } 541 542 os_ << " if (end_state_)\n"; 543 os_ << " {\n"; 544 os_ << " // return longest match\n"; 545 os_ << " start_token_ = end_token_;\n"; 546 547 if (dfas_ > 1) 548 { 549 os_ << " start_state_ = end_start_state_;\n"; 550 os_ << " if (id_ == 0)\n"; 551 os_ << " {\n"; 552 if (sm_.data()._seen_BOL_assertion) 553 { 554 os_ << " bol = end_bol_;\n"; 555 } 556 os_ << " goto again;\n"; 557 os_ << " }\n"; 558 if (sm_.data()._seen_BOL_assertion) 559 { 560 os_ << " else\n"; 561 os_ << " {\n"; 562 os_ << " bol_ = end_bol_;\n"; 563 os_ << " }\n"; 564 } 565 } 566 else if (sm_.data()._seen_BOL_assertion) 567 { 568 os_ << " bol_ = end_bol_;\n"; 569 } 570 571 os_ << " }\n"; 572 os_ << " else\n"; 573 os_ << " {\n"; 574 575 if (sm_.data()._seen_BOL_assertion) 576 { 577 os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n"; 578 } 579 580 os_ << " id_ = npos;\n"; 581 os_ << " uid_ = npos;\n"; 582 os_ << " }\n\n"; 583 584 os_ << " unique_id_ = uid_;\n"; 585 os_ << " return id_;\n"; 586 return os_.good(); 587 } 588 589 /////////////////////////////////////////////////////////////////////////// 590 template <typename Char> get_charlit(Char ch)591 inline std::basic_string<Char> get_charlit(Char ch) 592 { 593 std::basic_string<Char> result; 594 boost::lexer::basic_string_token<Char>::escape_char(ch, result); 595 return result; 596 } 597 598 // check whether state0_0 is referenced from any of the other states 599 template <typename Char> need_label0_0(boost::lexer::basic_state_machine<Char> const & sm_)600 bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_) 601 { 602 typedef typename boost::lexer::basic_state_machine<Char>::iterator 603 iterator_type; 604 iterator_type iter_ = sm_.begin(); 605 std::size_t const states_ = iter_->states; 606 607 for (std::size_t state_ = 0; state_ < states_; ++state_) 608 { 609 if (0 == iter_->bol_index || 0 == iter_->eol_index) 610 { 611 return true; 612 } 613 614 std::size_t const transitions_ = iter_->transitions; 615 for (std::size_t t_ = 0; t_ < transitions_; ++t_) 616 { 617 if (0 == iter_->goto_state) 618 { 619 return true; 620 } 621 ++iter_; 622 } 623 if (transitions_ == 0) ++iter_; 624 } 625 return false; 626 } 627 628 /////////////////////////////////////////////////////////////////////////// 629 template <typename Char> generate_function_body_switch(std::basic_ostream<Char> & os_,boost::lexer::basic_state_machine<Char> const & sm_)630 bool generate_function_body_switch(std::basic_ostream<Char> & os_ 631 , boost::lexer::basic_state_machine<Char> const &sm_) 632 { 633 typedef typename boost::lexer::basic_state_machine<Char>::iterator 634 iterator_type; 635 636 std::size_t const lookups_ = sm_.data()._lookup->front ()->size (); 637 iterator_type iter_ = sm_.begin(); 638 iterator_type labeliter_ = iter_; 639 iterator_type end_ = sm_.end(); 640 std::size_t const dfas_ = sm_.data()._dfa->size (); 641 642 os_ << " static std::size_t const npos = " 643 "static_cast<std::size_t>(~0);\n"; 644 645 os_ << "\n if (start_token_ == end_)\n"; 646 os_ << " {\n"; 647 os_ << " unique_id_ = npos;\n"; 648 os_ << " return 0;\n"; 649 os_ << " }\n\n"; 650 651 if (sm_.data()._seen_BOL_assertion) 652 { 653 os_ << " bool bol = bol_;\n"; 654 } 655 656 if (dfas_ > 1) 657 { 658 os_ << "again:\n"; 659 } 660 661 os_ << " Iterator curr_ = start_token_;\n"; 662 os_ << " bool end_state_ = false;\n"; 663 os_ << " std::size_t id_ = npos;\n"; 664 os_ << " std::size_t uid_ = npos;\n"; 665 666 if (dfas_ > 1) 667 { 668 os_ << " std::size_t end_start_state_ = start_state_;\n"; 669 } 670 671 if (sm_.data()._seen_BOL_assertion) 672 { 673 os_ << " bool end_bol_ = bol_;\n"; 674 } 675 676 os_ << " Iterator end_token_ = start_token_;\n"; 677 os_ << '\n'; 678 679 os_ << " " << ((lookups_ == 256) ? "char" : "wchar_t") 680 << " ch_ = 0;\n\n"; 681 682 if (dfas_ > 1) 683 { 684 os_ << " switch (start_state_)\n"; 685 os_ << " {\n"; 686 687 for (std::size_t i_ = 0; i_ < dfas_; ++i_) 688 { 689 os_ << " case " << i_ << ":\n"; 690 os_ << " goto state" << i_ << "_0;\n"; 691 os_ << " break;\n"; 692 } 693 694 os_ << " default:\n"; 695 os_ << " goto end;\n"; 696 os_ << " break;\n"; 697 os_ << " }\n"; 698 } 699 700 bool need_state0_0_label = need_label0_0(sm_); 701 702 for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_) 703 { 704 std::size_t const states_ = iter_->states; 705 for (std::size_t state_ = 0; state_ < states_; ++state_) 706 { 707 std::size_t const transitions_ = iter_->transitions; 708 std::size_t t_ = 0; 709 710 if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label) 711 { 712 os_ << "\nstate" << dfa_ << '_' << state_ << ":\n"; 713 } 714 715 if (iter_->end_state) 716 { 717 os_ << " end_state_ = true;\n"; 718 os_ << " id_ = " << iter_->id << ";\n"; 719 os_ << " uid_ = " << iter_->unique_id << ";\n"; 720 os_ << " end_token_ = curr_;\n"; 721 722 if (dfas_ > 1) 723 { 724 os_ << " end_start_state_ = " << iter_->goto_dfa << 725 ";\n"; 726 } 727 728 if (sm_.data()._seen_BOL_assertion) 729 { 730 os_ << " end_bol_ = bol;\n"; 731 } 732 733 if (transitions_) os_ << '\n'; 734 } 735 736 if (t_ < transitions_ || 737 iter_->bol_index != boost::lexer::npos || 738 iter_->eol_index != boost::lexer::npos) 739 { 740 os_ << " if (curr_ == end_) goto end;\n"; 741 os_ << " ch_ = *curr_;\n"; 742 if (iter_->bol_index != boost::lexer::npos) 743 { 744 os_ << "\n if (bol) goto state" << dfa_ << '_' 745 << iter_->bol_index << ";\n"; 746 } 747 if (iter_->eol_index != boost::lexer::npos) 748 { 749 os_ << "\n if (ch_ == '\\n') goto state" << dfa_ 750 << '_' << iter_->eol_index << ";\n"; 751 } 752 os_ << " ++curr_;\n"; 753 } 754 755 for (/**/; t_ < transitions_; ++t_) 756 { 757 Char const *ptr_ = iter_->token._charset.c_str(); 758 Char const *end_ = ptr_ + iter_->token._charset.size(); 759 Char start_char_ = 0; 760 Char curr_char_ = 0; 761 bool range_ = false; 762 bool first_char_ = true; 763 764 os_ << "\n if ("; 765 766 while (ptr_ != end_) 767 { 768 curr_char_ = *ptr_++; 769 770 if (*ptr_ == curr_char_ + 1) 771 { 772 if (!range_) 773 { 774 start_char_ = curr_char_; 775 } 776 range_ = true; 777 } 778 else 779 { 780 if (!first_char_) 781 { 782 os_ << ((iter_->token._negated) ? " && " : " || "); 783 } 784 else 785 { 786 first_char_ = false; 787 } 788 if (range_) 789 { 790 if (iter_->token._negated) 791 { 792 os_ << "!"; 793 } 794 os_ << "(ch_ >= '" << get_charlit(start_char_) 795 << "' && ch_ <= '" 796 << get_charlit(curr_char_) << "')"; 797 range_ = false; 798 } 799 else 800 { 801 os_ << "ch_ " 802 << ((iter_->token._negated) ? "!=" : "==") 803 << " '" << get_charlit(curr_char_) << "'"; 804 } 805 } 806 } 807 808 os_ << ") goto state" << dfa_ << '_' << iter_->goto_state 809 << ";\n"; 810 ++iter_; 811 } 812 813 if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1)) 814 { 815 os_ << " goto end;\n"; 816 } 817 818 if (transitions_ == 0) ++iter_; 819 } 820 } 821 822 os_ << "\nend:\n"; 823 os_ << " if (end_state_)\n"; 824 os_ << " {\n"; 825 os_ << " // return longest match\n"; 826 os_ << " start_token_ = end_token_;\n"; 827 828 if (dfas_ > 1) 829 { 830 os_ << " start_state_ = end_start_state_;\n"; 831 os_ << "\n if (id_ == 0)\n"; 832 os_ << " {\n"; 833 834 if (sm_.data()._seen_BOL_assertion) 835 { 836 os_ << " bol = end_bol_;\n"; 837 } 838 839 os_ << " goto again;\n"; 840 os_ << " }\n"; 841 842 if (sm_.data()._seen_BOL_assertion) 843 { 844 os_ << " else\n"; 845 os_ << " {\n"; 846 os_ << " bol_ = end_bol_;\n"; 847 os_ << " }\n"; 848 } 849 } 850 else if (sm_.data()._seen_BOL_assertion) 851 { 852 os_ << " bol_ = end_bol_;\n"; 853 } 854 855 os_ << " }\n"; 856 os_ << " else\n"; 857 os_ << " {\n"; 858 859 if (sm_.data()._seen_BOL_assertion) 860 { 861 os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n"; 862 } 863 os_ << " id_ = npos;\n"; 864 os_ << " uid_ = npos;\n"; 865 os_ << " }\n\n"; 866 867 os_ << " unique_id_ = uid_;\n"; 868 os_ << " return id_;\n"; 869 return os_.good(); 870 } 871 872 /////////////////////////////////////////////////////////////////////////// 873 // Generate a tokenizer for the given state machine. 874 template <typename Char, typename F> 875 inline bool generate_cpp(boost::lexer::basic_state_machine<Char> const & sm_,boost::lexer::basic_rules<Char> const & rules_,std::basic_ostream<Char> & os_,Char const * name_suffix,F generate_function_body)876 generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_ 877 , boost::lexer::basic_rules<Char> const& rules_ 878 , std::basic_ostream<Char> &os_, Char const* name_suffix 879 , F generate_function_body) 880 { 881 if (sm_.data()._lookup->empty()) 882 return false; 883 884 std::size_t const dfas_ = sm_.data()._dfa->size(); 885 // std::size_t const lookups_ = sm_.data()._lookup->front()->size(); 886 887 os_ << "// Copyright (c) 2008-2009 Ben Hanson\n"; 888 os_ << "// Copyright (c) 2008-2011 Hartmut Kaiser\n"; 889 os_ << "//\n"; 890 os_ << "// Distributed under the Boost Software License, " 891 "Version 1.0. (See accompanying\n"; 892 os_ << "// file licence_1_0.txt or copy at " 893 "http://www.boost.org/LICENSE_1_0.txt)\n\n"; 894 os_ << "// Auto-generated by boost::lexer, do not edit\n\n"; 895 896 std::basic_string<Char> guard(name_suffix); 897 guard += L<Char>(name_suffix[0] ? "_" : ""); 898 guard += L<Char>(__DATE__ "_" __TIME__); 899 typename std::basic_string<Char>::size_type p = 900 guard.find_first_of(L<Char>(": ")); 901 while (std::string::npos != p) 902 { 903 guard.replace(p, 1, L<Char>("_")); 904 p = guard.find_first_of(L<Char>(": "), p); 905 } 906 boost::to_upper(guard); 907 908 os_ << "#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << ")\n"; 909 os_ << "#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << "\n\n"; 910 911 os_ << "#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>\n\n"; 912 913 generate_delimiter(os_); 914 os_ << "// the generated table of state names and the tokenizer have to be\n" 915 "// defined in the boost::spirit::lex::lexertl::static_ namespace\n"; 916 os_ << "namespace boost { namespace spirit { namespace lex { " 917 "namespace lexertl { namespace static_ {\n\n"; 918 919 // generate the lexer state information variables 920 if (!generate_cpp_state_info(rules_, os_, name_suffix)) 921 return false; 922 923 generate_delimiter(os_); 924 os_ << "// this function returns the next matched token\n"; 925 os_ << "template<typename Iterator>\n"; 926 os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "") 927 << name_suffix << " ("; 928 929 if (dfas_ > 1) 930 { 931 os_ << "std::size_t& start_state_, "; 932 } 933 else 934 { 935 os_ << "std::size_t& /*start_state_*/, "; 936 } 937 if (sm_.data()._seen_BOL_assertion) 938 { 939 os_ << "bool& bol_, "; 940 } 941 else 942 { 943 os_ << "bool& /*bol_*/, "; 944 } 945 os_ << "\n "; 946 947 os_ << "Iterator &start_token_, Iterator const& end_, "; 948 os_ << "std::size_t& unique_id_)\n"; 949 os_ << "{\n"; 950 if (!generate_function_body(os_, sm_)) 951 return false; 952 os_ << "}\n\n"; 953 954 if (!generate_cpp_state_table<Char>(os_, name_suffix 955 , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion)) 956 { 957 return false; 958 } 959 960 os_ << "}}}}} // namespace boost::spirit::lex::lexertl::static_\n\n"; 961 962 os_ << "#endif\n"; 963 964 return os_.good(); 965 } 966 967 } // namespace detail 968 969 /////////////////////////////////////////////////////////////////////////// 970 template <typename Lexer, typename F> 971 inline bool generate_static(Lexer const & lexer,std::basic_ostream<typename Lexer::char_type> & os,typename Lexer::char_type const * name_suffix,F f)972 generate_static(Lexer const& lexer 973 , std::basic_ostream<typename Lexer::char_type>& os 974 , typename Lexer::char_type const* name_suffix, F f) 975 { 976 if (!lexer.init_dfa(true)) // always minimize DFA for static lexers 977 return false; 978 return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os 979 , name_suffix, f); 980 } 981 982 /////////////////////////////////////////////////////////////////////////// 983 // deprecated function, will be removed in the future (this has been 984 // replaced by the function generate_static_dfa - see below). 985 template <typename Lexer> 986 inline bool generate_static(Lexer const & lexer,std::basic_ostream<typename Lexer::char_type> & os,typename Lexer::char_type const * name_suffix=detail::L<typename Lexer::char_type> ())987 generate_static(Lexer const& lexer 988 , std::basic_ostream<typename Lexer::char_type>& os 989 , typename Lexer::char_type const* name_suffix = 990 detail::L<typename Lexer::char_type>()) 991 { 992 return generate_static(lexer, os, name_suffix 993 , &detail::generate_function_body_dfa<typename Lexer::char_type>); 994 } 995 996 /////////////////////////////////////////////////////////////////////////// 997 template <typename Lexer> 998 inline bool generate_static_dfa(Lexer const & lexer,std::basic_ostream<typename Lexer::char_type> & os,typename Lexer::char_type const * name_suffix=detail::L<typename Lexer::char_type> ())999 generate_static_dfa(Lexer const& lexer 1000 , std::basic_ostream<typename Lexer::char_type>& os 1001 , typename Lexer::char_type const* name_suffix = 1002 detail::L<typename Lexer::char_type>()) 1003 { 1004 return generate_static(lexer, os, name_suffix 1005 , &detail::generate_function_body_dfa<typename Lexer::char_type>); 1006 } 1007 1008 /////////////////////////////////////////////////////////////////////////// 1009 template <typename Lexer> 1010 inline bool generate_static_switch(Lexer const & lexer,std::basic_ostream<typename Lexer::char_type> & os,typename Lexer::char_type const * name_suffix=detail::L<typename Lexer::char_type> ())1011 generate_static_switch(Lexer const& lexer 1012 , std::basic_ostream<typename Lexer::char_type>& os 1013 , typename Lexer::char_type const* name_suffix = 1014 detail::L<typename Lexer::char_type>()) 1015 { 1016 return generate_static(lexer, os, name_suffix 1017 , &detail::generate_function_body_switch<typename Lexer::char_type>); 1018 } 1019 1020 /////////////////////////////////////////////////////////////////////////////// 1021 }}}} 1022 1023 #endif 1024