1 // Copyright (c) 2008-2009 Ben Hanson 2 // Copyright (c) 2008-2011 Hartmut Kaiser 3 // 4 // Distributed under the Boost Software License, Version 1.0. (See accompanying 5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 6 7 #if !defined(BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM) 8 #define BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM 9 10 #if defined(_MSC_VER) 11 #pragma once 12 #endif 13 14 #include <boost/spirit/home/support/detail/lexer/char_traits.hpp> 15 #include <boost/spirit/home/support/detail/lexer/consts.hpp> 16 #include <boost/spirit/home/support/detail/lexer/rules.hpp> 17 #include <boost/spirit/home/support/detail/lexer/size_t.hpp> 18 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp> 19 #include <boost/spirit/home/support/detail/lexer/debug.hpp> 20 #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp> 21 #include <boost/algorithm/string.hpp> 22 #include <boost/lexical_cast.hpp> 23 #include <boost/scoped_array.hpp> 24 25 /////////////////////////////////////////////////////////////////////////////// 26 namespace boost { namespace spirit { namespace lex { namespace lexertl 27 { 28 namespace detail 29 { 30 31 /////////////////////////////////////////////////////////////////////////// 32 template <typename CharT> 33 struct string_lit; 34 35 template <> 36 struct string_lit<char> 37 { getboost::spirit::lex::lexertl::detail::string_lit38 static char get(char c) { return c; } getboost::spirit::lex::lexertl::detail::string_lit39 static std::string get(char const* str = "") { return str; } 40 }; 41 42 template <> 43 struct string_lit<wchar_t> 44 { getboost::spirit::lex::lexertl::detail::string_lit45 static wchar_t get(char c) 46 { 47 typedef std::ctype<wchar_t> ctype_t; 48 return std::use_facet<ctype_t>(std::locale()).widen(c); 49 } getboost::spirit::lex::lexertl::detail::string_lit50 static std::basic_string<wchar_t> get(char const* source = "") 51 { 52 using namespace std; // some systems have size_t in ns std 53 size_t len = strlen(source); 54 boost::scoped_array<wchar_t> result (new wchar_t[len+1]); 55 result.get()[len] = '\0'; 56 57 // working with wide character streams is supported only if the 58 // platform provides the std::ctype<wchar_t> facet 59 BOOST_ASSERT(std::has_facet<std::ctype<wchar_t> >(std::locale())); 60 61 std::use_facet<std::ctype<wchar_t> >(std::locale()) 62 .widen(source, source + len, result.get()); 63 return result.get(); 64 } 65 }; 66 67 template <typename Char> L(char c)68 inline Char L(char c) 69 { 70 return string_lit<Char>::get(c); 71 } 72 73 template <typename Char> L(char const * c="")74 inline std::basic_string<Char> L(char const* c = "") 75 { 76 return string_lit<Char>::get(c); 77 } 78 79 /////////////////////////////////////////////////////////////////////////// 80 template <typename Char> 81 inline bool generate_delimiter(std::basic_ostream<Char> & os_)82 generate_delimiter(std::basic_ostream<Char> &os_) 83 { 84 os_ << std::basic_string<Char>(80, '/') << "\n"; 85 return os_.good(); 86 } 87 88 /////////////////////////////////////////////////////////////////////////// 89 // Generate a table of the names of the used lexer states, which is a bit 90 // tricky, because the table stored with the rules is sorted based on the 91 // names, but we need it sorted using the state ids. 92 template <typename Char> 93 inline bool generate_cpp_state_info(boost::lexer::basic_rules<Char> const & rules_,std::basic_ostream<Char> & os_,Char const * name_suffix)94 generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_ 95 , std::basic_ostream<Char> &os_, Char const* name_suffix) 96 { 97 // we need to re-sort the state names in ascending order of the state 98 // ids, filling possible gaps in between later 99 typedef typename 100 boost::lexer::basic_rules<Char>::string_size_t_map::const_iterator 101 state_iterator; 102 typedef std::map<std::size_t, Char const*> reverse_state_map_type; 103 104 reverse_state_map_type reverse_state_map; 105 state_iterator send = rules_.statemap().end(); 106 for (state_iterator sit = rules_.statemap().begin(); sit != send; ++sit) 107 { 108 typedef typename reverse_state_map_type::value_type value_type; 109 reverse_state_map.insert(value_type((*sit).second, (*sit).first.c_str())); 110 } 111 112 generate_delimiter(os_); 113 os_ << "// this table defines the names of the lexer states\n"; 114 os_ << boost::lexer::detail::strings<Char>::char_name() 115 << " const* const lexer_state_names" 116 << (name_suffix[0] ? "_" : "") << name_suffix 117 << "[" << rules_.statemap().size() << "] = \n{\n"; 118 119 typedef typename reverse_state_map_type::iterator iterator; 120 iterator rend = reverse_state_map.end(); 121 std::size_t last_id = 0; 122 for (iterator rit = reverse_state_map.begin(); rit != rend; ++last_id) 123 { 124 for (/**/; last_id < (*rit).first; ++last_id) 125 { 126 os_ << " 0, // \"<undefined state>\"\n"; 127 } 128 os_ << " " 129 << boost::lexer::detail::strings<Char>::char_prefix() 130 << "\"" << (*rit).second << "\""; 131 if (++rit != rend) 132 os_ << ",\n"; 133 else 134 os_ << "\n"; // don't generate the final comma 135 } 136 os_ << "};\n\n"; 137 138 generate_delimiter(os_); 139 os_ << "// this variable defines the number of lexer states\n"; 140 os_ << "std::size_t const lexer_state_count" 141 << (name_suffix[0] ? "_" : "") << name_suffix 142 << " = " << rules_.statemap().size() << ";\n\n"; 143 return os_.good(); 144 } 145 146 template <typename Char> 147 inline bool generate_cpp_state_table(std::basic_ostream<Char> & os_,Char const * name_suffix,bool bol,bool eol)148 generate_cpp_state_table (std::basic_ostream<Char> &os_ 149 , Char const* name_suffix, bool bol, bool eol) 150 { 151 std::basic_string<Char> suffix(L<Char>(name_suffix[0] ? "_" : "")); 152 suffix += name_suffix; 153 154 generate_delimiter(os_); 155 os_ << "// this defines a generic accessors for the information above\n"; 156 os_ << "struct lexer" << suffix << "\n{\n"; 157 os_ << " // version number and feature-set of compatible static lexer engine\n"; 158 os_ << " enum\n"; 159 os_ << " {\n static_version = " 160 << boost::lexical_cast<std::basic_string<Char> >(SPIRIT_STATIC_LEXER_VERSION) 161 << ",\n"; 162 os_ << " supports_bol = " << std::boolalpha << bol << ",\n"; 163 os_ << " supports_eol = " << std::boolalpha << eol << "\n"; 164 os_ << " };\n\n"; 165 os_ << " // return the number of lexer states\n"; 166 os_ << " static std::size_t state_count()\n"; 167 os_ << " {\n return lexer_state_count" << suffix << "; \n }\n\n"; 168 os_ << " // return the name of the lexer state as given by 'idx'\n"; 169 os_ << " static " << boost::lexer::detail::strings<Char>::char_name() 170 << " const* state_name(std::size_t idx)\n"; 171 os_ << " {\n return lexer_state_names" << suffix << "[idx]; \n }\n\n"; 172 os_ << " // return the next matched token\n"; 173 os_ << " template<typename Iterator>\n"; 174 os_ << " static std::size_t next(std::size_t &start_state_, bool& bol_\n"; 175 os_ << " , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n"; 176 os_ << " {\n return next_token" << suffix 177 << "(start_state_, bol_, start_token_, end_, unique_id_);\n }\n"; 178 os_ << "};\n\n"; 179 return os_.good(); 180 } 181 182 /////////////////////////////////////////////////////////////////////////// 183 // generate function body based on traversing the DFA tables 184 template <typename Char> generate_function_body_dfa(std::basic_ostream<Char> & os_,boost::lexer::basic_state_machine<Char> const & sm_)185 bool generate_function_body_dfa(std::basic_ostream<Char>& os_ 186 , boost::lexer::basic_state_machine<Char> const &sm_) 187 { 188 std::size_t const dfas_ = sm_.data()._dfa->size(); 189 std::size_t const lookups_ = sm_.data()._lookup->front()->size(); 190 191 os_ << " enum {end_state_index, id_index, unique_id_index, " 192 "state_index, bol_index,\n"; 193 os_ << " eol_index, dead_state_index, dfa_offset};\n\n"; 194 os_ << " static std::size_t const npos = " 195 "static_cast<std::size_t>(~0);\n"; 196 197 if (dfas_ > 1) 198 { 199 for (std::size_t state_ = 0; state_ < dfas_; ++state_) 200 { 201 std::size_t i_ = 0; 202 std::size_t j_ = 1; 203 std::size_t count_ = lookups_ / 8; 204 std::size_t const* lookup_ = &sm_.data()._lookup[state_]->front(); 205 std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front(); 206 207 os_ << " static std::size_t const lookup" << state_ 208 << "_[" << lookups_ << "] = {\n "; 209 for (/**/; i_ < count_; ++i_) 210 { 211 std::size_t const index_ = i_ * 8; 212 os_ << lookup_[index_]; 213 for (/**/; j_ < 8; ++j_) 214 { 215 os_ << ", " << lookup_[index_ + j_]; 216 } 217 if (i_ < count_ - 1) 218 { 219 os_ << ",\n "; 220 } 221 j_ = 1; 222 } 223 os_ << " };\n"; 224 225 count_ = sm_.data()._dfa[state_]->size (); 226 os_ << " static const std::size_t dfa" << state_ << "_[" 227 << count_ << "] = {\n "; 228 count_ /= 8; 229 for (i_ = 0; i_ < count_; ++i_) 230 { 231 std::size_t const index_ = i_ * 8; 232 os_ << dfa_[index_]; 233 for (j_ = 1; j_ < 8; ++j_) 234 { 235 os_ << ", " << dfa_[index_ + j_]; 236 } 237 if (i_ < count_ - 1) 238 { 239 os_ << ",\n "; 240 } 241 } 242 243 std::size_t const mod_ = sm_.data()._dfa[state_]->size () % 8; 244 if (mod_) 245 { 246 std::size_t const index_ = count_ * 8; 247 if (count_) 248 { 249 os_ << ",\n "; 250 } 251 os_ << dfa_[index_]; 252 for (j_ = 1; j_ < mod_; ++j_) 253 { 254 os_ << ", " << dfa_[index_ + j_]; 255 } 256 } 257 os_ << " };\n"; 258 } 259 260 std::size_t count_ = sm_.data()._dfa_alphabet.size(); 261 std::size_t i_ = 1; 262 263 os_ << " static std::size_t const* lookup_arr_[" << count_ 264 << "] = { lookup0_"; 265 for (i_ = 1; i_ < count_; ++i_) 266 { 267 os_ << ", " << "lookup" << i_ << "_"; 268 } 269 os_ << " };\n"; 270 271 os_ << " static std::size_t const dfa_alphabet_arr_[" 272 << count_ << "] = { "; 273 os_ << sm_.data()._dfa_alphabet.front (); 274 for (i_ = 1; i_ < count_; ++i_) 275 { 276 os_ << ", " << sm_.data()._dfa_alphabet[i_]; 277 } 278 os_ << " };\n"; 279 280 os_ << " static std::size_t const* dfa_arr_[" << count_ 281 << "] = { "; 282 os_ << "dfa0_"; 283 for (i_ = 1; i_ < count_; ++i_) 284 { 285 os_ << ", " << "dfa" << i_ << "_"; 286 } 287 os_ << " };\n"; 288 } 289 else 290 { 291 std::size_t const* lookup_ = &sm_.data()._lookup[0]->front(); 292 std::size_t const* dfa_ = &sm_.data()._dfa[0]->front(); 293 std::size_t i_ = 0; 294 std::size_t j_ = 1; 295 std::size_t count_ = lookups_ / 8; 296 297 os_ << " static std::size_t const lookup_["; 298 os_ << sm_.data()._lookup[0]->size() << "] = {\n "; 299 for (/**/; i_ < count_; ++i_) 300 { 301 const std::size_t index_ = i_ * 8; 302 os_ << lookup_[index_]; 303 for (/**/; j_ < 8; ++j_) 304 { 305 os_ << ", " << lookup_[index_ + j_]; 306 } 307 if (i_ < count_ - 1) 308 { 309 os_ << ",\n "; 310 } 311 j_ = 1; 312 } 313 os_ << " };\n"; 314 315 os_ << " static std::size_t const dfa_alphabet_ = " 316 << sm_.data()._dfa_alphabet.front () << ";\n"; 317 os_ << " static std::size_t const dfa_[" 318 << sm_.data()._dfa[0]->size () << "] = {\n "; 319 count_ = sm_.data()._dfa[0]->size () / 8; 320 for (i_ = 0; i_ < count_; ++i_) 321 { 322 const std::size_t index_ = i_ * 8; 323 os_ << dfa_[index_]; 324 for (j_ = 1; j_ < 8; ++j_) 325 { 326 os_ << ", " << dfa_[index_ + j_]; 327 } 328 if (i_ < count_ - 1) 329 { 330 os_ << ",\n "; 331 } 332 } 333 334 const std::size_t mod_ = sm_.data()._dfa[0]->size () % 8; 335 if (mod_) 336 { 337 const std::size_t index_ = count_ * 8; 338 if (count_) 339 { 340 os_ << ",\n "; 341 } 342 os_ << dfa_[index_]; 343 for (j_ = 1; j_ < mod_; ++j_) 344 { 345 os_ << ", " << dfa_[index_ + j_]; 346 } 347 } 348 os_ << " };\n"; 349 } 350 351 os_ << "\n if (start_token_ == end_)\n"; 352 os_ << " {\n"; 353 os_ << " unique_id_ = npos;\n"; 354 os_ << " return 0;\n"; 355 os_ << " }\n\n"; 356 if (sm_.data()._seen_BOL_assertion) 357 { 358 os_ << " bool bol = bol_;\n\n"; 359 } 360 361 if (dfas_ > 1) 362 { 363 os_ << "again:\n"; 364 os_ << " std::size_t const* lookup_ = lookup_arr_[start_state_];\n"; 365 os_ << " std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n"; 366 os_ << " std::size_t const*dfa_ = dfa_arr_[start_state_];\n"; 367 } 368 369 os_ << " std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n"; 370 os_ << " Iterator curr_ = start_token_;\n"; 371 os_ << " bool end_state_ = *ptr_ != 0;\n"; 372 os_ << " std::size_t id_ = *(ptr_ + id_index);\n"; 373 os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n"; 374 if (dfas_ > 1) 375 { 376 os_ << " std::size_t end_start_state_ = start_state_;\n"; 377 } 378 if (sm_.data()._seen_BOL_assertion) 379 { 380 os_ << " bool end_bol_ = bol_;\n"; 381 } 382 os_ << " Iterator end_token_ = start_token_;\n\n"; 383 384 os_ << " while (curr_ != end_)\n"; 385 os_ << " {\n"; 386 387 if (sm_.data()._seen_BOL_assertion) 388 { 389 os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n\n"; 390 } 391 392 if (sm_.data()._seen_EOL_assertion) 393 { 394 os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n"; 395 } 396 397 if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion) 398 { 399 os_ << " if (BOL_state_ && bol)\n"; 400 os_ << " {\n"; 401 os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n"; 402 os_ << " }\n"; 403 os_ << " else if (EOL_state_ && *curr_ == '\\n')\n"; 404 os_ << " {\n"; 405 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n"; 406 os_ << " }\n"; 407 os_ << " else\n"; 408 os_ << " {\n"; 409 if (lookups_ == 256) 410 { 411 os_ << " unsigned char index = \n"; 412 os_ << " static_cast<unsigned char>(*curr_++);\n"; 413 } 414 else 415 { 416 os_ << " std::size_t index = *curr_++\n"; 417 } 418 os_ << " bol = (index == '\n') ? true : false;\n"; 419 os_ << " std::size_t const state_ = ptr_[\n"; 420 os_ << " lookup_[static_cast<std::size_t>(index)]];\n"; 421 422 os_ << '\n'; 423 os_ << " if (state_ == 0) break;\n"; 424 os_ << '\n'; 425 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n"; 426 os_ << " }\n\n"; 427 } 428 else if (sm_.data()._seen_BOL_assertion) 429 { 430 os_ << " if (BOL_state_ && bol)\n"; 431 os_ << " {\n"; 432 os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n"; 433 os_ << " }\n"; 434 os_ << " else\n"; 435 os_ << " {\n"; 436 if (lookups_ == 256) 437 { 438 os_ << " unsigned char index = \n"; 439 os_ << " static_cast<unsigned char>(*curr_++);\n"; 440 } 441 else 442 { 443 os_ << " std::size_t index = *curr_++\n"; 444 } 445 os_ << " bol = (index == '\n') ? true : false;\n"; 446 os_ << " std::size_t const state_ = ptr_[\n"; 447 os_ << " lookup_[static_cast<std::size_t>(index)]];\n"; 448 449 os_ << '\n'; 450 os_ << " if (state_ == 0) break;\n"; 451 os_ << '\n'; 452 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n"; 453 os_ << " }\n\n"; 454 } 455 else if (sm_.data()._seen_EOL_assertion) 456 { 457 os_ << " if (EOL_state_ && *curr_ == '\\n')\n"; 458 os_ << " {\n"; 459 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n"; 460 os_ << " }\n"; 461 os_ << " else\n"; 462 os_ << " {\n"; 463 if (lookups_ == 256) 464 { 465 os_ << " unsigned char index = \n"; 466 os_ << " static_cast<unsigned char>(*curr_++);\n"; 467 } 468 else 469 { 470 os_ << " std::size_t index = *curr_++\n"; 471 } 472 os_ << " bol = (index == '\n') ? true : false;\n"; 473 os_ << " std::size_t const state_ = ptr_[\n"; 474 os_ << " lookup_[static_cast<std::size_t>(index)]];\n"; 475 476 os_ << '\n'; 477 os_ << " if (state_ == 0) break;\n"; 478 os_ << '\n'; 479 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n"; 480 os_ << " }\n\n"; 481 } 482 else 483 { 484 os_ << " std::size_t const state_ =\n"; 485 486 if (lookups_ == 256) 487 { 488 os_ << " ptr_[lookup_[" 489 "static_cast<unsigned char>(*curr_++)]];\n"; 490 } 491 else 492 { 493 os_ << " ptr_[lookup_[*curr_++]];\n"; 494 } 495 496 os_ << '\n'; 497 os_ << " if (state_ == 0) break;\n"; 498 os_ << '\n'; 499 os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n"; 500 } 501 502 os_ << " if (*ptr_)\n"; 503 os_ << " {\n"; 504 os_ << " end_state_ = true;\n"; 505 os_ << " id_ = *(ptr_ + id_index);\n"; 506 os_ << " uid_ = *(ptr_ + unique_id_index);\n"; 507 if (dfas_ > 1) 508 { 509 os_ << " end_start_state_ = *(ptr_ + state_index);\n"; 510 } 511 if (sm_.data()._seen_BOL_assertion) 512 { 513 os_ << " end_bol_ = bol;\n"; 514 } 515 os_ << " end_token_ = curr_;\n"; 516 os_ << " }\n"; 517 os_ << " }\n\n"; 518 519 if (sm_.data()._seen_EOL_assertion) 520 { 521 os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n"; 522 523 os_ << " if (EOL_state_ && curr_ == end_)\n"; 524 os_ << " {\n"; 525 os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n"; 526 527 os_ << " if (*ptr_)\n"; 528 os_ << " {\n"; 529 os_ << " end_state_ = true;\n"; 530 os_ << " id_ = *(ptr_ + id_index);\n"; 531 os_ << " uid_ = *(ptr_ + unique_id_index);\n"; 532 if (dfas_ > 1) 533 { 534 os_ << " end_start_state_ = *(ptr_ + state_index);\n"; 535 } 536 if (sm_.data()._seen_BOL_assertion) 537 { 538 os_ << " end_bol_ = bol;\n"; 539 } 540 os_ << " end_token_ = curr_;\n"; 541 os_ << " }\n"; 542 os_ << " }\n\n"; 543 } 544 545 os_ << " if (end_state_)\n"; 546 os_ << " {\n"; 547 os_ << " // return longest match\n"; 548 os_ << " start_token_ = end_token_;\n"; 549 550 if (dfas_ > 1) 551 { 552 os_ << " start_state_ = end_start_state_;\n"; 553 os_ << " if (id_ == 0)\n"; 554 os_ << " {\n"; 555 if (sm_.data()._seen_BOL_assertion) 556 { 557 os_ << " bol = end_bol_;\n"; 558 } 559 os_ << " goto again;\n"; 560 os_ << " }\n"; 561 if (sm_.data()._seen_BOL_assertion) 562 { 563 os_ << " else\n"; 564 os_ << " {\n"; 565 os_ << " bol_ = end_bol_;\n"; 566 os_ << " }\n"; 567 } 568 } 569 else if (sm_.data()._seen_BOL_assertion) 570 { 571 os_ << " bol_ = end_bol_;\n"; 572 } 573 574 os_ << " }\n"; 575 os_ << " else\n"; 576 os_ << " {\n"; 577 578 if (sm_.data()._seen_BOL_assertion) 579 { 580 os_ << " bol_ = (*start_token_ == '\n') ? true : false;\n"; 581 } 582 583 os_ << " id_ = npos;\n"; 584 os_ << " uid_ = npos;\n"; 585 os_ << " }\n\n"; 586 587 os_ << " unique_id_ = uid_;\n"; 588 os_ << " return id_;\n"; 589 return os_.good(); 590 } 591 592 /////////////////////////////////////////////////////////////////////////// 593 template <typename Char> get_charlit(Char ch)594 inline std::basic_string<Char> get_charlit(Char ch) 595 { 596 std::basic_string<Char> result; 597 boost::lexer::basic_string_token<Char>::escape_char(ch, result); 598 return result; 599 } 600 601 // check whether state0_0 is referenced from any of the other states 602 template <typename Char> need_label0_0(boost::lexer::basic_state_machine<Char> const & sm_)603 bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_) 604 { 605 typedef typename boost::lexer::basic_state_machine<Char>::iterator 606 iterator_type; 607 iterator_type iter_ = sm_.begin(); 608 std::size_t const states_ = iter_->states; 609 610 for (std::size_t state_ = 0; state_ < states_; ++state_) 611 { 612 if (0 == iter_->bol_index || 0 == iter_->eol_index) 613 { 614 return true; 615 } 616 617 std::size_t const transitions_ = iter_->transitions; 618 for (std::size_t t_ = 0; t_ < transitions_; ++t_) 619 { 620 if (0 == iter_->goto_state) 621 { 622 return true; 623 } 624 ++iter_; 625 } 626 if (transitions_ == 0) ++iter_; 627 } 628 return false; 629 } 630 631 /////////////////////////////////////////////////////////////////////////// 632 template <typename Char> generate_function_body_switch(std::basic_ostream<Char> & os_,boost::lexer::basic_state_machine<Char> const & sm_)633 bool generate_function_body_switch(std::basic_ostream<Char> & os_ 634 , boost::lexer::basic_state_machine<Char> const &sm_) 635 { 636 typedef typename boost::lexer::basic_state_machine<Char>::iterator 637 iterator_type; 638 639 std::size_t const lookups_ = sm_.data()._lookup->front ()->size (); 640 iterator_type iter_ = sm_.begin(); 641 iterator_type labeliter_ = iter_; 642 iterator_type end_ = sm_.end(); 643 std::size_t const dfas_ = sm_.data()._dfa->size (); 644 645 os_ << " static std::size_t const npos = " 646 "static_cast<std::size_t>(~0);\n"; 647 648 os_ << "\n if (start_token_ == end_)\n"; 649 os_ << " {\n"; 650 os_ << " unique_id_ = npos;\n"; 651 os_ << " return 0;\n"; 652 os_ << " }\n\n"; 653 654 if (sm_.data()._seen_BOL_assertion) 655 { 656 os_ << " bool bol = bol_;\n"; 657 } 658 659 if (dfas_ > 1) 660 { 661 os_ << "again:\n"; 662 } 663 664 os_ << " Iterator curr_ = start_token_;\n"; 665 os_ << " bool end_state_ = false;\n"; 666 os_ << " std::size_t id_ = npos;\n"; 667 os_ << " std::size_t uid_ = npos;\n"; 668 669 if (dfas_ > 1) 670 { 671 os_ << " std::size_t end_start_state_ = start_state_;\n"; 672 } 673 674 if (sm_.data()._seen_BOL_assertion) 675 { 676 os_ << " bool end_bol_ = bol_;\n"; 677 } 678 679 os_ << " Iterator end_token_ = start_token_;\n"; 680 os_ << '\n'; 681 682 os_ << " " << ((lookups_ == 256) ? "char" : "wchar_t") 683 << " ch_ = 0;\n\n"; 684 685 if (dfas_ > 1) 686 { 687 os_ << " switch (start_state_)\n"; 688 os_ << " {\n"; 689 690 for (std::size_t i_ = 0; i_ < dfas_; ++i_) 691 { 692 os_ << " case " << i_ << ":\n"; 693 os_ << " goto state" << i_ << "_0;\n"; 694 os_ << " break;\n"; 695 } 696 697 os_ << " default:\n"; 698 os_ << " goto end;\n"; 699 os_ << " break;\n"; 700 os_ << " }\n"; 701 } 702 703 bool need_state0_0_label = need_label0_0(sm_); 704 705 for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_) 706 { 707 std::size_t const states_ = iter_->states; 708 for (std::size_t state_ = 0; state_ < states_; ++state_) 709 { 710 std::size_t const transitions_ = iter_->transitions; 711 std::size_t t_ = 0; 712 713 if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label) 714 { 715 os_ << "\nstate" << dfa_ << '_' << state_ << ":\n"; 716 } 717 718 if (iter_->end_state) 719 { 720 os_ << " end_state_ = true;\n"; 721 os_ << " id_ = " << iter_->id << ";\n"; 722 os_ << " uid_ = " << iter_->unique_id << ";\n"; 723 os_ << " end_token_ = curr_;\n"; 724 725 if (dfas_ > 1) 726 { 727 os_ << " end_start_state_ = " << iter_->goto_dfa << 728 ";\n"; 729 } 730 731 if (sm_.data()._seen_BOL_assertion) 732 { 733 os_ << " end_bol_ = bol;\n"; 734 } 735 736 if (transitions_) os_ << '\n'; 737 } 738 739 if (t_ < transitions_ || 740 iter_->bol_index != boost::lexer::npos || 741 iter_->eol_index != boost::lexer::npos) 742 { 743 os_ << " if (curr_ == end_) goto end;\n"; 744 os_ << " ch_ = *curr_;\n"; 745 if (iter_->bol_index != boost::lexer::npos) 746 { 747 os_ << "\n if (bol) goto state" << dfa_ << '_' 748 << iter_->bol_index << ";\n"; 749 } 750 if (iter_->eol_index != boost::lexer::npos) 751 { 752 os_ << "\n if (ch_ == '\n') goto state" << dfa_ 753 << '_' << iter_->eol_index << ";\n"; 754 } 755 os_ << " ++curr_;\n"; 756 } 757 758 for (/**/; t_ < transitions_; ++t_) 759 { 760 Char const *ptr_ = iter_->token._charset.c_str(); 761 Char const *end_ = ptr_ + iter_->token._charset.size(); 762 Char start_char_ = 0; 763 Char curr_char_ = 0; 764 bool range_ = false; 765 bool first_char_ = true; 766 767 os_ << "\n if ("; 768 769 while (ptr_ != end_) 770 { 771 curr_char_ = *ptr_++; 772 773 if (*ptr_ == curr_char_ + 1) 774 { 775 if (!range_) 776 { 777 start_char_ = curr_char_; 778 } 779 range_ = true; 780 } 781 else 782 { 783 if (!first_char_) 784 { 785 os_ << ((iter_->token._negated) ? " && " : " || "); 786 } 787 else 788 { 789 first_char_ = false; 790 } 791 if (range_) 792 { 793 if (iter_->token._negated) 794 { 795 os_ << "!"; 796 } 797 os_ << "(ch_ >= '" << get_charlit(start_char_) 798 << "' && ch_ <= '" 799 << get_charlit(curr_char_) << "')"; 800 range_ = false; 801 } 802 else 803 { 804 os_ << "ch_ " 805 << ((iter_->token._negated) ? "!=" : "==") 806 << " '" << get_charlit(curr_char_) << "'"; 807 } 808 } 809 } 810 811 os_ << ") goto state" << dfa_ << '_' << iter_->goto_state 812 << ";\n"; 813 ++iter_; 814 } 815 816 if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1)) 817 { 818 os_ << " goto end;\n"; 819 } 820 821 if (transitions_ == 0) ++iter_; 822 } 823 } 824 825 os_ << "\nend:\n"; 826 os_ << " if (end_state_)\n"; 827 os_ << " {\n"; 828 os_ << " // return longest match\n"; 829 os_ << " start_token_ = end_token_;\n"; 830 831 if (dfas_ > 1) 832 { 833 os_ << " start_state_ = end_start_state_;\n"; 834 os_ << "\n if (id_ == 0)\n"; 835 os_ << " {\n"; 836 837 if (sm_.data()._seen_BOL_assertion) 838 { 839 os_ << " bol = end_bol_;\n"; 840 } 841 842 os_ << " goto again;\n"; 843 os_ << " }\n"; 844 845 if (sm_.data()._seen_BOL_assertion) 846 { 847 os_ << " else\n"; 848 os_ << " {\n"; 849 os_ << " bol_ = end_bol_;\n"; 850 os_ << " }\n"; 851 } 852 } 853 else if (sm_.data()._seen_BOL_assertion) 854 { 855 os_ << " bol_ = end_bol_;\n"; 856 } 857 858 os_ << " }\n"; 859 os_ << " else\n"; 860 os_ << " {\n"; 861 862 if (sm_.data()._seen_BOL_assertion) 863 { 864 os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n"; 865 } 866 os_ << " id_ = npos;\n"; 867 os_ << " uid_ = npos;\n"; 868 os_ << " }\n\n"; 869 870 os_ << " unique_id_ = uid_;\n"; 871 os_ << " return id_;\n"; 872 return os_.good(); 873 } 874 875 /////////////////////////////////////////////////////////////////////////// 876 // Generate a tokenizer for the given state machine. 877 template <typename Char, typename F> 878 inline bool generate_cpp(boost::lexer::basic_state_machine<Char> const & sm_,boost::lexer::basic_rules<Char> const & rules_,std::basic_ostream<Char> & os_,Char const * name_suffix,F generate_function_body)879 generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_ 880 , boost::lexer::basic_rules<Char> const& rules_ 881 , std::basic_ostream<Char> &os_, Char const* name_suffix 882 , F generate_function_body) 883 { 884 if (sm_.data()._lookup->empty()) 885 return false; 886 887 std::size_t const dfas_ = sm_.data()._dfa->size(); 888 // std::size_t const lookups_ = sm_.data()._lookup->front()->size(); 889 890 os_ << "// Copyright (c) 2008-2009 Ben Hanson\n"; 891 os_ << "// Copyright (c) 2008-2011 Hartmut Kaiser\n"; 892 os_ << "//\n"; 893 os_ << "// Distributed under the Boost Software License, " 894 "Version 1.0. (See accompanying\n"; 895 os_ << "// file licence_1_0.txt or copy at " 896 "http://www.boost.org/LICENSE_1_0.txt)\n\n"; 897 os_ << "// Auto-generated by boost::lexer, do not edit\n\n"; 898 899 std::basic_string<Char> guard(name_suffix); 900 guard += L<Char>(name_suffix[0] ? "_" : ""); 901 guard += L<Char>(__DATE__ "_" __TIME__); 902 typename std::basic_string<Char>::size_type p = 903 guard.find_first_of(L<Char>(": ")); 904 while (std::string::npos != p) 905 { 906 guard.replace(p, 1, L<Char>("_")); 907 p = guard.find_first_of(L<Char>(": "), p); 908 } 909 boost::to_upper(guard); 910 911 os_ << "#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << ")\n"; 912 os_ << "#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << "\n\n"; 913 914 os_ << "#include <boost/detail/iterator.hpp>\n"; 915 os_ << "#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>\n\n"; 916 917 generate_delimiter(os_); 918 os_ << "// the generated table of state names and the tokenizer have to be\n" 919 "// defined in the boost::spirit::lex::lexertl::static_ namespace\n"; 920 os_ << "namespace boost { namespace spirit { namespace lex { " 921 "namespace lexertl { namespace static_ {\n\n"; 922 923 // generate the lexer state information variables 924 if (!generate_cpp_state_info(rules_, os_, name_suffix)) 925 return false; 926 927 generate_delimiter(os_); 928 os_ << "// this function returns the next matched token\n"; 929 os_ << "template<typename Iterator>\n"; 930 os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "") 931 << name_suffix << " ("; 932 933 if (dfas_ > 1) 934 { 935 os_ << "std::size_t& start_state_, "; 936 } 937 else 938 { 939 os_ << "std::size_t& /*start_state_*/, "; 940 } 941 if (sm_.data()._seen_BOL_assertion) 942 { 943 os_ << "bool& bol_, "; 944 } 945 else 946 { 947 os_ << "bool& /*bol_*/, "; 948 } 949 os_ << "\n "; 950 951 os_ << "Iterator &start_token_, Iterator const& end_, "; 952 os_ << "std::size_t& unique_id_)\n"; 953 os_ << "{\n"; 954 if (!generate_function_body(os_, sm_)) 955 return false; 956 os_ << "}\n\n"; 957 958 if (!generate_cpp_state_table<Char>(os_, name_suffix 959 , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion)) 960 { 961 return false; 962 } 963 964 os_ << "}}}}} // namespace boost::spirit::lex::lexertl::static_\n\n"; 965 966 os_ << "#endif\n"; 967 968 return os_.good(); 969 } 970 971 } // namespace detail 972 973 /////////////////////////////////////////////////////////////////////////// 974 template <typename Lexer, typename F> 975 inline bool generate_static(Lexer const & lexer,std::basic_ostream<typename Lexer::char_type> & os,typename Lexer::char_type const * name_suffix,F f)976 generate_static(Lexer const& lexer 977 , std::basic_ostream<typename Lexer::char_type>& os 978 , typename Lexer::char_type const* name_suffix, F f) 979 { 980 if (!lexer.init_dfa(true)) // always minimize DFA for static lexers 981 return false; 982 return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os 983 , name_suffix, f); 984 } 985 986 /////////////////////////////////////////////////////////////////////////// 987 // deprecated function, will be removed in the future (this has been 988 // replaced by the function generate_static_dfa - see below). 989 template <typename Lexer> 990 inline bool generate_static(Lexer const & lexer,std::basic_ostream<typename Lexer::char_type> & os,typename Lexer::char_type const * name_suffix=detail::L<typename Lexer::char_type> ())991 generate_static(Lexer const& lexer 992 , std::basic_ostream<typename Lexer::char_type>& os 993 , typename Lexer::char_type const* name_suffix = 994 detail::L<typename Lexer::char_type>()) 995 { 996 return generate_static(lexer, os, name_suffix 997 , &detail::generate_function_body_dfa<typename Lexer::char_type>); 998 } 999 1000 /////////////////////////////////////////////////////////////////////////// 1001 template <typename Lexer> 1002 inline bool generate_static_dfa(Lexer const & lexer,std::basic_ostream<typename Lexer::char_type> & os,typename Lexer::char_type const * name_suffix=detail::L<typename Lexer::char_type> ())1003 generate_static_dfa(Lexer const& lexer 1004 , std::basic_ostream<typename Lexer::char_type>& os 1005 , typename Lexer::char_type const* name_suffix = 1006 detail::L<typename Lexer::char_type>()) 1007 { 1008 return generate_static(lexer, os, name_suffix 1009 , &detail::generate_function_body_dfa<typename Lexer::char_type>); 1010 } 1011 1012 /////////////////////////////////////////////////////////////////////////// 1013 template <typename Lexer> 1014 inline bool generate_static_switch(Lexer const & lexer,std::basic_ostream<typename Lexer::char_type> & os,typename Lexer::char_type const * name_suffix=detail::L<typename Lexer::char_type> ())1015 generate_static_switch(Lexer const& lexer 1016 , std::basic_ostream<typename Lexer::char_type>& os 1017 , typename Lexer::char_type const* name_suffix = 1018 detail::L<typename Lexer::char_type>()) 1019 { 1020 return generate_static(lexer, os, name_suffix 1021 , &detail::generate_function_body_switch<typename Lexer::char_type>); 1022 } 1023 1024 /////////////////////////////////////////////////////////////////////////////// 1025 }}}} 1026 1027 #endif 1028