1 //  Copyright (c) 2008-2009 Ben Hanson
2 //  Copyright (c) 2008-2011 Hartmut Kaiser
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
5 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 
7 #if !defined(BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM)
8 #define BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM
9 
10 #if defined(_MSC_VER)
11 #pragma once
12 #endif
13 
14 #include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
15 #include <boost/spirit/home/support/detail/lexer/consts.hpp>
16 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
17 #include <boost/spirit/home/support/detail/lexer/size_t.hpp>
18 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
19 #include <boost/spirit/home/support/detail/lexer/debug.hpp>
20 #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp>
21 #include <boost/algorithm/string.hpp>
22 #include <boost/scoped_array.hpp>
23 
24 ///////////////////////////////////////////////////////////////////////////////
25 namespace boost { namespace spirit { namespace lex { namespace lexertl
26 {
27     namespace detail
28     {
29 
30     ///////////////////////////////////////////////////////////////////////////
31     template <typename CharT>
32     struct string_lit;
33 
34     template <>
35     struct string_lit<char>
36     {
getboost::spirit::lex::lexertl::detail::string_lit37         static char get(char c) { return c; }
getboost::spirit::lex::lexertl::detail::string_lit38         static std::string get(char const* str = "") { return str; }
39     };
40 
41     template <>
42     struct string_lit<wchar_t>
43     {
getboost::spirit::lex::lexertl::detail::string_lit44         static wchar_t get(char c)
45         {
46             typedef std::ctype<wchar_t> ctype_t;
47             return std::use_facet<ctype_t>(std::locale()).widen(c);
48         }
getboost::spirit::lex::lexertl::detail::string_lit49         static std::basic_string<wchar_t> get(char const* source = "")
50         {
51             using namespace std;        // some systems have size_t in ns std
52             size_t len = strlen(source);
53             boost::scoped_array<wchar_t> result (new wchar_t[len+1]);
54             result.get()[len] = '\0';
55 
56             // working with wide character streams is supported only if the
57             // platform provides the std::ctype<wchar_t> facet
58             BOOST_ASSERT(std::has_facet<std::ctype<wchar_t> >(std::locale()));
59 
60             std::use_facet<std::ctype<wchar_t> >(std::locale())
61                 .widen(source, source + len, result.get());
62             return result.get();
63         }
64     };
65 
66     template <typename Char>
L(char c)67     inline Char L(char c)
68     {
69         return string_lit<Char>::get(c);
70     }
71 
72     template <typename Char>
L(char const * c="")73     inline std::basic_string<Char> L(char const* c = "")
74     {
75         return string_lit<Char>::get(c);
76     }
77 
78     ///////////////////////////////////////////////////////////////////////////
79     template <typename Char>
80     inline bool
generate_delimiter(std::basic_ostream<Char> & os_)81     generate_delimiter(std::basic_ostream<Char> &os_)
82     {
83         os_ << std::basic_string<Char>(80, '/') << "\n";
84         return os_.good();
85     }
86 
87     ///////////////////////////////////////////////////////////////////////////
88     // Generate a table of the names of the used lexer states, which is a bit
89     // tricky, because the table stored with the rules is sorted based on the
90     // names, but we need it sorted using the state ids.
91     template <typename Char>
92     inline bool
generate_cpp_state_info(boost::lexer::basic_rules<Char> const & rules_,std::basic_ostream<Char> & os_,Char const * name_suffix)93     generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_
94       , std::basic_ostream<Char> &os_, Char const* name_suffix)
95     {
96         // we need to re-sort the state names in ascending order of the state
97         // ids, filling possible gaps in between later
98         typedef typename
99             boost::lexer::basic_rules<Char>::string_size_t_map::const_iterator
100         state_iterator;
101         typedef std::map<std::size_t, Char const*> reverse_state_map_type;
102 
103         reverse_state_map_type reverse_state_map;
104         state_iterator send = rules_.statemap().end();
105         for (state_iterator sit = rules_.statemap().begin(); sit != send; ++sit)
106         {
107             typedef typename reverse_state_map_type::value_type value_type;
108             reverse_state_map.insert(value_type((*sit).second, (*sit).first.c_str()));
109         }
110 
111         generate_delimiter(os_);
112         os_ << "// this table defines the names of the lexer states\n";
113         os_ << boost::lexer::detail::strings<Char>::char_name()
114             << " const* const lexer_state_names"
115             << (name_suffix[0] ? "_" : "") << name_suffix
116             << "[" << rules_.statemap().size() << "] = \n{\n";
117 
118         typedef typename reverse_state_map_type::iterator iterator;
119         iterator rend = reverse_state_map.end();
120         std::size_t last_id = 0;
121         for (iterator rit = reverse_state_map.begin(); rit != rend; ++last_id)
122         {
123             for (/**/; last_id < (*rit).first; ++last_id)
124             {
125                 os_ << "    0,  // \"<undefined state>\"\n";
126             }
127             os_ << "    "
128                 << boost::lexer::detail::strings<Char>::char_prefix()
129                 << "\"" << (*rit).second << "\"";
130             if (++rit != rend)
131                 os_ << ",\n";
132             else
133                 os_ << "\n";        // don't generate the final comma
134         }
135         os_ << "};\n\n";
136 
137         generate_delimiter(os_);
138         os_ << "// this variable defines the number of lexer states\n";
139         os_ << "std::size_t const lexer_state_count"
140             << (name_suffix[0] ? "_" : "") << name_suffix
141             << " = " << rules_.statemap().size() << ";\n\n";
142         return os_.good();
143     }
144 
145     template <typename Char>
146     inline bool
generate_cpp_state_table(std::basic_ostream<Char> & os_,Char const * name_suffix,bool bol,bool eol)147     generate_cpp_state_table (std::basic_ostream<Char> &os_
148       , Char const* name_suffix, bool bol, bool eol)
149     {
150         std::basic_string<Char> suffix(L<Char>(name_suffix[0] ? "_" : ""));
151         suffix += name_suffix;
152 
153         generate_delimiter(os_);
154         os_ << "// this defines a generic accessors for the information above\n";
155         os_ << "struct lexer" << suffix << "\n{\n";
156         os_ << "    // version number and feature-set of compatible static lexer engine\n";
157         os_ << "    enum\n";
158         os_ << "    {\n        static_version = " << SPIRIT_STATIC_LEXER_VERSION << ",\n";
159         os_ << "        supports_bol = " << std::boolalpha << bol << ",\n";
160         os_ << "        supports_eol = " << std::boolalpha << eol << "\n";
161         os_ << "    };\n\n";
162         os_ << "    // return the number of lexer states\n";
163         os_ << "    static std::size_t state_count()\n";
164         os_ << "    {\n        return lexer_state_count" << suffix << "; \n    }\n\n";
165         os_ << "    // return the name of the lexer state as given by 'idx'\n";
166         os_ << "    static " << boost::lexer::detail::strings<Char>::char_name()
167             << " const* state_name(std::size_t idx)\n";
168         os_ << "    {\n        return lexer_state_names" << suffix << "[idx]; \n    }\n\n";
169         os_ << "    // return the next matched token\n";
170         os_ << "    template<typename Iterator>\n";
171         os_ << "    static std::size_t next(std::size_t &start_state_, bool& bol_\n";
172         os_ << "      , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n";
173         os_ << "    {\n        return next_token" << suffix
174             << "(start_state_, bol_, start_token_, end_, unique_id_);\n    }\n";
175         os_ << "};\n\n";
176         return os_.good();
177     }
178 
179     ///////////////////////////////////////////////////////////////////////////
180     // generate function body based on traversing the DFA tables
181     template <typename Char>
generate_function_body_dfa(std::basic_ostream<Char> & os_,boost::lexer::basic_state_machine<Char> const & sm_)182     bool generate_function_body_dfa(std::basic_ostream<Char>& os_
183       , boost::lexer::basic_state_machine<Char> const &sm_)
184     {
185         std::size_t const dfas_ = sm_.data()._dfa->size();
186         std::size_t const lookups_ = sm_.data()._lookup->front()->size();
187 
188         os_ << "    enum {end_state_index, id_index, unique_id_index, "
189                "state_index, bol_index,\n";
190         os_ << "        eol_index, dead_state_index, dfa_offset};\n\n";
191         os_ << "    static std::size_t const npos = "
192                "static_cast<std::size_t>(~0);\n";
193 
194         if (dfas_ > 1)
195         {
196             for (std::size_t state_ = 0; state_ < dfas_; ++state_)
197             {
198                 std::size_t i_ = 0;
199                 std::size_t j_ = 1;
200                 std::size_t count_ = lookups_ / 8;
201                 std::size_t const* lookup_ = &sm_.data()._lookup[state_]->front();
202                 std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front();
203 
204                 os_ << "    static std::size_t const lookup" << state_
205                     << "_[" << lookups_ << "] = {\n        ";
206                 for (/**/; i_ < count_; ++i_)
207                 {
208                     std::size_t const index_ = i_ * 8;
209                     os_ << lookup_[index_];
210                     for (/**/; j_ < 8; ++j_)
211                     {
212                         os_ << ", " << lookup_[index_ + j_];
213                     }
214                     if (i_ < count_ - 1)
215                     {
216                         os_ << ",\n        ";
217                     }
218                     j_ = 1;
219                 }
220                 os_ << " };\n";
221 
222                 count_ = sm_.data()._dfa[state_]->size ();
223                 os_ << "    static const std::size_t dfa" << state_ << "_["
224                     << count_ << "] = {\n        ";
225                 count_ /= 8;
226                 for (i_ = 0; i_ < count_; ++i_)
227                 {
228                     std::size_t const index_ = i_ * 8;
229                     os_ << dfa_[index_];
230                     for (j_ = 1; j_ < 8; ++j_)
231                     {
232                         os_ << ", " << dfa_[index_ + j_];
233                     }
234                     if (i_ < count_ - 1)
235                     {
236                         os_ << ",\n        ";
237                     }
238                 }
239 
240                 std::size_t const mod_ = sm_.data()._dfa[state_]->size () % 8;
241                 if (mod_)
242                 {
243                     std::size_t const index_ = count_ * 8;
244                     if (count_)
245                     {
246                         os_ << ",\n        ";
247                     }
248                     os_ << dfa_[index_];
249                     for (j_ = 1; j_ < mod_; ++j_)
250                     {
251                         os_ << ", " << dfa_[index_ + j_];
252                     }
253                 }
254                 os_ << " };\n";
255             }
256 
257             std::size_t count_ = sm_.data()._dfa_alphabet.size();
258             std::size_t i_ = 1;
259 
260             os_ << "    static std::size_t const* lookup_arr_[" << count_
261                 << "] = { lookup0_";
262             for (i_ = 1; i_ < count_; ++i_)
263             {
264                 os_ << ", " << "lookup" << i_ << "_";
265             }
266             os_ << " };\n";
267 
268             os_ << "    static std::size_t const dfa_alphabet_arr_["
269                 << count_ << "] = { ";
270             os_ << sm_.data()._dfa_alphabet.front ();
271             for (i_ = 1; i_ < count_; ++i_)
272             {
273                 os_ << ", " << sm_.data()._dfa_alphabet[i_];
274             }
275             os_ << " };\n";
276 
277             os_ << "    static std::size_t const* dfa_arr_[" << count_
278                 << "] = { ";
279             os_ << "dfa0_";
280             for (i_ = 1; i_ < count_; ++i_)
281             {
282                 os_ << ", " << "dfa" << i_ << "_";
283             }
284             os_ << " };\n";
285         }
286         else
287         {
288             std::size_t const* lookup_ = &sm_.data()._lookup[0]->front();
289             std::size_t const* dfa_ = &sm_.data()._dfa[0]->front();
290             std::size_t i_ = 0;
291             std::size_t j_ = 1;
292             std::size_t count_ = lookups_ / 8;
293 
294             os_ << "    static std::size_t const lookup_[";
295             os_ << sm_.data()._lookup[0]->size() << "] = {\n        ";
296             for (/**/; i_ < count_; ++i_)
297             {
298                 const std::size_t index_ = i_ * 8;
299                 os_ << lookup_[index_];
300                 for (/**/; j_ < 8; ++j_)
301                 {
302                     os_ << ", " << lookup_[index_ + j_];
303                 }
304                 if (i_ < count_ - 1)
305                 {
306                     os_ << ",\n        ";
307                 }
308                 j_ = 1;
309             }
310             os_ << " };\n";
311 
312             os_ << "    static std::size_t const dfa_alphabet_ = "
313                 << sm_.data()._dfa_alphabet.front () << ";\n";
314             os_ << "    static std::size_t const dfa_["
315                 << sm_.data()._dfa[0]->size () << "] = {\n        ";
316             count_ = sm_.data()._dfa[0]->size () / 8;
317             for (i_ = 0; i_ < count_; ++i_)
318             {
319                 const std::size_t index_ = i_ * 8;
320                 os_ << dfa_[index_];
321                 for (j_ = 1; j_ < 8; ++j_)
322                 {
323                     os_ << ", " << dfa_[index_ + j_];
324                 }
325                 if (i_ < count_ - 1)
326                 {
327                     os_ << ",\n        ";
328                 }
329             }
330 
331             const std::size_t mod_ = sm_.data()._dfa[0]->size () % 8;
332             if (mod_)
333             {
334                 const std::size_t index_ = count_ * 8;
335                 if (count_)
336                 {
337                     os_ << ",\n        ";
338                 }
339                 os_ << dfa_[index_];
340                 for (j_ = 1; j_ < mod_; ++j_)
341                 {
342                     os_ << ", " << dfa_[index_ + j_];
343                 }
344             }
345             os_ << " };\n";
346         }
347 
348         os_ << "\n    if (start_token_ == end_)\n";
349         os_ << "    {\n";
350         os_ << "        unique_id_ = npos;\n";
351         os_ << "        return 0;\n";
352         os_ << "    }\n\n";
353         if (sm_.data()._seen_BOL_assertion)
354         {
355             os_ << "    bool bol = bol_;\n\n";
356         }
357 
358         if (dfas_ > 1)
359         {
360             os_ << "again:\n";
361             os_ << "    std::size_t const* lookup_ = lookup_arr_[start_state_];\n";
362             os_ << "    std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n";
363             os_ << "    std::size_t const*dfa_ = dfa_arr_[start_state_];\n";
364         }
365 
366         os_ << "    std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n";
367         os_ << "    Iterator curr_ = start_token_;\n";
368         os_ << "    bool end_state_ = *ptr_ != 0;\n";
369         os_ << "    std::size_t id_ = *(ptr_ + id_index);\n";
370         os_ << "    std::size_t uid_ = *(ptr_ + unique_id_index);\n";
371         if (dfas_ > 1)
372         {
373             os_ << "    std::size_t end_start_state_ = start_state_;\n";
374         }
375         if (sm_.data()._seen_BOL_assertion)
376         {
377             os_ << "    bool end_bol_ = bol_;\n";
378         }
379         os_ << "    Iterator end_token_ = start_token_;\n\n";
380 
381         os_ << "    while (curr_ != end_)\n";
382         os_ << "    {\n";
383 
384         if (sm_.data()._seen_BOL_assertion)
385         {
386             os_ << "        std::size_t const BOL_state_ = ptr_[bol_index];\n\n";
387         }
388 
389         if (sm_.data()._seen_EOL_assertion)
390         {
391             os_ << "        std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
392         }
393 
394         if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
395         {
396             os_ << "        if (BOL_state_ && bol)\n";
397             os_ << "        {\n";
398             os_ << "            ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
399             os_ << "        }\n";
400             os_ << "        else if (EOL_state_ && *curr_ == '\\n')\n";
401             os_ << "        {\n";
402             os_ << "            ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
403             os_ << "        }\n";
404             os_ << "        else\n";
405             os_ << "        {\n";
406             if (lookups_ == 256)
407             {
408                 os_ << "            unsigned char index = \n";
409                 os_ << "                static_cast<unsigned char>(*curr_++);\n";
410             }
411             else
412             {
413                 os_ << "            std::size_t index = *curr_++\n";
414             }
415             os_ << "            bol = (index == '\\n') ? true : false;\n";
416             os_ << "            std::size_t const state_ = ptr_[\n";
417             os_ << "                lookup_[static_cast<std::size_t>(index)]];\n";
418 
419             os_ << '\n';
420             os_ << "            if (state_ == 0) break;\n";
421             os_ << '\n';
422             os_ << "            ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
423             os_ << "        }\n\n";
424         }
425         else if (sm_.data()._seen_BOL_assertion)
426         {
427             os_ << "        if (BOL_state_ && bol)\n";
428             os_ << "        {\n";
429             os_ << "            ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
430             os_ << "        }\n";
431             os_ << "        else\n";
432             os_ << "        {\n";
433             if (lookups_ == 256)
434             {
435                 os_ << "            unsigned char index = \n";
436                 os_ << "                static_cast<unsigned char>(*curr_++);\n";
437             }
438             else
439             {
440                 os_ << "            std::size_t index = *curr_++\n";
441             }
442             os_ << "            bol = (index == '\\n') ? true : false;\n";
443             os_ << "            std::size_t const state_ = ptr_[\n";
444             os_ << "                lookup_[static_cast<std::size_t>(index)]];\n";
445 
446             os_ << '\n';
447             os_ << "            if (state_ == 0) break;\n";
448             os_ << '\n';
449             os_ << "            ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
450             os_ << "        }\n\n";
451         }
452         else if (sm_.data()._seen_EOL_assertion)
453         {
454             os_ << "        if (EOL_state_ && *curr_ == '\\n')\n";
455             os_ << "        {\n";
456             os_ << "            ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
457             os_ << "        }\n";
458             os_ << "        else\n";
459             os_ << "        {\n";
460             if (lookups_ == 256)
461             {
462                 os_ << "            unsigned char index = \n";
463                 os_ << "                static_cast<unsigned char>(*curr_++);\n";
464             }
465             else
466             {
467                 os_ << "            std::size_t index = *curr_++\n";
468             }
469             os_ << "            bol = (index == '\\n') ? true : false;\n";
470             os_ << "            std::size_t const state_ = ptr_[\n";
471             os_ << "                lookup_[static_cast<std::size_t>(index)]];\n";
472 
473             os_ << '\n';
474             os_ << "            if (state_ == 0) break;\n";
475             os_ << '\n';
476             os_ << "            ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
477             os_ << "        }\n\n";
478         }
479         else
480         {
481             os_ << "        std::size_t const state_ =\n";
482 
483             if (lookups_ == 256)
484             {
485                 os_ << "            ptr_[lookup_["
486                        "static_cast<unsigned char>(*curr_++)]];\n";
487             }
488             else
489             {
490                 os_ << "            ptr_[lookup_[*curr_++]];\n";
491             }
492 
493             os_ << '\n';
494             os_ << "        if (state_ == 0) break;\n";
495             os_ << '\n';
496             os_ << "        ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n";
497         }
498 
499         os_ << "        if (*ptr_)\n";
500         os_ << "        {\n";
501         os_ << "            end_state_ = true;\n";
502         os_ << "            id_ = *(ptr_ + id_index);\n";
503         os_ << "            uid_ = *(ptr_ + unique_id_index);\n";
504         if (dfas_ > 1)
505         {
506             os_ << "            end_start_state_ = *(ptr_ + state_index);\n";
507         }
508         if (sm_.data()._seen_BOL_assertion)
509         {
510             os_ << "            end_bol_ = bol;\n";
511         }
512         os_ << "            end_token_ = curr_;\n";
513         os_ << "        }\n";
514         os_ << "    }\n\n";
515 
516         if (sm_.data()._seen_EOL_assertion)
517         {
518             os_ << "    std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
519 
520             os_ << "    if (EOL_state_ && curr_ == end_)\n";
521             os_ << "    {\n";
522             os_ << "        ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n";
523 
524             os_ << "        if (*ptr_)\n";
525             os_ << "        {\n";
526             os_ << "            end_state_ = true;\n";
527             os_ << "            id_ = *(ptr_ + id_index);\n";
528             os_ << "            uid_ = *(ptr_ + unique_id_index);\n";
529             if (dfas_ > 1)
530             {
531                 os_ << "            end_start_state_ = *(ptr_ + state_index);\n";
532             }
533             if (sm_.data()._seen_BOL_assertion)
534             {
535                 os_ << "            end_bol_ = bol;\n";
536             }
537             os_ << "            end_token_ = curr_;\n";
538             os_ << "        }\n";
539             os_ << "    }\n\n";
540         }
541 
542         os_ << "    if (end_state_)\n";
543         os_ << "    {\n";
544         os_ << "        // return longest match\n";
545         os_ << "        start_token_ = end_token_;\n";
546 
547         if (dfas_ > 1)
548         {
549             os_ << "        start_state_ = end_start_state_;\n";
550             os_ << "        if (id_ == 0)\n";
551             os_ << "        {\n";
552             if (sm_.data()._seen_BOL_assertion)
553             {
554                 os_ << "            bol = end_bol_;\n";
555             }
556             os_ << "            goto again;\n";
557             os_ << "        }\n";
558             if (sm_.data()._seen_BOL_assertion)
559             {
560                 os_ << "        else\n";
561                 os_ << "        {\n";
562                 os_ << "            bol_ = end_bol_;\n";
563                 os_ << "        }\n";
564             }
565         }
566         else if (sm_.data()._seen_BOL_assertion)
567         {
568             os_ << "        bol_ = end_bol_;\n";
569         }
570 
571         os_ << "    }\n";
572         os_ << "    else\n";
573         os_ << "    {\n";
574 
575         if (sm_.data()._seen_BOL_assertion)
576         {
577             os_ << "        bol_ = (*start_token_ == '\\n') ? true : false;\n";
578         }
579 
580         os_ << "        id_ = npos;\n";
581         os_ << "        uid_ = npos;\n";
582         os_ << "    }\n\n";
583 
584         os_ << "    unique_id_ = uid_;\n";
585         os_ << "    return id_;\n";
586         return os_.good();
587     }
588 
589     ///////////////////////////////////////////////////////////////////////////
590     template <typename Char>
get_charlit(Char ch)591     inline std::basic_string<Char> get_charlit(Char ch)
592     {
593         std::basic_string<Char> result;
594         boost::lexer::basic_string_token<Char>::escape_char(ch, result);
595         return result;
596     }
597 
598     // check whether state0_0 is referenced from any of the other states
599     template <typename Char>
need_label0_0(boost::lexer::basic_state_machine<Char> const & sm_)600     bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_)
601     {
602         typedef typename boost::lexer::basic_state_machine<Char>::iterator
603             iterator_type;
604         iterator_type iter_ = sm_.begin();
605         std::size_t const states_ = iter_->states;
606 
607         for (std::size_t state_ = 0; state_ < states_; ++state_)
608         {
609             if (0 == iter_->bol_index || 0 == iter_->eol_index)
610             {
611                 return true;
612             }
613 
614             std::size_t const transitions_ = iter_->transitions;
615             for (std::size_t t_ = 0; t_ < transitions_; ++t_)
616             {
617                 if (0 == iter_->goto_state)
618                 {
619                     return true;
620                 }
621                 ++iter_;
622             }
623             if (transitions_ == 0) ++iter_;
624         }
625         return false;
626     }
627 
628     ///////////////////////////////////////////////////////////////////////////
629     template <typename Char>
generate_function_body_switch(std::basic_ostream<Char> & os_,boost::lexer::basic_state_machine<Char> const & sm_)630     bool generate_function_body_switch(std::basic_ostream<Char> & os_
631       , boost::lexer::basic_state_machine<Char> const &sm_)
632     {
633         typedef typename boost::lexer::basic_state_machine<Char>::iterator
634             iterator_type;
635 
636         std::size_t const lookups_ = sm_.data()._lookup->front ()->size ();
637         iterator_type iter_ = sm_.begin();
638         iterator_type labeliter_ = iter_;
639         iterator_type end_ = sm_.end();
640         std::size_t const dfas_ = sm_.data()._dfa->size ();
641 
642         os_ << "    static std::size_t const npos = "
643                "static_cast<std::size_t>(~0);\n";
644 
645         os_ << "\n    if (start_token_ == end_)\n";
646         os_ << "    {\n";
647         os_ << "        unique_id_ = npos;\n";
648         os_ << "        return 0;\n";
649         os_ << "    }\n\n";
650 
651         if (sm_.data()._seen_BOL_assertion)
652         {
653             os_ << "    bool bol = bol_;\n";
654         }
655 
656         if (dfas_ > 1)
657         {
658             os_ << "again:\n";
659         }
660 
661         os_ << "    Iterator curr_ = start_token_;\n";
662         os_ << "    bool end_state_ = false;\n";
663         os_ << "    std::size_t id_ = npos;\n";
664         os_ << "    std::size_t uid_ = npos;\n";
665 
666         if (dfas_ > 1)
667         {
668             os_ << "    std::size_t end_start_state_ = start_state_;\n";
669         }
670 
671         if (sm_.data()._seen_BOL_assertion)
672         {
673             os_ << "    bool end_bol_ = bol_;\n";
674         }
675 
676         os_ << "    Iterator end_token_ = start_token_;\n";
677         os_ << '\n';
678 
679         os_ << "    " << ((lookups_ == 256) ? "char" : "wchar_t")
680             << " ch_ = 0;\n\n";
681 
682         if (dfas_ > 1)
683         {
684             os_ << "    switch (start_state_)\n";
685             os_ << "    {\n";
686 
687             for (std::size_t i_ = 0; i_ < dfas_; ++i_)
688             {
689                 os_ << "    case " << i_ << ":\n";
690                 os_ << "        goto state" << i_ << "_0;\n";
691                 os_ << "        break;\n";
692             }
693 
694             os_ << "    default:\n";
695             os_ << "        goto end;\n";
696             os_ << "        break;\n";
697             os_ << "    }\n";
698         }
699 
700         bool need_state0_0_label = need_label0_0(sm_);
701 
702         for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
703         {
704             std::size_t const states_ = iter_->states;
705             for (std::size_t state_ = 0; state_ < states_; ++state_)
706             {
707                 std::size_t const transitions_ = iter_->transitions;
708                 std::size_t t_ = 0;
709 
710                 if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label)
711                 {
712                     os_ << "\nstate" << dfa_ << '_' << state_ << ":\n";
713                 }
714 
715                 if (iter_->end_state)
716                 {
717                     os_ << "    end_state_ = true;\n";
718                     os_ << "    id_ = " << iter_->id << ";\n";
719                     os_ << "    uid_ = " << iter_->unique_id << ";\n";
720                     os_ << "    end_token_ = curr_;\n";
721 
722                     if (dfas_ > 1)
723                     {
724                         os_ << "    end_start_state_ = " << iter_->goto_dfa <<
725                             ";\n";
726                     }
727 
728                     if (sm_.data()._seen_BOL_assertion)
729                     {
730                         os_ << "    end_bol_ = bol;\n";
731                     }
732 
733                     if (transitions_) os_ << '\n';
734                 }
735 
736                 if (t_ < transitions_ ||
737                     iter_->bol_index != boost::lexer::npos ||
738                     iter_->eol_index != boost::lexer::npos)
739                 {
740                     os_ << "    if (curr_ == end_) goto end;\n";
741                     os_ << "    ch_ = *curr_;\n";
742                     if (iter_->bol_index != boost::lexer::npos)
743                     {
744                         os_ << "\n    if (bol) goto state" << dfa_ << '_'
745                             << iter_->bol_index << ";\n";
746                     }
747                     if (iter_->eol_index != boost::lexer::npos)
748                     {
749                         os_ << "\n    if (ch_ == '\\n') goto state" << dfa_
750                             << '_' << iter_->eol_index << ";\n";
751                     }
752                     os_ << "    ++curr_;\n";
753                 }
754 
755                 for (/**/; t_ < transitions_; ++t_)
756                 {
757                     Char const *ptr_ = iter_->token._charset.c_str();
758                     Char const *end_ = ptr_ + iter_->token._charset.size();
759                     Char start_char_ = 0;
760                     Char curr_char_ = 0;
761                     bool range_ = false;
762                     bool first_char_ = true;
763 
764                     os_ << "\n    if (";
765 
766                     while (ptr_ != end_)
767                     {
768                         curr_char_ = *ptr_++;
769 
770                         if (*ptr_ == curr_char_ + 1)
771                         {
772                             if (!range_)
773                             {
774                                 start_char_ = curr_char_;
775                             }
776                             range_ = true;
777                         }
778                         else
779                         {
780                             if (!first_char_)
781                             {
782                                 os_ << ((iter_->token._negated) ? " && " : " || ");
783                             }
784                             else
785                             {
786                                 first_char_ = false;
787                             }
788                             if (range_)
789                             {
790                                 if (iter_->token._negated)
791                                 {
792                                     os_ << "!";
793                                 }
794                                 os_ << "(ch_ >= '" << get_charlit(start_char_)
795                                     << "' && ch_ <= '"
796                                     << get_charlit(curr_char_) << "')";
797                                 range_ = false;
798                             }
799                             else
800                             {
801                                 os_ << "ch_ "
802                                     << ((iter_->token._negated) ? "!=" : "==")
803                                     << " '" << get_charlit(curr_char_) << "'";
804                             }
805                         }
806                     }
807 
808                     os_ << ") goto state" << dfa_ << '_' << iter_->goto_state
809                         << ";\n";
810                     ++iter_;
811                 }
812 
813                 if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
814                 {
815                     os_ << "    goto end;\n";
816                 }
817 
818                 if (transitions_ == 0) ++iter_;
819             }
820         }
821 
822         os_ << "\nend:\n";
823         os_ << "    if (end_state_)\n";
824         os_ << "    {\n";
825         os_ << "        // return longest match\n";
826         os_ << "        start_token_ = end_token_;\n";
827 
828         if (dfas_ > 1)
829         {
830             os_ << "        start_state_ = end_start_state_;\n";
831             os_ << "\n        if (id_ == 0)\n";
832             os_ << "        {\n";
833 
834             if (sm_.data()._seen_BOL_assertion)
835             {
836                 os_ << "            bol = end_bol_;\n";
837             }
838 
839             os_ << "            goto again;\n";
840             os_ << "        }\n";
841 
842             if (sm_.data()._seen_BOL_assertion)
843             {
844                 os_ << "        else\n";
845                 os_ << "        {\n";
846                 os_ << "            bol_ = end_bol_;\n";
847                 os_ << "        }\n";
848             }
849         }
850         else if (sm_.data()._seen_BOL_assertion)
851         {
852             os_ << "        bol_ = end_bol_;\n";
853         }
854 
855         os_ << "    }\n";
856         os_ << "    else\n";
857         os_ << "    {\n";
858 
859         if (sm_.data()._seen_BOL_assertion)
860         {
861             os_ << "        bol_ = (*start_token_ == '\\n') ? true : false;\n";
862         }
863         os_ << "        id_ = npos;\n";
864         os_ << "        uid_ = npos;\n";
865         os_ << "    }\n\n";
866 
867         os_ << "    unique_id_ = uid_;\n";
868         os_ << "    return id_;\n";
869         return os_.good();
870     }
871 
872     ///////////////////////////////////////////////////////////////////////////
873     // Generate a tokenizer for the given state machine.
874     template <typename Char, typename F>
875     inline bool
generate_cpp(boost::lexer::basic_state_machine<Char> const & sm_,boost::lexer::basic_rules<Char> const & rules_,std::basic_ostream<Char> & os_,Char const * name_suffix,F generate_function_body)876     generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_
877       , boost::lexer::basic_rules<Char> const& rules_
878       , std::basic_ostream<Char> &os_, Char const* name_suffix
879       , F generate_function_body)
880     {
881         if (sm_.data()._lookup->empty())
882             return false;
883 
884         std::size_t const dfas_ = sm_.data()._dfa->size();
885 //         std::size_t const lookups_ = sm_.data()._lookup->front()->size();
886 
887         os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
888         os_ << "// Copyright (c) 2008-2011 Hartmut Kaiser\n";
889         os_ << "//\n";
890         os_ << "// Distributed under the Boost Software License, "
891             "Version 1.0. (See accompanying\n";
892         os_ << "// file licence_1_0.txt or copy at "
893             "http://www.boost.org/LICENSE_1_0.txt)\n\n";
894         os_ << "// Auto-generated by boost::lexer, do not edit\n\n";
895 
896         std::basic_string<Char> guard(name_suffix);
897         guard += L<Char>(name_suffix[0] ? "_" : "");
898         guard += L<Char>(__DATE__ "_" __TIME__);
899         typename std::basic_string<Char>::size_type p =
900             guard.find_first_of(L<Char>(": "));
901         while (std::string::npos != p)
902         {
903             guard.replace(p, 1, L<Char>("_"));
904             p = guard.find_first_of(L<Char>(": "), p);
905         }
906         boost::to_upper(guard);
907 
908         os_ << "#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << ")\n";
909         os_ << "#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << "\n\n";
910 
911         os_ << "#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>\n\n";
912 
913         generate_delimiter(os_);
914         os_ << "// the generated table of state names and the tokenizer have to be\n"
915                "// defined in the boost::spirit::lex::lexertl::static_ namespace\n";
916         os_ << "namespace boost { namespace spirit { namespace lex { "
917             "namespace lexertl { namespace static_ {\n\n";
918 
919         // generate the lexer state information variables
920         if (!generate_cpp_state_info(rules_, os_, name_suffix))
921             return false;
922 
923         generate_delimiter(os_);
924         os_ << "// this function returns the next matched token\n";
925         os_ << "template<typename Iterator>\n";
926         os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "")
927             << name_suffix  << " (";
928 
929         if (dfas_ > 1)
930         {
931             os_ << "std::size_t& start_state_, ";
932         }
933         else
934         {
935             os_ << "std::size_t& /*start_state_*/, ";
936         }
937         if (sm_.data()._seen_BOL_assertion)
938         {
939             os_ << "bool& bol_, ";
940         }
941         else
942         {
943             os_ << "bool& /*bol_*/, ";
944         }
945         os_ << "\n    ";
946 
947         os_ << "Iterator &start_token_, Iterator const& end_, ";
948         os_ << "std::size_t& unique_id_)\n";
949         os_ << "{\n";
950         if (!generate_function_body(os_, sm_))
951             return false;
952         os_ << "}\n\n";
953 
954         if (!generate_cpp_state_table<Char>(os_, name_suffix
955             , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion))
956         {
957             return false;
958         }
959 
960         os_ << "}}}}}  // namespace boost::spirit::lex::lexertl::static_\n\n";
961 
962         os_ << "#endif\n";
963 
964         return os_.good();
965     }
966 
967     }   // namespace detail
968 
969     ///////////////////////////////////////////////////////////////////////////
970     template <typename Lexer, typename F>
971     inline bool
generate_static(Lexer const & lexer,std::basic_ostream<typename Lexer::char_type> & os,typename Lexer::char_type const * name_suffix,F f)972     generate_static(Lexer const& lexer
973       , std::basic_ostream<typename Lexer::char_type>& os
974       , typename Lexer::char_type const* name_suffix, F f)
975     {
976         if (!lexer.init_dfa(true))    // always minimize DFA for static lexers
977             return false;
978         return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os
979           , name_suffix, f);
980     }
981 
982     ///////////////////////////////////////////////////////////////////////////
983     // deprecated function, will be removed in the future (this has been
984     // replaced by the function generate_static_dfa - see below).
985     template <typename Lexer>
986     inline bool
generate_static(Lexer const & lexer,std::basic_ostream<typename Lexer::char_type> & os,typename Lexer::char_type const * name_suffix=detail::L<typename Lexer::char_type> ())987     generate_static(Lexer const& lexer
988       , std::basic_ostream<typename Lexer::char_type>& os
989       , typename Lexer::char_type const* name_suffix =
990           detail::L<typename Lexer::char_type>())
991     {
992         return generate_static(lexer, os, name_suffix
993           , &detail::generate_function_body_dfa<typename Lexer::char_type>);
994     }
995 
996     ///////////////////////////////////////////////////////////////////////////
997     template <typename Lexer>
998     inline bool
generate_static_dfa(Lexer const & lexer,std::basic_ostream<typename Lexer::char_type> & os,typename Lexer::char_type const * name_suffix=detail::L<typename Lexer::char_type> ())999     generate_static_dfa(Lexer const& lexer
1000       , std::basic_ostream<typename Lexer::char_type>& os
1001       , typename Lexer::char_type const* name_suffix =
1002           detail::L<typename Lexer::char_type>())
1003     {
1004         return generate_static(lexer, os, name_suffix
1005           , &detail::generate_function_body_dfa<typename Lexer::char_type>);
1006     }
1007 
1008     ///////////////////////////////////////////////////////////////////////////
1009     template <typename Lexer>
1010     inline bool
generate_static_switch(Lexer const & lexer,std::basic_ostream<typename Lexer::char_type> & os,typename Lexer::char_type const * name_suffix=detail::L<typename Lexer::char_type> ())1011     generate_static_switch(Lexer const& lexer
1012       , std::basic_ostream<typename Lexer::char_type>& os
1013       , typename Lexer::char_type const* name_suffix =
1014           detail::L<typename Lexer::char_type>())
1015     {
1016         return generate_static(lexer, os, name_suffix
1017           , &detail::generate_function_body_switch<typename Lexer::char_type>);
1018     }
1019 
1020 ///////////////////////////////////////////////////////////////////////////////
1021 }}}}
1022 
1023 #endif
1024