1 //  Copyright (c) 2008-2009 Ben Hanson
2 //  Copyright (c) 2008-2011 Hartmut Kaiser
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
5 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 
7 #if !defined(BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM)
8 #define BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM
9 
10 #if defined(_MSC_VER)
11 #pragma once
12 #endif
13 
14 #include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
15 #include <boost/spirit/home/support/detail/lexer/consts.hpp>
16 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
17 #include <boost/spirit/home/support/detail/lexer/size_t.hpp>
18 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
19 #include <boost/spirit/home/support/detail/lexer/debug.hpp>
20 #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp>
21 #include <boost/algorithm/string.hpp>
22 #include <boost/lexical_cast.hpp>
23 #include <boost/scoped_array.hpp>
24 
25 ///////////////////////////////////////////////////////////////////////////////
26 namespace boost { namespace spirit { namespace lex { namespace lexertl
27 {
28     namespace detail
29     {
30 
31     ///////////////////////////////////////////////////////////////////////////
32     template <typename CharT>
33     struct string_lit;
34 
35     template <>
36     struct string_lit<char>
37     {
getboost::spirit::lex::lexertl::detail::string_lit38         static char get(char c) { return c; }
getboost::spirit::lex::lexertl::detail::string_lit39         static std::string get(char const* str = "") { return str; }
40     };
41 
42     template <>
43     struct string_lit<wchar_t>
44     {
getboost::spirit::lex::lexertl::detail::string_lit45         static wchar_t get(char c)
46         {
47             typedef std::ctype<wchar_t> ctype_t;
48             return std::use_facet<ctype_t>(std::locale()).widen(c);
49         }
getboost::spirit::lex::lexertl::detail::string_lit50         static std::basic_string<wchar_t> get(char const* source = "")
51         {
52             using namespace std;        // some systems have size_t in ns std
53             size_t len = strlen(source);
54             boost::scoped_array<wchar_t> result (new wchar_t[len+1]);
55             result.get()[len] = '\0';
56 
57             // working with wide character streams is supported only if the
58             // platform provides the std::ctype<wchar_t> facet
59             BOOST_ASSERT(std::has_facet<std::ctype<wchar_t> >(std::locale()));
60 
61             std::use_facet<std::ctype<wchar_t> >(std::locale())
62                 .widen(source, source + len, result.get());
63             return result.get();
64         }
65     };
66 
67     template <typename Char>
L(char c)68     inline Char L(char c)
69     {
70         return string_lit<Char>::get(c);
71     }
72 
73     template <typename Char>
L(char const * c="")74     inline std::basic_string<Char> L(char const* c = "")
75     {
76         return string_lit<Char>::get(c);
77     }
78 
79     ///////////////////////////////////////////////////////////////////////////
80     template <typename Char>
81     inline bool
generate_delimiter(std::basic_ostream<Char> & os_)82     generate_delimiter(std::basic_ostream<Char> &os_)
83     {
84         os_ << std::basic_string<Char>(80, '/') << "\n";
85         return os_.good();
86     }
87 
88     ///////////////////////////////////////////////////////////////////////////
89     // Generate a table of the names of the used lexer states, which is a bit
90     // tricky, because the table stored with the rules is sorted based on the
91     // names, but we need it sorted using the state ids.
92     template <typename Char>
93     inline bool
generate_cpp_state_info(boost::lexer::basic_rules<Char> const & rules_,std::basic_ostream<Char> & os_,Char const * name_suffix)94     generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_
95       , std::basic_ostream<Char> &os_, Char const* name_suffix)
96     {
97         // we need to re-sort the state names in ascending order of the state
98         // ids, filling possible gaps in between later
99         typedef typename
100             boost::lexer::basic_rules<Char>::string_size_t_map::const_iterator
101         state_iterator;
102         typedef std::map<std::size_t, Char const*> reverse_state_map_type;
103 
104         reverse_state_map_type reverse_state_map;
105         state_iterator send = rules_.statemap().end();
106         for (state_iterator sit = rules_.statemap().begin(); sit != send; ++sit)
107         {
108             typedef typename reverse_state_map_type::value_type value_type;
109             reverse_state_map.insert(value_type((*sit).second, (*sit).first.c_str()));
110         }
111 
112         generate_delimiter(os_);
113         os_ << "// this table defines the names of the lexer states\n";
114         os_ << boost::lexer::detail::strings<Char>::char_name()
115             << " const* const lexer_state_names"
116             << (name_suffix[0] ? "_" : "") << name_suffix
117             << "[" << rules_.statemap().size() << "] = \n{\n";
118 
119         typedef typename reverse_state_map_type::iterator iterator;
120         iterator rend = reverse_state_map.end();
121         std::size_t last_id = 0;
122         for (iterator rit = reverse_state_map.begin(); rit != rend; ++last_id)
123         {
124             for (/**/; last_id < (*rit).first; ++last_id)
125             {
126                 os_ << "    0,  // \"<undefined state>\"\n";
127             }
128             os_ << "    "
129                 << boost::lexer::detail::strings<Char>::char_prefix()
130                 << "\"" << (*rit).second << "\"";
131             if (++rit != rend)
132                 os_ << ",\n";
133             else
134                 os_ << "\n";        // don't generate the final comma
135         }
136         os_ << "};\n\n";
137 
138         generate_delimiter(os_);
139         os_ << "// this variable defines the number of lexer states\n";
140         os_ << "std::size_t const lexer_state_count"
141             << (name_suffix[0] ? "_" : "") << name_suffix
142             << " = " << rules_.statemap().size() << ";\n\n";
143         return os_.good();
144     }
145 
146     template <typename Char>
147     inline bool
generate_cpp_state_table(std::basic_ostream<Char> & os_,Char const * name_suffix,bool bol,bool eol)148     generate_cpp_state_table (std::basic_ostream<Char> &os_
149       , Char const* name_suffix, bool bol, bool eol)
150     {
151         std::basic_string<Char> suffix(L<Char>(name_suffix[0] ? "_" : ""));
152         suffix += name_suffix;
153 
154         generate_delimiter(os_);
155         os_ << "// this defines a generic accessors for the information above\n";
156         os_ << "struct lexer" << suffix << "\n{\n";
157         os_ << "    // version number and feature-set of compatible static lexer engine\n";
158         os_ << "    enum\n";
159         os_ << "    {\n        static_version = "
160             << boost::lexical_cast<std::basic_string<Char> >(SPIRIT_STATIC_LEXER_VERSION)
161             << ",\n";
162         os_ << "        supports_bol = " << std::boolalpha << bol << ",\n";
163         os_ << "        supports_eol = " << std::boolalpha << eol << "\n";
164         os_ << "    };\n\n";
165         os_ << "    // return the number of lexer states\n";
166         os_ << "    static std::size_t state_count()\n";
167         os_ << "    {\n        return lexer_state_count" << suffix << "; \n    }\n\n";
168         os_ << "    // return the name of the lexer state as given by 'idx'\n";
169         os_ << "    static " << boost::lexer::detail::strings<Char>::char_name()
170             << " const* state_name(std::size_t idx)\n";
171         os_ << "    {\n        return lexer_state_names" << suffix << "[idx]; \n    }\n\n";
172         os_ << "    // return the next matched token\n";
173         os_ << "    template<typename Iterator>\n";
174         os_ << "    static std::size_t next(std::size_t &start_state_, bool& bol_\n";
175         os_ << "      , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n";
176         os_ << "    {\n        return next_token" << suffix
177             << "(start_state_, bol_, start_token_, end_, unique_id_);\n    }\n";
178         os_ << "};\n\n";
179         return os_.good();
180     }
181 
182     ///////////////////////////////////////////////////////////////////////////
183     // generate function body based on traversing the DFA tables
184     template <typename Char>
generate_function_body_dfa(std::basic_ostream<Char> & os_,boost::lexer::basic_state_machine<Char> const & sm_)185     bool generate_function_body_dfa(std::basic_ostream<Char>& os_
186       , boost::lexer::basic_state_machine<Char> const &sm_)
187     {
188         std::size_t const dfas_ = sm_.data()._dfa->size();
189         std::size_t const lookups_ = sm_.data()._lookup->front()->size();
190 
191         os_ << "    enum {end_state_index, id_index, unique_id_index, "
192                "state_index, bol_index,\n";
193         os_ << "        eol_index, dead_state_index, dfa_offset};\n\n";
194         os_ << "    static std::size_t const npos = "
195                "static_cast<std::size_t>(~0);\n";
196 
197         if (dfas_ > 1)
198         {
199             for (std::size_t state_ = 0; state_ < dfas_; ++state_)
200             {
201                 std::size_t i_ = 0;
202                 std::size_t j_ = 1;
203                 std::size_t count_ = lookups_ / 8;
204                 std::size_t const* lookup_ = &sm_.data()._lookup[state_]->front();
205                 std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front();
206 
207                 os_ << "    static std::size_t const lookup" << state_
208                     << "_[" << lookups_ << "] = {\n        ";
209                 for (/**/; i_ < count_; ++i_)
210                 {
211                     std::size_t const index_ = i_ * 8;
212                     os_ << lookup_[index_];
213                     for (/**/; j_ < 8; ++j_)
214                     {
215                         os_ << ", " << lookup_[index_ + j_];
216                     }
217                     if (i_ < count_ - 1)
218                     {
219                         os_ << ",\n        ";
220                     }
221                     j_ = 1;
222                 }
223                 os_ << " };\n";
224 
225                 count_ = sm_.data()._dfa[state_]->size ();
226                 os_ << "    static const std::size_t dfa" << state_ << "_["
227                     << count_ << "] = {\n        ";
228                 count_ /= 8;
229                 for (i_ = 0; i_ < count_; ++i_)
230                 {
231                     std::size_t const index_ = i_ * 8;
232                     os_ << dfa_[index_];
233                     for (j_ = 1; j_ < 8; ++j_)
234                     {
235                         os_ << ", " << dfa_[index_ + j_];
236                     }
237                     if (i_ < count_ - 1)
238                     {
239                         os_ << ",\n        ";
240                     }
241                 }
242 
243                 std::size_t const mod_ = sm_.data()._dfa[state_]->size () % 8;
244                 if (mod_)
245                 {
246                     std::size_t const index_ = count_ * 8;
247                     if (count_)
248                     {
249                         os_ << ",\n        ";
250                     }
251                     os_ << dfa_[index_];
252                     for (j_ = 1; j_ < mod_; ++j_)
253                     {
254                         os_ << ", " << dfa_[index_ + j_];
255                     }
256                 }
257                 os_ << " };\n";
258             }
259 
260             std::size_t count_ = sm_.data()._dfa_alphabet.size();
261             std::size_t i_ = 1;
262 
263             os_ << "    static std::size_t const* lookup_arr_[" << count_
264                 << "] = { lookup0_";
265             for (i_ = 1; i_ < count_; ++i_)
266             {
267                 os_ << ", " << "lookup" << i_ << "_";
268             }
269             os_ << " };\n";
270 
271             os_ << "    static std::size_t const dfa_alphabet_arr_["
272                 << count_ << "] = { ";
273             os_ << sm_.data()._dfa_alphabet.front ();
274             for (i_ = 1; i_ < count_; ++i_)
275             {
276                 os_ << ", " << sm_.data()._dfa_alphabet[i_];
277             }
278             os_ << " };\n";
279 
280             os_ << "    static std::size_t const* dfa_arr_[" << count_
281                 << "] = { ";
282             os_ << "dfa0_";
283             for (i_ = 1; i_ < count_; ++i_)
284             {
285                 os_ << ", " << "dfa" << i_ << "_";
286             }
287             os_ << " };\n";
288         }
289         else
290         {
291             std::size_t const* lookup_ = &sm_.data()._lookup[0]->front();
292             std::size_t const* dfa_ = &sm_.data()._dfa[0]->front();
293             std::size_t i_ = 0;
294             std::size_t j_ = 1;
295             std::size_t count_ = lookups_ / 8;
296 
297             os_ << "    static std::size_t const lookup_[";
298             os_ << sm_.data()._lookup[0]->size() << "] = {\n        ";
299             for (/**/; i_ < count_; ++i_)
300             {
301                 const std::size_t index_ = i_ * 8;
302                 os_ << lookup_[index_];
303                 for (/**/; j_ < 8; ++j_)
304                 {
305                     os_ << ", " << lookup_[index_ + j_];
306                 }
307                 if (i_ < count_ - 1)
308                 {
309                     os_ << ",\n        ";
310                 }
311                 j_ = 1;
312             }
313             os_ << " };\n";
314 
315             os_ << "    static std::size_t const dfa_alphabet_ = "
316                 << sm_.data()._dfa_alphabet.front () << ";\n";
317             os_ << "    static std::size_t const dfa_["
318                 << sm_.data()._dfa[0]->size () << "] = {\n        ";
319             count_ = sm_.data()._dfa[0]->size () / 8;
320             for (i_ = 0; i_ < count_; ++i_)
321             {
322                 const std::size_t index_ = i_ * 8;
323                 os_ << dfa_[index_];
324                 for (j_ = 1; j_ < 8; ++j_)
325                 {
326                     os_ << ", " << dfa_[index_ + j_];
327                 }
328                 if (i_ < count_ - 1)
329                 {
330                     os_ << ",\n        ";
331                 }
332             }
333 
334             const std::size_t mod_ = sm_.data()._dfa[0]->size () % 8;
335             if (mod_)
336             {
337                 const std::size_t index_ = count_ * 8;
338                 if (count_)
339                 {
340                     os_ << ",\n        ";
341                 }
342                 os_ << dfa_[index_];
343                 for (j_ = 1; j_ < mod_; ++j_)
344                 {
345                     os_ << ", " << dfa_[index_ + j_];
346                 }
347             }
348             os_ << " };\n";
349         }
350 
351         os_ << "\n    if (start_token_ == end_)\n";
352         os_ << "    {\n";
353         os_ << "        unique_id_ = npos;\n";
354         os_ << "        return 0;\n";
355         os_ << "    }\n\n";
356         if (sm_.data()._seen_BOL_assertion)
357         {
358             os_ << "    bool bol = bol_;\n\n";
359         }
360 
361         if (dfas_ > 1)
362         {
363             os_ << "again:\n";
364             os_ << "    std::size_t const* lookup_ = lookup_arr_[start_state_];\n";
365             os_ << "    std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n";
366             os_ << "    std::size_t const*dfa_ = dfa_arr_[start_state_];\n";
367         }
368 
369         os_ << "    std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n";
370         os_ << "    Iterator curr_ = start_token_;\n";
371         os_ << "    bool end_state_ = *ptr_ != 0;\n";
372         os_ << "    std::size_t id_ = *(ptr_ + id_index);\n";
373         os_ << "    std::size_t uid_ = *(ptr_ + unique_id_index);\n";
374         if (dfas_ > 1)
375         {
376             os_ << "    std::size_t end_start_state_ = start_state_;\n";
377         }
378         if (sm_.data()._seen_BOL_assertion)
379         {
380             os_ << "    bool end_bol_ = bol_;\n";
381         }
382         os_ << "    Iterator end_token_ = start_token_;\n\n";
383 
384         os_ << "    while (curr_ != end_)\n";
385         os_ << "    {\n";
386 
387         if (sm_.data()._seen_BOL_assertion)
388         {
389             os_ << "        std::size_t const BOL_state_ = ptr_[bol_index];\n\n";
390         }
391 
392         if (sm_.data()._seen_EOL_assertion)
393         {
394             os_ << "        std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
395         }
396 
397         if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
398         {
399             os_ << "        if (BOL_state_ && bol)\n";
400             os_ << "        {\n";
401             os_ << "            ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
402             os_ << "        }\n";
403             os_ << "        else if (EOL_state_ && *curr_ == '\\n')\n";
404             os_ << "        {\n";
405             os_ << "            ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
406             os_ << "        }\n";
407             os_ << "        else\n";
408             os_ << "        {\n";
409             if (lookups_ == 256)
410             {
411                 os_ << "            unsigned char index = \n";
412                 os_ << "                static_cast<unsigned char>(*curr_++);\n";
413             }
414             else
415             {
416                 os_ << "            std::size_t index = *curr_++\n";
417             }
418             os_ << "            bol = (index == '\n') ? true : false;\n";
419             os_ << "            std::size_t const state_ = ptr_[\n";
420             os_ << "                lookup_[static_cast<std::size_t>(index)]];\n";
421 
422             os_ << '\n';
423             os_ << "            if (state_ == 0) break;\n";
424             os_ << '\n';
425             os_ << "            ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
426             os_ << "        }\n\n";
427         }
428         else if (sm_.data()._seen_BOL_assertion)
429         {
430             os_ << "        if (BOL_state_ && bol)\n";
431             os_ << "        {\n";
432             os_ << "            ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
433             os_ << "        }\n";
434             os_ << "        else\n";
435             os_ << "        {\n";
436             if (lookups_ == 256)
437             {
438                 os_ << "            unsigned char index = \n";
439                 os_ << "                static_cast<unsigned char>(*curr_++);\n";
440             }
441             else
442             {
443                 os_ << "            std::size_t index = *curr_++\n";
444             }
445             os_ << "            bol = (index == '\n') ? true : false;\n";
446             os_ << "            std::size_t const state_ = ptr_[\n";
447             os_ << "                lookup_[static_cast<std::size_t>(index)]];\n";
448 
449             os_ << '\n';
450             os_ << "            if (state_ == 0) break;\n";
451             os_ << '\n';
452             os_ << "            ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
453             os_ << "        }\n\n";
454         }
455         else if (sm_.data()._seen_EOL_assertion)
456         {
457             os_ << "        if (EOL_state_ && *curr_ == '\\n')\n";
458             os_ << "        {\n";
459             os_ << "            ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
460             os_ << "        }\n";
461             os_ << "        else\n";
462             os_ << "        {\n";
463             if (lookups_ == 256)
464             {
465                 os_ << "            unsigned char index = \n";
466                 os_ << "                static_cast<unsigned char>(*curr_++);\n";
467             }
468             else
469             {
470                 os_ << "            std::size_t index = *curr_++\n";
471             }
472             os_ << "            bol = (index == '\n') ? true : false;\n";
473             os_ << "            std::size_t const state_ = ptr_[\n";
474             os_ << "                lookup_[static_cast<std::size_t>(index)]];\n";
475 
476             os_ << '\n';
477             os_ << "            if (state_ == 0) break;\n";
478             os_ << '\n';
479             os_ << "            ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
480             os_ << "        }\n\n";
481         }
482         else
483         {
484             os_ << "        std::size_t const state_ =\n";
485 
486             if (lookups_ == 256)
487             {
488                 os_ << "            ptr_[lookup_["
489                        "static_cast<unsigned char>(*curr_++)]];\n";
490             }
491             else
492             {
493                 os_ << "            ptr_[lookup_[*curr_++]];\n";
494             }
495 
496             os_ << '\n';
497             os_ << "        if (state_ == 0) break;\n";
498             os_ << '\n';
499             os_ << "        ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n";
500         }
501 
502         os_ << "        if (*ptr_)\n";
503         os_ << "        {\n";
504         os_ << "            end_state_ = true;\n";
505         os_ << "            id_ = *(ptr_ + id_index);\n";
506         os_ << "            uid_ = *(ptr_ + unique_id_index);\n";
507         if (dfas_ > 1)
508         {
509             os_ << "            end_start_state_ = *(ptr_ + state_index);\n";
510         }
511         if (sm_.data()._seen_BOL_assertion)
512         {
513             os_ << "            end_bol_ = bol;\n";
514         }
515         os_ << "            end_token_ = curr_;\n";
516         os_ << "        }\n";
517         os_ << "    }\n\n";
518 
519         if (sm_.data()._seen_EOL_assertion)
520         {
521             os_ << "    std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
522 
523             os_ << "    if (EOL_state_ && curr_ == end_)\n";
524             os_ << "    {\n";
525             os_ << "        ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n";
526 
527             os_ << "        if (*ptr_)\n";
528             os_ << "        {\n";
529             os_ << "            end_state_ = true;\n";
530             os_ << "            id_ = *(ptr_ + id_index);\n";
531             os_ << "            uid_ = *(ptr_ + unique_id_index);\n";
532             if (dfas_ > 1)
533             {
534                 os_ << "            end_start_state_ = *(ptr_ + state_index);\n";
535             }
536             if (sm_.data()._seen_BOL_assertion)
537             {
538                 os_ << "            end_bol_ = bol;\n";
539             }
540             os_ << "            end_token_ = curr_;\n";
541             os_ << "        }\n";
542             os_ << "    }\n\n";
543         }
544 
545         os_ << "    if (end_state_)\n";
546         os_ << "    {\n";
547         os_ << "        // return longest match\n";
548         os_ << "        start_token_ = end_token_;\n";
549 
550         if (dfas_ > 1)
551         {
552             os_ << "        start_state_ = end_start_state_;\n";
553             os_ << "        if (id_ == 0)\n";
554             os_ << "        {\n";
555             if (sm_.data()._seen_BOL_assertion)
556             {
557                 os_ << "            bol = end_bol_;\n";
558             }
559             os_ << "            goto again;\n";
560             os_ << "        }\n";
561             if (sm_.data()._seen_BOL_assertion)
562             {
563                 os_ << "        else\n";
564                 os_ << "        {\n";
565                 os_ << "            bol_ = end_bol_;\n";
566                 os_ << "        }\n";
567             }
568         }
569         else if (sm_.data()._seen_BOL_assertion)
570         {
571             os_ << "        bol_ = end_bol_;\n";
572         }
573 
574         os_ << "    }\n";
575         os_ << "    else\n";
576         os_ << "    {\n";
577 
578         if (sm_.data()._seen_BOL_assertion)
579         {
580             os_ << "        bol_ = (*start_token_ == '\n') ? true : false;\n";
581         }
582 
583         os_ << "        id_ = npos;\n";
584         os_ << "        uid_ = npos;\n";
585         os_ << "    }\n\n";
586 
587         os_ << "    unique_id_ = uid_;\n";
588         os_ << "    return id_;\n";
589         return os_.good();
590     }
591 
592     ///////////////////////////////////////////////////////////////////////////
593     template <typename Char>
get_charlit(Char ch)594     inline std::basic_string<Char> get_charlit(Char ch)
595     {
596         std::basic_string<Char> result;
597         boost::lexer::basic_string_token<Char>::escape_char(ch, result);
598         return result;
599     }
600 
601     // check whether state0_0 is referenced from any of the other states
602     template <typename Char>
need_label0_0(boost::lexer::basic_state_machine<Char> const & sm_)603     bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_)
604     {
605         typedef typename boost::lexer::basic_state_machine<Char>::iterator
606             iterator_type;
607         iterator_type iter_ = sm_.begin();
608         std::size_t const states_ = iter_->states;
609 
610         for (std::size_t state_ = 0; state_ < states_; ++state_)
611         {
612             if (0 == iter_->bol_index || 0 == iter_->eol_index)
613             {
614                 return true;
615             }
616 
617             std::size_t const transitions_ = iter_->transitions;
618             for (std::size_t t_ = 0; t_ < transitions_; ++t_)
619             {
620                 if (0 == iter_->goto_state)
621                 {
622                     return true;
623                 }
624                 ++iter_;
625             }
626             if (transitions_ == 0) ++iter_;
627         }
628         return false;
629     }
630 
631     ///////////////////////////////////////////////////////////////////////////
632     template <typename Char>
generate_function_body_switch(std::basic_ostream<Char> & os_,boost::lexer::basic_state_machine<Char> const & sm_)633     bool generate_function_body_switch(std::basic_ostream<Char> & os_
634       , boost::lexer::basic_state_machine<Char> const &sm_)
635     {
636         typedef typename boost::lexer::basic_state_machine<Char>::iterator
637             iterator_type;
638 
639         std::size_t const lookups_ = sm_.data()._lookup->front ()->size ();
640         iterator_type iter_ = sm_.begin();
641         iterator_type labeliter_ = iter_;
642         iterator_type end_ = sm_.end();
643         std::size_t const dfas_ = sm_.data()._dfa->size ();
644 
645         os_ << "    static std::size_t const npos = "
646                "static_cast<std::size_t>(~0);\n";
647 
648         os_ << "\n    if (start_token_ == end_)\n";
649         os_ << "    {\n";
650         os_ << "        unique_id_ = npos;\n";
651         os_ << "        return 0;\n";
652         os_ << "    }\n\n";
653 
654         if (sm_.data()._seen_BOL_assertion)
655         {
656             os_ << "    bool bol = bol_;\n";
657         }
658 
659         if (dfas_ > 1)
660         {
661             os_ << "again:\n";
662         }
663 
664         os_ << "    Iterator curr_ = start_token_;\n";
665         os_ << "    bool end_state_ = false;\n";
666         os_ << "    std::size_t id_ = npos;\n";
667         os_ << "    std::size_t uid_ = npos;\n";
668 
669         if (dfas_ > 1)
670         {
671             os_ << "    std::size_t end_start_state_ = start_state_;\n";
672         }
673 
674         if (sm_.data()._seen_BOL_assertion)
675         {
676             os_ << "    bool end_bol_ = bol_;\n";
677         }
678 
679         os_ << "    Iterator end_token_ = start_token_;\n";
680         os_ << '\n';
681 
682         os_ << "    " << ((lookups_ == 256) ? "char" : "wchar_t")
683             << " ch_ = 0;\n\n";
684 
685         if (dfas_ > 1)
686         {
687             os_ << "    switch (start_state_)\n";
688             os_ << "    {\n";
689 
690             for (std::size_t i_ = 0; i_ < dfas_; ++i_)
691             {
692                 os_ << "    case " << i_ << ":\n";
693                 os_ << "        goto state" << i_ << "_0;\n";
694                 os_ << "        break;\n";
695             }
696 
697             os_ << "    default:\n";
698             os_ << "        goto end;\n";
699             os_ << "        break;\n";
700             os_ << "    }\n";
701         }
702 
703         bool need_state0_0_label = need_label0_0(sm_);
704 
705         for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
706         {
707             std::size_t const states_ = iter_->states;
708             for (std::size_t state_ = 0; state_ < states_; ++state_)
709             {
710                 std::size_t const transitions_ = iter_->transitions;
711                 std::size_t t_ = 0;
712 
713                 if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label)
714                 {
715                     os_ << "\nstate" << dfa_ << '_' << state_ << ":\n";
716                 }
717 
718                 if (iter_->end_state)
719                 {
720                     os_ << "    end_state_ = true;\n";
721                     os_ << "    id_ = " << iter_->id << ";\n";
722                     os_ << "    uid_ = " << iter_->unique_id << ";\n";
723                     os_ << "    end_token_ = curr_;\n";
724 
725                     if (dfas_ > 1)
726                     {
727                         os_ << "    end_start_state_ = " << iter_->goto_dfa <<
728                             ";\n";
729                     }
730 
731                     if (sm_.data()._seen_BOL_assertion)
732                     {
733                         os_ << "    end_bol_ = bol;\n";
734                     }
735 
736                     if (transitions_) os_ << '\n';
737                 }
738 
739                 if (t_ < transitions_ ||
740                     iter_->bol_index != boost::lexer::npos ||
741                     iter_->eol_index != boost::lexer::npos)
742                 {
743                     os_ << "    if (curr_ == end_) goto end;\n";
744                     os_ << "    ch_ = *curr_;\n";
745                     if (iter_->bol_index != boost::lexer::npos)
746                     {
747                         os_ << "\n    if (bol) goto state" << dfa_ << '_'
748                             << iter_->bol_index << ";\n";
749                     }
750                     if (iter_->eol_index != boost::lexer::npos)
751                     {
752                         os_ << "\n    if (ch_ == '\n') goto state" << dfa_
753                             << '_' << iter_->eol_index << ";\n";
754                     }
755                     os_ << "    ++curr_;\n";
756                 }
757 
758                 for (/**/; t_ < transitions_; ++t_)
759                 {
760                     Char const *ptr_ = iter_->token._charset.c_str();
761                     Char const *end_ = ptr_ + iter_->token._charset.size();
762                     Char start_char_ = 0;
763                     Char curr_char_ = 0;
764                     bool range_ = false;
765                     bool first_char_ = true;
766 
767                     os_ << "\n    if (";
768 
769                     while (ptr_ != end_)
770                     {
771                         curr_char_ = *ptr_++;
772 
773                         if (*ptr_ == curr_char_ + 1)
774                         {
775                             if (!range_)
776                             {
777                                 start_char_ = curr_char_;
778                             }
779                             range_ = true;
780                         }
781                         else
782                         {
783                             if (!first_char_)
784                             {
785                                 os_ << ((iter_->token._negated) ? " && " : " || ");
786                             }
787                             else
788                             {
789                                 first_char_ = false;
790                             }
791                             if (range_)
792                             {
793                                 if (iter_->token._negated)
794                                 {
795                                     os_ << "!";
796                                 }
797                                 os_ << "(ch_ >= '" << get_charlit(start_char_)
798                                     << "' && ch_ <= '"
799                                     << get_charlit(curr_char_) << "')";
800                                 range_ = false;
801                             }
802                             else
803                             {
804                                 os_ << "ch_ "
805                                     << ((iter_->token._negated) ? "!=" : "==")
806                                     << " '" << get_charlit(curr_char_) << "'";
807                             }
808                         }
809                     }
810 
811                     os_ << ") goto state" << dfa_ << '_' << iter_->goto_state
812                         << ";\n";
813                     ++iter_;
814                 }
815 
816                 if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
817                 {
818                     os_ << "    goto end;\n";
819                 }
820 
821                 if (transitions_ == 0) ++iter_;
822             }
823         }
824 
825         os_ << "\nend:\n";
826         os_ << "    if (end_state_)\n";
827         os_ << "    {\n";
828         os_ << "        // return longest match\n";
829         os_ << "        start_token_ = end_token_;\n";
830 
831         if (dfas_ > 1)
832         {
833             os_ << "        start_state_ = end_start_state_;\n";
834             os_ << "\n        if (id_ == 0)\n";
835             os_ << "        {\n";
836 
837             if (sm_.data()._seen_BOL_assertion)
838             {
839                 os_ << "            bol = end_bol_;\n";
840             }
841 
842             os_ << "            goto again;\n";
843             os_ << "        }\n";
844 
845             if (sm_.data()._seen_BOL_assertion)
846             {
847                 os_ << "        else\n";
848                 os_ << "        {\n";
849                 os_ << "            bol_ = end_bol_;\n";
850                 os_ << "        }\n";
851             }
852         }
853         else if (sm_.data()._seen_BOL_assertion)
854         {
855             os_ << "        bol_ = end_bol_;\n";
856         }
857 
858         os_ << "    }\n";
859         os_ << "    else\n";
860         os_ << "    {\n";
861 
862         if (sm_.data()._seen_BOL_assertion)
863         {
864             os_ << "        bol_ = (*start_token_ == '\\n') ? true : false;\n";
865         }
866         os_ << "        id_ = npos;\n";
867         os_ << "        uid_ = npos;\n";
868         os_ << "    }\n\n";
869 
870         os_ << "    unique_id_ = uid_;\n";
871         os_ << "    return id_;\n";
872         return os_.good();
873     }
874 
875     ///////////////////////////////////////////////////////////////////////////
876     // Generate a tokenizer for the given state machine.
877     template <typename Char, typename F>
878     inline bool
generate_cpp(boost::lexer::basic_state_machine<Char> const & sm_,boost::lexer::basic_rules<Char> const & rules_,std::basic_ostream<Char> & os_,Char const * name_suffix,F generate_function_body)879     generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_
880       , boost::lexer::basic_rules<Char> const& rules_
881       , std::basic_ostream<Char> &os_, Char const* name_suffix
882       , F generate_function_body)
883     {
884         if (sm_.data()._lookup->empty())
885             return false;
886 
887         std::size_t const dfas_ = sm_.data()._dfa->size();
888 //         std::size_t const lookups_ = sm_.data()._lookup->front()->size();
889 
890         os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
891         os_ << "// Copyright (c) 2008-2011 Hartmut Kaiser\n";
892         os_ << "//\n";
893         os_ << "// Distributed under the Boost Software License, "
894             "Version 1.0. (See accompanying\n";
895         os_ << "// file licence_1_0.txt or copy at "
896             "http://www.boost.org/LICENSE_1_0.txt)\n\n";
897         os_ << "// Auto-generated by boost::lexer, do not edit\n\n";
898 
899         std::basic_string<Char> guard(name_suffix);
900         guard += L<Char>(name_suffix[0] ? "_" : "");
901         guard += L<Char>(__DATE__ "_" __TIME__);
902         typename std::basic_string<Char>::size_type p =
903             guard.find_first_of(L<Char>(": "));
904         while (std::string::npos != p)
905         {
906             guard.replace(p, 1, L<Char>("_"));
907             p = guard.find_first_of(L<Char>(": "), p);
908         }
909         boost::to_upper(guard);
910 
911         os_ << "#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << ")\n";
912         os_ << "#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << "\n\n";
913 
914         os_ << "#include <boost/detail/iterator.hpp>\n";
915         os_ << "#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>\n\n";
916 
917         generate_delimiter(os_);
918         os_ << "// the generated table of state names and the tokenizer have to be\n"
919                "// defined in the boost::spirit::lex::lexertl::static_ namespace\n";
920         os_ << "namespace boost { namespace spirit { namespace lex { "
921             "namespace lexertl { namespace static_ {\n\n";
922 
923         // generate the lexer state information variables
924         if (!generate_cpp_state_info(rules_, os_, name_suffix))
925             return false;
926 
927         generate_delimiter(os_);
928         os_ << "// this function returns the next matched token\n";
929         os_ << "template<typename Iterator>\n";
930         os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "")
931             << name_suffix  << " (";
932 
933         if (dfas_ > 1)
934         {
935             os_ << "std::size_t& start_state_, ";
936         }
937         else
938         {
939             os_ << "std::size_t& /*start_state_*/, ";
940         }
941         if (sm_.data()._seen_BOL_assertion)
942         {
943             os_ << "bool& bol_, ";
944         }
945         else
946         {
947             os_ << "bool& /*bol_*/, ";
948         }
949         os_ << "\n    ";
950 
951         os_ << "Iterator &start_token_, Iterator const& end_, ";
952         os_ << "std::size_t& unique_id_)\n";
953         os_ << "{\n";
954         if (!generate_function_body(os_, sm_))
955             return false;
956         os_ << "}\n\n";
957 
958         if (!generate_cpp_state_table<Char>(os_, name_suffix
959             , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion))
960         {
961             return false;
962         }
963 
964         os_ << "}}}}}  // namespace boost::spirit::lex::lexertl::static_\n\n";
965 
966         os_ << "#endif\n";
967 
968         return os_.good();
969     }
970 
971     }   // namespace detail
972 
973     ///////////////////////////////////////////////////////////////////////////
974     template <typename Lexer, typename F>
975     inline bool
generate_static(Lexer const & lexer,std::basic_ostream<typename Lexer::char_type> & os,typename Lexer::char_type const * name_suffix,F f)976     generate_static(Lexer const& lexer
977       , std::basic_ostream<typename Lexer::char_type>& os
978       , typename Lexer::char_type const* name_suffix, F f)
979     {
980         if (!lexer.init_dfa(true))    // always minimize DFA for static lexers
981             return false;
982         return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os
983           , name_suffix, f);
984     }
985 
986     ///////////////////////////////////////////////////////////////////////////
987     // deprecated function, will be removed in the future (this has been
988     // replaced by the function generate_static_dfa - see below).
989     template <typename Lexer>
990     inline bool
generate_static(Lexer const & lexer,std::basic_ostream<typename Lexer::char_type> & os,typename Lexer::char_type const * name_suffix=detail::L<typename Lexer::char_type> ())991     generate_static(Lexer const& lexer
992       , std::basic_ostream<typename Lexer::char_type>& os
993       , typename Lexer::char_type const* name_suffix =
994           detail::L<typename Lexer::char_type>())
995     {
996         return generate_static(lexer, os, name_suffix
997           , &detail::generate_function_body_dfa<typename Lexer::char_type>);
998     }
999 
1000     ///////////////////////////////////////////////////////////////////////////
1001     template <typename Lexer>
1002     inline bool
generate_static_dfa(Lexer const & lexer,std::basic_ostream<typename Lexer::char_type> & os,typename Lexer::char_type const * name_suffix=detail::L<typename Lexer::char_type> ())1003     generate_static_dfa(Lexer const& lexer
1004       , std::basic_ostream<typename Lexer::char_type>& os
1005       , typename Lexer::char_type const* name_suffix =
1006           detail::L<typename Lexer::char_type>())
1007     {
1008         return generate_static(lexer, os, name_suffix
1009           , &detail::generate_function_body_dfa<typename Lexer::char_type>);
1010     }
1011 
1012     ///////////////////////////////////////////////////////////////////////////
1013     template <typename Lexer>
1014     inline bool
generate_static_switch(Lexer const & lexer,std::basic_ostream<typename Lexer::char_type> & os,typename Lexer::char_type const * name_suffix=detail::L<typename Lexer::char_type> ())1015     generate_static_switch(Lexer const& lexer
1016       , std::basic_ostream<typename Lexer::char_type>& os
1017       , typename Lexer::char_type const* name_suffix =
1018           detail::L<typename Lexer::char_type>())
1019     {
1020         return generate_static(lexer, os, name_suffix
1021           , &detail::generate_function_body_switch<typename Lexer::char_type>);
1022     }
1023 
1024 ///////////////////////////////////////////////////////////////////////////////
1025 }}}}
1026 
1027 #endif
1028