1 //  Copyright (c) 2001-2011 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #if !defined(BOOST_SPIRIT_LEX_STATIC_LEXER_FEB_10_2008_0753PM)
7 #define BOOST_SPIRIT_LEX_STATIC_LEXER_FEB_10_2008_0753PM
8 
9 #if defined(_MSC_VER)
10 #pragma once
11 #endif
12 
13 #include <boost/spirit/home/lex/lexer/lexertl/token.hpp>
14 #include <boost/spirit/home/lex/lexer/lexertl/functor.hpp>
15 #include <boost/spirit/home/lex/lexer/lexertl/static_functor_data.hpp>
16 #include <boost/spirit/home/lex/lexer/lexertl/iterator.hpp>
17 #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp>
18 #if defined(BOOST_SPIRIT_DEBUG)
19 #include <boost/spirit/home/support/detail/lexer/debug.hpp>
20 #endif
21 
22 namespace boost { namespace spirit { namespace lex { namespace lexertl
23 {
24     ///////////////////////////////////////////////////////////////////////////
25     //  forward declaration
26     ///////////////////////////////////////////////////////////////////////////
27     namespace static_
28     {
29         struct lexer;
30     }
31 
32     ///////////////////////////////////////////////////////////////////////////
33     //
34     //  Every lexer type to be used as a lexer for Spirit has to conform to
35     //  the following public interface:
36     //
37     //    typedefs:
38     //        iterator_type   The type of the iterator exposed by this lexer.
39     //        token_type      The type of the tokens returned from the exposed
40     //                        iterators.
41     //
42     //    functions:
43     //        default constructor
44     //                        Since lexers are instantiated as base classes
45     //                        only it might be a good idea to make this
46     //                        constructor protected.
47     //        begin, end      Return a pair of iterators, when dereferenced
48     //                        returning the sequence of tokens recognized in
49     //                        the input stream given as the parameters to the
50     //                        begin() function.
51     //        add_token       Should add the definition of a token to be
52     //                        recognized by this lexer.
53     //        clear           Should delete all current token definitions
54     //                        associated with the given state of this lexer
55     //                        object.
56     //
57     //    template parameters:
58     //        Token           The type of the tokens to be returned from the
59     //                        exposed token iterator.
60     //        LexerTables     See explanations below.
61     //        Iterator        The type of the iterator used to access the
62     //                        underlying character stream.
63     //        Functor         The type of the InputPolicy to use to instantiate
64     //                        the multi_pass iterator type to be used as the
65     //                        token iterator (returned from begin()/end()).
66     //
67     //    Additionally, this implementation of a static lexer has a template
68     //    parameter LexerTables allowing to customize the static lexer tables
69     //    to be used. The LexerTables is expected to be a type exposing
70     //    the following functions:
71     //
72     //        static std::size_t const state_count()
73     //
74     //                This function needs toreturn the number of lexer states
75     //                contained in the table returned from the state_names()
76     //                function.
77     //
78     //        static char const* const* state_names()
79     //
80     //                This function needs to return a pointer to a table of
81     //                names of all lexer states. The table needs to have as
82     //                much entries as the state_count() function returns
83     //
84     //        template<typename Iterator>
85     //        std::size_t next(std::size_t &start_state_, Iterator const& start_
86     //          , Iterator &start_token_, Iterator const& end_
87     //          , std::size_t& unique_id_);
88     //
89     //                This function is expected to return the next matched
90     //                token from the underlying input stream.
91     //
92     ///////////////////////////////////////////////////////////////////////////
93 
94     ///////////////////////////////////////////////////////////////////////////
95     //
96     //  The static_lexer class is a implementation of a Spirit.Lex
97     //  lexer on top of Ben Hanson's lexertl library (For more information
98     //  about lexertl go here: http://www.benhanson.net/lexertl.html).
99     //
100     //  This class is designed to be used in conjunction with a generated,
101     //  static lexer. For more information see the documentation (The Static
102     //  Lexer Model).
103     //
104     //  This class is supposed to be used as the first and only template
105     //  parameter while instantiating instances of a lex::lexer class.
106     //
107     ///////////////////////////////////////////////////////////////////////////
108     template <typename Token = token<>
109       , typename LexerTables = static_::lexer
110       , typename Iterator = typename Token::iterator_type
111       , typename Functor = functor<Token, detail::static_data, Iterator> >
112     class static_lexer
113     {
114     private:
true_boost::spirit::lex::lexertl::static_lexer::dummy115         struct dummy { void true_() {} };
116         typedef void (dummy::*safe_bool)();
117 
118     public:
119         // object is always valid
operator safe_bool() const120         operator safe_bool() const { return &dummy::true_; }
121 
122         typedef typename boost::detail::iterator_traits<Iterator>::value_type
123             char_type;
124         typedef std::basic_string<char_type> string_type;
125 
126         //  Every lexer type to be used as a lexer for Spirit has to conform to
127         //  a public interface
128         typedef Token token_type;
129         typedef typename Token::id_type id_type;
130         typedef iterator<Functor> iterator_type;
131 
132     private:
133         // this type is purely used for the iterator_type construction below
134         struct iterator_data_type
135         {
136             typedef typename Functor::next_token_functor next_token_functor;
137             typedef typename Functor::semantic_actions_type semantic_actions_type;
138             typedef typename Functor::get_state_name_type get_state_name_type;
139 
iterator_data_typeboost::spirit::lex::lexertl::static_lexer::iterator_data_type140             iterator_data_type(next_token_functor next
141                   , semantic_actions_type const& actions
142                   , get_state_name_type get_state_name, std::size_t num_states
143                   , bool bol)
144               : next_(next), actions_(actions), get_state_name_(get_state_name)
145               , num_states_(num_states), bol_(bol)
146             {}
147 
148             next_token_functor next_;
149             semantic_actions_type const& actions_;
150             get_state_name_type get_state_name_;
151             std::size_t num_states_;
152             bool bol_;
153 
154         private:
155             // silence MSVC warning C4512: assignment operator could not be generated
156             iterator_data_type& operator= (iterator_data_type const&);
157         };
158 
159         typedef LexerTables tables_type;
160 
161         // The following static assertion fires if the referenced static lexer
162         // tables are generated by a different static lexer version as used for
163         // the current compilation unit. Please regenerate your static lexer
164         // tables before trying to create a static_lexer<> instance.
165         BOOST_SPIRIT_ASSERT_MSG(
166             tables_type::static_version == SPIRIT_STATIC_LEXER_VERSION
167           , incompatible_static_lexer_version, (LexerTables));
168 
169     public:
170         //  Return the start iterator usable for iterating over the generated
171         //  tokens, the generated function next_token(...) is called to match
172         //  the next token from the input.
173         template <typename Iterator_>
begin(Iterator_ & first,Iterator_ const & last,char_type const * initial_state=0) const174         iterator_type begin(Iterator_& first, Iterator_ const& last
175           , char_type const* initial_state = 0) const
176         {
177             iterator_data_type iterator_data(
178                     &tables_type::template next<Iterator_>, actions_
179                   , &tables_type::state_name, tables_type::state_count()
180                   , tables_type::supports_bol
181                 );
182             return iterator_type(iterator_data, first, last, initial_state);
183         }
184 
185         //  Return the end iterator usable to stop iterating over the generated
186         //  tokens.
end() const187         iterator_type end() const
188         {
189             return iterator_type();
190         }
191 
192     protected:
193         //  Lexer instances can be created by means of a derived class only.
static_lexer(unsigned int)194         static_lexer(unsigned int) : unique_id_(0) {}
195 
196     public:
197         // interface for token definition management
add_token(char_type const *,char_type,std::size_t,char_type const *)198         std::size_t add_token (char_type const*, char_type, std::size_t
199           , char_type const*)
200         {
201             return unique_id_++;
202         }
add_token(char_type const *,string_type const &,std::size_t,char_type const *)203         std::size_t add_token (char_type const*, string_type const&
204           , std::size_t, char_type const*)
205         {
206             return unique_id_++;
207         }
208 
209         // interface for pattern definition management
add_pattern(char_type const *,string_type const &,string_type const &)210         void add_pattern (char_type const*, string_type const&
211           , string_type const&) {}
212 
clear(char_type const *)213         void clear(char_type const*) {}
214 
add_state(char_type const * state)215         std::size_t add_state(char_type const* state)
216         {
217             return detail::get_state_id(state, &tables_type::state_name
218               , tables_type::state_count());
219         }
initial_state() const220         string_type initial_state() const
221         {
222             return tables_type::state_name(0);
223         }
224 
225         // register a semantic action with the given id
226         template <typename F>
add_action(id_type unique_id,std::size_t state,F act)227         void add_action(id_type unique_id, std::size_t state, F act)
228         {
229             typedef typename Functor::wrap_action_type wrapper_type;
230             actions_.add_action(unique_id, state, wrapper_type::call(act));
231         }
232 
init_dfa(bool minimize=false) const233         bool init_dfa(bool minimize = false) const { return true; }
234 
235     private:
236         typename Functor::semantic_actions_type actions_;
237         std::size_t unique_id_;
238     };
239 
240     ///////////////////////////////////////////////////////////////////////////
241     //
242     //  The static_actor_lexer class is another implementation of a
243     //  Spirit.Lex lexer on top of Ben Hanson's lexertl library as outlined
244     //  above (For more information about lexertl go here:
245     //  http://www.benhanson.net/lexertl.html).
246     //
247     //  Just as the static_lexer class it is meant to be used with
248     //  a statically generated lexer as outlined above.
249     //
250     //  The only difference to the static_lexer class above is that
251     //  token_def definitions may have semantic (lexer) actions attached while
252     //  being defined:
253     //
254     //      int w;
255     //      token_def<> word = "[^ \t\n]+";
256     //      self = word[++ref(w)];        // see example: word_count_lexer
257     //
258     //  This class is supposed to be used as the first and only template
259     //  parameter while instantiating instances of a lex::lexer class.
260     //
261     ///////////////////////////////////////////////////////////////////////////
262     template <typename Token = token<>
263       , typename LexerTables = static_::lexer
264       , typename Iterator = typename Token::iterator_type
265       , typename Functor
266           = functor<Token, detail::static_data, Iterator, mpl::true_> >
267     class static_actor_lexer
268       : public static_lexer<Token, LexerTables, Iterator, Functor>
269     {
270     protected:
271         // Lexer instances can be created by means of a derived class only.
static_actor_lexer(unsigned int flags)272         static_actor_lexer(unsigned int flags)
273           : static_lexer<Token, LexerTables, Iterator, Functor>(flags)
274         {}
275     };
276 
277 }}}}
278 
279 #endif
280