1 //  Copyright (c) 2001-2011 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM)
7 #define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM
8 
9 #if defined(_MSC_VER)
10 #pragma once
11 #endif
12 
13 #include <boost/spirit/home/support/info.hpp>
14 #include <boost/spirit/home/qi/skip_over.hpp>
15 #include <boost/spirit/home/qi/parser.hpp>
16 #include <boost/spirit/home/qi/detail/assign_to.hpp>
17 #include <boost/spirit/home/lex/reference.hpp>
18 #include <boost/spirit/home/lex/meta_compiler.hpp>
19 #include <boost/spirit/home/lex/lexer_type.hpp>
20 #include <boost/spirit/home/lex/lexer/token_def.hpp>
21 #include <boost/assert.hpp>
22 #include <boost/noncopyable.hpp>
23 #include <boost/detail/iterator.hpp>
24 #include <boost/fusion/include/vector.hpp>
25 #include <boost/mpl/assert.hpp>
26 #include <boost/range/iterator_range.hpp>
27 #include <string>
28 
29 namespace boost { namespace spirit { namespace lex
30 {
31     ///////////////////////////////////////////////////////////////////////////
32     namespace detail
33     {
34         ///////////////////////////////////////////////////////////////////////
35         template <typename LexerDef>
36         struct lexer_def_
37           : proto::extends<
38                 typename proto::terminal<
39                    lex::reference<lexer_def_<LexerDef> const>
40                 >::type
41               , lexer_def_<LexerDef> >
42           , qi::parser<lexer_def_<LexerDef> >
43           , lex::lexer_type<lexer_def_<LexerDef> >
44         {
45         private:
46             // avoid warnings about using 'this' in constructor
this_boost::spirit::lex::detail::lexer_def_47             lexer_def_& this_() { return *this; }
48 
49             typedef typename LexerDef::char_type char_type;
50             typedef typename LexerDef::string_type string_type;
51             typedef typename LexerDef::id_type id_type;
52 
53             typedef lex::reference<lexer_def_ const> reference_;
54             typedef typename proto::terminal<reference_>::type terminal_type;
55             typedef proto::extends<terminal_type, lexer_def_> proto_base_type;
56 
aliasboost::spirit::lex::detail::lexer_def_57             reference_ alias() const
58             {
59                 return reference_(*this);
60             }
61 
62         public:
63             // Qi interface: metafunction calculating parser attribute type
64             template <typename Context, typename Iterator>
65             struct attribute
66             {
67                 //  the return value of a token set contains the matched token
68                 //  id, and the corresponding pair of iterators
69                 typedef typename Iterator::base_iterator_type iterator_type;
70                 typedef
71                     fusion::vector2<id_type, iterator_range<iterator_type> >
72                 type;
73             };
74 
75             // Qi interface: parse functionality
76             template <typename Iterator, typename Context
77               , typename Skipper, typename Attribute>
parseboost::spirit::lex::detail::lexer_def_78             bool parse(Iterator& first, Iterator const& last
79               , Context& /*context*/, Skipper const& skipper
80               , Attribute& attr) const
81             {
82                 qi::skip_over(first, last, skipper);   // always do a pre-skip
83 
84                 if (first != last) {
85                     typedef typename
86                         boost::detail::iterator_traits<Iterator>::value_type
87                     token_type;
88 
89                     token_type const& t = *first;
90                     if (token_is_valid(t) && t.state() == first.get_state()) {
91                     // any of the token definitions matched
92                         spirit::traits::assign_to(t, attr);
93                         ++first;
94                         return true;
95                     }
96                 }
97                 return false;
98             }
99 
100             // Qi interface: 'what' functionality
101             template <typename Context>
whatboost::spirit::lex::detail::lexer_def_102             info what(Context& /*context*/) const
103             {
104                 return info("lexer");
105             }
106 
107         private:
108             // allow to use the lexer.self.add("regex1", id1)("regex2", id2);
109             // syntax
110             struct adder
111             {
adderboost::spirit::lex::detail::lexer_def_::adder112                 adder(lexer_def_& def_)
113                   : def(def_) {}
114 
115                 // Add a token definition based on a single character as given
116                 // by the first parameter, the second parameter allows to
117                 // specify the token id to use for the new token. If no token
118                 // id is given the character code is used.
operator ()boost::spirit::lex::detail::lexer_def_::adder119                 adder const& operator()(char_type c
120                   , id_type token_id = id_type()) const
121                 {
122                     if (id_type() == token_id)
123                         token_id = static_cast<id_type>(c);
124                     def.def.add_token (def.state.c_str(), c, token_id
125                         , def.targetstate.empty() ? 0 : def.targetstate.c_str());
126                     return *this;
127                 }
128 
129                 // Add a token definition based on a character sequence as
130                 // given by the first parameter, the second parameter allows to
131                 // specify the token id to use for the new token. If no token
132                 // id is given this function will generate a unique id to be
133                 // used as the token's id.
operator ()boost::spirit::lex::detail::lexer_def_::adder134                 adder const& operator()(string_type const& s
135                   , id_type token_id = id_type()) const
136                 {
137                     if (id_type() == token_id)
138                         token_id = def.def.get_next_id();
139                     def.def.add_token (def.state.c_str(), s, token_id
140                         , def.targetstate.empty() ? 0 : def.targetstate.c_str());
141                     return *this;
142                 }
143 
144                 template <typename Attribute>
operator ()boost::spirit::lex::detail::lexer_def_::adder145                 adder const& operator()(
146                     token_def<Attribute, char_type, id_type>& tokdef
147                   , id_type token_id = id_type()) const
148                 {
149                     // make sure we have a token id
150                     if (id_type() == token_id) {
151                         if (id_type() == tokdef.id()) {
152                             token_id = def.def.get_next_id();
153                             tokdef.id(token_id);
154                         }
155                         else {
156                             token_id = tokdef.id();
157                         }
158                     }
159                     else {
160                     // the following assertion makes sure that the token_def
161                     // instance has not been assigned a different id earlier
162                         BOOST_ASSERT(id_type() == tokdef.id()
163                                   || token_id == tokdef.id());
164                         tokdef.id(token_id);
165                     }
166 
167                     def.define(tokdef);
168                     return *this;
169                 }
170 
171 //                 template <typename F>
172 //                 adder const& operator()(char_type c, id_type token_id, F act) const
173 //                 {
174 //                     if (id_type() == token_id)
175 //                         token_id = def.def.get_next_id();
176 //                     std::size_t unique_id =
177 //                         def.def.add_token (def.state.c_str(), s, token_id);
178 //                     def.def.add_action(unique_id, def.state.c_str(), act);
179 //                     return *this;
180 //                 }
181 
182                 lexer_def_& def;
183 
184             private:
185                 // silence MSVC warning C4512: assignment operator could not be generated
186                 adder& operator= (adder const&);
187             };
188             friend struct adder;
189 
190             // allow to use lexer.self.add_pattern("pattern1", "regex1")(...);
191             // syntax
192             struct pattern_adder
193             {
pattern_adderboost::spirit::lex::detail::lexer_def_::pattern_adder194                 pattern_adder(lexer_def_& def_)
195                   : def(def_) {}
196 
operator ()boost::spirit::lex::detail::lexer_def_::pattern_adder197                 pattern_adder const& operator()(string_type const& p
198                   , string_type const& s) const
199                 {
200                     def.def.add_pattern (def.state.c_str(), p, s);
201                     return *this;
202                 }
203 
204                 lexer_def_& def;
205 
206             private:
207                 // silence MSVC warning C4512: assignment operator could not be generated
208                 pattern_adder& operator= (pattern_adder const&);
209             };
210             friend struct pattern_adder;
211 
212         private:
213             // Helper function to invoke the necessary 2 step compilation
214             // process on token definition expressions
215             template <typename TokenExpr>
compile2passboost::spirit::lex::detail::lexer_def_216             void compile2pass(TokenExpr const& expr)
217             {
218                 expr.collect(def, state, targetstate);
219                 expr.add_actions(def);
220             }
221 
222         public:
223             ///////////////////////////////////////////////////////////////////
224             template <typename Expr>
defineboost::spirit::lex::detail::lexer_def_225             void define(Expr const& expr)
226             {
227                 compile2pass(compile<lex::domain>(expr));
228             }
229 
lexer_def_boost::spirit::lex::detail::lexer_def_230             lexer_def_(LexerDef& def_, string_type const& state_
231                   , string_type const& targetstate_ = string_type())
232               : proto_base_type(terminal_type::make(alias()))
233               , add(this_()), add_pattern(this_()), def(def_)
234               , state(state_), targetstate(targetstate_)
235             {}
236 
237             // allow to switch states
operator ()boost::spirit::lex::detail::lexer_def_238             lexer_def_ operator()(char_type const* state) const
239             {
240                 return lexer_def_(def, state);
241             }
operator ()boost::spirit::lex::detail::lexer_def_242             lexer_def_ operator()(char_type const* state
243               , char_type const* targetstate) const
244             {
245                 return lexer_def_(def, state, targetstate);
246             }
operator ()boost::spirit::lex::detail::lexer_def_247             lexer_def_ operator()(string_type const& state
248               , string_type const& targetstate = string_type()) const
249             {
250                 return lexer_def_(def, state, targetstate);
251             }
252 
253             // allow to assign a token definition expression
254             template <typename Expr>
operator =boost::spirit::lex::detail::lexer_def_255             lexer_def_& operator= (Expr const& xpr)
256             {
257                 // Report invalid expression error as early as possible.
258                 // If you got an error_invalid_expression error message here,
259                 // then the expression (expr) is not a valid spirit lex
260                 // expression.
261                 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
262 
263                 def.clear(state.c_str());
264                 define(xpr);
265                 return *this;
266             }
267 
268             // explicitly tell the lexer that the given state will be defined
269             // (useful in conjunction with "*")
add_stateboost::spirit::lex::detail::lexer_def_270             std::size_t add_state(char_type const* state = 0)
271             {
272                 return def.add_state(state ? state : def.initial_state().c_str());
273             }
274 
275             adder add;
276             pattern_adder add_pattern;
277 
278         private:
279             LexerDef& def;
280             string_type state;
281             string_type targetstate;
282 
283         private:
284             // silence MSVC warning C4512: assignment operator could not be generated
285             lexer_def_& operator= (lexer_def_ const&);
286         };
287 
288 #if defined(BOOST_NO_CXX11_RVALUE_REFERENCES)
289         // allow to assign a token definition expression
290         template <typename LexerDef, typename Expr>
291         inline lexer_def_<LexerDef>&
operator +=(lexer_def_<LexerDef> & lexdef,Expr & xpr)292         operator+= (lexer_def_<LexerDef>& lexdef, Expr& xpr)
293         {
294             // Report invalid expression error as early as possible.
295             // If you got an error_invalid_expression error message here,
296             // then the expression (expr) is not a valid spirit lex
297             // expression.
298             BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
299 
300             lexdef.define(xpr);
301             return lexdef;
302         }
303 #else
304         // allow to assign a token definition expression
305         template <typename LexerDef, typename Expr>
306         inline lexer_def_<LexerDef>&
operator +=(lexer_def_<LexerDef> & lexdef,Expr && xpr)307         operator+= (lexer_def_<LexerDef>& lexdef, Expr&& xpr)
308         {
309             // Report invalid expression error as early as possible.
310             // If you got an error_invalid_expression error message here,
311             // then the expression (expr) is not a valid spirit lex
312             // expression.
313             BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
314 
315             lexdef.define(xpr);
316             return lexdef;
317         }
318 #endif
319 
320         template <typename LexerDef, typename Expr>
321         inline lexer_def_<LexerDef>&
operator +=(lexer_def_<LexerDef> & lexdef,Expr const & xpr)322         operator+= (lexer_def_<LexerDef>& lexdef, Expr const& xpr)
323         {
324             // Report invalid expression error as early as possible.
325             // If you got an error_invalid_expression error message here,
326             // then the expression (expr) is not a valid spirit lex
327             // expression.
328             BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr);
329 
330             lexdef.define(xpr);
331             return lexdef;
332         }
333     }
334 
335     ///////////////////////////////////////////////////////////////////////////
336     //  The match_flags flags are used to influence different matching
337     //  modes of the lexer
338     struct match_flags
339     {
340         enum enum_type
341         {
342             match_default = 0,          // no flags
343             match_not_dot_newline = 1,  // the regex '.' doesn't match newlines
344             match_icase = 2             // all matching operations are case insensitive
345         };
346     };
347 
348     ///////////////////////////////////////////////////////////////////////////
349     //  This represents a lexer object
350     ///////////////////////////////////////////////////////////////////////////
351 
352     ///////////////////////////////////////////////////////////////////////////
353     // This is the first token id automatically assigned by the library
354     // if needed
355     enum tokenids
356     {
357         min_token_id = 0x10000
358     };
359 
360     template <typename Lexer>
361     class lexer : public Lexer
362     {
363     private:
364         // avoid warnings about using 'this' in constructor
this_()365         lexer& this_() { return *this; }
366 
367         std::size_t next_token_id;   // has to be an integral type
368 
369     public:
370         typedef Lexer lexer_type;
371         typedef typename Lexer::id_type id_type;
372         typedef typename Lexer::char_type char_type;
373         typedef typename Lexer::iterator_type iterator_type;
374         typedef lexer base_type;
375 
376         typedef detail::lexer_def_<lexer> lexer_def;
377         typedef std::basic_string<char_type> string_type;
378 
lexer(unsigned int flags=match_flags::match_default,id_type first_id=id_type (min_token_id))379         lexer(unsigned int flags = match_flags::match_default
380             , id_type first_id = id_type(min_token_id))
381           : lexer_type(flags)
382           , next_token_id(first_id)
383           , self(this_(), lexer_type::initial_state())
384         {}
385 
386         // access iterator interface
387         template <typename Iterator>
begin(Iterator & first,Iterator const & last,char_type const * initial_state=0) const388         iterator_type begin(Iterator& first, Iterator const& last
389                 , char_type const* initial_state = 0) const
390             { return this->lexer_type::begin(first, last, initial_state); }
end() const391         iterator_type end() const
392             { return this->lexer_type::end(); }
393 
map_state(char_type const * state)394         std::size_t map_state(char_type const* state)
395             { return this->lexer_type::add_state(state); }
396 
397         //  create a unique token id
get_next_id()398         id_type get_next_id() { return id_type(next_token_id++); }
399 
400         lexer_def self;  // allow for easy token definition
401     };
402 
403 }}}
404 
405 #endif
406