1 //  Copyright (c) 2001-2011 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #if !defined(BOOST_SPIRIT_LEX_LEXER_FUNCTOR_DATA_JUN_10_2009_0954AM)
7 #define BOOST_SPIRIT_LEX_LEXER_FUNCTOR_DATA_JUN_10_2009_0954AM
8 
9 #if defined(_MSC_VER)
10 #pragma once
11 #endif
12 
13 #include <boost/spirit/home/qi/detail/assign_to.hpp>
14 #include <boost/spirit/home/support/detail/lexer/generator.hpp>
15 #include <boost/spirit/home/support/detail/lexer/rules.hpp>
16 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
17 #include <boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp>
18 #include <boost/spirit/home/lex/lexer/lexertl/semantic_action_data.hpp>
19 #include <boost/spirit/home/lex/lexer/lexertl/wrap_action.hpp>
20 #include <boost/mpl/bool.hpp>
21 #include <boost/optional.hpp>
22 
23 namespace boost { namespace spirit { namespace lex { namespace lexertl
24 {
25     namespace detail
26     {
27         ///////////////////////////////////////////////////////////////////////
28         template <typename Iterator, typename HasActors, typename HasState
29           , typename TokenValue>
30         class data;    // no default specialization
31 
32         ///////////////////////////////////////////////////////////////////////
33         //  neither supports state, nor actors
34         template <typename Iterator, typename TokenValue>
35         class data<Iterator, mpl::false_, mpl::false_, TokenValue>
36         {
37         protected:
38             typedef typename
39                 boost::detail::iterator_traits<Iterator>::value_type
40             char_type;
41 
42         public:
43             typedef Iterator base_iterator_type;
44             typedef iterator_range<Iterator> token_value_type;
45             typedef token_value_type get_value_type;
46             typedef std::size_t state_type;
47             typedef char_type const* state_name_type;
48             typedef unused_type semantic_actions_type;
49             typedef detail::wrap_action<unused_type, Iterator, data, std::size_t>
50                 wrap_action_type;
51 
52             typedef unused_type next_token_functor;
53             typedef unused_type get_state_name_type;
54 
55             // initialize the shared data
56             template <typename IterData>
data(IterData const & data_,Iterator & first,Iterator const & last)57             data (IterData const& data_, Iterator& first, Iterator const& last)
58               : first_(first), last_(last)
59               , state_machine_(data_.state_machine_)
60               , rules_(data_.rules_)
61               , bol_(data_.state_machine_.data()._seen_BOL_assertion) {}
62 
63             // The following functions are used by the implementation of the
64             // placeholder '_state'.
65             template <typename Char>
set_state_name(Char const *)66             void set_state_name (Char const*)
67             {
68 // some (random) versions of gcc instantiate this function even if it's not
69 // needed leading to false static asserts
70 #if !defined(__GNUC__)
71                 // If you see a compile time assertion below you're probably
72                 // using a token type not supporting lexer states (the 3rd
73                 // template parameter of the token is mpl::false_), but your
74                 // code uses state changes anyways.
75                 BOOST_STATIC_ASSERT(false);
76 #endif
77             }
get_state_name() const78             char_type const* get_state_name() const { return rules_.initial(); }
get_state_id(char_type const *) const79             std::size_t get_state_id (char_type const*) const
80             {
81                 return 0;
82             }
83 
84             // The function get_eoi() is used by the implementation of the
85             // placeholder '_eoi'.
get_eoi() const86             Iterator const& get_eoi() const { return last_; }
87 
88             // The function less() is used by the implementation of the support
89             // function lex::less(). Its functionality is equivalent to flex'
90             // function yyless(): it returns an iterator positioned to the
91             // nth input character beyond the current start iterator (i.e. by
92             // assigning the return value to the placeholder '_end' it is
93             // possible to return all but the first n characters of the current
94             // token back to the input stream.
95             //
96             // This function does nothing as long as no semantic actions are
97             // used.
less(Iterator const & it,int)98             Iterator const& less(Iterator const& it, int)
99             {
100                 // The following assertion fires most likely because you are
101                 // using lexer semantic actions without using the actor_lexer
102                 // as the base class for your token definition class.
103                 BOOST_ASSERT(false &&
104                     "Are you using lexer semantic actions without using the "
105                     "actor_lexer base?");
106                 return it;
107             }
108 
109             // The function more() is used by the implementation of the support
110             // function lex::more(). Its functionality is equivalent to flex'
111             // function yymore(): it tells the lexer that the next time it
112             // matches a rule, the corresponding token should be appended onto
113             // the current token value rather than replacing it.
114             //
115             // These functions do nothing as long as no semantic actions are
116             // used.
more()117             void more()
118             {
119                 // The following assertion fires most likely because you are
120                 // using lexer semantic actions without using the actor_lexer
121                 // as the base class for your token definition class.
122                 BOOST_ASSERT(false &&
123                     "Are you using lexer semantic actions without using the "
124                     "actor_lexer base?");
125             }
adjust_start()126             bool adjust_start() { return false; }
revert_adjust_start()127             void revert_adjust_start() {}
128 
129             // The function lookahead() is used by the implementation of the
130             // support function lex::lookahead. It can be used to implement
131             // lookahead for lexer engines not supporting constructs like flex'
132             // a/b  (match a, but only when followed by b):
133             //
134             // This function does nothing as long as no semantic actions are
135             // used.
lookahead(std::size_t,std::size_t=std::size_t (~0))136             bool lookahead(std::size_t, std::size_t /*state*/ = std::size_t(~0))
137             {
138                 // The following assertion fires most likely because you are
139                 // using lexer semantic actions without using the actor_lexer
140                 // as the base class for your token definition class.
141                 BOOST_ASSERT(false &&
142                     "Are you using lexer semantic actions without using the "
143                     "actor_lexer base?");
144                 return false;
145             }
146 
147             // the functions next, invoke_actions, and get_state are used by
148             // the functor implementation below
149 
150             // The function next() tries to match the next token from the
151             // underlying input sequence.
next(Iterator & end,std::size_t & unique_id,bool & prev_bol)152             std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol)
153             {
154                 prev_bol = bol_;
155 
156                 typedef basic_iterator_tokeniser<Iterator> tokenizer;
157                 return tokenizer::next(state_machine_, bol_, end, last_
158                   , unique_id);
159             }
160 
161             // nothing to invoke, so this is empty
invoke_actions(std::size_t,std::size_t,std::size_t,Iterator const &)162             BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t
163               , std::size_t, std::size_t, Iterator const&)
164             {
165                 return pass_flags::pass_normal;    // always accept
166             }
167 
get_state() const168             std::size_t get_state() const { return 0; }
set_state(std::size_t)169             void set_state(std::size_t) {}
170 
set_end(Iterator const &)171             void set_end(Iterator const& /*it*/) {}
172 
get_first()173             Iterator& get_first() { return first_; }
get_first() const174             Iterator const& get_first() const { return first_; }
get_last() const175             Iterator const& get_last() const { return last_; }
176 
get_value() const177             iterator_range<Iterator> get_value() const
178             {
179                 return iterator_range<Iterator>(first_, last_);
180             }
has_value() const181             bool has_value() const { return false; }
reset_value()182             void reset_value() {}
183 
reset_bol(bool bol)184             void reset_bol(bool bol) { bol_ = bol; }
185 
186         protected:
187             Iterator& first_;
188             Iterator last_;
189 
190             boost::lexer::basic_state_machine<char_type> const& state_machine_;
191             boost::lexer::basic_rules<char_type> const& rules_;
192 
193             bool bol_;      // helper storing whether last character was \n
194 
195         private:
196             // silence MSVC warning C4512: assignment operator could not be generated
197             data& operator= (data const&);
198         };
199 
200         ///////////////////////////////////////////////////////////////////////
201         //  doesn't support lexer semantic actions, but supports state
202         template <typename Iterator, typename TokenValue>
203         class data<Iterator, mpl::false_, mpl::true_, TokenValue>
204           : public data<Iterator, mpl::false_, mpl::false_, TokenValue>
205         {
206         protected:
207             typedef data<Iterator, mpl::false_, mpl::false_, TokenValue> base_type;
208             typedef typename base_type::char_type char_type;
209 
210         public:
211             typedef Iterator base_iterator_type;
212             typedef iterator_range<Iterator> token_value_type;
213             typedef token_value_type get_value_type;
214             typedef typename base_type::state_type state_type;
215             typedef typename base_type::state_name_type state_name_type;
216             typedef typename base_type::semantic_actions_type
217                 semantic_actions_type;
218 
219             // initialize the shared data
220             template <typename IterData>
data(IterData const & data_,Iterator & first,Iterator const & last)221             data (IterData const& data_, Iterator& first, Iterator const& last)
222               : base_type(data_, first, last)
223               , state_(0) {}
224 
225             // The following functions are used by the implementation of the
226             // placeholder '_state'.
set_state_name(char_type const * new_state)227             void set_state_name (char_type const* new_state)
228             {
229                 std::size_t state_id = this->rules_.state(new_state);
230 
231                 // If the following assertion fires you've probably been using
232                 // a lexer state name which was not defined in your token
233                 // definition.
234                 BOOST_ASSERT(state_id != boost::lexer::npos);
235 
236                 if (state_id != boost::lexer::npos)
237                     state_ = state_id;
238             }
get_state_name() const239             char_type const* get_state_name() const
240             {
241                 return this->rules_.state(state_);
242             }
get_state_id(char_type const * state) const243             std::size_t get_state_id (char_type const* state) const
244             {
245                 return this->rules_.state(state);
246             }
247 
248             // the functions next() and get_state() are used by the functor
249             // implementation below
250 
251             // The function next() tries to match the next token from the
252             // underlying input sequence.
next(Iterator & end,std::size_t & unique_id,bool & prev_bol)253             std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol)
254             {
255                 prev_bol = this->bol_;
256 
257                 typedef basic_iterator_tokeniser<Iterator> tokenizer;
258                 return tokenizer::next(this->state_machine_, state_,
259                     this->bol_, end, this->get_eoi(), unique_id);
260             }
261 
get_state()262             std::size_t& get_state() { return state_; }
set_state(std::size_t state)263             void set_state(std::size_t state) { state_ = state; }
264 
265         protected:
266             std::size_t state_;
267 
268         private:
269             // silence MSVC warning C4512: assignment operator could not be generated
270             data& operator= (data const&);
271         };
272 
273         ///////////////////////////////////////////////////////////////////////
274         //  does support lexer semantic actions, may support state
275         template <typename Iterator, typename HasState, typename TokenValue>
276         class data<Iterator, mpl::true_, HasState, TokenValue>
277           : public data<Iterator, mpl::false_, HasState, TokenValue>
278         {
279         public:
280             typedef semantic_actions<Iterator, HasState, data>
281                 semantic_actions_type;
282 
283         protected:
284             typedef data<Iterator, mpl::false_, HasState, TokenValue> base_type;
285             typedef typename base_type::char_type char_type;
286             typedef typename semantic_actions_type::functor_wrapper_type
287                 functor_wrapper_type;
288 
289         public:
290             typedef Iterator base_iterator_type;
291             typedef TokenValue token_value_type;
292             typedef TokenValue const& get_value_type;
293             typedef typename base_type::state_type state_type;
294             typedef typename base_type::state_name_type state_name_type;
295 
296             typedef detail::wrap_action<functor_wrapper_type
297               , Iterator, data, std::size_t> wrap_action_type;
298 
299             template <typename IterData>
data(IterData const & data_,Iterator & first,Iterator const & last)300             data (IterData const& data_, Iterator& first, Iterator const& last)
301               : base_type(data_, first, last)
302               , actions_(data_.actions_), hold_()
303               , value_(iterator_range<Iterator>(last, last))
304               , has_value_(false), has_hold_(false) {}
305 
306             // invoke attached semantic actions, if defined
invoke_actions(std::size_t state,std::size_t & id,std::size_t unique_id,Iterator & end)307             BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state
308               , std::size_t& id, std::size_t unique_id, Iterator& end)
309             {
310                 return actions_.invoke_actions(state, id, unique_id, end, *this);
311             }
312 
313             // The function less() is used by the implementation of the support
314             // function lex::less(). Its functionality is equivalent to flex'
315             // function yyless(): it returns an iterator positioned to the
316             // nth input character beyond the current start iterator (i.e. by
317             // assigning the return value to the placeholder '_end' it is
318             // possible to return all but the first n characters of the current
319             // token back to the input stream).
less(Iterator & it,int n)320             Iterator const& less(Iterator& it, int n)
321             {
322                 it = this->get_first();
323                 std::advance(it, n);
324                 return it;
325             }
326 
327             // The function more() is used by the implementation of the support
328             // function lex::more(). Its functionality is equivalent to flex'
329             // function yymore(): it tells the lexer that the next time it
330             // matches a rule, the corresponding token should be appended onto
331             // the current token value rather than replacing it.
more()332             void more()
333             {
334                 hold_ = this->get_first();
335                 has_hold_ = true;
336             }
337 
338             // The function lookahead() is used by the implementation of the
339             // support function lex::lookahead. It can be used to implement
340             // lookahead for lexer engines not supporting constructs like flex'
341             // a/b  (match a, but only when followed by b)
lookahead(std::size_t id,std::size_t state=std::size_t (~0))342             bool lookahead(std::size_t id, std::size_t state = std::size_t(~0))
343             {
344                 Iterator end = end_;
345                 std::size_t unique_id = boost::lexer::npos;
346                 bool bol = this->bol_;
347 
348                 if (std::size_t(~0) == state)
349                     state = this->state_;
350 
351                 typedef basic_iterator_tokeniser<Iterator> tokenizer;
352                 return id == tokenizer::next(this->state_machine_, state,
353                     bol, end, this->get_eoi(), unique_id);
354             }
355 
356             // The adjust_start() and revert_adjust_start() are helper
357             // functions needed to implement the functionality required for
358             // lex::more(). It is called from the functor body below.
adjust_start()359             bool adjust_start()
360             {
361                 if (!has_hold_)
362                     return false;
363 
364                 std::swap(this->get_first(), hold_);
365                 has_hold_ = false;
366                 return true;
367             }
revert_adjust_start()368             void revert_adjust_start()
369             {
370                 // this will be called only if adjust_start above returned true
371                 std::swap(this->get_first(), hold_);
372                 has_hold_ = true;
373             }
374 
get_value() const375             TokenValue const& get_value() const
376             {
377                 if (!has_value_) {
378                     value_ = iterator_range<Iterator>(this->get_first(), end_);
379                     has_value_ = true;
380                 }
381                 return value_;
382             }
383             template <typename Value>
set_value(Value const & val)384             void set_value(Value const& val)
385             {
386                 value_ = val;
387                 has_value_ = true;
388             }
set_end(Iterator const & it)389             void set_end(Iterator const& it)
390             {
391                 end_ = it;
392             }
has_value() const393             bool has_value() const { return has_value_; }
reset_value()394             void reset_value() { has_value_ = false; }
395 
396         protected:
397             semantic_actions_type const& actions_;
398             Iterator hold_;     // iterator needed to support lex::more()
399             Iterator end_;      // iterator pointing to end of matched token
400             mutable TokenValue value_;  // token value to use
401             mutable bool has_value_;    // 'true' if value_ is valid
402             bool has_hold_;     // 'true' if hold_ is valid
403 
404         private:
405             // silence MSVC warning C4512: assignment operator could not be generated
406             data& operator= (data const&);
407         };
408 
409         ///////////////////////////////////////////////////////////////////////
410         //  does support lexer semantic actions, may support state, is used for
411         //  position_token exposing exactly one type
412         template <typename Iterator, typename HasState, typename TokenValue>
413         class data<Iterator, mpl::true_, HasState, boost::optional<TokenValue> >
414           : public data<Iterator, mpl::false_, HasState, TokenValue>
415         {
416         public:
417             typedef semantic_actions<Iterator, HasState, data>
418                 semantic_actions_type;
419 
420         protected:
421             typedef data<Iterator, mpl::false_, HasState, TokenValue> base_type;
422             typedef typename base_type::char_type char_type;
423             typedef typename semantic_actions_type::functor_wrapper_type
424                 functor_wrapper_type;
425 
426         public:
427             typedef Iterator base_iterator_type;
428             typedef boost::optional<TokenValue> token_value_type;
429             typedef boost::optional<TokenValue> const& get_value_type;
430             typedef typename base_type::state_type state_type;
431             typedef typename base_type::state_name_type state_name_type;
432 
433             typedef detail::wrap_action<functor_wrapper_type
434               , Iterator, data, std::size_t> wrap_action_type;
435 
436             template <typename IterData>
data(IterData const & data_,Iterator & first,Iterator const & last)437             data (IterData const& data_, Iterator& first, Iterator const& last)
438               : base_type(data_, first, last)
439               , actions_(data_.actions_), hold_()
440               , has_value_(false), has_hold_(false)
441             {
442                 spirit::traits::assign_to(first, last, value_);
443                 has_value_ = true;
444             }
445 
446             // invoke attached semantic actions, if defined
invoke_actions(std::size_t state,std::size_t & id,std::size_t unique_id,Iterator & end)447             BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state
448               , std::size_t& id, std::size_t unique_id, Iterator& end)
449             {
450                 return actions_.invoke_actions(state, id, unique_id, end, *this);
451             }
452 
453             // The function less() is used by the implementation of the support
454             // function lex::less(). Its functionality is equivalent to flex'
455             // function yyless(): it returns an iterator positioned to the
456             // nth input character beyond the current start iterator (i.e. by
457             // assigning the return value to the placeholder '_end' it is
458             // possible to return all but the first n characters of the current
459             // token back to the input stream).
less(Iterator & it,int n)460             Iterator const& less(Iterator& it, int n)
461             {
462                 it = this->get_first();
463                 std::advance(it, n);
464                 return it;
465             }
466 
467             // The function more() is used by the implementation of the support
468             // function lex::more(). Its functionality is equivalent to flex'
469             // function yymore(): it tells the lexer that the next time it
470             // matches a rule, the corresponding token should be appended onto
471             // the current token value rather than replacing it.
more()472             void more()
473             {
474                 hold_ = this->get_first();
475                 has_hold_ = true;
476             }
477 
478             // The function lookahead() is used by the implementation of the
479             // support function lex::lookahead. It can be used to implement
480             // lookahead for lexer engines not supporting constructs like flex'
481             // a/b  (match a, but only when followed by b)
lookahead(std::size_t id,std::size_t state=std::size_t (~0))482             bool lookahead(std::size_t id, std::size_t state = std::size_t(~0))
483             {
484                 Iterator end = end_;
485                 std::size_t unique_id = boost::lexer::npos;
486                 bool bol = this->bol_;
487 
488                 if (std::size_t(~0) == state)
489                     state = this->state_;
490 
491                 typedef basic_iterator_tokeniser<Iterator> tokenizer;
492                 return id == tokenizer::next(this->state_machine_, state,
493                     bol, end, this->get_eoi(), unique_id);
494             }
495 
496             // The adjust_start() and revert_adjust_start() are helper
497             // functions needed to implement the functionality required for
498             // lex::more(). It is called from the functor body below.
adjust_start()499             bool adjust_start()
500             {
501                 if (!has_hold_)
502                     return false;
503 
504                 std::swap(this->get_first(), hold_);
505                 has_hold_ = false;
506                 return true;
507             }
revert_adjust_start()508             void revert_adjust_start()
509             {
510                 // this will be called only if adjust_start above returned true
511                 std::swap(this->get_first(), hold_);
512                 has_hold_ = true;
513             }
514 
get_value() const515             token_value_type const& get_value() const
516             {
517                 if (!has_value_) {
518                     spirit::traits::assign_to(this->get_first(), end_, value_);
519                     has_value_ = true;
520                 }
521                 return value_;
522             }
523             template <typename Value>
set_value(Value const & val)524             void set_value(Value const& val)
525             {
526                 value_ = val;
527                 has_value_ = true;
528             }
set_end(Iterator const & it)529             void set_end(Iterator const& it)
530             {
531                 end_ = it;
532             }
has_value() const533             bool has_value() const { return has_value_; }
reset_value()534             void reset_value() { has_value_ = false; }
535 
536         protected:
537             semantic_actions_type const& actions_;
538             Iterator hold_;     // iterator needed to support lex::more()
539             Iterator end_;      // iterator pointing to end of matched token
540             mutable token_value_type value_;  // token value to use
541             mutable bool has_value_;    // 'true' if value_ is valid
542             bool has_hold_;     // 'true' if hold_ is valid
543 
544         private:
545             // silence MSVC warning C4512: assignment operator could not be generated
546             data& operator= (data const&);
547         };
548     }
549 }}}}
550 
551 #endif
552 
553