1 // Copyright (c) 2001-2011 Hartmut Kaiser 2 // 3 // Distributed under the Boost Software License, Version 1.0. (See accompanying 4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 5 6 #if !defined(BOOST_SPIRIT_LEX_LEXER_FUNCTOR_DATA_JUN_10_2009_0954AM) 7 #define BOOST_SPIRIT_LEX_LEXER_FUNCTOR_DATA_JUN_10_2009_0954AM 8 9 #if defined(_MSC_VER) 10 #pragma once 11 #endif 12 13 #include <boost/spirit/home/qi/detail/assign_to.hpp> 14 #include <boost/spirit/home/support/detail/lexer/generator.hpp> 15 #include <boost/spirit/home/support/detail/lexer/rules.hpp> 16 #include <boost/spirit/home/support/detail/lexer/state_machine.hpp> 17 #include <boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp> 18 #include <boost/spirit/home/lex/lexer/lexertl/semantic_action_data.hpp> 19 #include <boost/spirit/home/lex/lexer/lexertl/wrap_action.hpp> 20 #include <boost/mpl/bool.hpp> 21 #include <boost/optional.hpp> 22 23 namespace boost { namespace spirit { namespace lex { namespace lexertl 24 { 25 namespace detail 26 { 27 /////////////////////////////////////////////////////////////////////// 28 template <typename Iterator, typename HasActors, typename HasState 29 , typename TokenValue> 30 class data; // no default specialization 31 32 /////////////////////////////////////////////////////////////////////// 33 // neither supports state, nor actors 34 template <typename Iterator, typename TokenValue> 35 class data<Iterator, mpl::false_, mpl::false_, TokenValue> 36 { 37 protected: 38 typedef typename 39 boost::detail::iterator_traits<Iterator>::value_type 40 char_type; 41 42 public: 43 typedef Iterator base_iterator_type; 44 typedef iterator_range<Iterator> token_value_type; 45 typedef token_value_type get_value_type; 46 typedef std::size_t state_type; 47 typedef char_type const* state_name_type; 48 typedef unused_type semantic_actions_type; 49 typedef detail::wrap_action<unused_type, Iterator, data, std::size_t> 50 wrap_action_type; 51 52 typedef unused_type next_token_functor; 53 typedef unused_type get_state_name_type; 54 55 // initialize the shared data 56 template <typename IterData> data(IterData const & data_,Iterator & first,Iterator const & last)57 data (IterData const& data_, Iterator& first, Iterator const& last) 58 : first_(first), last_(last) 59 , state_machine_(data_.state_machine_) 60 , rules_(data_.rules_) 61 , bol_(data_.state_machine_.data()._seen_BOL_assertion) {} 62 63 // The following functions are used by the implementation of the 64 // placeholder '_state'. 65 template <typename Char> set_state_name(Char const *)66 void set_state_name (Char const*) 67 { 68 // some (random) versions of gcc instantiate this function even if it's not 69 // needed leading to false static asserts 70 #if !defined(__GNUC__) 71 // If you see a compile time assertion below you're probably 72 // using a token type not supporting lexer states (the 3rd 73 // template parameter of the token is mpl::false_), but your 74 // code uses state changes anyways. 75 BOOST_STATIC_ASSERT(false); 76 #endif 77 } get_state_name() const78 char_type const* get_state_name() const { return rules_.initial(); } get_state_id(char_type const *) const79 std::size_t get_state_id (char_type const*) const 80 { 81 return 0; 82 } 83 84 // The function get_eoi() is used by the implementation of the 85 // placeholder '_eoi'. get_eoi() const86 Iterator const& get_eoi() const { return last_; } 87 88 // The function less() is used by the implementation of the support 89 // function lex::less(). Its functionality is equivalent to flex' 90 // function yyless(): it returns an iterator positioned to the 91 // nth input character beyond the current start iterator (i.e. by 92 // assigning the return value to the placeholder '_end' it is 93 // possible to return all but the first n characters of the current 94 // token back to the input stream. 95 // 96 // This function does nothing as long as no semantic actions are 97 // used. less(Iterator const & it,int)98 Iterator const& less(Iterator const& it, int) 99 { 100 // The following assertion fires most likely because you are 101 // using lexer semantic actions without using the actor_lexer 102 // as the base class for your token definition class. 103 BOOST_ASSERT(false && 104 "Are you using lexer semantic actions without using the " 105 "actor_lexer base?"); 106 return it; 107 } 108 109 // The function more() is used by the implementation of the support 110 // function lex::more(). Its functionality is equivalent to flex' 111 // function yymore(): it tells the lexer that the next time it 112 // matches a rule, the corresponding token should be appended onto 113 // the current token value rather than replacing it. 114 // 115 // These functions do nothing as long as no semantic actions are 116 // used. more()117 void more() 118 { 119 // The following assertion fires most likely because you are 120 // using lexer semantic actions without using the actor_lexer 121 // as the base class for your token definition class. 122 BOOST_ASSERT(false && 123 "Are you using lexer semantic actions without using the " 124 "actor_lexer base?"); 125 } adjust_start()126 bool adjust_start() { return false; } revert_adjust_start()127 void revert_adjust_start() {} 128 129 // The function lookahead() is used by the implementation of the 130 // support function lex::lookahead. It can be used to implement 131 // lookahead for lexer engines not supporting constructs like flex' 132 // a/b (match a, but only when followed by b): 133 // 134 // This function does nothing as long as no semantic actions are 135 // used. lookahead(std::size_t,std::size_t=std::size_t (~0))136 bool lookahead(std::size_t, std::size_t /*state*/ = std::size_t(~0)) 137 { 138 // The following assertion fires most likely because you are 139 // using lexer semantic actions without using the actor_lexer 140 // as the base class for your token definition class. 141 BOOST_ASSERT(false && 142 "Are you using lexer semantic actions without using the " 143 "actor_lexer base?"); 144 return false; 145 } 146 147 // the functions next, invoke_actions, and get_state are used by 148 // the functor implementation below 149 150 // The function next() tries to match the next token from the 151 // underlying input sequence. next(Iterator & end,std::size_t & unique_id,bool & prev_bol)152 std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol) 153 { 154 prev_bol = bol_; 155 156 typedef basic_iterator_tokeniser<Iterator> tokenizer; 157 return tokenizer::next(state_machine_, bol_, end, last_ 158 , unique_id); 159 } 160 161 // nothing to invoke, so this is empty invoke_actions(std::size_t,std::size_t,std::size_t,Iterator const &)162 BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t 163 , std::size_t, std::size_t, Iterator const&) 164 { 165 return pass_flags::pass_normal; // always accept 166 } 167 get_state() const168 std::size_t get_state() const { return 0; } set_state(std::size_t)169 void set_state(std::size_t) {} 170 set_end(Iterator const &)171 void set_end(Iterator const& /*it*/) {} 172 get_first()173 Iterator& get_first() { return first_; } get_first() const174 Iterator const& get_first() const { return first_; } get_last() const175 Iterator const& get_last() const { return last_; } 176 get_value() const177 iterator_range<Iterator> get_value() const 178 { 179 return iterator_range<Iterator>(first_, last_); 180 } has_value() const181 bool has_value() const { return false; } reset_value()182 void reset_value() {} 183 reset_bol(bool bol)184 void reset_bol(bool bol) { bol_ = bol; } 185 186 protected: 187 Iterator& first_; 188 Iterator last_; 189 190 boost::lexer::basic_state_machine<char_type> const& state_machine_; 191 boost::lexer::basic_rules<char_type> const& rules_; 192 193 bool bol_; // helper storing whether last character was \n 194 195 private: 196 // silence MSVC warning C4512: assignment operator could not be generated 197 data& operator= (data const&); 198 }; 199 200 /////////////////////////////////////////////////////////////////////// 201 // doesn't support lexer semantic actions, but supports state 202 template <typename Iterator, typename TokenValue> 203 class data<Iterator, mpl::false_, mpl::true_, TokenValue> 204 : public data<Iterator, mpl::false_, mpl::false_, TokenValue> 205 { 206 protected: 207 typedef data<Iterator, mpl::false_, mpl::false_, TokenValue> base_type; 208 typedef typename base_type::char_type char_type; 209 210 public: 211 typedef Iterator base_iterator_type; 212 typedef iterator_range<Iterator> token_value_type; 213 typedef token_value_type get_value_type; 214 typedef typename base_type::state_type state_type; 215 typedef typename base_type::state_name_type state_name_type; 216 typedef typename base_type::semantic_actions_type 217 semantic_actions_type; 218 219 // initialize the shared data 220 template <typename IterData> data(IterData const & data_,Iterator & first,Iterator const & last)221 data (IterData const& data_, Iterator& first, Iterator const& last) 222 : base_type(data_, first, last) 223 , state_(0) {} 224 225 // The following functions are used by the implementation of the 226 // placeholder '_state'. set_state_name(char_type const * new_state)227 void set_state_name (char_type const* new_state) 228 { 229 std::size_t state_id = this->rules_.state(new_state); 230 231 // If the following assertion fires you've probably been using 232 // a lexer state name which was not defined in your token 233 // definition. 234 BOOST_ASSERT(state_id != boost::lexer::npos); 235 236 if (state_id != boost::lexer::npos) 237 state_ = state_id; 238 } get_state_name() const239 char_type const* get_state_name() const 240 { 241 return this->rules_.state(state_); 242 } get_state_id(char_type const * state) const243 std::size_t get_state_id (char_type const* state) const 244 { 245 return this->rules_.state(state); 246 } 247 248 // the functions next() and get_state() are used by the functor 249 // implementation below 250 251 // The function next() tries to match the next token from the 252 // underlying input sequence. next(Iterator & end,std::size_t & unique_id,bool & prev_bol)253 std::size_t next(Iterator& end, std::size_t& unique_id, bool& prev_bol) 254 { 255 prev_bol = this->bol_; 256 257 typedef basic_iterator_tokeniser<Iterator> tokenizer; 258 return tokenizer::next(this->state_machine_, state_, 259 this->bol_, end, this->get_eoi(), unique_id); 260 } 261 get_state()262 std::size_t& get_state() { return state_; } set_state(std::size_t state)263 void set_state(std::size_t state) { state_ = state; } 264 265 protected: 266 std::size_t state_; 267 268 private: 269 // silence MSVC warning C4512: assignment operator could not be generated 270 data& operator= (data const&); 271 }; 272 273 /////////////////////////////////////////////////////////////////////// 274 // does support lexer semantic actions, may support state 275 template <typename Iterator, typename HasState, typename TokenValue> 276 class data<Iterator, mpl::true_, HasState, TokenValue> 277 : public data<Iterator, mpl::false_, HasState, TokenValue> 278 { 279 public: 280 typedef semantic_actions<Iterator, HasState, data> 281 semantic_actions_type; 282 283 protected: 284 typedef data<Iterator, mpl::false_, HasState, TokenValue> base_type; 285 typedef typename base_type::char_type char_type; 286 typedef typename semantic_actions_type::functor_wrapper_type 287 functor_wrapper_type; 288 289 public: 290 typedef Iterator base_iterator_type; 291 typedef TokenValue token_value_type; 292 typedef TokenValue const& get_value_type; 293 typedef typename base_type::state_type state_type; 294 typedef typename base_type::state_name_type state_name_type; 295 296 typedef detail::wrap_action<functor_wrapper_type 297 , Iterator, data, std::size_t> wrap_action_type; 298 299 template <typename IterData> data(IterData const & data_,Iterator & first,Iterator const & last)300 data (IterData const& data_, Iterator& first, Iterator const& last) 301 : base_type(data_, first, last) 302 , actions_(data_.actions_), hold_() 303 , value_(iterator_range<Iterator>(last, last)) 304 , has_value_(false), has_hold_(false) {} 305 306 // invoke attached semantic actions, if defined invoke_actions(std::size_t state,std::size_t & id,std::size_t unique_id,Iterator & end)307 BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state 308 , std::size_t& id, std::size_t unique_id, Iterator& end) 309 { 310 return actions_.invoke_actions(state, id, unique_id, end, *this); 311 } 312 313 // The function less() is used by the implementation of the support 314 // function lex::less(). Its functionality is equivalent to flex' 315 // function yyless(): it returns an iterator positioned to the 316 // nth input character beyond the current start iterator (i.e. by 317 // assigning the return value to the placeholder '_end' it is 318 // possible to return all but the first n characters of the current 319 // token back to the input stream). less(Iterator & it,int n)320 Iterator const& less(Iterator& it, int n) 321 { 322 it = this->get_first(); 323 std::advance(it, n); 324 return it; 325 } 326 327 // The function more() is used by the implementation of the support 328 // function lex::more(). Its functionality is equivalent to flex' 329 // function yymore(): it tells the lexer that the next time it 330 // matches a rule, the corresponding token should be appended onto 331 // the current token value rather than replacing it. more()332 void more() 333 { 334 hold_ = this->get_first(); 335 has_hold_ = true; 336 } 337 338 // The function lookahead() is used by the implementation of the 339 // support function lex::lookahead. It can be used to implement 340 // lookahead for lexer engines not supporting constructs like flex' 341 // a/b (match a, but only when followed by b) lookahead(std::size_t id,std::size_t state=std::size_t (~0))342 bool lookahead(std::size_t id, std::size_t state = std::size_t(~0)) 343 { 344 Iterator end = end_; 345 std::size_t unique_id = boost::lexer::npos; 346 bool bol = this->bol_; 347 348 if (std::size_t(~0) == state) 349 state = this->state_; 350 351 typedef basic_iterator_tokeniser<Iterator> tokenizer; 352 return id == tokenizer::next(this->state_machine_, state, 353 bol, end, this->get_eoi(), unique_id); 354 } 355 356 // The adjust_start() and revert_adjust_start() are helper 357 // functions needed to implement the functionality required for 358 // lex::more(). It is called from the functor body below. adjust_start()359 bool adjust_start() 360 { 361 if (!has_hold_) 362 return false; 363 364 std::swap(this->get_first(), hold_); 365 has_hold_ = false; 366 return true; 367 } revert_adjust_start()368 void revert_adjust_start() 369 { 370 // this will be called only if adjust_start above returned true 371 std::swap(this->get_first(), hold_); 372 has_hold_ = true; 373 } 374 get_value() const375 TokenValue const& get_value() const 376 { 377 if (!has_value_) { 378 value_ = iterator_range<Iterator>(this->get_first(), end_); 379 has_value_ = true; 380 } 381 return value_; 382 } 383 template <typename Value> set_value(Value const & val)384 void set_value(Value const& val) 385 { 386 value_ = val; 387 has_value_ = true; 388 } set_end(Iterator const & it)389 void set_end(Iterator const& it) 390 { 391 end_ = it; 392 } has_value() const393 bool has_value() const { return has_value_; } reset_value()394 void reset_value() { has_value_ = false; } 395 396 protected: 397 semantic_actions_type const& actions_; 398 Iterator hold_; // iterator needed to support lex::more() 399 Iterator end_; // iterator pointing to end of matched token 400 mutable TokenValue value_; // token value to use 401 mutable bool has_value_; // 'true' if value_ is valid 402 bool has_hold_; // 'true' if hold_ is valid 403 404 private: 405 // silence MSVC warning C4512: assignment operator could not be generated 406 data& operator= (data const&); 407 }; 408 409 /////////////////////////////////////////////////////////////////////// 410 // does support lexer semantic actions, may support state, is used for 411 // position_token exposing exactly one type 412 template <typename Iterator, typename HasState, typename TokenValue> 413 class data<Iterator, mpl::true_, HasState, boost::optional<TokenValue> > 414 : public data<Iterator, mpl::false_, HasState, TokenValue> 415 { 416 public: 417 typedef semantic_actions<Iterator, HasState, data> 418 semantic_actions_type; 419 420 protected: 421 typedef data<Iterator, mpl::false_, HasState, TokenValue> base_type; 422 typedef typename base_type::char_type char_type; 423 typedef typename semantic_actions_type::functor_wrapper_type 424 functor_wrapper_type; 425 426 public: 427 typedef Iterator base_iterator_type; 428 typedef boost::optional<TokenValue> token_value_type; 429 typedef boost::optional<TokenValue> const& get_value_type; 430 typedef typename base_type::state_type state_type; 431 typedef typename base_type::state_name_type state_name_type; 432 433 typedef detail::wrap_action<functor_wrapper_type 434 , Iterator, data, std::size_t> wrap_action_type; 435 436 template <typename IterData> data(IterData const & data_,Iterator & first,Iterator const & last)437 data (IterData const& data_, Iterator& first, Iterator const& last) 438 : base_type(data_, first, last) 439 , actions_(data_.actions_), hold_() 440 , has_value_(false), has_hold_(false) 441 { 442 spirit::traits::assign_to(first, last, value_); 443 has_value_ = true; 444 } 445 446 // invoke attached semantic actions, if defined invoke_actions(std::size_t state,std::size_t & id,std::size_t unique_id,Iterator & end)447 BOOST_SCOPED_ENUM(pass_flags) invoke_actions(std::size_t state 448 , std::size_t& id, std::size_t unique_id, Iterator& end) 449 { 450 return actions_.invoke_actions(state, id, unique_id, end, *this); 451 } 452 453 // The function less() is used by the implementation of the support 454 // function lex::less(). Its functionality is equivalent to flex' 455 // function yyless(): it returns an iterator positioned to the 456 // nth input character beyond the current start iterator (i.e. by 457 // assigning the return value to the placeholder '_end' it is 458 // possible to return all but the first n characters of the current 459 // token back to the input stream). less(Iterator & it,int n)460 Iterator const& less(Iterator& it, int n) 461 { 462 it = this->get_first(); 463 std::advance(it, n); 464 return it; 465 } 466 467 // The function more() is used by the implementation of the support 468 // function lex::more(). Its functionality is equivalent to flex' 469 // function yymore(): it tells the lexer that the next time it 470 // matches a rule, the corresponding token should be appended onto 471 // the current token value rather than replacing it. more()472 void more() 473 { 474 hold_ = this->get_first(); 475 has_hold_ = true; 476 } 477 478 // The function lookahead() is used by the implementation of the 479 // support function lex::lookahead. It can be used to implement 480 // lookahead for lexer engines not supporting constructs like flex' 481 // a/b (match a, but only when followed by b) lookahead(std::size_t id,std::size_t state=std::size_t (~0))482 bool lookahead(std::size_t id, std::size_t state = std::size_t(~0)) 483 { 484 Iterator end = end_; 485 std::size_t unique_id = boost::lexer::npos; 486 bool bol = this->bol_; 487 488 if (std::size_t(~0) == state) 489 state = this->state_; 490 491 typedef basic_iterator_tokeniser<Iterator> tokenizer; 492 return id == tokenizer::next(this->state_machine_, state, 493 bol, end, this->get_eoi(), unique_id); 494 } 495 496 // The adjust_start() and revert_adjust_start() are helper 497 // functions needed to implement the functionality required for 498 // lex::more(). It is called from the functor body below. adjust_start()499 bool adjust_start() 500 { 501 if (!has_hold_) 502 return false; 503 504 std::swap(this->get_first(), hold_); 505 has_hold_ = false; 506 return true; 507 } revert_adjust_start()508 void revert_adjust_start() 509 { 510 // this will be called only if adjust_start above returned true 511 std::swap(this->get_first(), hold_); 512 has_hold_ = true; 513 } 514 get_value() const515 token_value_type const& get_value() const 516 { 517 if (!has_value_) { 518 spirit::traits::assign_to(this->get_first(), end_, value_); 519 has_value_ = true; 520 } 521 return value_; 522 } 523 template <typename Value> set_value(Value const & val)524 void set_value(Value const& val) 525 { 526 value_ = val; 527 has_value_ = true; 528 } set_end(Iterator const & it)529 void set_end(Iterator const& it) 530 { 531 end_ = it; 532 } has_value() const533 bool has_value() const { return has_value_; } reset_value()534 void reset_value() { has_value_ = false; } 535 536 protected: 537 semantic_actions_type const& actions_; 538 Iterator hold_; // iterator needed to support lex::more() 539 Iterator end_; // iterator pointing to end of matched token 540 mutable token_value_type value_; // token value to use 541 mutable bool has_value_; // 'true' if value_ is valid 542 bool has_hold_; // 'true' if hold_ is valid 543 544 private: 545 // silence MSVC warning C4512: assignment operator could not be generated 546 data& operator= (data const&); 547 }; 548 } 549 }}}} 550 551 #endif 552 553