1 // Copyright (c) 2001-2011 Hartmut Kaiser 2 // 3 // Distributed under the Boost Software License, Version 1.0. (See accompanying 4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 5 6 #if !defined(BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM) 7 #define BOOST_SPIRIT_LEX_LEXER_MAR_13_2007_0145PM 8 9 #if defined(_MSC_VER) 10 #pragma once 11 #endif 12 13 #include <boost/spirit/home/support/info.hpp> 14 #include <boost/spirit/home/qi/skip_over.hpp> 15 #include <boost/spirit/home/qi/parser.hpp> 16 #include <boost/spirit/home/qi/detail/assign_to.hpp> 17 #include <boost/spirit/home/lex/reference.hpp> 18 #include <boost/spirit/home/lex/meta_compiler.hpp> 19 #include <boost/spirit/home/lex/lexer_type.hpp> 20 #include <boost/spirit/home/lex/lexer/token_def.hpp> 21 #include <boost/assert.hpp> 22 #include <boost/noncopyable.hpp> 23 #include <boost/detail/iterator.hpp> 24 #include <boost/fusion/include/vector.hpp> 25 #include <boost/mpl/assert.hpp> 26 #include <boost/range/iterator_range.hpp> 27 #include <string> 28 29 namespace boost { namespace spirit { namespace lex 30 { 31 /////////////////////////////////////////////////////////////////////////// 32 namespace detail 33 { 34 /////////////////////////////////////////////////////////////////////// 35 template <typename LexerDef> 36 struct lexer_def_ 37 : proto::extends< 38 typename proto::terminal< 39 lex::reference<lexer_def_<LexerDef> const> 40 >::type 41 , lexer_def_<LexerDef> > 42 , qi::parser<lexer_def_<LexerDef> > 43 , lex::lexer_type<lexer_def_<LexerDef> > 44 { 45 private: 46 // avoid warnings about using 'this' in constructor this_boost::spirit::lex::detail::lexer_def_47 lexer_def_& this_() { return *this; } 48 49 typedef typename LexerDef::char_type char_type; 50 typedef typename LexerDef::string_type string_type; 51 typedef typename LexerDef::id_type id_type; 52 53 typedef lex::reference<lexer_def_ const> reference_; 54 typedef typename proto::terminal<reference_>::type terminal_type; 55 typedef proto::extends<terminal_type, lexer_def_> proto_base_type; 56 aliasboost::spirit::lex::detail::lexer_def_57 reference_ alias() const 58 { 59 return reference_(*this); 60 } 61 62 public: 63 // Qi interface: metafunction calculating parser attribute type 64 template <typename Context, typename Iterator> 65 struct attribute 66 { 67 // the return value of a token set contains the matched token 68 // id, and the corresponding pair of iterators 69 typedef typename Iterator::base_iterator_type iterator_type; 70 typedef 71 fusion::vector2<id_type, iterator_range<iterator_type> > 72 type; 73 }; 74 75 // Qi interface: parse functionality 76 template <typename Iterator, typename Context 77 , typename Skipper, typename Attribute> parseboost::spirit::lex::detail::lexer_def_78 bool parse(Iterator& first, Iterator const& last 79 , Context& /*context*/, Skipper const& skipper 80 , Attribute& attr) const 81 { 82 qi::skip_over(first, last, skipper); // always do a pre-skip 83 84 if (first != last) { 85 typedef typename 86 boost::detail::iterator_traits<Iterator>::value_type 87 token_type; 88 89 token_type const& t = *first; 90 if (token_is_valid(t) && t.state() == first.get_state()) { 91 // any of the token definitions matched 92 spirit::traits::assign_to(t, attr); 93 ++first; 94 return true; 95 } 96 } 97 return false; 98 } 99 100 // Qi interface: 'what' functionality 101 template <typename Context> whatboost::spirit::lex::detail::lexer_def_102 info what(Context& /*context*/) const 103 { 104 return info("lexer"); 105 } 106 107 private: 108 // allow to use the lexer.self.add("regex1", id1)("regex2", id2); 109 // syntax 110 struct adder 111 { adderboost::spirit::lex::detail::lexer_def_::adder112 adder(lexer_def_& def_) 113 : def(def_) {} 114 115 // Add a token definition based on a single character as given 116 // by the first parameter, the second parameter allows to 117 // specify the token id to use for the new token. If no token 118 // id is given the character code is used. operator ()boost::spirit::lex::detail::lexer_def_::adder119 adder const& operator()(char_type c 120 , id_type token_id = id_type()) const 121 { 122 if (id_type() == token_id) 123 token_id = static_cast<id_type>(c); 124 def.def.add_token (def.state.c_str(), c, token_id 125 , def.targetstate.empty() ? 0 : def.targetstate.c_str()); 126 return *this; 127 } 128 129 // Add a token definition based on a character sequence as 130 // given by the first parameter, the second parameter allows to 131 // specify the token id to use for the new token. If no token 132 // id is given this function will generate a unique id to be 133 // used as the token's id. operator ()boost::spirit::lex::detail::lexer_def_::adder134 adder const& operator()(string_type const& s 135 , id_type token_id = id_type()) const 136 { 137 if (id_type() == token_id) 138 token_id = def.def.get_next_id(); 139 def.def.add_token (def.state.c_str(), s, token_id 140 , def.targetstate.empty() ? 0 : def.targetstate.c_str()); 141 return *this; 142 } 143 144 template <typename Attribute> operator ()boost::spirit::lex::detail::lexer_def_::adder145 adder const& operator()( 146 token_def<Attribute, char_type, id_type>& tokdef 147 , id_type token_id = id_type()) const 148 { 149 // make sure we have a token id 150 if (id_type() == token_id) { 151 if (id_type() == tokdef.id()) { 152 token_id = def.def.get_next_id(); 153 tokdef.id(token_id); 154 } 155 else { 156 token_id = tokdef.id(); 157 } 158 } 159 else { 160 // the following assertion makes sure that the token_def 161 // instance has not been assigned a different id earlier 162 BOOST_ASSERT(id_type() == tokdef.id() 163 || token_id == tokdef.id()); 164 tokdef.id(token_id); 165 } 166 167 def.define(tokdef); 168 return *this; 169 } 170 171 // template <typename F> 172 // adder const& operator()(char_type c, id_type token_id, F act) const 173 // { 174 // if (id_type() == token_id) 175 // token_id = def.def.get_next_id(); 176 // std::size_t unique_id = 177 // def.def.add_token (def.state.c_str(), s, token_id); 178 // def.def.add_action(unique_id, def.state.c_str(), act); 179 // return *this; 180 // } 181 182 lexer_def_& def; 183 184 private: 185 // silence MSVC warning C4512: assignment operator could not be generated 186 adder& operator= (adder const&); 187 }; 188 friend struct adder; 189 190 // allow to use lexer.self.add_pattern("pattern1", "regex1")(...); 191 // syntax 192 struct pattern_adder 193 { pattern_adderboost::spirit::lex::detail::lexer_def_::pattern_adder194 pattern_adder(lexer_def_& def_) 195 : def(def_) {} 196 operator ()boost::spirit::lex::detail::lexer_def_::pattern_adder197 pattern_adder const& operator()(string_type const& p 198 , string_type const& s) const 199 { 200 def.def.add_pattern (def.state.c_str(), p, s); 201 return *this; 202 } 203 204 lexer_def_& def; 205 206 private: 207 // silence MSVC warning C4512: assignment operator could not be generated 208 pattern_adder& operator= (pattern_adder const&); 209 }; 210 friend struct pattern_adder; 211 212 private: 213 // Helper function to invoke the necessary 2 step compilation 214 // process on token definition expressions 215 template <typename TokenExpr> compile2passboost::spirit::lex::detail::lexer_def_216 void compile2pass(TokenExpr const& expr) 217 { 218 expr.collect(def, state, targetstate); 219 expr.add_actions(def); 220 } 221 222 public: 223 /////////////////////////////////////////////////////////////////// 224 template <typename Expr> defineboost::spirit::lex::detail::lexer_def_225 void define(Expr const& expr) 226 { 227 compile2pass(compile<lex::domain>(expr)); 228 } 229 lexer_def_boost::spirit::lex::detail::lexer_def_230 lexer_def_(LexerDef& def_, string_type const& state_ 231 , string_type const& targetstate_ = string_type()) 232 : proto_base_type(terminal_type::make(alias())) 233 , add(this_()), add_pattern(this_()), def(def_) 234 , state(state_), targetstate(targetstate_) 235 {} 236 237 // allow to switch states operator ()boost::spirit::lex::detail::lexer_def_238 lexer_def_ operator()(char_type const* state) const 239 { 240 return lexer_def_(def, state); 241 } operator ()boost::spirit::lex::detail::lexer_def_242 lexer_def_ operator()(char_type const* state 243 , char_type const* targetstate) const 244 { 245 return lexer_def_(def, state, targetstate); 246 } operator ()boost::spirit::lex::detail::lexer_def_247 lexer_def_ operator()(string_type const& state 248 , string_type const& targetstate = string_type()) const 249 { 250 return lexer_def_(def, state, targetstate); 251 } 252 253 // allow to assign a token definition expression 254 template <typename Expr> operator =boost::spirit::lex::detail::lexer_def_255 lexer_def_& operator= (Expr const& xpr) 256 { 257 // Report invalid expression error as early as possible. 258 // If you got an error_invalid_expression error message here, 259 // then the expression (expr) is not a valid spirit lex 260 // expression. 261 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); 262 263 def.clear(state.c_str()); 264 define(xpr); 265 return *this; 266 } 267 268 // explicitly tell the lexer that the given state will be defined 269 // (useful in conjunction with "*") add_stateboost::spirit::lex::detail::lexer_def_270 std::size_t add_state(char_type const* state = 0) 271 { 272 return def.add_state(state ? state : def.initial_state().c_str()); 273 } 274 275 adder add; 276 pattern_adder add_pattern; 277 278 private: 279 LexerDef& def; 280 string_type state; 281 string_type targetstate; 282 283 private: 284 // silence MSVC warning C4512: assignment operator could not be generated 285 lexer_def_& operator= (lexer_def_ const&); 286 }; 287 288 #if defined(BOOST_NO_CXX11_RVALUE_REFERENCES) 289 // allow to assign a token definition expression 290 template <typename LexerDef, typename Expr> 291 inline lexer_def_<LexerDef>& operator +=(lexer_def_<LexerDef> & lexdef,Expr & xpr)292 operator+= (lexer_def_<LexerDef>& lexdef, Expr& xpr) 293 { 294 // Report invalid expression error as early as possible. 295 // If you got an error_invalid_expression error message here, 296 // then the expression (expr) is not a valid spirit lex 297 // expression. 298 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); 299 300 lexdef.define(xpr); 301 return lexdef; 302 } 303 #else 304 // allow to assign a token definition expression 305 template <typename LexerDef, typename Expr> 306 inline lexer_def_<LexerDef>& operator +=(lexer_def_<LexerDef> & lexdef,Expr && xpr)307 operator+= (lexer_def_<LexerDef>& lexdef, Expr&& xpr) 308 { 309 // Report invalid expression error as early as possible. 310 // If you got an error_invalid_expression error message here, 311 // then the expression (expr) is not a valid spirit lex 312 // expression. 313 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); 314 315 lexdef.define(xpr); 316 return lexdef; 317 } 318 #endif 319 320 template <typename LexerDef, typename Expr> 321 inline lexer_def_<LexerDef>& operator +=(lexer_def_<LexerDef> & lexdef,Expr const & xpr)322 operator+= (lexer_def_<LexerDef>& lexdef, Expr const& xpr) 323 { 324 // Report invalid expression error as early as possible. 325 // If you got an error_invalid_expression error message here, 326 // then the expression (expr) is not a valid spirit lex 327 // expression. 328 BOOST_SPIRIT_ASSERT_MATCH(lex::domain, Expr); 329 330 lexdef.define(xpr); 331 return lexdef; 332 } 333 } 334 335 /////////////////////////////////////////////////////////////////////////// 336 // The match_flags flags are used to influence different matching 337 // modes of the lexer 338 struct match_flags 339 { 340 enum enum_type 341 { 342 match_default = 0, // no flags 343 match_not_dot_newline = 1, // the regex '.' doesn't match newlines 344 match_icase = 2 // all matching operations are case insensitive 345 }; 346 }; 347 348 /////////////////////////////////////////////////////////////////////////// 349 // This represents a lexer object 350 /////////////////////////////////////////////////////////////////////////// 351 352 /////////////////////////////////////////////////////////////////////////// 353 // This is the first token id automatically assigned by the library 354 // if needed 355 enum tokenids 356 { 357 min_token_id = 0x10000 358 }; 359 360 template <typename Lexer> 361 class lexer : public Lexer 362 { 363 private: 364 // avoid warnings about using 'this' in constructor this_()365 lexer& this_() { return *this; } 366 367 std::size_t next_token_id; // has to be an integral type 368 369 public: 370 typedef Lexer lexer_type; 371 typedef typename Lexer::id_type id_type; 372 typedef typename Lexer::char_type char_type; 373 typedef typename Lexer::iterator_type iterator_type; 374 typedef lexer base_type; 375 376 typedef detail::lexer_def_<lexer> lexer_def; 377 typedef std::basic_string<char_type> string_type; 378 lexer(unsigned int flags=match_flags::match_default,id_type first_id=id_type (min_token_id))379 lexer(unsigned int flags = match_flags::match_default 380 , id_type first_id = id_type(min_token_id)) 381 : lexer_type(flags) 382 , next_token_id(first_id) 383 , self(this_(), lexer_type::initial_state()) 384 {} 385 386 // access iterator interface 387 template <typename Iterator> begin(Iterator & first,Iterator const & last,char_type const * initial_state=0) const388 iterator_type begin(Iterator& first, Iterator const& last 389 , char_type const* initial_state = 0) const 390 { return this->lexer_type::begin(first, last, initial_state); } end() const391 iterator_type end() const 392 { return this->lexer_type::end(); } 393 map_state(char_type const * state)394 std::size_t map_state(char_type const* state) 395 { return this->lexer_type::add_state(state); } 396 397 // create a unique token id get_next_id()398 id_type get_next_id() { return id_type(next_token_id++); } 399 400 lexer_def self; // allow for easy token definition 401 }; 402 403 }}} 404 405 #endif 406