1 // Copyright (c) 2001-2011 Hartmut Kaiser 2 // 3 // Distributed under the Boost Software License, Version 1.0. (See accompanying 4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 5 6 #if !defined(BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM) 7 #define BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM 8 9 #if defined(_MSC_VER) 10 #pragma once 11 #endif 12 13 #include <boost/spirit/home/qi/skip_over.hpp> 14 #include <boost/spirit/home/qi/parse.hpp> 15 #include <boost/spirit/home/qi/nonterminal/grammar.hpp> 16 #include <boost/spirit/home/support/unused.hpp> 17 #include <boost/spirit/home/lex/lexer.hpp> 18 #include <boost/mpl/assert.hpp> 19 20 namespace boost { namespace phoenix 21 { 22 template <typename Expr> 23 struct actor; 24 }} 25 26 namespace boost { namespace spirit { namespace lex 27 { 28 /////////////////////////////////////////////////////////////////////////// 29 // Import skip_flag enumerator type from Qi namespace 30 using qi::skip_flag; 31 32 /////////////////////////////////////////////////////////////////////////// 33 // 34 // The tokenize_and_parse() function is one of the main Spirit API 35 // functions. It simplifies using a lexer as the underlying token source 36 // while parsing a given input sequence. 37 // 38 // The function takes a pair of iterators spanning the underlying input 39 // stream to parse, the lexer object (built from the token definitions) 40 // and a parser object (built from the parser grammar definition). 41 // 42 // The second version of this function additionally takes an attribute to 43 // be used as the top level data structure instance the parser should use 44 // to store the recognized input to. 45 // 46 // The function returns true if the parsing succeeded (the given input 47 // sequence has been successfully matched by the given grammar). 48 // 49 // first, last: The pair of iterators spanning the underlying input 50 // sequence to parse. These iterators must at least 51 // conform to the requirements of the std::intput_iterator 52 // category. 53 // On exit the iterator 'first' will be updated to the 54 // position right after the last successfully matched 55 // token. 56 // lex: The lexer object (encoding the token definitions) to be 57 // used to convert the input sequence into a sequence of 58 // tokens. This token sequence is passed to the parsing 59 // process. The LexerExpr type must conform to the 60 // lexer interface described in the corresponding section 61 // of the documentation. 62 // xpr: The grammar object (encoding the parser grammar) to be 63 // used to match the token sequence generated by the lex 64 // object instance. The ParserExpr type must conform to 65 // the grammar interface described in the corresponding 66 // section of the documentation. 67 // attr: The top level attribute passed to the parser. It will 68 // be populated during the parsing of the input sequence. 69 // On exit it will hold the 'parser result' corresponding 70 // to the matched input sequence. 71 // 72 /////////////////////////////////////////////////////////////////////////// 73 template <typename Iterator, typename Lexer, typename ParserExpr> 74 inline bool tokenize_and_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr)75 tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex, 76 ParserExpr const& xpr) 77 { 78 // Report invalid expression error as early as possible. 79 // If you got an error_invalid_expression error message here, 80 // then the expression (expr) is not a valid spirit qi expression. 81 BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr); 82 83 typename Lexer::iterator_type iter = lex.begin(first, last); 84 return compile<qi::domain>(xpr).parse( 85 iter, lex.end(), unused, unused, unused); 86 } 87 88 /////////////////////////////////////////////////////////////////////////// 89 template <typename Iterator, typename Lexer, typename ParserExpr 90 , typename Attribute> 91 inline bool tokenize_and_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr,Attribute & attr)92 tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex 93 , ParserExpr const& xpr, Attribute& attr) 94 { 95 // Report invalid expression error as early as possible. 96 // If you got an error_invalid_expression error message here, 97 // then the expression (expr) is not a valid spirit qi expression. 98 BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr); 99 100 typename Lexer::iterator_type iter = lex.begin(first, last); 101 return compile<qi::domain>(xpr).parse( 102 iter, lex.end(), unused, unused, attr); 103 } 104 105 /////////////////////////////////////////////////////////////////////////// 106 // 107 // The tokenize_and_phrase_parse() function is one of the main Spirit API 108 // functions. It simplifies using a lexer as the underlying token source 109 // while phrase parsing a given input sequence. 110 // 111 // The function takes a pair of iterators spanning the underlying input 112 // stream to parse, the lexer object (built from the token definitions) 113 // and a parser object (built from the parser grammar definition). The 114 // additional skipper parameter will be used as the skip parser during 115 // the parsing process. 116 // 117 // The second version of this function additionally takes an attribute to 118 // be used as the top level data structure instance the parser should use 119 // to store the recognized input to. 120 // 121 // The function returns true if the parsing succeeded (the given input 122 // sequence has been successfully matched by the given grammar). 123 // 124 // first, last: The pair of iterators spanning the underlying input 125 // sequence to parse. These iterators must at least 126 // conform to the requirements of the std::intput_iterator 127 // category. 128 // On exit the iterator 'first' will be updated to the 129 // position right after the last successfully matched 130 // token. 131 // lex: The lexer object (encoding the token definitions) to be 132 // used to convert the input sequence into a sequence of 133 // tokens. This token sequence is passed to the parsing 134 // process. The LexerExpr type must conform to the 135 // lexer interface described in the corresponding section 136 // of the documentation. 137 // xpr: The grammar object (encoding the parser grammar) to be 138 // used to match the token sequence generated by the lex 139 // object instance. The ParserExpr type must conform to 140 // the grammar interface described in the corresponding 141 // section of the documentation. 142 // skipper: The skip parser to be used while parsing the given 143 // input sequence. Note, the skip parser will have to 144 // act on the same token sequence as the main parser 145 // 'xpr'. 146 // post_skip: The post_skip flag controls whether the function will 147 // invoke an additional post skip after the main parser 148 // returned. 149 // attr: The top level attribute passed to the parser. It will 150 // be populated during the parsing of the input sequence. 151 // On exit it will hold the 'parser result' corresponding 152 // to the matched input sequence. 153 // 154 /////////////////////////////////////////////////////////////////////////// 155 template <typename Iterator, typename Lexer, typename ParserExpr 156 , typename Skipper> 157 inline bool tokenize_and_phrase_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr,Skipper const & skipper,BOOST_SCOPED_ENUM (skip_flag)post_skip=skip_flag::postskip)158 tokenize_and_phrase_parse(Iterator& first, Iterator last 159 , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper 160 , BOOST_SCOPED_ENUM(skip_flag) post_skip = skip_flag::postskip) 161 { 162 // Report invalid expression error as early as possible. 163 // If you got an error_invalid_expression error message here, 164 // then the expression (expr) is not a valid spirit qi expression. 165 BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr); 166 BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper); 167 168 typedef 169 typename spirit::result_of::compile<qi::domain, Skipper>::type 170 skipper_type; 171 skipper_type const skipper_ = compile<qi::domain>(skipper); 172 173 typename Lexer::iterator_type iter = lex.begin(first, last); 174 typename Lexer::iterator_type end = lex.end(); 175 if (!compile<qi::domain>(xpr).parse( 176 iter, end, unused, skipper_, unused)) 177 return false; 178 179 // do a final post-skip 180 if (post_skip == skip_flag::postskip) 181 qi::skip_over(iter, end, skipper_); 182 return true; 183 } 184 185 template <typename Iterator, typename Lexer, typename ParserExpr 186 , typename Skipper, typename Attribute> 187 inline bool tokenize_and_phrase_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr,Skipper const & skipper,BOOST_SCOPED_ENUM (skip_flag)post_skip,Attribute & attr)188 tokenize_and_phrase_parse(Iterator& first, Iterator last 189 , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper 190 , BOOST_SCOPED_ENUM(skip_flag) post_skip, Attribute& attr) 191 { 192 // Report invalid expression error as early as possible. 193 // If you got an error_invalid_expression error message here, 194 // then the expression (expr) is not a valid spirit qi expression. 195 BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr); 196 BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper); 197 198 typedef 199 typename spirit::result_of::compile<qi::domain, Skipper>::type 200 skipper_type; 201 skipper_type const skipper_ = compile<qi::domain>(skipper); 202 203 typename Lexer::iterator_type iter = lex.begin(first, last); 204 typename Lexer::iterator_type end = lex.end(); 205 if (!compile<qi::domain>(xpr).parse( 206 iter, end, unused, skipper_, attr)) 207 return false; 208 209 // do a final post-skip 210 if (post_skip == skip_flag::postskip) 211 qi::skip_over(iter, end, skipper_); 212 return true; 213 } 214 215 /////////////////////////////////////////////////////////////////////////// 216 template <typename Iterator, typename Lexer, typename ParserExpr 217 , typename Skipper, typename Attribute> 218 inline bool tokenize_and_phrase_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr,Skipper const & skipper,Attribute & attr)219 tokenize_and_phrase_parse(Iterator& first, Iterator last 220 , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper 221 , Attribute& attr) 222 { 223 return tokenize_and_phrase_parse(first, last, lex, xpr, skipper 224 , skip_flag::postskip, attr); 225 } 226 227 /////////////////////////////////////////////////////////////////////////// 228 // 229 // The tokenize() function is one of the main Spirit API functions. It 230 // simplifies using a lexer to tokenize a given input sequence. It's main 231 // purpose is to use the lexer to tokenize all the input. 232 // 233 // The second version below discards all generated tokens afterwards. 234 // This is useful whenever all the needed functionality has been 235 // implemented directly inside the lexer semantic actions, which are being 236 // executed while the tokens are matched. 237 // 238 // The function takes a pair of iterators spanning the underlying input 239 // stream to scan, the lexer object (built from the token definitions), 240 // and a (optional) functor being called for each of the generated tokens. 241 // 242 // The function returns true if the scanning of the input succeeded (the 243 // given input sequence has been successfully matched by the given token 244 // definitions). 245 // 246 // first, last: The pair of iterators spanning the underlying input 247 // sequence to parse. These iterators must at least 248 // conform to the requirements of the std::intput_iterator 249 // category. 250 // On exit the iterator 'first' will be updated to the 251 // position right after the last successfully matched 252 // token. 253 // lex: The lexer object (encoding the token definitions) to be 254 // used to convert the input sequence into a sequence of 255 // tokens. The LexerExpr type must conform to the 256 // lexer interface described in the corresponding section 257 // of the documentation. 258 // f: A functor (callable object) taking a single argument of 259 // the token type and returning a bool, indicating whether 260 // the tokenization should be canceled. 261 // initial_state: The name of the state the lexer should start matching. 262 // The default value is zero, causing the lexer to start 263 // in its 'INITIAL' state. 264 // 265 /////////////////////////////////////////////////////////////////////////// 266 namespace detail 267 { 268 template <typename Token, typename F> tokenize_callback(Token const & t,F f)269 bool tokenize_callback(Token const& t, F f) 270 { 271 return f(t); 272 } 273 274 template <typename Token, typename Eval> tokenize_callback(Token const & t,phoenix::actor<Eval> const & f)275 bool tokenize_callback(Token const& t, phoenix::actor<Eval> const& f) 276 { 277 f(t); 278 return true; 279 } 280 281 template <typename Token> tokenize_callback(Token const & t,void (* f)(Token const &))282 bool tokenize_callback(Token const& t, void (*f)(Token const&)) 283 { 284 f(t); 285 return true; 286 } 287 288 template <typename Token> tokenize_callback(Token const & t,bool (* f)(Token const &))289 bool tokenize_callback(Token const& t, bool (*f)(Token const&)) 290 { 291 return f(t); 292 } 293 } 294 295 template <typename Iterator, typename Lexer, typename F> 296 inline bool tokenize(Iterator & first,Iterator last,Lexer const & lex,F f,typename Lexer::char_type const * initial_state=0)297 tokenize(Iterator& first, Iterator last, Lexer const& lex, F f 298 , typename Lexer::char_type const* initial_state = 0) 299 { 300 typedef typename Lexer::iterator_type iterator_type; 301 302 iterator_type iter = lex.begin(first, last, initial_state); 303 iterator_type end = lex.end(); 304 for (/**/; iter != end && token_is_valid(*iter); ++iter) 305 { 306 if (!detail::tokenize_callback(*iter, f)) 307 return false; 308 } 309 return (iter == end) ? true : false; 310 } 311 312 /////////////////////////////////////////////////////////////////////////// 313 template <typename Iterator, typename Lexer> 314 inline bool tokenize(Iterator & first,Iterator last,Lexer const & lex,typename Lexer::char_type const * initial_state=0)315 tokenize(Iterator& first, Iterator last, Lexer const& lex 316 , typename Lexer::char_type const* initial_state = 0) 317 { 318 typedef typename Lexer::iterator_type iterator_type; 319 320 iterator_type iter = lex.begin(first, last, initial_state); 321 iterator_type end = lex.end(); 322 323 while (iter != end && token_is_valid(*iter)) 324 ++iter; 325 326 return (iter == end) ? true : false; 327 } 328 329 }}} 330 331 #endif 332