1 // Copyright (c) 2001-2011 Hartmut Kaiser 2 // 3 // Distributed under the Boost Software License, Version 1.0. (See accompanying 4 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 5 6 #if !defined(BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM) 7 #define BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM 8 9 #if defined(_MSC_VER) 10 #pragma once 11 #endif 12 13 #include <boost/spirit/home/qi/skip_over.hpp> 14 #include <boost/spirit/home/qi/parse.hpp> 15 #include <boost/spirit/home/qi/nonterminal/grammar.hpp> 16 #include <boost/spirit/home/support/unused.hpp> 17 #include <boost/spirit/home/lex/lexer.hpp> 18 #include <boost/mpl/assert.hpp> 19 20 namespace boost { namespace spirit { namespace lex 21 { 22 /////////////////////////////////////////////////////////////////////////// 23 // Import skip_flag enumerator type from Qi namespace 24 using qi::skip_flag; 25 26 /////////////////////////////////////////////////////////////////////////// 27 // 28 // The tokenize_and_parse() function is one of the main Spirit API 29 // functions. It simplifies using a lexer as the underlying token source 30 // while parsing a given input sequence. 31 // 32 // The function takes a pair of iterators spanning the underlying input 33 // stream to parse, the lexer object (built from the token definitions) 34 // and a parser object (built from the parser grammar definition). 35 // 36 // The second version of this function additionally takes an attribute to 37 // be used as the top level data structure instance the parser should use 38 // to store the recognized input to. 39 // 40 // The function returns true if the parsing succeeded (the given input 41 // sequence has been successfully matched by the given grammar). 42 // 43 // first, last: The pair of iterators spanning the underlying input 44 // sequence to parse. These iterators must at least 45 // conform to the requirements of the std::intput_iterator 46 // category. 47 // On exit the iterator 'first' will be updated to the 48 // position right after the last successfully matched 49 // token. 50 // lex: The lexer object (encoding the token definitions) to be 51 // used to convert the input sequence into a sequence of 52 // tokens. This token sequence is passed to the parsing 53 // process. The LexerExpr type must conform to the 54 // lexer interface described in the corresponding section 55 // of the documentation. 56 // xpr: The grammar object (encoding the parser grammar) to be 57 // used to match the token sequence generated by the lex 58 // object instance. The ParserExpr type must conform to 59 // the grammar interface described in the corresponding 60 // section of the documentation. 61 // attr: The top level attribute passed to the parser. It will 62 // be populated during the parsing of the input sequence. 63 // On exit it will hold the 'parser result' corresponding 64 // to the matched input sequence. 65 // 66 /////////////////////////////////////////////////////////////////////////// 67 template <typename Iterator, typename Lexer, typename ParserExpr> 68 inline bool tokenize_and_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr)69 tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex, 70 ParserExpr const& xpr) 71 { 72 // Report invalid expression error as early as possible. 73 // If you got an error_invalid_expression error message here, 74 // then the expression (expr) is not a valid spirit qi expression. 75 BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr); 76 77 typename Lexer::iterator_type iter = lex.begin(first, last); 78 return compile<qi::domain>(xpr).parse( 79 iter, lex.end(), unused, unused, unused); 80 } 81 82 /////////////////////////////////////////////////////////////////////////// 83 template <typename Iterator, typename Lexer, typename ParserExpr 84 , typename Attribute> 85 inline bool tokenize_and_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr,Attribute & attr)86 tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex 87 , ParserExpr const& xpr, Attribute& attr) 88 { 89 // Report invalid expression error as early as possible. 90 // If you got an error_invalid_expression error message here, 91 // then the expression (expr) is not a valid spirit qi expression. 92 BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr); 93 94 typename Lexer::iterator_type iter = lex.begin(first, last); 95 return compile<qi::domain>(xpr).parse( 96 iter, lex.end(), unused, unused, attr); 97 } 98 99 /////////////////////////////////////////////////////////////////////////// 100 // 101 // The tokenize_and_phrase_parse() function is one of the main Spirit API 102 // functions. It simplifies using a lexer as the underlying token source 103 // while phrase parsing a given input sequence. 104 // 105 // The function takes a pair of iterators spanning the underlying input 106 // stream to parse, the lexer object (built from the token definitions) 107 // and a parser object (built from the parser grammar definition). The 108 // additional skipper parameter will be used as the skip parser during 109 // the parsing process. 110 // 111 // The second version of this function additionally takes an attribute to 112 // be used as the top level data structure instance the parser should use 113 // to store the recognized input to. 114 // 115 // The function returns true if the parsing succeeded (the given input 116 // sequence has been successfully matched by the given grammar). 117 // 118 // first, last: The pair of iterators spanning the underlying input 119 // sequence to parse. These iterators must at least 120 // conform to the requirements of the std::intput_iterator 121 // category. 122 // On exit the iterator 'first' will be updated to the 123 // position right after the last successfully matched 124 // token. 125 // lex: The lexer object (encoding the token definitions) to be 126 // used to convert the input sequence into a sequence of 127 // tokens. This token sequence is passed to the parsing 128 // process. The LexerExpr type must conform to the 129 // lexer interface described in the corresponding section 130 // of the documentation. 131 // xpr: The grammar object (encoding the parser grammar) to be 132 // used to match the token sequence generated by the lex 133 // object instance. The ParserExpr type must conform to 134 // the grammar interface described in the corresponding 135 // section of the documentation. 136 // skipper: The skip parser to be used while parsing the given 137 // input sequence. Note, the skip parser will have to 138 // act on the same token sequence as the main parser 139 // 'xpr'. 140 // post_skip: The post_skip flag controls whether the function will 141 // invoke an additional post skip after the main parser 142 // returned. 143 // attr: The top level attribute passed to the parser. It will 144 // be populated during the parsing of the input sequence. 145 // On exit it will hold the 'parser result' corresponding 146 // to the matched input sequence. 147 // 148 /////////////////////////////////////////////////////////////////////////// 149 template <typename Iterator, typename Lexer, typename ParserExpr 150 , typename Skipper> 151 inline bool tokenize_and_phrase_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr,Skipper const & skipper,BOOST_SCOPED_ENUM (skip_flag)post_skip=skip_flag::postskip)152 tokenize_and_phrase_parse(Iterator& first, Iterator last 153 , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper 154 , BOOST_SCOPED_ENUM(skip_flag) post_skip = skip_flag::postskip) 155 { 156 // Report invalid expression error as early as possible. 157 // If you got an error_invalid_expression error message here, 158 // then the expression (expr) is not a valid spirit qi expression. 159 BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr); 160 BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper); 161 162 typedef 163 typename spirit::result_of::compile<qi::domain, Skipper>::type 164 skipper_type; 165 skipper_type const skipper_ = compile<qi::domain>(skipper); 166 167 typename Lexer::iterator_type iter = lex.begin(first, last); 168 typename Lexer::iterator_type end = lex.end(); 169 if (!compile<qi::domain>(xpr).parse( 170 iter, end, unused, skipper_, unused)) 171 return false; 172 173 // do a final post-skip 174 if (post_skip == skip_flag::postskip) 175 qi::skip_over(iter, end, skipper_); 176 return true; 177 } 178 179 template <typename Iterator, typename Lexer, typename ParserExpr 180 , typename Skipper, typename Attribute> 181 inline bool tokenize_and_phrase_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr,Skipper const & skipper,BOOST_SCOPED_ENUM (skip_flag)post_skip,Attribute & attr)182 tokenize_and_phrase_parse(Iterator& first, Iterator last 183 , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper 184 , BOOST_SCOPED_ENUM(skip_flag) post_skip, Attribute& attr) 185 { 186 // Report invalid expression error as early as possible. 187 // If you got an error_invalid_expression error message here, 188 // then the expression (expr) is not a valid spirit qi expression. 189 BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr); 190 BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper); 191 192 typedef 193 typename spirit::result_of::compile<qi::domain, Skipper>::type 194 skipper_type; 195 skipper_type const skipper_ = compile<qi::domain>(skipper); 196 197 typename Lexer::iterator_type iter = lex.begin(first, last); 198 typename Lexer::iterator_type end = lex.end(); 199 if (!compile<qi::domain>(xpr).parse( 200 iter, end, unused, skipper_, attr)) 201 return false; 202 203 // do a final post-skip 204 if (post_skip == skip_flag::postskip) 205 qi::skip_over(iter, end, skipper_); 206 return true; 207 } 208 209 /////////////////////////////////////////////////////////////////////////// 210 template <typename Iterator, typename Lexer, typename ParserExpr 211 , typename Skipper, typename Attribute> 212 inline bool tokenize_and_phrase_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr,Skipper const & skipper,Attribute & attr)213 tokenize_and_phrase_parse(Iterator& first, Iterator last 214 , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper 215 , Attribute& attr) 216 { 217 return tokenize_and_phrase_parse(first, last, lex, xpr, skipper 218 , skip_flag::postskip, attr); 219 } 220 221 /////////////////////////////////////////////////////////////////////////// 222 // 223 // The tokenize() function is one of the main Spirit API functions. It 224 // simplifies using a lexer to tokenize a given input sequence. It's main 225 // purpose is to use the lexer to tokenize all the input. 226 // 227 // The second version below discards all generated tokens afterwards. 228 // This is useful whenever all the needed functionality has been 229 // implemented directly inside the lexer semantic actions, which are being 230 // executed while the tokens are matched. 231 // 232 // The function takes a pair of iterators spanning the underlying input 233 // stream to scan, the lexer object (built from the token definitions), 234 // and a (optional) functor being called for each of the generated tokens. 235 // 236 // The function returns true if the scanning of the input succeeded (the 237 // given input sequence has been successfully matched by the given token 238 // definitions). 239 // 240 // first, last: The pair of iterators spanning the underlying input 241 // sequence to parse. These iterators must at least 242 // conform to the requirements of the std::intput_iterator 243 // category. 244 // On exit the iterator 'first' will be updated to the 245 // position right after the last successfully matched 246 // token. 247 // lex: The lexer object (encoding the token definitions) to be 248 // used to convert the input sequence into a sequence of 249 // tokens. The LexerExpr type must conform to the 250 // lexer interface described in the corresponding section 251 // of the documentation. 252 // f: A functor (callable object) taking a single argument of 253 // the token type and returning a bool, indicating whether 254 // the tokenization should be canceled. 255 // initial_state: The name of the state the lexer should start matching. 256 // The default value is zero, causing the lexer to start 257 // in its 'INITIAL' state. 258 // 259 /////////////////////////////////////////////////////////////////////////// 260 namespace detail 261 { 262 template <typename Token, typename F> tokenize_callback(Token const & t,F f)263 bool tokenize_callback(Token const& t, F f) 264 { 265 return f(t); 266 } 267 268 template <typename Token, typename Eval> tokenize_callback(Token const & t,phoenix::actor<Eval> const & f)269 bool tokenize_callback(Token const& t, phoenix::actor<Eval> const& f) 270 { 271 f(t); 272 return true; 273 } 274 275 template <typename Token> tokenize_callback(Token const & t,void (* f)(Token const &))276 bool tokenize_callback(Token const& t, void (*f)(Token const&)) 277 { 278 f(t); 279 return true; 280 } 281 282 template <typename Token> tokenize_callback(Token const & t,bool (* f)(Token const &))283 bool tokenize_callback(Token const& t, bool (*f)(Token const&)) 284 { 285 return f(t); 286 } 287 } 288 289 template <typename Iterator, typename Lexer, typename F> 290 inline bool tokenize(Iterator & first,Iterator last,Lexer const & lex,F f,typename Lexer::char_type const * initial_state=0)291 tokenize(Iterator& first, Iterator last, Lexer const& lex, F f 292 , typename Lexer::char_type const* initial_state = 0) 293 { 294 typedef typename Lexer::iterator_type iterator_type; 295 296 iterator_type iter = lex.begin(first, last, initial_state); 297 iterator_type end = lex.end(); 298 for (/**/; iter != end && token_is_valid(*iter); ++iter) 299 { 300 if (!detail::tokenize_callback(*iter, f)) 301 return false; 302 } 303 return (iter == end) ? true : false; 304 } 305 306 /////////////////////////////////////////////////////////////////////////// 307 template <typename Iterator, typename Lexer> 308 inline bool tokenize(Iterator & first,Iterator last,Lexer const & lex,typename Lexer::char_type const * initial_state=0)309 tokenize(Iterator& first, Iterator last, Lexer const& lex 310 , typename Lexer::char_type const* initial_state = 0) 311 { 312 typedef typename Lexer::iterator_type iterator_type; 313 314 iterator_type iter = lex.begin(first, last, initial_state); 315 iterator_type end = lex.end(); 316 317 while (iter != end && token_is_valid(*iter)) 318 ++iter; 319 320 return (iter == end) ? true : false; 321 } 322 323 }}} 324 325 #endif 326