1 //  Copyright (c) 2001-2011 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #if !defined(BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM)
7 #define BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM
8 
9 #if defined(_MSC_VER)
10 #pragma once
11 #endif
12 
13 #include <boost/spirit/home/qi/skip_over.hpp>
14 #include <boost/spirit/home/qi/parse.hpp>
15 #include <boost/spirit/home/qi/nonterminal/grammar.hpp>
16 #include <boost/spirit/home/support/unused.hpp>
17 #include <boost/spirit/home/lex/lexer.hpp>
18 #include <boost/mpl/assert.hpp>
19 
20 namespace boost { namespace phoenix
21 {
22     template <typename Expr>
23     struct actor;
24 }}
25 
26 namespace boost { namespace spirit { namespace lex
27 {
28     ///////////////////////////////////////////////////////////////////////////
29     //  Import skip_flag enumerator type from Qi namespace
30     using qi::skip_flag;
31 
32     ///////////////////////////////////////////////////////////////////////////
33     //
34     //  The tokenize_and_parse() function is one of the main Spirit API
35     //  functions. It simplifies using a lexer as the underlying token source
36     //  while parsing a given input sequence.
37     //
38     //  The function takes a pair of iterators spanning the underlying input
39     //  stream to parse, the lexer object (built from the token definitions)
40     //  and a parser object (built from the parser grammar definition).
41     //
42     //  The second version of this function additionally takes an attribute to
43     //  be used as the top level data structure instance the parser should use
44     //  to store the recognized input to.
45     //
46     //  The function returns true if the parsing succeeded (the given input
47     //  sequence has been successfully matched by the given grammar).
48     //
49     //  first, last:    The pair of iterators spanning the underlying input
50     //                  sequence to parse. These iterators must at least
51     //                  conform to the requirements of the std::intput_iterator
52     //                  category.
53     //                  On exit the iterator 'first' will be updated to the
54     //                  position right after the last successfully matched
55     //                  token.
56     //  lex:            The lexer object (encoding the token definitions) to be
57     //                  used to convert the input sequence into a sequence of
58     //                  tokens. This token sequence is passed to the parsing
59     //                  process. The LexerExpr type must conform to the
60     //                  lexer interface described in the corresponding section
61     //                  of the documentation.
62     //  xpr:            The grammar object (encoding the parser grammar) to be
63     //                  used to match the token sequence generated by the lex
64     //                  object instance. The ParserExpr type must conform to
65     //                  the grammar interface described in the corresponding
66     //                  section of the documentation.
67     //  attr:           The top level attribute passed to the parser. It will
68     //                  be populated during the parsing of the input sequence.
69     //                  On exit it will hold the 'parser result' corresponding
70     //                  to the matched input sequence.
71     //
72     ///////////////////////////////////////////////////////////////////////////
73     template <typename Iterator, typename Lexer, typename ParserExpr>
74     inline bool
tokenize_and_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr)75     tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex,
76         ParserExpr const& xpr)
77     {
78         // Report invalid expression error as early as possible.
79         // If you got an error_invalid_expression error message here,
80         // then the expression (expr) is not a valid spirit qi expression.
81         BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
82 
83         typename Lexer::iterator_type iter = lex.begin(first, last);
84         return compile<qi::domain>(xpr).parse(
85             iter, lex.end(), unused, unused, unused);
86     }
87 
88     ///////////////////////////////////////////////////////////////////////////
89     template <typename Iterator, typename Lexer, typename ParserExpr
90       , typename Attribute>
91     inline bool
tokenize_and_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr,Attribute & attr)92     tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex
93       , ParserExpr const& xpr, Attribute& attr)
94     {
95         // Report invalid expression error as early as possible.
96         // If you got an error_invalid_expression error message here,
97         // then the expression (expr) is not a valid spirit qi expression.
98         BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
99 
100         typename Lexer::iterator_type iter = lex.begin(first, last);
101         return compile<qi::domain>(xpr).parse(
102             iter, lex.end(), unused, unused, attr);
103     }
104 
105     ///////////////////////////////////////////////////////////////////////////
106     //
107     //  The tokenize_and_phrase_parse() function is one of the main Spirit API
108     //  functions. It simplifies using a lexer as the underlying token source
109     //  while phrase parsing a given input sequence.
110     //
111     //  The function takes a pair of iterators spanning the underlying input
112     //  stream to parse, the lexer object (built from the token definitions)
113     //  and a parser object (built from the parser grammar definition). The
114     //  additional skipper parameter will be used as the skip parser during
115     //  the parsing process.
116     //
117     //  The second version of this function additionally takes an attribute to
118     //  be used as the top level data structure instance the parser should use
119     //  to store the recognized input to.
120     //
121     //  The function returns true if the parsing succeeded (the given input
122     //  sequence has been successfully matched by the given grammar).
123     //
124     //  first, last:    The pair of iterators spanning the underlying input
125     //                  sequence to parse. These iterators must at least
126     //                  conform to the requirements of the std::intput_iterator
127     //                  category.
128     //                  On exit the iterator 'first' will be updated to the
129     //                  position right after the last successfully matched
130     //                  token.
131     //  lex:            The lexer object (encoding the token definitions) to be
132     //                  used to convert the input sequence into a sequence of
133     //                  tokens. This token sequence is passed to the parsing
134     //                  process. The LexerExpr type must conform to the
135     //                  lexer interface described in the corresponding section
136     //                  of the documentation.
137     //  xpr:            The grammar object (encoding the parser grammar) to be
138     //                  used to match the token sequence generated by the lex
139     //                  object instance. The ParserExpr type must conform to
140     //                  the grammar interface described in the corresponding
141     //                  section of the documentation.
142     //  skipper:        The skip parser to be used while parsing the given
143     //                  input sequence. Note, the skip parser will have to
144     //                  act on the same token sequence as the main parser
145     //                  'xpr'.
146     //  post_skip:      The post_skip flag controls whether the function will
147     //                  invoke an additional post skip after the main parser
148     //                  returned.
149     //  attr:           The top level attribute passed to the parser. It will
150     //                  be populated during the parsing of the input sequence.
151     //                  On exit it will hold the 'parser result' corresponding
152     //                  to the matched input sequence.
153     //
154     ///////////////////////////////////////////////////////////////////////////
155     template <typename Iterator, typename Lexer, typename ParserExpr
156       , typename Skipper>
157     inline bool
tokenize_and_phrase_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr,Skipper const & skipper,BOOST_SCOPED_ENUM (skip_flag)post_skip=skip_flag::postskip)158     tokenize_and_phrase_parse(Iterator& first, Iterator last
159       , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
160       , BOOST_SCOPED_ENUM(skip_flag) post_skip = skip_flag::postskip)
161     {
162         // Report invalid expression error as early as possible.
163         // If you got an error_invalid_expression error message here,
164         // then the expression (expr) is not a valid spirit qi expression.
165         BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
166         BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper);
167 
168         typedef
169             typename spirit::result_of::compile<qi::domain, Skipper>::type
170         skipper_type;
171         skipper_type const skipper_ = compile<qi::domain>(skipper);
172 
173         typename Lexer::iterator_type iter = lex.begin(first, last);
174         typename Lexer::iterator_type end = lex.end();
175         if (!compile<qi::domain>(xpr).parse(
176                 iter, end, unused, skipper_, unused))
177             return false;
178 
179         // do a final post-skip
180         if (post_skip == skip_flag::postskip)
181             qi::skip_over(iter, end, skipper_);
182         return true;
183     }
184 
185     template <typename Iterator, typename Lexer, typename ParserExpr
186       , typename Skipper, typename Attribute>
187     inline bool
tokenize_and_phrase_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr,Skipper const & skipper,BOOST_SCOPED_ENUM (skip_flag)post_skip,Attribute & attr)188     tokenize_and_phrase_parse(Iterator& first, Iterator last
189       , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
190       , BOOST_SCOPED_ENUM(skip_flag) post_skip, Attribute& attr)
191     {
192         // Report invalid expression error as early as possible.
193         // If you got an error_invalid_expression error message here,
194         // then the expression (expr) is not a valid spirit qi expression.
195         BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
196         BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper);
197 
198         typedef
199             typename spirit::result_of::compile<qi::domain, Skipper>::type
200         skipper_type;
201         skipper_type const skipper_ = compile<qi::domain>(skipper);
202 
203         typename Lexer::iterator_type iter = lex.begin(first, last);
204         typename Lexer::iterator_type end = lex.end();
205         if (!compile<qi::domain>(xpr).parse(
206                 iter, end, unused, skipper_, attr))
207             return false;
208 
209         // do a final post-skip
210         if (post_skip == skip_flag::postskip)
211             qi::skip_over(iter, end, skipper_);
212         return true;
213     }
214 
215     ///////////////////////////////////////////////////////////////////////////
216     template <typename Iterator, typename Lexer, typename ParserExpr
217       , typename Skipper, typename Attribute>
218     inline bool
tokenize_and_phrase_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr,Skipper const & skipper,Attribute & attr)219     tokenize_and_phrase_parse(Iterator& first, Iterator last
220       , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
221       , Attribute& attr)
222     {
223         return tokenize_and_phrase_parse(first, last, lex, xpr, skipper
224           , skip_flag::postskip, attr);
225     }
226 
227     ///////////////////////////////////////////////////////////////////////////
228     //
229     //  The tokenize() function is one of the main Spirit API functions. It
230     //  simplifies using a lexer to tokenize a given input sequence. It's main
231     //  purpose is to use the lexer to tokenize all the input.
232     //
233     //  The second version below discards all generated tokens afterwards.
234     //  This is useful whenever all the needed functionality has been
235     //  implemented directly inside the lexer semantic actions, which are being
236     //  executed while the tokens are matched.
237     //
238     //  The function takes a pair of iterators spanning the underlying input
239     //  stream to scan, the lexer object (built from the token definitions),
240     //  and a (optional) functor being called for each of the generated tokens.
241     //
242     //  The function returns true if the scanning of the input succeeded (the
243     //  given input sequence has been successfully matched by the given token
244     //  definitions).
245     //
246     //  first, last:    The pair of iterators spanning the underlying input
247     //                  sequence to parse. These iterators must at least
248     //                  conform to the requirements of the std::intput_iterator
249     //                  category.
250     //                  On exit the iterator 'first' will be updated to the
251     //                  position right after the last successfully matched
252     //                  token.
253     //  lex:            The lexer object (encoding the token definitions) to be
254     //                  used to convert the input sequence into a sequence of
255     //                  tokens. The LexerExpr type must conform to the
256     //                  lexer interface described in the corresponding section
257     //                  of the documentation.
258     //  f:              A functor (callable object) taking a single argument of
259     //                  the token type and returning a bool, indicating whether
260     //                  the tokenization should be canceled.
261     //  initial_state:  The name of the state the lexer should start matching.
262     //                  The default value is zero, causing the lexer to start
263     //                  in its 'INITIAL' state.
264     //
265     ///////////////////////////////////////////////////////////////////////////
266     namespace detail
267     {
268         template <typename Token, typename F>
tokenize_callback(Token const & t,F f)269         bool tokenize_callback(Token const& t, F f)
270         {
271             return f(t);
272         }
273 
274         template <typename Token, typename Eval>
tokenize_callback(Token const & t,phoenix::actor<Eval> const & f)275         bool tokenize_callback(Token const& t, phoenix::actor<Eval> const& f)
276         {
277             f(t);
278             return true;
279         }
280 
281         template <typename Token>
tokenize_callback(Token const & t,void (* f)(Token const &))282         bool tokenize_callback(Token const& t, void (*f)(Token const&))
283         {
284             f(t);
285             return true;
286         }
287 
288         template <typename Token>
tokenize_callback(Token const & t,bool (* f)(Token const &))289         bool tokenize_callback(Token const& t, bool (*f)(Token const&))
290         {
291             return f(t);
292         }
293     }
294 
295     template <typename Iterator, typename Lexer, typename F>
296     inline bool
tokenize(Iterator & first,Iterator last,Lexer const & lex,F f,typename Lexer::char_type const * initial_state=0)297     tokenize(Iterator& first, Iterator last, Lexer const& lex, F f
298       , typename Lexer::char_type const* initial_state = 0)
299     {
300         typedef typename Lexer::iterator_type iterator_type;
301 
302         iterator_type iter = lex.begin(first, last, initial_state);
303         iterator_type end = lex.end();
304         for (/**/; iter != end && token_is_valid(*iter); ++iter)
305         {
306             if (!detail::tokenize_callback(*iter, f))
307                 return false;
308         }
309         return (iter == end) ? true : false;
310     }
311 
312     ///////////////////////////////////////////////////////////////////////////
313     template <typename Iterator, typename Lexer>
314     inline bool
tokenize(Iterator & first,Iterator last,Lexer const & lex,typename Lexer::char_type const * initial_state=0)315     tokenize(Iterator& first, Iterator last, Lexer const& lex
316       , typename Lexer::char_type const* initial_state = 0)
317     {
318         typedef typename Lexer::iterator_type iterator_type;
319 
320         iterator_type iter = lex.begin(first, last, initial_state);
321         iterator_type end = lex.end();
322 
323         while (iter != end && token_is_valid(*iter))
324             ++iter;
325 
326         return (iter == end) ? true : false;
327     }
328 
329 }}}
330 
331 #endif
332