1 //  Copyright (c) 2001-2011 Hartmut Kaiser
2 //
3 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
4 //  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5 
6 #if !defined(BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM)
7 #define BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM
8 
9 #if defined(_MSC_VER)
10 #pragma once
11 #endif
12 
13 #include <boost/spirit/home/qi/skip_over.hpp>
14 #include <boost/spirit/home/qi/parse.hpp>
15 #include <boost/spirit/home/qi/nonterminal/grammar.hpp>
16 #include <boost/spirit/home/support/unused.hpp>
17 #include <boost/spirit/home/lex/lexer.hpp>
18 #include <boost/mpl/assert.hpp>
19 
20 namespace boost { namespace spirit { namespace lex
21 {
22     ///////////////////////////////////////////////////////////////////////////
23     //  Import skip_flag enumerator type from Qi namespace
24     using qi::skip_flag;
25 
26     ///////////////////////////////////////////////////////////////////////////
27     //
28     //  The tokenize_and_parse() function is one of the main Spirit API
29     //  functions. It simplifies using a lexer as the underlying token source
30     //  while parsing a given input sequence.
31     //
32     //  The function takes a pair of iterators spanning the underlying input
33     //  stream to parse, the lexer object (built from the token definitions)
34     //  and a parser object (built from the parser grammar definition).
35     //
36     //  The second version of this function additionally takes an attribute to
37     //  be used as the top level data structure instance the parser should use
38     //  to store the recognized input to.
39     //
40     //  The function returns true if the parsing succeeded (the given input
41     //  sequence has been successfully matched by the given grammar).
42     //
43     //  first, last:    The pair of iterators spanning the underlying input
44     //                  sequence to parse. These iterators must at least
45     //                  conform to the requirements of the std::intput_iterator
46     //                  category.
47     //                  On exit the iterator 'first' will be updated to the
48     //                  position right after the last successfully matched
49     //                  token.
50     //  lex:            The lexer object (encoding the token definitions) to be
51     //                  used to convert the input sequence into a sequence of
52     //                  tokens. This token sequence is passed to the parsing
53     //                  process. The LexerExpr type must conform to the
54     //                  lexer interface described in the corresponding section
55     //                  of the documentation.
56     //  xpr:            The grammar object (encoding the parser grammar) to be
57     //                  used to match the token sequence generated by the lex
58     //                  object instance. The ParserExpr type must conform to
59     //                  the grammar interface described in the corresponding
60     //                  section of the documentation.
61     //  attr:           The top level attribute passed to the parser. It will
62     //                  be populated during the parsing of the input sequence.
63     //                  On exit it will hold the 'parser result' corresponding
64     //                  to the matched input sequence.
65     //
66     ///////////////////////////////////////////////////////////////////////////
67     template <typename Iterator, typename Lexer, typename ParserExpr>
68     inline bool
tokenize_and_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr)69     tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex,
70         ParserExpr const& xpr)
71     {
72         // Report invalid expression error as early as possible.
73         // If you got an error_invalid_expression error message here,
74         // then the expression (expr) is not a valid spirit qi expression.
75         BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
76 
77         typename Lexer::iterator_type iter = lex.begin(first, last);
78         return compile<qi::domain>(xpr).parse(
79             iter, lex.end(), unused, unused, unused);
80     }
81 
82     ///////////////////////////////////////////////////////////////////////////
83     template <typename Iterator, typename Lexer, typename ParserExpr
84       , typename Attribute>
85     inline bool
tokenize_and_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr,Attribute & attr)86     tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex
87       , ParserExpr const& xpr, Attribute& attr)
88     {
89         // Report invalid expression error as early as possible.
90         // If you got an error_invalid_expression error message here,
91         // then the expression (expr) is not a valid spirit qi expression.
92         BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
93 
94         typename Lexer::iterator_type iter = lex.begin(first, last);
95         return compile<qi::domain>(xpr).parse(
96             iter, lex.end(), unused, unused, attr);
97     }
98 
99     ///////////////////////////////////////////////////////////////////////////
100     //
101     //  The tokenize_and_phrase_parse() function is one of the main Spirit API
102     //  functions. It simplifies using a lexer as the underlying token source
103     //  while phrase parsing a given input sequence.
104     //
105     //  The function takes a pair of iterators spanning the underlying input
106     //  stream to parse, the lexer object (built from the token definitions)
107     //  and a parser object (built from the parser grammar definition). The
108     //  additional skipper parameter will be used as the skip parser during
109     //  the parsing process.
110     //
111     //  The second version of this function additionally takes an attribute to
112     //  be used as the top level data structure instance the parser should use
113     //  to store the recognized input to.
114     //
115     //  The function returns true if the parsing succeeded (the given input
116     //  sequence has been successfully matched by the given grammar).
117     //
118     //  first, last:    The pair of iterators spanning the underlying input
119     //                  sequence to parse. These iterators must at least
120     //                  conform to the requirements of the std::intput_iterator
121     //                  category.
122     //                  On exit the iterator 'first' will be updated to the
123     //                  position right after the last successfully matched
124     //                  token.
125     //  lex:            The lexer object (encoding the token definitions) to be
126     //                  used to convert the input sequence into a sequence of
127     //                  tokens. This token sequence is passed to the parsing
128     //                  process. The LexerExpr type must conform to the
129     //                  lexer interface described in the corresponding section
130     //                  of the documentation.
131     //  xpr:            The grammar object (encoding the parser grammar) to be
132     //                  used to match the token sequence generated by the lex
133     //                  object instance. The ParserExpr type must conform to
134     //                  the grammar interface described in the corresponding
135     //                  section of the documentation.
136     //  skipper:        The skip parser to be used while parsing the given
137     //                  input sequence. Note, the skip parser will have to
138     //                  act on the same token sequence as the main parser
139     //                  'xpr'.
140     //  post_skip:      The post_skip flag controls whether the function will
141     //                  invoke an additional post skip after the main parser
142     //                  returned.
143     //  attr:           The top level attribute passed to the parser. It will
144     //                  be populated during the parsing of the input sequence.
145     //                  On exit it will hold the 'parser result' corresponding
146     //                  to the matched input sequence.
147     //
148     ///////////////////////////////////////////////////////////////////////////
149     template <typename Iterator, typename Lexer, typename ParserExpr
150       , typename Skipper>
151     inline bool
tokenize_and_phrase_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr,Skipper const & skipper,BOOST_SCOPED_ENUM (skip_flag)post_skip=skip_flag::postskip)152     tokenize_and_phrase_parse(Iterator& first, Iterator last
153       , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
154       , BOOST_SCOPED_ENUM(skip_flag) post_skip = skip_flag::postskip)
155     {
156         // Report invalid expression error as early as possible.
157         // If you got an error_invalid_expression error message here,
158         // then the expression (expr) is not a valid spirit qi expression.
159         BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
160         BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper);
161 
162         typedef
163             typename spirit::result_of::compile<qi::domain, Skipper>::type
164         skipper_type;
165         skipper_type const skipper_ = compile<qi::domain>(skipper);
166 
167         typename Lexer::iterator_type iter = lex.begin(first, last);
168         typename Lexer::iterator_type end = lex.end();
169         if (!compile<qi::domain>(xpr).parse(
170                 iter, end, unused, skipper_, unused))
171             return false;
172 
173         // do a final post-skip
174         if (post_skip == skip_flag::postskip)
175             qi::skip_over(iter, end, skipper_);
176         return true;
177     }
178 
179     template <typename Iterator, typename Lexer, typename ParserExpr
180       , typename Skipper, typename Attribute>
181     inline bool
tokenize_and_phrase_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr,Skipper const & skipper,BOOST_SCOPED_ENUM (skip_flag)post_skip,Attribute & attr)182     tokenize_and_phrase_parse(Iterator& first, Iterator last
183       , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
184       , BOOST_SCOPED_ENUM(skip_flag) post_skip, Attribute& attr)
185     {
186         // Report invalid expression error as early as possible.
187         // If you got an error_invalid_expression error message here,
188         // then the expression (expr) is not a valid spirit qi expression.
189         BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
190         BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper);
191 
192         typedef
193             typename spirit::result_of::compile<qi::domain, Skipper>::type
194         skipper_type;
195         skipper_type const skipper_ = compile<qi::domain>(skipper);
196 
197         typename Lexer::iterator_type iter = lex.begin(first, last);
198         typename Lexer::iterator_type end = lex.end();
199         if (!compile<qi::domain>(xpr).parse(
200                 iter, end, unused, skipper_, attr))
201             return false;
202 
203         // do a final post-skip
204         if (post_skip == skip_flag::postskip)
205             qi::skip_over(iter, end, skipper_);
206         return true;
207     }
208 
209     ///////////////////////////////////////////////////////////////////////////
210     template <typename Iterator, typename Lexer, typename ParserExpr
211       , typename Skipper, typename Attribute>
212     inline bool
tokenize_and_phrase_parse(Iterator & first,Iterator last,Lexer const & lex,ParserExpr const & xpr,Skipper const & skipper,Attribute & attr)213     tokenize_and_phrase_parse(Iterator& first, Iterator last
214       , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
215       , Attribute& attr)
216     {
217         return tokenize_and_phrase_parse(first, last, lex, xpr, skipper
218           , skip_flag::postskip, attr);
219     }
220 
221     ///////////////////////////////////////////////////////////////////////////
222     //
223     //  The tokenize() function is one of the main Spirit API functions. It
224     //  simplifies using a lexer to tokenize a given input sequence. It's main
225     //  purpose is to use the lexer to tokenize all the input.
226     //
227     //  The second version below discards all generated tokens afterwards.
228     //  This is useful whenever all the needed functionality has been
229     //  implemented directly inside the lexer semantic actions, which are being
230     //  executed while the tokens are matched.
231     //
232     //  The function takes a pair of iterators spanning the underlying input
233     //  stream to scan, the lexer object (built from the token definitions),
234     //  and a (optional) functor being called for each of the generated tokens.
235     //
236     //  The function returns true if the scanning of the input succeeded (the
237     //  given input sequence has been successfully matched by the given token
238     //  definitions).
239     //
240     //  first, last:    The pair of iterators spanning the underlying input
241     //                  sequence to parse. These iterators must at least
242     //                  conform to the requirements of the std::intput_iterator
243     //                  category.
244     //                  On exit the iterator 'first' will be updated to the
245     //                  position right after the last successfully matched
246     //                  token.
247     //  lex:            The lexer object (encoding the token definitions) to be
248     //                  used to convert the input sequence into a sequence of
249     //                  tokens. The LexerExpr type must conform to the
250     //                  lexer interface described in the corresponding section
251     //                  of the documentation.
252     //  f:              A functor (callable object) taking a single argument of
253     //                  the token type and returning a bool, indicating whether
254     //                  the tokenization should be canceled.
255     //  initial_state:  The name of the state the lexer should start matching.
256     //                  The default value is zero, causing the lexer to start
257     //                  in its 'INITIAL' state.
258     //
259     ///////////////////////////////////////////////////////////////////////////
260     namespace detail
261     {
262         template <typename Token, typename F>
tokenize_callback(Token const & t,F f)263         bool tokenize_callback(Token const& t, F f)
264         {
265             return f(t);
266         }
267 
268         template <typename Token, typename Eval>
tokenize_callback(Token const & t,phoenix::actor<Eval> const & f)269         bool tokenize_callback(Token const& t, phoenix::actor<Eval> const& f)
270         {
271             f(t);
272             return true;
273         }
274 
275         template <typename Token>
tokenize_callback(Token const & t,void (* f)(Token const &))276         bool tokenize_callback(Token const& t, void (*f)(Token const&))
277         {
278             f(t);
279             return true;
280         }
281 
282         template <typename Token>
tokenize_callback(Token const & t,bool (* f)(Token const &))283         bool tokenize_callback(Token const& t, bool (*f)(Token const&))
284         {
285             return f(t);
286         }
287     }
288 
289     template <typename Iterator, typename Lexer, typename F>
290     inline bool
tokenize(Iterator & first,Iterator last,Lexer const & lex,F f,typename Lexer::char_type const * initial_state=0)291     tokenize(Iterator& first, Iterator last, Lexer const& lex, F f
292       , typename Lexer::char_type const* initial_state = 0)
293     {
294         typedef typename Lexer::iterator_type iterator_type;
295 
296         iterator_type iter = lex.begin(first, last, initial_state);
297         iterator_type end = lex.end();
298         for (/**/; iter != end && token_is_valid(*iter); ++iter)
299         {
300             if (!detail::tokenize_callback(*iter, f))
301                 return false;
302         }
303         return (iter == end) ? true : false;
304     }
305 
306     ///////////////////////////////////////////////////////////////////////////
307     template <typename Iterator, typename Lexer>
308     inline bool
tokenize(Iterator & first,Iterator last,Lexer const & lex,typename Lexer::char_type const * initial_state=0)309     tokenize(Iterator& first, Iterator last, Lexer const& lex
310       , typename Lexer::char_type const* initial_state = 0)
311     {
312         typedef typename Lexer::iterator_type iterator_type;
313 
314         iterator_type iter = lex.begin(first, last, initial_state);
315         iterator_type end = lex.end();
316 
317         while (iter != end && token_is_valid(*iter))
318             ++iter;
319 
320         return (iter == end) ? true : false;
321     }
322 
323 }}}
324 
325 #endif
326