1 /*=============================================================================
2     Boost.Wave: A Standard compliant C++ preprocessor library
3 
4     Xpressive based generic lexer
5 
6     http://www.boost.org/
7 
8     Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
9     Software License, Version 1.0. (See accompanying file
10     LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
11 =============================================================================*/
12 
13 #if !defined(BOOST_XPRESSIVE_LEXER_HPP)
14 #define BOOST_XPRESSIVE_LEXER_HPP
15 
16 #include <string>
17 #include <vector>
18 #include <utility>
19 #include <iterator>
20 #include <algorithm>
21 
22 #include <boost/xpressive/xpressive.hpp>
23 
24 namespace boost {
25 namespace wave {
26 namespace cpplexer {
27 namespace xlex {
28 
29 ///////////////////////////////////////////////////////////////////////////////
30 template <
31     typename Iterator = char const*,
32     typename Token = int,
33     typename Callback = bool (*)(
34         Iterator const&, Iterator&, Iterator const&, Token const&)
35 >
36 class xpressive_lexer
37 {
38 private:
39     typedef typename std::iterator_traits<Iterator>::value_type
40         char_type;
41     typedef std::basic_string<char_type> string_type;
42 
43     // this represents a single token to match
44     struct regex_info
45     {
46         typedef boost::xpressive::basic_regex<Iterator> regex_type;
47 
48         string_type str;
49         Token token;
50         regex_type regex;
51         Callback callback;
52 
regex_infoboost::wave::cpplexer::xlex::xpressive_lexer::regex_info53         regex_info(string_type const& str, Token const& token,
54                 Callback const& callback)
55         :   str(str), token(token),
56             regex(regex_type::compile(str)),
57             callback(callback)
58         {}
59 
60         // these structures are to be ordered by the token id
operator <(regex_info const & lhs,regex_info const & rhs)61         friend bool operator< (regex_info const& lhs, regex_info const& rhs)
62         {
63             return lhs.token < rhs.token;
64         }
65     };
66 
67     typedef std::vector<regex_info> regex_list_type;
68 
69 public:
70     typedef Callback callback_type;
71 
xpressive_lexer()72     xpressive_lexer() {}
73 
74     // register a the regex with the lexer
75     void register_regex(string_type const& regex, Token const& id,
76         Callback const& cb = Callback());
77 
78     // match the given input and return the next recognized token
79     Token next_token(Iterator &first, Iterator const& last, string_type& token);
80 
81 private:
82     regex_list_type regex_list;
83 };
84 
85 ///////////////////////////////////////////////////////////////////////////////
86 template <typename Iterator, typename Token, typename Callback>
87 inline void
register_regex(string_type const & regex,Token const & id,Callback const & cb)88 xpressive_lexer<Iterator, Token, Callback>::register_regex(
89     string_type const& regex, Token const& id, Callback const& cb)
90 {
91     regex_list.push_back(regex_info(regex, id, cb));
92 }
93 
94 ///////////////////////////////////////////////////////////////////////////////
95 template <typename Iterator, typename Token, typename Callback>
96 inline Token
next_token(Iterator & first,Iterator const & last,string_type & token)97 xpressive_lexer<Iterator, Token, Callback>::next_token(
98     Iterator &first, Iterator const& last, string_type& token)
99 {
100     typedef typename regex_list_type::iterator iterator;
101 
102     xpressive::match_results<Iterator> regex_result;
103     for (iterator it = regex_list.begin(), end = regex_list.end(); it != end; ++it)
104     {
105         namespace xpressive = boost::xpressive;
106 
107 //         regex_info const& curr_regex = *it;
108 //         xpressive::match_results<Iterator> regex_result;
109         if (xpressive::regex_search(first, last, regex_result, (*it).regex,
110             xpressive::regex_constants::match_continuous))
111         {
112             Iterator saved = first;
113             Token rval = (*it).token;
114 
115             std::advance(first, regex_result.length());
116             token = string_type(saved, first);
117 
118             if (NULL != (*it).callback) {
119             // execute corresponding callback
120                 if ((*it).callback(saved, first, last, (*it).token))
121                     rval = next_token(first, last, token);
122             }
123 
124             return rval;
125         }
126     }
127     return Token(-1);    // TODO: change this to use token_traits<Token>
128 }
129 
130 ///////////////////////////////////////////////////////////////////////////////
131 }}}} // boost::wave::cpplexer::xlex
132 
133 #endif // !defined(BOOST_XPRESSIVE_LEXER_HPP)
134 
135 
136