1 /*=============================================================================
2     Copyright (c) 2001-2010 Joel de Guzman
3 
4     Distributed under the Boost Software License, Version 1.0. (See accompanying
5     file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 =============================================================================*/
7 ///////////////////////////////////////////////////////////////////////////////
8 //
9 //  A mini XML-like parser
10 //
11 //  [ JDG March 25, 2007 ]   spirit2
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14 
15 #include <boost/config/warning_disable.hpp>
16 #include <boost/spirit/include/qi.hpp>
17 #include <boost/spirit/include/phoenix_core.hpp>
18 #include <boost/spirit/include/phoenix_operator.hpp>
19 #include <boost/spirit/include/phoenix_fusion.hpp>
20 #include <boost/spirit/include/phoenix_stl.hpp>
21 #include <boost/fusion/include/adapt_struct.hpp>
22 #include <boost/variant/recursive_variant.hpp>
23 #include <boost/foreach.hpp>
24 
25 #include <iostream>
26 #include <fstream>
27 #include <string>
28 #include <vector>
29 
30 namespace client
31 {
32     namespace fusion = boost::fusion;
33     namespace phoenix = boost::phoenix;
34     namespace qi = boost::spirit::qi;
35     namespace ascii = boost::spirit::ascii;
36 
37     ///////////////////////////////////////////////////////////////////////////
38     //  Our mini XML tree representation
39     ///////////////////////////////////////////////////////////////////////////
40     //[tutorial_xml1_structures
41     struct mini_xml;
42 
43     typedef
44         boost::variant<
45             boost::recursive_wrapper<mini_xml>
46           , std::string
47         >
48     mini_xml_node;
49 
50     struct mini_xml
51     {
52         std::string name;                           // tag name
53         std::vector<mini_xml_node> children;        // children
54     };
55     //]
56 }
57 
58 // We need to tell fusion about our mini_xml struct
59 // to make it a first-class fusion citizen
60 //[tutorial_xml1_adapt_structures
61 BOOST_FUSION_ADAPT_STRUCT(
62     client::mini_xml,
63     (std::string, name)
64     (std::vector<client::mini_xml_node>, children)
65 )
66 //]
67 
68 namespace client
69 {
70     ///////////////////////////////////////////////////////////////////////////
71     //  Print out the mini xml tree
72     ///////////////////////////////////////////////////////////////////////////
73     int const tabsize = 4;
74 
tab(int indent)75     void tab(int indent)
76     {
77         for (int i = 0; i < indent; ++i)
78             std::cout << ' ';
79     }
80 
81     struct mini_xml_printer
82     {
mini_xml_printerclient::mini_xml_printer83         mini_xml_printer(int indent = 0)
84           : indent(indent)
85         {
86         }
87 
88         void operator()(mini_xml const& xml) const;
89 
90         int indent;
91     };
92 
93     struct mini_xml_node_printer : boost::static_visitor<>
94     {
mini_xml_node_printerclient::mini_xml_node_printer95         mini_xml_node_printer(int indent = 0)
96           : indent(indent)
97         {
98         }
99 
operator ()client::mini_xml_node_printer100         void operator()(mini_xml const& xml) const
101         {
102             mini_xml_printer(indent+tabsize)(xml);
103         }
104 
operator ()client::mini_xml_node_printer105         void operator()(std::string const& text) const
106         {
107             tab(indent+tabsize);
108             std::cout << "text: \"" << text << '"' << std::endl;
109         }
110 
111         int indent;
112     };
113 
operator ()(mini_xml const & xml) const114     void mini_xml_printer::operator()(mini_xml const& xml) const
115     {
116         tab(indent);
117         std::cout << "tag: " << xml.name << std::endl;
118         tab(indent);
119         std::cout << '{' << std::endl;
120 
121         BOOST_FOREACH(mini_xml_node const& node, xml.children)
122         {
123             boost::apply_visitor(mini_xml_node_printer(indent), node);
124         }
125 
126         tab(indent);
127         std::cout << '}' << std::endl;
128     }
129 
130     ///////////////////////////////////////////////////////////////////////////
131     //  Our mini XML grammar definition
132     ///////////////////////////////////////////////////////////////////////////
133     //[tutorial_xml1_grammar
134     template <typename Iterator>
135     struct mini_xml_grammar : qi::grammar<Iterator, mini_xml(), ascii::space_type>
136     {
mini_xml_grammarclient::mini_xml_grammar137         mini_xml_grammar() : mini_xml_grammar::base_type(xml)
138         {
139             using qi::lit;
140             using qi::lexeme;
141             using ascii::char_;
142             using ascii::string;
143             using namespace qi::labels;
144 
145             using phoenix::at_c;
146             using phoenix::push_back;
147 
148             text = lexeme[+(char_ - '<')        [_val += _1]];
149             node = (xml | text)                 [_val = _1];
150 
151             start_tag =
152                     '<'
153                 >>  !lit('/')
154                 >>  lexeme[+(char_ - '>')       [_val += _1]]
155                 >>  '>'
156             ;
157 
158             end_tag =
159                     "</"
160                 >>  lit(_r1)
161                 >>  '>'
162             ;
163 
164             xml =
165                     start_tag                   [at_c<0>(_val) = _1]
166                 >>  *node                       [push_back(at_c<1>(_val), _1)]
167                 >>  end_tag(at_c<0>(_val))
168             ;
169         }
170 
171         qi::rule<Iterator, mini_xml(), ascii::space_type> xml;
172         qi::rule<Iterator, mini_xml_node(), ascii::space_type> node;
173         qi::rule<Iterator, std::string(), ascii::space_type> text;
174         qi::rule<Iterator, std::string(), ascii::space_type> start_tag;
175         qi::rule<Iterator, void(std::string), ascii::space_type> end_tag;
176     };
177     //]
178 }
179 
180 ///////////////////////////////////////////////////////////////////////////////
181 //  Main program
182 ///////////////////////////////////////////////////////////////////////////////
main(int argc,char ** argv)183 int main(int argc, char **argv)
184 {
185     char const* filename;
186     if (argc > 1)
187     {
188         filename = argv[1];
189     }
190     else
191     {
192         std::cerr << "Error: No input file provided." << std::endl;
193         return 1;
194     }
195 
196     std::ifstream in(filename, std::ios_base::in);
197 
198     if (!in)
199     {
200         std::cerr << "Error: Could not open input file: "
201             << filename << std::endl;
202         return 1;
203     }
204 
205     std::string storage; // We will read the contents here.
206     in.unsetf(std::ios::skipws); // No white space skipping!
207     std::copy(
208         std::istream_iterator<char>(in),
209         std::istream_iterator<char>(),
210         std::back_inserter(storage));
211 
212     typedef client::mini_xml_grammar<std::string::const_iterator> mini_xml_grammar;
213     mini_xml_grammar xml; // Our grammar
214     client::mini_xml ast; // Our tree
215 
216     using boost::spirit::ascii::space;
217     std::string::const_iterator iter = storage.begin();
218     std::string::const_iterator end = storage.end();
219     bool r = phrase_parse(iter, end, xml, space, ast);
220 
221     if (r && iter == end)
222     {
223         std::cout << "-------------------------\n";
224         std::cout << "Parsing succeeded\n";
225         std::cout << "-------------------------\n";
226         client::mini_xml_printer printer;
227         printer(ast);
228         return 0;
229     }
230     else
231     {
232         std::string::const_iterator some = iter + std::min(30, int(end - iter));
233         std::string context(iter, (some>end)?end:some);
234         std::cout << "-------------------------\n";
235         std::cout << "Parsing failed\n";
236         std::cout << "stopped at: \"" << context << "...\"\n";
237         std::cout << "-------------------------\n";
238         return 1;
239     }
240 }
241 
242 
243