1 //! Copyright (C) 2006 T. Zachary Laine
2 //!
3 //! This library is free software; you can redistribute it and/or
4 //! modify it under the terms of the GNU Lesser General Public License
5 //! as published by the Free Software Foundation; either version 2.1
6 //! of the License, or (at your option) any later version.
7 //!
8 //! This library is distributed in the hope that it will be useful,
9 //! but WITHOUT ANY WARRANTY; without even the implied warranty of
10 //! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 //! Lesser General Public License for more details.
12 //!
13 //! You should have received a copy of the GNU Lesser General Public
14 //! License along with this library; if not, write to the Free
15 //! Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
16 //! 02111-1307 USA
17 //!
18 //! If you do not wish to comply with the terms of the LGPL please
19 //! contact the author as other terms are available for a fee.
20 //!
21 //! ----
22 //!
23 //! Zach Laine
24 //! whatwasthataddress@hotmail.com/
25 //!
26 //! This notice came from the original file from which all the XML parsing code
27 //! was taken, part of the spirit distribution. The code was modified slightly
28 //! by me, and doesn't contain all the original code. Thanks to Daniel Nuffer
29 //! for his great work.
30 //!
31 //! ----
32 //!
33 //! simplexml.cpp
34 //!
35 //! Spirit V1.3
36 //! URL: http://spirit.sourceforge.net/
37 //!
38 //! Copyright (c) 2001, Daniel C. Nuffer
39 //!
40 //! This software is provided 'as-is', without any express or implied
41 //! warranty. In no event will the copyright holder be held liable for
42 //! any damages arising from the use of this software.
43 //!
44 //! Permission is granted to anyone to use this software for any purpose,
45 //! including commercial applications, and to alter it and redistribute
46 //! it freely, subject to the following restrictions:
47 //!
48 //! 1. The origin of this software must not be misrepresented; you must
49 //! not claim that you wrote the original software. If you use this
50 //! software in a product, an acknowledgment in the product documentation
51 //! would be appreciated but is not required.
52 //!
53 //! 2. Altered source versions must be plainly marked as such, and must
54 //! not be misrepresented as being the original software.
55 //!
56 //! 3. This notice may not be removed or altered from any source
57 //! distribution.
58
59
60 #include "XMLDoc.h"
61
62 #include <boost/spirit/include/classic.hpp>
63
64 #include <algorithm>
65 #include <stdexcept>
66 #include <sstream>
67
68
69 namespace {
70 using namespace boost::spirit::classic;
71
72 typedef chset<unsigned char> chset_t;
73
74 //! XML grammar rules
75 rule<> document, prolog, element, Misc, Reference, CData, doctypedecl,
76 XMLDecl, SDDecl, VersionInfo, EncodingDecl, VersionNum, Eq,
77 EmptyElemTag, STag, content, ETag, Attribute, AttValue, CharData,
78 Comment, CDSect, CharRef, EntityRef, EncName, Name, Comment1, S;
79
80 //! XML Character classes
81 chset_t Char("\x9\xA\xD\x20-\xFF");
82 chset_t Letter("\x41-\x5A\x61-\x7A\xC0-\xD6\xD8-\xF6\xF8-\xFF");
83 chset_t Digit("0-9");
84 chset_t Extender('\xB7');
85 chset_t NameChar = Letter | Digit | chset_t("._:-") | Extender;
86 chset_t Sch("\x20\x9\xD\xA");
87 }
88
89
Tag() const90 const std::string& XMLElement::Tag() const
91 { return m_tag; }
92
Text() const93 const std::string& XMLElement::Text() const
94 { return m_text; }
95
ContainsChild(const std::string & tag) const96 bool XMLElement::ContainsChild(const std::string& tag) const {
97 return children.end() != std::find_if(children.begin(), children.end(),
98 [&tag] (const XMLElement& e) { return e.m_tag == tag; });
99 }
100
Child(const std::string & tag) const101 const XMLElement& XMLElement::Child(const std::string& tag) const {
102 auto match = std::find_if(children.begin(), children.end(),
103 [&tag] (const XMLElement& e) { return e.m_tag == tag; });
104
105 if (match == children.end())
106 throw std::out_of_range("XMLElement::Child(): The XMLElement \"" + Tag() + "\" contains no child \"" + tag + "\".");
107
108 return *match;
109 }
110
WriteElement(int indent,bool whitespace) const111 std::string XMLElement::WriteElement(int indent/* = 0*/, bool whitespace/* = true*/) const {
112 std::stringstream ss;
113 WriteElement(ss, indent, whitespace);
114 return ss.str();
115 }
116
WriteElement(std::ostream & os,int indent,bool whitespace) const117 std::ostream& XMLElement::WriteElement(std::ostream& os, int indent/* = 0*/, bool whitespace/* = true*/) const {
118 if (whitespace)
119 os << std::string(indent * 2, ' ');
120 os << '<' << m_tag;
121 for (const auto& attribute : attributes)
122 os << ' ' << attribute.first << "=\"" << attribute.second << "\"";
123 if (children.empty() && m_text.empty() && !m_root) {
124 os << "/>";
125 if (whitespace)
126 os << "\n";
127 } else {
128 os << ">";
129 if (!m_text.empty() && m_text.find_first_of("<&") != std::string::npos) {
130 os << "<![CDATA[" << m_text << "]]>";
131 } else {
132 os << m_text;
133 }
134 if (whitespace && !children.empty())
135 os << "\n";
136 for (const XMLElement& child : children)
137 child.WriteElement(os, indent + 1, whitespace);
138 if (whitespace && !children.empty()) {
139 os << std::string(indent * 2, ' ');
140 }
141 os << "</" << m_tag << ">";
142 if (whitespace) os << "\n";
143 }
144 return os;
145 }
146
Child(const std::string & tag)147 XMLElement& XMLElement::Child(const std::string& tag) {
148 auto match = std::find_if(children.begin(), children.end(),
149 [&tag] (const XMLElement& e) { return e.m_tag == tag; });
150
151 if (match == children.end())
152 throw std::out_of_range("XMLElement::Child(): The XMLElement \"" + Tag() + "\" contains no child \"" + tag + "\".");
153
154 return *match;
155 }
156
SetTag(const std::string & tag)157 void XMLElement::SetTag(const std::string& tag)
158 { m_tag = tag; }
159
SetText(const std::string & text)160 void XMLElement::SetText(const std::string& text)
161 { m_text = text; }
162
163
164 XMLDoc* XMLDoc::s_curr_parsing_doc = nullptr;
165 std::vector<XMLElement*> XMLDoc::s_element_stack;
166 XMLDoc::RuleDefiner XMLDoc::s_rule_definer;
167 XMLElement XMLDoc::s_temp_elem;
168 std::string XMLDoc::s_temp_attr_name;
169
XMLDoc(const std::string & root_tag)170 XMLDoc::XMLDoc(const std::string& root_tag/*= "XMLDoc"*/) :
171 root_node(XMLElement(root_tag, true))
172 {}
173
XMLDoc(const std::istream & is)174 XMLDoc::XMLDoc(const std::istream& is) :
175 root_node(XMLElement())
176 {}
177
WriteDoc(std::ostream & os,bool whitespace) const178 std::ostream& XMLDoc::WriteDoc(std::ostream& os, bool whitespace/* = true*/) const {
179 os << "<?xml version=\"1.0\"?>";
180 if (whitespace) os << "\n";
181 return root_node.WriteElement(os, 0, whitespace);
182 }
183
ReadDoc(const std::string & s)184 void XMLDoc::ReadDoc(const std::string& s) {
185 std::stringstream ss(s);
186 ReadDoc(ss);
187 }
188
ReadDoc(std::istream & is)189 std::istream& XMLDoc::ReadDoc(std::istream& is) {
190 root_node = XMLElement(); // clear doc contents
191 s_element_stack.clear(); // clear this to start a fresh read
192 s_curr_parsing_doc = this; // indicate where to add elements
193 std::string parse_str;
194 std::string temp_str;
195 while (is) {
196 getline(is, temp_str);
197 parse_str += temp_str + '\n';
198 }
199 parse(parse_str.c_str(), document);
200 s_curr_parsing_doc = nullptr;
201 return is;
202 }
203
SetElemName(const char * first,const char * last)204 void XMLDoc::SetElemName(const char* first, const char* last)
205 { s_temp_elem = XMLElement(std::string(first, last)); }
206
SetAttributeName(const char * first,const char * last)207 void XMLDoc::SetAttributeName(const char* first, const char* last)
208 { s_temp_attr_name = std::string(first, last); }
209
AddAttribute(const char * first,const char * last)210 void XMLDoc::AddAttribute(const char* first, const char* last)
211 { s_temp_elem.attributes[s_temp_attr_name] = std::string(first, last); }
212
PushElem1(const char * first,const char * last)213 void XMLDoc::PushElem1(const char* first, const char* last) {
214 if (XMLDoc* this_ = XMLDoc::s_curr_parsing_doc) {
215 if (s_element_stack.empty()) {
216 this_->root_node = s_temp_elem;
217 s_element_stack.push_back(&this_->root_node);
218 } else {
219 s_element_stack.back()->children.push_back(s_temp_elem);
220 s_element_stack.push_back(&s_element_stack.back()->children.back());
221 }
222 }
223 }
224
PushElem2(const char * first,const char * last)225 void XMLDoc::PushElem2(const char* first, const char* last) {
226 if (XMLDoc* this_ = XMLDoc::s_curr_parsing_doc) {
227 if (s_element_stack.empty()) {
228 this_->root_node = s_temp_elem;
229 } else {
230 s_element_stack.back()->children.push_back(s_temp_elem);
231 }
232 }
233 }
234
PopElem(const char *,const char *)235 void XMLDoc::PopElem(const char*, const char*) {
236 if (!s_element_stack.empty())
237 s_element_stack.pop_back();
238 }
239
AppendToText(const char * first,const char * last)240 void XMLDoc::AppendToText(const char* first, const char* last) {
241 if (!s_element_stack.empty()) {
242 std::string text(first, last);
243 std::string::size_type first_good_posn = (text[0] != '\"') ? 0 : 1;
244 std::string::size_type last_good_posn = text.find_last_not_of(" \t\n\"\r\f");
245 // strip of leading quote and/or trailing quote, and/or trailing whitespace
246 if (last_good_posn != std::string::npos)
247 s_element_stack.back()->m_text += text.substr(first_good_posn, (last_good_posn + 1) - first_good_posn);
248 }
249 }
250
RuleDefiner()251 XMLDoc::RuleDefiner::RuleDefiner() {
252 // This is the start rule for XML parsing
253 document =
254 prolog >> element >> *Misc
255 ;
256
257 S =
258 +(Sch)
259 ;
260
261 Name =
262 (Letter | '_' | ':')
263 >> *(NameChar)
264 ;
265
266 AttValue =
267 '"'
268 >> (
269 (*(anychar_p - (chset_t('<') | '&' | '"')))[&XMLDoc::AddAttribute]
270 | *(Reference)
271 )
272 >> '"'
273 | '\''
274 >> (
275 (*(anychar_p - (chset_t('<') | '&' | '\'')))[&XMLDoc::AddAttribute]
276 | *(Reference)
277 )
278 >> '\''
279 ;
280
281 chset_t CharDataChar(anychar_p - (chset_t('<') | chset_t('&')));
282
283 CharData =
284 (*(CharDataChar - str_p("]]>")))[&XMLDoc::AppendToText]
285 ;
286
287 Comment1 =
288 *(
289 (Char - ch_p('-'))
290 | (ch_p('-') >> (Char - ch_p('-')))
291 )
292 ;
293
294 Comment =
295 str_p("<!--") >> Comment1 >> str_p("-->")
296 ;
297
298 CDSect =
299 str_p("<![CDATA[") >> CData >> str_p("]]>")
300 ;
301
302 CData =
303 (*(Char - str_p("]]>")))[&XMLDoc::AppendToText]
304 ;
305
306 prolog =
307 !XMLDecl >> *Misc >> !(doctypedecl >> *Misc)
308 ;
309
310 XMLDecl =
311 str_p("<?xml")
312 >> VersionInfo
313 >> !EncodingDecl
314 >> !SDDecl
315 >> !S
316 >> str_p("?>")
317 ;
318
319 VersionInfo =
320 S
321 >> str_p("version")
322 >> Eq
323 >> (
324 ch_p('\'') >> VersionNum >> '\''
325 | ch_p('"') >> VersionNum >> '"'
326 )
327 ;
328
329 Eq =
330 !S >> '=' >> !S
331 ;
332
333 chset_t VersionNumCh("A-Za-z0-9_.:-");
334
335 VersionNum =
336 +(VersionNumCh)
337 ;
338
339 Misc =
340 Comment | S
341 ;
342
343 doctypedecl =
344 str_p("<!DOCTYPE")
345 >> *(Char - (chset_t('[') | '>'))
346 >> !('[' >> *(Char - ']') >> ']')
347 >> '>'
348 ;
349
350 SDDecl =
351 S
352 >> str_p("standalone")
353 >> Eq
354 >> (
355 (ch_p('\'') >> (str_p("yes") | str_p("no")) >> '\'')
356 | (ch_p('"') >> (str_p("yes") | str_p("no")) >> '"')
357 )
358 ;
359
360 element =
361 STag[&XMLDoc::PushElem1] >> content >> ETag
362 | EmptyElemTag[&XMLDoc::PushElem2]
363 ;
364
365 STag =
366 '<'
367 >> Name[&XMLDoc::SetElemName]
368 >> *(S >> Attribute)
369 >> !S
370 >> '>'
371 ;
372
373 Attribute =
374 Name[&XMLDoc::SetAttributeName] >> Eq >> AttValue
375 ;
376
377 ETag =
378 str_p("</") >> Name[&XMLDoc::PopElem] >> !S >> '>'
379 ;
380
381 content =
382 !CharData
383 >> *(
384 (
385 element
386 | Reference
387 | CDSect
388 | Comment
389 )
390 >> !CharData
391 )
392 ;
393
394 EmptyElemTag =
395 '<'
396 >> Name[&XMLDoc::SetElemName]
397 >> *(S >> Attribute)
398 >> !S
399 >> str_p("/>")
400 ;
401
402 CharRef =
403 str_p("&#") >> +digit_p >> ';'
404 | str_p("&#x") >> +xdigit_p >> ';'
405 ;
406
407 Reference =
408 EntityRef
409 | CharRef
410 ;
411
412 EntityRef =
413 '&' >> Name >> ';'
414 ;
415
416 EncodingDecl =
417 S
418 >> str_p("encoding")
419 >> Eq
420 >> (
421 ch_p('"') >> EncName >> '"'
422 | ch_p('\'') >> EncName >> '\''
423 )
424 ;
425
426 chset_t EncNameCh = VersionNumCh - chset_t(':');
427
428 EncName =
429 alpha_p >> *(EncNameCh)
430 ;
431 }
432