1 //! Copyright (C) 2006 T. Zachary Laine
2 //!
3 //! This library is free software; you can redistribute it and/or
4 //! modify it under the terms of the GNU Lesser General Public License
5 //! as published by the Free Software Foundation; either version 2.1
6 //! of the License, or (at your option) any later version.
7 //!
8 //! This library is distributed in the hope that it will be useful,
9 //! but WITHOUT ANY WARRANTY; without even the implied warranty of
10 //! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11 //! Lesser General Public License for more details.
12 //!
13 //! You should have received a copy of the GNU Lesser General Public
14 //! License along with this library; if not, write to the Free
15 //! Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
16 //! 02111-1307 USA
17 //!
18 //! If you do not wish to comply with the terms of the LGPL please
19 //! contact the author as other terms are available for a fee.
20 //!
21 //! ----
22 //!
23 //! Zach Laine
24 //! whatwasthataddress@hotmail.com/
25 //!
26 //! This notice came from the original file from which all the XML parsing code
27 //! was taken, part of the spirit distribution.  The code was modified slightly
28 //! by me, and doesn't contain all the original code.  Thanks to Daniel Nuffer
29 //! for his great work.
30 //!
31 //! ----
32 //!
33 //! simplexml.cpp
34 //!
35 //! Spirit V1.3
36 //! URL: http://spirit.sourceforge.net/
37 //!
38 //! Copyright (c) 2001, Daniel C. Nuffer
39 //!
40 //! This software is provided 'as-is', without any express or implied
41 //! warranty. In no event will the copyright holder be held liable for
42 //! any damages arising from the use of this software.
43 //!
44 //! Permission is granted to anyone to use this software for any purpose,
45 //! including commercial applications, and to alter it and redistribute
46 //! it freely, subject to the following restrictions:
47 //!
48 //! 1.  The origin of this software must not be misrepresented; you must
49 //!     not claim that you wrote the original software. If you use this
50 //!     software in a product, an acknowledgment in the product documentation
51 //!     would be appreciated but is not required.
52 //!
53 //! 2.  Altered source versions must be plainly marked as such, and must
54 //!     not be misrepresented as being the original software.
55 //!
56 //! 3.  This notice may not be removed or altered from any source
57 //!     distribution.
58 
59 
60 #include "XMLDoc.h"
61 
62 #include <boost/spirit/include/classic.hpp>
63 
64 #include <algorithm>
65 #include <stdexcept>
66 #include <sstream>
67 
68 
69 namespace {
70     using namespace boost::spirit::classic;
71 
72     typedef chset<unsigned char> chset_t;
73 
74     //! XML grammar rules
75     rule<> document, prolog, element, Misc, Reference, CData, doctypedecl,
76            XMLDecl, SDDecl, VersionInfo, EncodingDecl, VersionNum, Eq,
77            EmptyElemTag, STag, content, ETag, Attribute, AttValue, CharData,
78            Comment, CDSect, CharRef, EntityRef, EncName, Name, Comment1, S;
79 
80     //! XML Character classes
81     chset_t Char("\x9\xA\xD\x20-\xFF");
82     chset_t Letter("\x41-\x5A\x61-\x7A\xC0-\xD6\xD8-\xF6\xF8-\xFF");
83     chset_t Digit("0-9");
84     chset_t Extender('\xB7');
85     chset_t NameChar = Letter | Digit | chset_t("._:-") | Extender;
86     chset_t Sch("\x20\x9\xD\xA");
87 }
88 
89 
Tag() const90 const std::string& XMLElement::Tag() const
91 { return m_tag; }
92 
Text() const93 const std::string& XMLElement::Text() const
94 { return m_text; }
95 
ContainsChild(const std::string & tag) const96 bool XMLElement::ContainsChild(const std::string& tag) const {
97     return children.end() != std::find_if(children.begin(), children.end(),
98         [&tag] (const XMLElement& e) { return e.m_tag == tag; });
99 }
100 
Child(const std::string & tag) const101 const XMLElement& XMLElement::Child(const std::string& tag) const {
102     auto match = std::find_if(children.begin(), children.end(),
103         [&tag] (const XMLElement& e) { return e.m_tag == tag; });
104 
105     if (match == children.end())
106         throw std::out_of_range("XMLElement::Child(): The XMLElement \"" + Tag() + "\" contains no child \"" + tag + "\".");
107 
108     return *match;
109 }
110 
WriteElement(int indent,bool whitespace) const111 std::string XMLElement::WriteElement(int indent/* = 0*/, bool whitespace/* = true*/) const {
112     std::stringstream ss;
113     WriteElement(ss, indent, whitespace);
114     return ss.str();
115 }
116 
WriteElement(std::ostream & os,int indent,bool whitespace) const117 std::ostream& XMLElement::WriteElement(std::ostream& os, int indent/* = 0*/, bool whitespace/* = true*/) const {
118     if (whitespace)
119         os << std::string(indent * 2, ' ');
120     os << '<' << m_tag;
121     for (const auto& attribute : attributes)
122         os << ' ' << attribute.first << "=\"" << attribute.second << "\"";
123     if (children.empty() && m_text.empty() && !m_root) {
124         os << "/>";
125         if (whitespace)
126             os << "\n";
127     } else {
128         os << ">";
129         if (!m_text.empty() && m_text.find_first_of("<&") != std::string::npos) {
130             os << "<![CDATA[" << m_text << "]]>";
131         } else {
132             os << m_text;
133         }
134         if (whitespace && !children.empty())
135             os << "\n";
136         for (const XMLElement& child : children)
137             child.WriteElement(os, indent + 1, whitespace);
138         if (whitespace && !children.empty()) {
139             os << std::string(indent * 2, ' ');
140         }
141         os << "</" << m_tag << ">";
142         if (whitespace) os << "\n";
143     }
144     return os;
145 }
146 
Child(const std::string & tag)147 XMLElement& XMLElement::Child(const std::string& tag) {
148     auto match = std::find_if(children.begin(), children.end(),
149         [&tag] (const XMLElement& e) { return e.m_tag == tag; });
150 
151     if (match == children.end())
152         throw std::out_of_range("XMLElement::Child(): The XMLElement \"" + Tag() + "\" contains no child \"" + tag + "\".");
153 
154     return *match;
155 }
156 
SetTag(const std::string & tag)157 void XMLElement::SetTag(const std::string& tag)
158 { m_tag = tag; }
159 
SetText(const std::string & text)160 void XMLElement::SetText(const std::string& text)
161 { m_text = text; }
162 
163 
164 XMLDoc*                  XMLDoc::s_curr_parsing_doc = nullptr;
165 std::vector<XMLElement*> XMLDoc::s_element_stack;
166 XMLDoc::RuleDefiner      XMLDoc::s_rule_definer;
167 XMLElement               XMLDoc::s_temp_elem;
168 std::string              XMLDoc::s_temp_attr_name;
169 
XMLDoc(const std::string & root_tag)170 XMLDoc::XMLDoc(const std::string& root_tag/*= "XMLDoc"*/) :
171     root_node(XMLElement(root_tag, true))
172 {}
173 
XMLDoc(const std::istream & is)174 XMLDoc::XMLDoc(const std::istream& is) :
175     root_node(XMLElement())
176 {}
177 
WriteDoc(std::ostream & os,bool whitespace) const178 std::ostream& XMLDoc::WriteDoc(std::ostream& os, bool whitespace/* = true*/) const {
179     os << "<?xml version=\"1.0\"?>";
180     if (whitespace) os << "\n";
181     return root_node.WriteElement(os, 0, whitespace);
182 }
183 
ReadDoc(const std::string & s)184 void XMLDoc::ReadDoc(const std::string& s) {
185     std::stringstream ss(s);
186     ReadDoc(ss);
187 }
188 
ReadDoc(std::istream & is)189 std::istream& XMLDoc::ReadDoc(std::istream& is) {
190     root_node = XMLElement(); // clear doc contents
191     s_element_stack.clear();  // clear this to start a fresh read
192     s_curr_parsing_doc = this;  // indicate where to add elements
193     std::string parse_str;
194     std::string temp_str;
195     while (is) {
196         getline(is, temp_str);
197         parse_str += temp_str + '\n';
198     }
199     parse(parse_str.c_str(), document);
200     s_curr_parsing_doc = nullptr;
201     return is;
202 }
203 
SetElemName(const char * first,const char * last)204 void XMLDoc::SetElemName(const char* first, const char* last)
205 { s_temp_elem = XMLElement(std::string(first, last)); }
206 
SetAttributeName(const char * first,const char * last)207 void XMLDoc::SetAttributeName(const char* first, const char* last)
208 { s_temp_attr_name = std::string(first, last); }
209 
AddAttribute(const char * first,const char * last)210 void XMLDoc::AddAttribute(const char* first, const char* last)
211 { s_temp_elem.attributes[s_temp_attr_name] = std::string(first, last); }
212 
PushElem1(const char * first,const char * last)213 void XMLDoc::PushElem1(const char* first, const char* last) {
214     if (XMLDoc* this_ = XMLDoc::s_curr_parsing_doc) {
215         if (s_element_stack.empty()) {
216             this_->root_node = s_temp_elem;
217             s_element_stack.push_back(&this_->root_node);
218         } else {
219             s_element_stack.back()->children.push_back(s_temp_elem);
220             s_element_stack.push_back(&s_element_stack.back()->children.back());
221         }
222     }
223 }
224 
PushElem2(const char * first,const char * last)225 void XMLDoc::PushElem2(const char* first, const char* last) {
226     if (XMLDoc* this_ = XMLDoc::s_curr_parsing_doc) {
227         if (s_element_stack.empty()) {
228             this_->root_node = s_temp_elem;
229         } else {
230             s_element_stack.back()->children.push_back(s_temp_elem);
231         }
232     }
233 }
234 
PopElem(const char *,const char *)235 void XMLDoc::PopElem(const char*, const char*) {
236     if (!s_element_stack.empty())
237         s_element_stack.pop_back();
238 }
239 
AppendToText(const char * first,const char * last)240 void XMLDoc::AppendToText(const char* first, const char* last) {
241     if (!s_element_stack.empty()) {
242         std::string text(first, last);
243         std::string::size_type first_good_posn = (text[0] != '\"') ? 0 : 1;
244         std::string::size_type last_good_posn = text.find_last_not_of(" \t\n\"\r\f");
245         // strip of leading quote and/or trailing quote, and/or trailing whitespace
246         if (last_good_posn != std::string::npos)
247             s_element_stack.back()->m_text += text.substr(first_good_posn, (last_good_posn + 1) - first_good_posn);
248     }
249 }
250 
RuleDefiner()251 XMLDoc::RuleDefiner::RuleDefiner() {
252     // This is the start rule for XML parsing
253     document =
254         prolog >> element >> *Misc
255         ;
256 
257     S =
258         +(Sch)
259         ;
260 
261     Name =
262         (Letter | '_' | ':')
263             >> *(NameChar)
264         ;
265 
266     AttValue =
267         '"'
268             >> (
269                 (*(anychar_p - (chset_t('<') | '&' | '"')))[&XMLDoc::AddAttribute]
270                 | *(Reference)
271                )
272             >> '"'
273         |   '\''
274             >> (
275                 (*(anychar_p - (chset_t('<') | '&' | '\'')))[&XMLDoc::AddAttribute]
276                 | *(Reference)
277                )
278             >> '\''
279         ;
280 
281     chset_t CharDataChar(anychar_p - (chset_t('<') | chset_t('&')));
282 
283     CharData =
284         (*(CharDataChar - str_p("]]>")))[&XMLDoc::AppendToText]
285         ;
286 
287     Comment1 =
288         *(
289           (Char - ch_p('-'))
290           | (ch_p('-') >> (Char - ch_p('-')))
291           )
292         ;
293 
294     Comment =
295         str_p("<!--") >> Comment1 >> str_p("-->")
296         ;
297 
298     CDSect =
299         str_p("<![CDATA[") >> CData >> str_p("]]>")
300         ;
301 
302     CData =
303         (*(Char - str_p("]]>")))[&XMLDoc::AppendToText]
304         ;
305 
306     prolog =
307         !XMLDecl >> *Misc >> !(doctypedecl >> *Misc)
308         ;
309 
310     XMLDecl =
311         str_p("<?xml")
312             >> VersionInfo
313             >> !EncodingDecl
314             >> !SDDecl
315             >> !S
316             >> str_p("?>")
317         ;
318 
319     VersionInfo =
320         S
321             >> str_p("version")
322             >> Eq
323             >> (
324                 ch_p('\'') >> VersionNum >> '\''
325                 | ch_p('"')  >> VersionNum >> '"'
326                 )
327         ;
328 
329     Eq =
330         !S >> '=' >> !S
331         ;
332 
333     chset_t VersionNumCh("A-Za-z0-9_.:-");
334 
335     VersionNum =
336         +(VersionNumCh)
337         ;
338 
339     Misc =
340         Comment | S
341         ;
342 
343     doctypedecl =
344         str_p("<!DOCTYPE")
345             >> *(Char - (chset_t('[') | '>'))
346             >> !('[' >> *(Char - ']') >> ']')
347             >> '>'
348         ;
349 
350     SDDecl =
351         S
352             >> str_p("standalone")
353             >> Eq
354             >> (
355                 (ch_p('\'') >> (str_p("yes") | str_p("no")) >> '\'')
356                 | (ch_p('"')  >> (str_p("yes") | str_p("no")) >> '"')
357                 )
358         ;
359 
360     element =
361         STag[&XMLDoc::PushElem1] >> content >> ETag
362         | EmptyElemTag[&XMLDoc::PushElem2]
363         ;
364 
365     STag =
366         '<'
367             >> Name[&XMLDoc::SetElemName]
368             >> *(S >> Attribute)
369             >> !S
370             >> '>'
371         ;
372 
373     Attribute =
374         Name[&XMLDoc::SetAttributeName] >> Eq >> AttValue
375         ;
376 
377     ETag =
378         str_p("</") >> Name[&XMLDoc::PopElem] >> !S >> '>'
379         ;
380 
381     content =
382         !CharData
383             >> *(
384                  (
385                   element
386                   | Reference
387                   | CDSect
388                   | Comment
389                   )
390                  >> !CharData
391                  )
392         ;
393 
394     EmptyElemTag =
395         '<'
396             >> Name[&XMLDoc::SetElemName]
397             >> *(S >> Attribute)
398             >> !S
399             >> str_p("/>")
400         ;
401 
402     CharRef =
403         str_p("&#") >> +digit_p >> ';'
404         | str_p("&#x") >> +xdigit_p >> ';'
405         ;
406 
407     Reference =
408         EntityRef
409         | CharRef
410         ;
411 
412     EntityRef =
413         '&' >> Name >> ';'
414         ;
415 
416     EncodingDecl =
417         S
418             >> str_p("encoding")
419             >> Eq
420             >> (
421                 ch_p('"')  >> EncName >> '"'
422                 | ch_p('\'') >> EncName >> '\''
423                 )
424         ;
425 
426     chset_t EncNameCh = VersionNumCh - chset_t(':');
427 
428     EncName =
429         alpha_p >> *(EncNameCh)
430         ;
431 }
432