1 /*
2  *  Created by Phil on 19/07/2017.
3  *
4  *  Distributed under the Boost Software License, Version 1.0. (See accompanying
5  *  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6  */
7 
8 #include "catch_xmlwriter.h"
9 
10 #include "catch_enforce.h"
11 
12 #include <iomanip>
13 #include <type_traits>
14 
15 namespace Catch {
16 
17 namespace {
18 
trailingBytes(unsigned char c)19     size_t trailingBytes(unsigned char c) {
20         if ((c & 0xE0) == 0xC0) {
21             return 2;
22         }
23         if ((c & 0xF0) == 0xE0) {
24             return 3;
25         }
26         if ((c & 0xF8) == 0xF0) {
27             return 4;
28         }
29         CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered");
30     }
31 
headerValue(unsigned char c)32     uint32_t headerValue(unsigned char c) {
33         if ((c & 0xE0) == 0xC0) {
34             return c & 0x1F;
35         }
36         if ((c & 0xF0) == 0xE0) {
37             return c & 0x0F;
38         }
39         if ((c & 0xF8) == 0xF0) {
40             return c & 0x07;
41         }
42         CATCH_INTERNAL_ERROR("Invalid multibyte utf-8 start byte encountered");
43     }
44 
hexEscapeChar(std::ostream & os,unsigned char c)45     void hexEscapeChar(std::ostream& os, unsigned char c) {
46         std::ios_base::fmtflags f(os.flags());
47         os << "\\x"
48             << std::uppercase << std::hex << std::setfill('0') << std::setw(2)
49             << static_cast<int>(c);
50         os.flags(f);
51     }
52 
shouldNewline(XmlFormatting fmt)53     bool shouldNewline(XmlFormatting fmt) {
54         return !!(static_cast<std::underlying_type<XmlFormatting>::type>(fmt & XmlFormatting::Newline));
55     }
56 
shouldIndent(XmlFormatting fmt)57     bool shouldIndent(XmlFormatting fmt) {
58         return !!(static_cast<std::underlying_type<XmlFormatting>::type>(fmt & XmlFormatting::Indent));
59     }
60 
61 } // anonymous namespace
62 
operator |(XmlFormatting lhs,XmlFormatting rhs)63     XmlFormatting operator | (XmlFormatting lhs, XmlFormatting rhs) {
64         return static_cast<XmlFormatting>(
65             static_cast<std::underlying_type<XmlFormatting>::type>(lhs) |
66             static_cast<std::underlying_type<XmlFormatting>::type>(rhs)
67         );
68     }
69 
operator &(XmlFormatting lhs,XmlFormatting rhs)70     XmlFormatting operator & (XmlFormatting lhs, XmlFormatting rhs) {
71         return static_cast<XmlFormatting>(
72             static_cast<std::underlying_type<XmlFormatting>::type>(lhs) &
73             static_cast<std::underlying_type<XmlFormatting>::type>(rhs)
74         );
75     }
76 
77 
XmlEncode(std::string const & str,ForWhat forWhat)78     XmlEncode::XmlEncode( std::string const& str, ForWhat forWhat )
79     :   m_str( str ),
80         m_forWhat( forWhat )
81     {}
82 
encodeTo(std::ostream & os) const83     void XmlEncode::encodeTo( std::ostream& os ) const {
84         // Apostrophe escaping not necessary if we always use " to write attributes
85         // (see: http://www.w3.org/TR/xml/#syntax)
86 
87         for( std::size_t idx = 0; idx < m_str.size(); ++ idx ) {
88             unsigned char c = m_str[idx];
89             switch (c) {
90             case '<':   os << "&lt;"; break;
91             case '&':   os << "&amp;"; break;
92 
93             case '>':
94                 // See: http://www.w3.org/TR/xml/#syntax
95                 if (idx > 2 && m_str[idx - 1] == ']' && m_str[idx - 2] == ']')
96                     os << "&gt;";
97                 else
98                     os << c;
99                 break;
100 
101             case '\"':
102                 if (m_forWhat == ForAttributes)
103                     os << "&quot;";
104                 else
105                     os << c;
106                 break;
107 
108             default:
109                 // Check for control characters and invalid utf-8
110 
111                 // Escape control characters in standard ascii
112                 // see http://stackoverflow.com/questions/404107/why-are-control-characters-illegal-in-xml-1-0
113                 if (c < 0x09 || (c > 0x0D && c < 0x20) || c == 0x7F) {
114                     hexEscapeChar(os, c);
115                     break;
116                 }
117 
118                 // Plain ASCII: Write it to stream
119                 if (c < 0x7F) {
120                     os << c;
121                     break;
122                 }
123 
124                 // UTF-8 territory
125                 // Check if the encoding is valid and if it is not, hex escape bytes.
126                 // Important: We do not check the exact decoded values for validity, only the encoding format
127                 // First check that this bytes is a valid lead byte:
128                 // This means that it is not encoded as 1111 1XXX
129                 // Or as 10XX XXXX
130                 if (c <  0xC0 ||
131                     c >= 0xF8) {
132                     hexEscapeChar(os, c);
133                     break;
134                 }
135 
136                 auto encBytes = trailingBytes(c);
137                 // Are there enough bytes left to avoid accessing out-of-bounds memory?
138                 if (idx + encBytes - 1 >= m_str.size()) {
139                     hexEscapeChar(os, c);
140                     break;
141                 }
142                 // The header is valid, check data
143                 // The next encBytes bytes must together be a valid utf-8
144                 // This means: bitpattern 10XX XXXX and the extracted value is sane (ish)
145                 bool valid = true;
146                 uint32_t value = headerValue(c);
147                 for (std::size_t n = 1; n < encBytes; ++n) {
148                     unsigned char nc = m_str[idx + n];
149                     valid &= ((nc & 0xC0) == 0x80);
150                     value = (value << 6) | (nc & 0x3F);
151                 }
152 
153                 if (
154                     // Wrong bit pattern of following bytes
155                     (!valid) ||
156                     // Overlong encodings
157                     (value < 0x80) ||
158                     (0x80 <= value && value < 0x800   && encBytes > 2) ||
159                     (0x800 < value && value < 0x10000 && encBytes > 3) ||
160                     // Encoded value out of range
161                     (value >= 0x110000)
162                     ) {
163                     hexEscapeChar(os, c);
164                     break;
165                 }
166 
167                 // If we got here, this is in fact a valid(ish) utf-8 sequence
168                 for (std::size_t n = 0; n < encBytes; ++n) {
169                     os << m_str[idx + n];
170                 }
171                 idx += encBytes - 1;
172                 break;
173             }
174         }
175     }
176 
operator <<(std::ostream & os,XmlEncode const & xmlEncode)177     std::ostream& operator << ( std::ostream& os, XmlEncode const& xmlEncode ) {
178         xmlEncode.encodeTo( os );
179         return os;
180     }
181 
ScopedElement(XmlWriter * writer,XmlFormatting fmt)182     XmlWriter::ScopedElement::ScopedElement( XmlWriter* writer, XmlFormatting fmt )
183     :   m_writer( writer ),
184         m_fmt(fmt)
185     {}
186 
ScopedElement(ScopedElement && other)187     XmlWriter::ScopedElement::ScopedElement( ScopedElement&& other ) noexcept
188     :   m_writer( other.m_writer ),
189         m_fmt(other.m_fmt)
190     {
191         other.m_writer = nullptr;
192         other.m_fmt = XmlFormatting::None;
193     }
operator =(ScopedElement && other)194     XmlWriter::ScopedElement& XmlWriter::ScopedElement::operator=( ScopedElement&& other ) noexcept {
195         if ( m_writer ) {
196             m_writer->endElement();
197         }
198         m_writer = other.m_writer;
199         other.m_writer = nullptr;
200         m_fmt = other.m_fmt;
201         other.m_fmt = XmlFormatting::None;
202         return *this;
203     }
204 
205 
~ScopedElement()206     XmlWriter::ScopedElement::~ScopedElement() {
207         if (m_writer) {
208             m_writer->endElement(m_fmt);
209         }
210     }
211 
writeText(std::string const & text,XmlFormatting fmt)212     XmlWriter::ScopedElement& XmlWriter::ScopedElement::writeText( std::string const& text, XmlFormatting fmt ) {
213         m_writer->writeText( text, fmt );
214         return *this;
215     }
216 
XmlWriter(std::ostream & os)217     XmlWriter::XmlWriter( std::ostream& os ) : m_os( os )
218     {
219         writeDeclaration();
220     }
221 
~XmlWriter()222     XmlWriter::~XmlWriter() {
223         while (!m_tags.empty()) {
224             endElement();
225         }
226         newlineIfNecessary();
227     }
228 
startElement(std::string const & name,XmlFormatting fmt)229     XmlWriter& XmlWriter::startElement( std::string const& name, XmlFormatting fmt ) {
230         ensureTagClosed();
231         newlineIfNecessary();
232         if (shouldIndent(fmt)) {
233             m_os << m_indent;
234             m_indent += "  ";
235         }
236         m_os << '<' << name;
237         m_tags.push_back( name );
238         m_tagIsOpen = true;
239         applyFormatting(fmt);
240         return *this;
241     }
242 
scopedElement(std::string const & name,XmlFormatting fmt)243     XmlWriter::ScopedElement XmlWriter::scopedElement( std::string const& name, XmlFormatting fmt ) {
244         ScopedElement scoped( this, fmt );
245         startElement( name, fmt );
246         return scoped;
247     }
248 
endElement(XmlFormatting fmt)249     XmlWriter& XmlWriter::endElement(XmlFormatting fmt) {
250         m_indent = m_indent.substr(0, m_indent.size() - 2);
251 
252         if( m_tagIsOpen ) {
253             m_os << "/>";
254             m_tagIsOpen = false;
255         } else {
256             newlineIfNecessary();
257             if (shouldIndent(fmt)) {
258                 m_os << m_indent;
259             }
260             m_os << "</" << m_tags.back() << ">";
261         }
262         m_os << std::flush;
263         applyFormatting(fmt);
264         m_tags.pop_back();
265         return *this;
266     }
267 
writeAttribute(std::string const & name,std::string const & attribute)268     XmlWriter& XmlWriter::writeAttribute( std::string const& name, std::string const& attribute ) {
269         if( !name.empty() && !attribute.empty() )
270             m_os << ' ' << name << "=\"" << XmlEncode( attribute, XmlEncode::ForAttributes ) << '"';
271         return *this;
272     }
273 
writeAttribute(std::string const & name,bool attribute)274     XmlWriter& XmlWriter::writeAttribute( std::string const& name, bool attribute ) {
275         m_os << ' ' << name << "=\"" << ( attribute ? "true" : "false" ) << '"';
276         return *this;
277     }
278 
writeText(std::string const & text,XmlFormatting fmt)279     XmlWriter& XmlWriter::writeText( std::string const& text, XmlFormatting fmt) {
280         if( !text.empty() ){
281             bool tagWasOpen = m_tagIsOpen;
282             ensureTagClosed();
283             if (tagWasOpen && shouldIndent(fmt)) {
284                 m_os << m_indent;
285             }
286             m_os << XmlEncode( text );
287             applyFormatting(fmt);
288         }
289         return *this;
290     }
291 
writeComment(std::string const & text,XmlFormatting fmt)292     XmlWriter& XmlWriter::writeComment( std::string const& text, XmlFormatting fmt) {
293         ensureTagClosed();
294         if (shouldIndent(fmt)) {
295             m_os << m_indent;
296         }
297         m_os << "<!--" << text << "-->";
298         applyFormatting(fmt);
299         return *this;
300     }
301 
writeStylesheetRef(std::string const & url)302     void XmlWriter::writeStylesheetRef( std::string const& url ) {
303         m_os << "<?xml-stylesheet type=\"text/xsl\" href=\"" << url << "\"?>\n";
304     }
305 
writeBlankLine()306     XmlWriter& XmlWriter::writeBlankLine() {
307         ensureTagClosed();
308         m_os << '\n';
309         return *this;
310     }
311 
ensureTagClosed()312     void XmlWriter::ensureTagClosed() {
313         if( m_tagIsOpen ) {
314             m_os << '>' << std::flush;
315             newlineIfNecessary();
316             m_tagIsOpen = false;
317         }
318     }
319 
applyFormatting(XmlFormatting fmt)320     void XmlWriter::applyFormatting(XmlFormatting fmt) {
321         m_needsNewline = shouldNewline(fmt);
322     }
323 
writeDeclaration()324     void XmlWriter::writeDeclaration() {
325         m_os << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
326     }
327 
newlineIfNecessary()328     void XmlWriter::newlineIfNecessary() {
329         if( m_needsNewline ) {
330             m_os << std::endl;
331             m_needsNewline = false;
332         }
333     }
334 }
335