1 // Copyright 2008, Google Inc. All rights reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are met:
5 //
6 // 1. Redistributions of source code must retain the above copyright notice,
7 // this list of conditions and the following disclaimer.
8 // 2. Redistributions in binary form must reproduce the above copyright notice,
9 // this list of conditions and the following disclaimer in the documentation
10 // and/or other materials provided with the distribution.
11 // 3. Neither the name of Google Inc. nor the names of its contributors may be
12 // used to endorse or promote products derived from this software without
13 // specific prior written permission.
14 //
15 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
16 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
17 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
18 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
26 // This file contains the unit tests for the ExpatParser class.
27
28 #include "kml/base/expat_parser.h"
29 #include "kml/base/file.h"
30 #include "boost/scoped_ptr.hpp"
31 #include "gtest/gtest.h"
32 #include "kml/dom/kml_handler.h"
33 #include "kml/dom/parser_observer.h"
34
35 // The following define is a convenience for testing inside Google.
36 #ifdef GOOGLE_INTERNAL
37 #include "kml/base/google_internal_test.h"
38 #endif
39
40 #ifndef DATADIR
41 #error *** DATADIR must be defined! ***
42 #endif
43
44 namespace kmlbase {
45
46 // A basic ExpatParser handler that simply reconstructs parsed XML in a string.
47 class TestXmlHandler : public ExpatHandler {
48 public:
StartElement(const string & name,const StringVector & atts)49 virtual void StartElement(const string& name,
50 const StringVector& atts) {
51 xml_.append("<" + name + ">");
52 }
EndElement(const string & name)53 virtual void EndElement(const string& name) {
54 xml_.append("</" + string(name) + ">");
55 }
CharData(const string & data)56 virtual void CharData(const string& data) {
57 xml_.append(data);
58 }
get_xml() const59 const string& get_xml() const { return xml_; }
60
61 private:
62 string xml_;
63 };
64
65 class ExpatParserTest : public testing::Test {
66 protected:
67 string errors_;
68 TestXmlHandler handler_;
69 };
70
71 // Verify basic usage of the static ParseString method.
TEST_F(ExpatParserTest,TestPassingParseString)72 TEST_F(ExpatParserTest, TestPassingParseString) {
73 const string kXml("<Tom><dick>foo</dick><harry>bar</harry></Tom>");
74 ASSERT_TRUE(ExpatParser::ParseString(kXml, &handler_, &errors_, false));
75 ASSERT_TRUE(errors_.empty());
76 ASSERT_EQ(kXml, handler_.get_xml());
77 }
78
79 // Verify failure of ParseString on badly formed XML content.
TEST_F(ExpatParserTest,TestFailingParseString)80 TEST_F(ExpatParserTest, TestFailingParseString) {
81 // kXml is not well-formed.
82 const string kXml("<Tom><dick>foo</dick><harry>bar</harry>");
83 ASSERT_FALSE(ExpatParser::ParseString(kXml, &handler_, &errors_, false));
84 ASSERT_FALSE(errors_.empty());
85 ASSERT_EQ(kXml, handler_.get_xml());
86 }
87
88 // Verify basic usage of the ParseBuffer method.
TEST_F(ExpatParserTest,TestPassingParseBuffer)89 TEST_F(ExpatParserTest, TestPassingParseBuffer) {
90 const string kXml("<Tom><dick>foo</dick><harry>bar</harry></Tom>");
91 ExpatParser parser(&handler_, false);
92
93 // Parse the string one character at a time.
94 for (size_t i = 0; i < kXml.length(); ++i) {
95 ASSERT_TRUE(parser.ParseBuffer(kXml.substr(i, 1), &errors_,
96 i == kXml.length()-1));
97 }
98 ASSERT_EQ(kXml, handler_.get_xml());
99 }
100
101 // Verify failure of ParseBuffer on badly formed XML content.
TEST_F(ExpatParserTest,TestFailingParseBuffer)102 TEST_F(ExpatParserTest, TestFailingParseBuffer) {
103 // kXml is not well-formed.
104 const string kXml("<Tom><dick>foo</dick><harry>bar</harry>");
105 ExpatParser parser(&handler_, false);
106
107 // Parse the string one character at a time.
108 for (size_t i = 0; i < kXml.length(); ++i) {
109 ASSERT_TRUE(parser.ParseBuffer(kXml.substr(i, 1), &errors_, false));
110 ASSERT_TRUE(errors_.empty());
111 }
112 // Now set the is_final bool to true to indicate that we believe parsing
113 // is done. Expat will check and see that its own parsing state shows
114 // more content is necessary because our XML is missing the closing
115 // </Tom> tag.
116 ASSERT_FALSE(parser.ParseBuffer("", &errors_, true));
117 ASSERT_FALSE(errors_.empty());
118
119 ASSERT_EQ(kXml, handler_.get_xml());
120 }
121
122 // Assert that we detect a mid-stream parsing failure.
TEST_F(ExpatParserTest,TestMidstreamFailingParseBuffer)123 TEST_F(ExpatParserTest, TestMidstreamFailingParseBuffer) {
124 const string k0("<A><B><C><D>");
125 const string k1("</D>"); // This is fine.
126 const string k2("</B>"); // XML is badly formed here, missing </C>.
127 ExpatParser parser(&handler_, false);
128
129 ASSERT_TRUE(parser.ParseBuffer(k0, &errors_, false));
130 ASSERT_TRUE(errors_.empty());
131
132 ASSERT_TRUE(parser.ParseBuffer(k1, &errors_, false));
133 ASSERT_TRUE(errors_.empty());
134
135 ASSERT_FALSE(parser.ParseBuffer(k2, &errors_, false));
136 ASSERT_FALSE(errors_.empty());
137 }
138
139 // Verify basic usage of the GetInternalBuffer and ParseInternalBuffer methods.
TEST_F(ExpatParserTest,TestPassingParseInternalBuffer)140 TEST_F(ExpatParserTest, TestPassingParseInternalBuffer) {
141 const string kXml("<Tom><dick>foo</dick><harry>bar</harry></Tom>");
142 ExpatParser parser(&handler_, false);
143
144 // Parse the string one character at a time.
145 for (size_t i = 0; i < kXml.length(); ++i) {
146 char* buf = static_cast<char*>(parser.GetInternalBuffer(1));
147 *buf = kXml[i];
148 ASSERT_TRUE(parser.ParseInternalBuffer(1, &errors_, i == kXml.length()-1));
149 }
150 ASSERT_EQ(kXml, handler_.get_xml());
151 ASSERT_TRUE(errors_.empty());
152 }
153
TEST_F(ExpatParserTest,TestFailingInternalBuffer)154 TEST_F(ExpatParserTest, TestFailingInternalBuffer) {
155 const string k0("<A><B><C><D>");
156 const string k1("</D>"); // This is fine.
157 const string k2("</B>"); // XML is badly formed here, missing </C>.
158 ExpatParser parser(&handler_, false);
159
160 void* buf = parser.GetInternalBuffer(k0.size());
161 memcpy(buf, k0.data(), k0.size());
162 ASSERT_TRUE(parser.ParseInternalBuffer(k0.size(), &errors_, false));
163 ASSERT_TRUE(errors_.empty());
164
165 buf = parser.GetInternalBuffer(k1.size());
166 memcpy(buf, k1.data(), k1.size());
167 ASSERT_TRUE(parser.ParseInternalBuffer(k1.size(), &errors_, false));
168 ASSERT_TRUE(errors_.empty());
169
170 buf = parser.GetInternalBuffer(k2.size());
171 memcpy(buf, k2.data(), k2.size());
172 ASSERT_FALSE(parser.ParseInternalBuffer(k2.size(), &errors_, true));
173 ASSERT_FALSE(errors_.empty());
174 }
175
TEST_F(ExpatParserTest,TestBillionLaughsAttack)176 TEST_F(ExpatParserTest, TestBillionLaughsAttack) {
177 // Ensure that the "billion laughs" buffer overflow attack is handled.
178 // Previously, this would hang libkml.
179 const string kBadXml = string(DATADIR) + "/kml/billion.kml";
180 string file_data;
181 ASSERT_TRUE(File::ReadFileToString(kBadXml, &file_data));
182
183 ASSERT_FALSE(ExpatParser::ParseString(file_data, &handler_, &errors_, false));
184 ASSERT_FALSE(errors_.empty());
185 ASSERT_TRUE(handler_.get_xml().empty());
186 }
187
TEST_F(ExpatParserTest,TestEntitiesStopParser)188 TEST_F(ExpatParserTest, TestEntitiesStopParser) {
189 // This is malformed XML.
190 const string kBadXml(
191 "<Placemark>"
192 "<!DOCTYPE billion ["
193 "<!ELEMENT billion (#PCDATA)>"
194 "<!ENTITY laugh0 \"ha\">"
195 "<!ENTITY laugh1 \"&laugh0;&laugh0;\">"
196 "<!ENTITY laugh2 \"&laugh1;&laugh1;\">"
197 "<!ENTITY laugh3 \"&laugh2;&laugh2;\">"
198 "<!ENTITY laugh4 \"&laugh3;&laugh3;\">"
199 "<!ENTITY laugh5 \"&laugh4;&laugh4;\">"
200 "<!ENTITY laugh6 \"&laugh5;&laugh5;\">"
201 "<!ENTITY laugh7 \"&laugh6;&laugh6;\">"
202 "<!ENTITY laugh8 \"&laugh7;&laugh7;\">"
203 "<!ENTITY laugh9 \"&laugh8;&laugh8;\">"
204 "<!ENTITY laugh10 \"&laugh9;&laugh9;\">"
205 "<!ENTITY laugh11 \"&laugh10;&laugh10;\">"
206 "<!ENTITY laugh12 \"&laugh11;&laugh11;\">"
207 "<!ENTITY laugh13 \"&laugh12;&laugh12;\">"
208 "<!ENTITY laugh14 \"&laugh13;&laugh13;\">"
209 "<!ENTITY laugh15 \"&laugh14;&laugh14;\">"
210 "<!ENTITY laugh16 \"&laugh15;&laugh15;\">"
211 "<!ENTITY laugh17 \"&laugh16;&laugh16;\">"
212 "<!ENTITY laugh18 \"&laugh17;&laugh17;\">"
213 "<!ENTITY laugh19 \"&laugh18;&laugh18;\">"
214 "<!ENTITY laugh20 \"&laugh19;&laugh19;\">"
215 "<!ENTITY laugh21 \"&laugh20;&laugh20;\">"
216 "<!ENTITY laugh22 \"&laugh21;&laugh21;\">"
217 "<!ENTITY laugh23 \"&laugh22;&laugh22;\">"
218 "<!ENTITY laugh24 \"&laugh23;&laugh23;\">"
219 "<!ENTITY laugh25 \"&laugh24;&laugh24;\">"
220 "<!ENTITY laugh26 \"&laugh25;&laugh25;\">"
221 "<!ENTITY laugh27 \"&laugh26;&laugh26;\">"
222 "<!ENTITY laugh28 \"&laugh27;&laugh27;\">"
223 "<!ENTITY laugh29 \"&laugh28;&laugh28;\">"
224 "<!ENTITY laugh30 \"&laugh29;&laugh29;\">"
225 "]>"
226 "<billion>&laugh30;</billion>"
227 "</Placemark>");
228 ASSERT_FALSE(ExpatParser::ParseString(kBadXml, &handler_, &errors_, false));
229 ASSERT_FALSE(errors_.empty());
230 ASSERT_EQ(string("<Placemark>"), handler_.get_xml());
231 }
232
TEST_F(ExpatParserTest,TestUnicode)233 TEST_F(ExpatParserTest, TestUnicode) {
234 const string kUnicodeKml(
235 "<Placemark>"
236 "<name>"
237 "one\xe2\x80\x93two" // A UTF-8 encoded en-dash.
238 "</name>"
239 "</Placemark>"
240 );
241 ASSERT_TRUE(ExpatParser::ParseString(kUnicodeKml, &handler_, &errors_, false));
242 ASSERT_TRUE(errors_.empty());
243 // If this roundtrip fails, expect problems in the XML_UNICODE paths.
244 ASSERT_EQ(kUnicodeKml, handler_.get_xml());
245 }
246
TEST_F(ExpatParserTest,TestUnicodeToUtf8)247 TEST_F(ExpatParserTest, TestUnicodeToUtf8) {
248 // Verify no crash on null inputs.
249 string result_string;
250 const XML_Char input_buffer = 'a';
251
252 xmlchar_to_utf8(&input_buffer, NULL);
253 xmlchar_to_utf8(NULL, &result_string);
254 ASSERT_TRUE(result_string.empty());
255 xmlchar_to_utf8(NULL, NULL);
256
257 xmlchar_to_utf8(&input_buffer, &result_string);
258 ASSERT_EQ("a", result_string);
259
260 // Requires this file be built with the same flags used to build libexpat.
261 #if XML_UNICODE
262 ASSERT_TRUE(sizeof(XML_Char) > 1);
263
264 // Verify successful two byte encoding.
265 result_string.clear();
266 const XML_Char kutf8_small_g = 0x262;
267 xmlchar_to_utf8(&kutf8_small_g, &result_string);
268 ASSERT_EQ("\xc9\xa2", result_string);
269
270 // Verify three byte encoding.
271 result_string.clear();
272 const XML_Char kutf8_degree_celsius = 0x2103;
273 xmlchar_to_utf8(&kutf8_degree_celsius, &result_string);
274 ASSERT_EQ("\xe2\x84\x83", result_string);
275 #else
276 // Verify we don't mangle UTF-8 start if we're just passing through.
277 result_string.clear();
278 const XML_Char kutf8 = 0xe2 ;
279 xmlchar_to_utf8(&kutf8, &result_string);
280 ASSERT_EQ("\xe2", result_string);
281 #endif // XML_UNICODE
282 }
283
TEST_F(ExpatParserTest,TestXmlUnicodeHandlers)284 TEST_F(ExpatParserTest, TestXmlUnicodeHandlers) {
285 // The contrived-looking array approach here is so we're safe with either
286 // sane build options or XML_UNICODE.
287 const XML_Char kXMLChar[] = {'<', 'A', '>', '<', 'B', '>', 0 };
288 const XML_Char kXMLChar2[] = {'<', 'C', '>', '<', 'D', '>', 0 };
289 const XML_Char kEmptyString[] = { 0 };
290 string s1;
291 // Ensure roundrip is OK. If this fails, suspect XML_UNICODE mismatches
292 // in linked expat lib and this source.
293 ASSERT_EQ("<A><B>", xml_char_to_string(kXMLChar));
294
295 // Check null inputs.
296 s1 = xml_char_to_string(NULL);
297 ASSERT_TRUE(s1.empty());
298
299 // Check empty inputs.
300 s1 = xml_char_to_string(kEmptyString);
301 ASSERT_TRUE(s1.empty());
302
303 // Exercise xml_char_to_string_n.
304 s1 = xml_char_to_string_n(kXMLChar, 0);
305 ASSERT_TRUE(s1.empty());
306 s1 = xml_char_to_string_n(kXMLChar, 3);
307 ASSERT_EQ(s1, "<A>");
308
309 // Now the array version.
310 std::vector <string> a;
311 xml_char_to_string_vec(NULL, &a);
312 ASSERT_EQ(a.size(), static_cast<size_t>(0));
313
314 // Check empty array.
315 const XML_Char* kXMLEmptyArray[] = {NULL};
316 xml_char_to_string_vec(kXMLEmptyArray, &a);
317 ASSERT_EQ(a.size(), static_cast<size_t>(0));
318
319 // Check common case.
320 const XML_Char* kXMLArray[] = {kXMLChar, kXMLChar2, NULL};
321 xml_char_to_string_vec(kXMLArray, &a);
322 ASSERT_EQ(a.size(), static_cast<size_t>(2));
323 ASSERT_EQ(a.at(0), "<A><B>");
324 ASSERT_EQ(a.at(1), "<C><D>");
325 }
326
327 } // end namespace kmlbase
328