1 // Copyright 2008, Google Inc. All rights reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are met:
5 //
6 //  1. Redistributions of source code must retain the above copyright notice,
7 //     this list of conditions and the following disclaimer.
8 //  2. Redistributions in binary form must reproduce the above copyright notice,
9 //     this list of conditions and the following disclaimer in the documentation
10 //     and/or other materials provided with the distribution.
11 //  3. Neither the name of Google Inc. nor the names of its contributors may be
12 //     used to endorse or promote products derived from this software without
13 //     specific prior written permission.
14 //
15 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
16 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
17 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
18 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 // This file contains the unit tests for the ExpatParser class.
27 
28 #include "kml/base/expat_parser.h"
29 #include "kml/base/file.h"
30 #include "boost/scoped_ptr.hpp"
31 #include "gtest/gtest.h"
32 #include "kml/dom/kml_handler.h"
33 #include "kml/dom/parser_observer.h"
34 
35 // The following define is a convenience for testing inside Google.
36 #ifdef GOOGLE_INTERNAL
37 #include "kml/base/google_internal_test.h"
38 #endif
39 
40 #ifndef DATADIR
41 #error *** DATADIR must be defined! ***
42 #endif
43 
44 namespace kmlbase {
45 
46 // A basic ExpatParser handler that simply reconstructs parsed XML in a string.
47 class TestXmlHandler : public ExpatHandler {
48  public:
StartElement(const string & name,const StringVector & atts)49   virtual void StartElement(const string& name,
50                             const StringVector& atts) {
51     xml_.append("<" + name + ">");
52   }
EndElement(const string & name)53   virtual void EndElement(const string& name) {
54     xml_.append("</" + string(name) + ">");
55   }
CharData(const string & data)56   virtual void CharData(const string& data) {
57     xml_.append(data);
58   }
get_xml() const59   const string& get_xml() const { return xml_; }
60 
61  private:
62   string xml_;
63 };
64 
65 class ExpatParserTest : public testing::Test {
66  protected:
67   string errors_;
68   TestXmlHandler handler_;
69 };
70 
71 // Verify basic usage of the static ParseString method.
TEST_F(ExpatParserTest,TestPassingParseString)72 TEST_F(ExpatParserTest, TestPassingParseString) {
73   const string kXml("<Tom><dick>foo</dick><harry>bar</harry></Tom>");
74   ASSERT_TRUE(ExpatParser::ParseString(kXml, &handler_, &errors_, false));
75   ASSERT_TRUE(errors_.empty());
76   ASSERT_EQ(kXml, handler_.get_xml());
77 }
78 
79 // Verify failure of ParseString on badly formed XML content.
TEST_F(ExpatParserTest,TestFailingParseString)80 TEST_F(ExpatParserTest, TestFailingParseString) {
81   // kXml is not well-formed.
82   const string kXml("<Tom><dick>foo</dick><harry>bar</harry>");
83   ASSERT_FALSE(ExpatParser::ParseString(kXml, &handler_, &errors_, false));
84   ASSERT_FALSE(errors_.empty());
85   ASSERT_EQ(kXml, handler_.get_xml());
86 }
87 
88 // Verify basic usage of the ParseBuffer method.
TEST_F(ExpatParserTest,TestPassingParseBuffer)89 TEST_F(ExpatParserTest, TestPassingParseBuffer) {
90   const string kXml("<Tom><dick>foo</dick><harry>bar</harry></Tom>");
91   ExpatParser parser(&handler_, false);
92 
93   // Parse the string one character at a time.
94   for (size_t i = 0; i < kXml.length(); ++i) {
95     ASSERT_TRUE(parser.ParseBuffer(kXml.substr(i, 1), &errors_,
96                                    i == kXml.length()-1));
97   }
98   ASSERT_EQ(kXml, handler_.get_xml());
99 }
100 
101 // Verify failure of ParseBuffer on badly formed XML content.
TEST_F(ExpatParserTest,TestFailingParseBuffer)102 TEST_F(ExpatParserTest, TestFailingParseBuffer) {
103   // kXml is not well-formed.
104   const string kXml("<Tom><dick>foo</dick><harry>bar</harry>");
105   ExpatParser parser(&handler_, false);
106 
107   // Parse the string one character at a time.
108   for (size_t i = 0; i < kXml.length(); ++i) {
109     ASSERT_TRUE(parser.ParseBuffer(kXml.substr(i, 1), &errors_, false));
110     ASSERT_TRUE(errors_.empty());
111   }
112   // Now set the is_final bool to true to indicate that we believe parsing
113   // is done. Expat will check and see that its own parsing state shows
114   // more content is necessary because our XML is missing the closing
115   // </Tom> tag.
116   ASSERT_FALSE(parser.ParseBuffer("", &errors_, true));
117   ASSERT_FALSE(errors_.empty());
118 
119   ASSERT_EQ(kXml, handler_.get_xml());
120 }
121 
122 // Assert that we detect a mid-stream parsing failure.
TEST_F(ExpatParserTest,TestMidstreamFailingParseBuffer)123 TEST_F(ExpatParserTest, TestMidstreamFailingParseBuffer) {
124   const string k0("<A><B><C><D>");
125   const string k1("</D>");  // This is fine.
126   const string k2("</B>");  // XML is badly formed here, missing </C>.
127   ExpatParser parser(&handler_, false);
128 
129   ASSERT_TRUE(parser.ParseBuffer(k0, &errors_, false));
130   ASSERT_TRUE(errors_.empty());
131 
132   ASSERT_TRUE(parser.ParseBuffer(k1, &errors_, false));
133   ASSERT_TRUE(errors_.empty());
134 
135   ASSERT_FALSE(parser.ParseBuffer(k2, &errors_, false));
136   ASSERT_FALSE(errors_.empty());
137 }
138 
139 // Verify basic usage of the GetInternalBuffer and ParseInternalBuffer methods.
TEST_F(ExpatParserTest,TestPassingParseInternalBuffer)140 TEST_F(ExpatParserTest, TestPassingParseInternalBuffer) {
141   const string kXml("<Tom><dick>foo</dick><harry>bar</harry></Tom>");
142   ExpatParser parser(&handler_, false);
143 
144   // Parse the string one character at a time.
145   for (size_t i = 0; i < kXml.length(); ++i) {
146     char* buf = static_cast<char*>(parser.GetInternalBuffer(1));
147     *buf = kXml[i];
148     ASSERT_TRUE(parser.ParseInternalBuffer(1, &errors_, i == kXml.length()-1));
149   }
150   ASSERT_EQ(kXml, handler_.get_xml());
151   ASSERT_TRUE(errors_.empty());
152 }
153 
TEST_F(ExpatParserTest,TestFailingInternalBuffer)154 TEST_F(ExpatParserTest, TestFailingInternalBuffer) {
155   const string k0("<A><B><C><D>");
156   const string k1("</D>");  // This is fine.
157   const string k2("</B>");  // XML is badly formed here, missing </C>.
158   ExpatParser parser(&handler_, false);
159 
160   void* buf = parser.GetInternalBuffer(k0.size());
161   memcpy(buf, k0.data(), k0.size());
162   ASSERT_TRUE(parser.ParseInternalBuffer(k0.size(), &errors_, false));
163   ASSERT_TRUE(errors_.empty());
164 
165   buf = parser.GetInternalBuffer(k1.size());
166   memcpy(buf, k1.data(), k1.size());
167   ASSERT_TRUE(parser.ParseInternalBuffer(k1.size(), &errors_, false));
168   ASSERT_TRUE(errors_.empty());
169 
170   buf = parser.GetInternalBuffer(k2.size());
171   memcpy(buf, k2.data(), k2.size());
172   ASSERT_FALSE(parser.ParseInternalBuffer(k2.size(), &errors_, true));
173   ASSERT_FALSE(errors_.empty());
174 }
175 
TEST_F(ExpatParserTest,TestBillionLaughsAttack)176 TEST_F(ExpatParserTest, TestBillionLaughsAttack) {
177   // Ensure that the "billion laughs" buffer overflow attack is handled.
178   // Previously, this would hang libkml.
179   const string kBadXml = string(DATADIR) + "/kml/billion.kml";
180   string file_data;
181   ASSERT_TRUE(File::ReadFileToString(kBadXml, &file_data));
182 
183   ASSERT_FALSE(ExpatParser::ParseString(file_data, &handler_, &errors_, false));
184   ASSERT_FALSE(errors_.empty());
185   ASSERT_TRUE(handler_.get_xml().empty());
186 }
187 
TEST_F(ExpatParserTest,TestEntitiesStopParser)188 TEST_F(ExpatParserTest, TestEntitiesStopParser) {
189   // This is malformed XML.
190   const string kBadXml(
191     "<Placemark>"
192     "<!DOCTYPE billion ["
193     "<!ELEMENT billion (#PCDATA)>"
194     "<!ENTITY laugh0 \"ha\">"
195     "<!ENTITY laugh1 \"&laugh0;&laugh0;\">"
196     "<!ENTITY laugh2 \"&laugh1;&laugh1;\">"
197     "<!ENTITY laugh3 \"&laugh2;&laugh2;\">"
198     "<!ENTITY laugh4 \"&laugh3;&laugh3;\">"
199     "<!ENTITY laugh5 \"&laugh4;&laugh4;\">"
200     "<!ENTITY laugh6 \"&laugh5;&laugh5;\">"
201     "<!ENTITY laugh7 \"&laugh6;&laugh6;\">"
202     "<!ENTITY laugh8 \"&laugh7;&laugh7;\">"
203     "<!ENTITY laugh9 \"&laugh8;&laugh8;\">"
204     "<!ENTITY laugh10 \"&laugh9;&laugh9;\">"
205     "<!ENTITY laugh11 \"&laugh10;&laugh10;\">"
206     "<!ENTITY laugh12 \"&laugh11;&laugh11;\">"
207     "<!ENTITY laugh13 \"&laugh12;&laugh12;\">"
208     "<!ENTITY laugh14 \"&laugh13;&laugh13;\">"
209     "<!ENTITY laugh15 \"&laugh14;&laugh14;\">"
210     "<!ENTITY laugh16 \"&laugh15;&laugh15;\">"
211     "<!ENTITY laugh17 \"&laugh16;&laugh16;\">"
212     "<!ENTITY laugh18 \"&laugh17;&laugh17;\">"
213     "<!ENTITY laugh19 \"&laugh18;&laugh18;\">"
214     "<!ENTITY laugh20 \"&laugh19;&laugh19;\">"
215     "<!ENTITY laugh21 \"&laugh20;&laugh20;\">"
216     "<!ENTITY laugh22 \"&laugh21;&laugh21;\">"
217     "<!ENTITY laugh23 \"&laugh22;&laugh22;\">"
218     "<!ENTITY laugh24 \"&laugh23;&laugh23;\">"
219     "<!ENTITY laugh25 \"&laugh24;&laugh24;\">"
220     "<!ENTITY laugh26 \"&laugh25;&laugh25;\">"
221     "<!ENTITY laugh27 \"&laugh26;&laugh26;\">"
222     "<!ENTITY laugh28 \"&laugh27;&laugh27;\">"
223     "<!ENTITY laugh29 \"&laugh28;&laugh28;\">"
224     "<!ENTITY laugh30 \"&laugh29;&laugh29;\">"
225     "]>"
226     "<billion>&laugh30;</billion>"
227     "</Placemark>");
228   ASSERT_FALSE(ExpatParser::ParseString(kBadXml, &handler_, &errors_, false));
229   ASSERT_FALSE(errors_.empty());
230   ASSERT_EQ(string("<Placemark>"), handler_.get_xml());
231 }
232 
TEST_F(ExpatParserTest,TestUnicode)233 TEST_F(ExpatParserTest, TestUnicode) {
234   const string kUnicodeKml(
235       "<Placemark>"
236       "<name>"
237       "one\xe2\x80\x93two" // A UTF-8 encoded en-dash.
238       "</name>"
239       "</Placemark>"
240   );
241   ASSERT_TRUE(ExpatParser::ParseString(kUnicodeKml, &handler_, &errors_, false));
242   ASSERT_TRUE(errors_.empty());
243   // If this roundtrip fails, expect problems in the XML_UNICODE paths.
244   ASSERT_EQ(kUnicodeKml, handler_.get_xml());
245 }
246 
TEST_F(ExpatParserTest,TestUnicodeToUtf8)247 TEST_F(ExpatParserTest, TestUnicodeToUtf8) {
248   // Verify no crash on null inputs.
249   string result_string;
250   const XML_Char input_buffer = 'a';
251 
252   xmlchar_to_utf8(&input_buffer, NULL);
253   xmlchar_to_utf8(NULL, &result_string);
254   ASSERT_TRUE(result_string.empty());
255   xmlchar_to_utf8(NULL, NULL);
256 
257   xmlchar_to_utf8(&input_buffer, &result_string);
258   ASSERT_EQ("a", result_string);
259 
260   // Requires this file be built with the same flags used to build libexpat.
261 #if XML_UNICODE
262   ASSERT_TRUE(sizeof(XML_Char) > 1);
263 
264   // Verify successful two byte encoding.
265   result_string.clear();
266   const XML_Char kutf8_small_g = 0x262;
267   xmlchar_to_utf8(&kutf8_small_g, &result_string);
268   ASSERT_EQ("\xc9\xa2", result_string);
269 
270   // Verify three byte encoding.
271   result_string.clear();
272   const XML_Char kutf8_degree_celsius = 0x2103;
273   xmlchar_to_utf8(&kutf8_degree_celsius, &result_string);
274   ASSERT_EQ("\xe2\x84\x83", result_string);
275 #else
276   // Verify we don't mangle UTF-8 start if we're just passing through.
277   result_string.clear();
278   const XML_Char kutf8 = 0xe2 ;
279   xmlchar_to_utf8(&kutf8, &result_string);
280   ASSERT_EQ("\xe2", result_string);
281 #endif  // XML_UNICODE
282 }
283 
TEST_F(ExpatParserTest,TestXmlUnicodeHandlers)284 TEST_F(ExpatParserTest, TestXmlUnicodeHandlers) {
285   // The contrived-looking array approach here is so we're safe with either
286   // sane build options or XML_UNICODE.
287   const XML_Char kXMLChar[] = {'<', 'A', '>', '<', 'B', '>', 0 };
288   const XML_Char kXMLChar2[] = {'<', 'C', '>', '<', 'D', '>', 0 };
289   const XML_Char kEmptyString[] = { 0 };
290   string s1;
291   // Ensure roundrip is OK.  If this fails, suspect XML_UNICODE mismatches
292   // in linked expat lib and this source.
293   ASSERT_EQ("<A><B>", xml_char_to_string(kXMLChar));
294 
295   // Check null inputs.
296   s1 = xml_char_to_string(NULL);
297   ASSERT_TRUE(s1.empty());
298 
299   // Check empty inputs.
300   s1 = xml_char_to_string(kEmptyString);
301   ASSERT_TRUE(s1.empty());
302 
303   // Exercise xml_char_to_string_n.
304   s1 = xml_char_to_string_n(kXMLChar, 0);
305   ASSERT_TRUE(s1.empty());
306   s1 = xml_char_to_string_n(kXMLChar, 3);
307   ASSERT_EQ(s1, "<A>");
308 
309   // Now the array version.
310   std::vector <string> a;
311   xml_char_to_string_vec(NULL, &a);
312   ASSERT_EQ(a.size(), static_cast<size_t>(0));
313 
314   // Check empty array.
315   const XML_Char* kXMLEmptyArray[] = {NULL};
316   xml_char_to_string_vec(kXMLEmptyArray, &a);
317   ASSERT_EQ(a.size(), static_cast<size_t>(0));
318 
319   // Check common case.
320   const XML_Char* kXMLArray[] = {kXMLChar, kXMLChar2, NULL};
321   xml_char_to_string_vec(kXMLArray, &a);
322   ASSERT_EQ(a.size(), static_cast<size_t>(2));
323   ASSERT_EQ(a.at(0), "<A><B>");
324   ASSERT_EQ(a.at(1), "<C><D>");
325 }
326 
327 }  // end namespace kmlbase
328