1 // Copyright 2008, Google Inc. All rights reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are met:
5 //
6 // 1. Redistributions of source code must retain the above copyright notice,
7 // this list of conditions and the following disclaimer.
8 // 2. Redistributions in binary form must reproduce the above copyright notice,
9 // this list of conditions and the following disclaimer in the documentation
10 // and/or other materials provided with the distribution.
11 // 3. Neither the name of Google Inc. nor the names of its contributors may be
12 // used to endorse or promote products derived from this software without
13 // specific prior written permission.
14 //
15 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
16 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
17 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
18 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
26 // This file implements the KML Parse() function. The parser uses expat.
27 // The overall process is as follows:
28 // 1) map instance file element name to id
29 // 2) create Element for each element and push onto stack at StartElement
30 // 2a) call NewElement() for each ParserObserver.
31 // 3) gather character data for simple elements
32 // 4) in EndElement pop off the child to add to the parent on the stack
33 // 4a) call AddChild() for each ParserObserver.
34
35 #include "kml/dom/kml_handler.h"
36 #include "boost/scoped_ptr.hpp"
37 #include "kml/base/attributes.h"
38 #include "kml/dom/element.h"
39 #include "kml/dom/kml_cast.h"
40 #include "kml/dom/kml_factory.h"
41 #include "kml/dom/parser_observer.h"
42 #include "kml/dom/xsd.h"
43
44 using kmlbase::Attributes;
45 using kmlbase::StringVector;
46
47 // The maximum nesting depth we permit. Depths beyond this are treated as
48 // errors. Override it with a -DLIBKML_MAX_NESTING_DEPTH preprocessor
49 // instruction.
50 // TODO: some flags-like solution would be preferable.
51 static const unsigned int kMaxNestingDepth = 100;
52 #ifdef LIBKML_MAX_NESTING_DEPTH
53 kMaxNestingDepth = LIBKML_MAX_NESTING_DEPTH;
54 #endif
55
56 namespace kmldom {
57
KmlHandler(parser_observer_vector_t & observers)58 KmlHandler::KmlHandler(parser_observer_vector_t& observers)
59 : kml_factory_(*KmlFactory::GetFactory()),
60 skip_depth_(0),
61 in_description_(0),
62 nesting_depth_(0),
63 in_old_schema_placemark_(false),
64 observers_(observers) {
65 }
66
~KmlHandler()67 KmlHandler::~KmlHandler() {
68 // stack_'s destructor calls the destructor of each ElementPtr releasing
69 // the reference and potentially freeing the associated storage.
70 }
71
StartElement(const string & name,const StringVector & attrs)72 void KmlHandler::StartElement(const string& name,
73 const StringVector& attrs) {
74 // Check that we're not nested beyond the max permissible depth.
75 if (++nesting_depth_ > kMaxNestingDepth) {
76 XML_StopParser(get_parser(), XML_TRUE);
77 return;
78 }
79 // 3 possibilities:
80 // 1) complex element: create an Element.
81 // 2) simple element: create a Field
82 // 3) unknown element: save XML as a string inside the parent element.
83 // No matter what an Element is pushed onto the stack and we always gather
84 // character data.
85
86 // See the comment towards the end of this function about permitting "raw"
87 // HTML inside <description> elements. This check will catch an instance
88 // of a <description> inside a <description> and permit us to handle it
89 // correctly as unknown text.
90 if (in_description_ > 0 && name.length() == 11 && name == "description") {
91 in_description_++;
92 }
93
94 if (skip_depth_ > 0) {
95 // We're already inside an unknown element. Stringify the next element and
96 // its attributes, increment the skip counter again, and return
97 // immediately.
98 InsertUnknownStartElement(name, attrs);
99 skip_depth_++;
100 return;
101 }
102
103 // If we see <Schema parent=""> then we attempt to parse the old Schema
104 // usage outlined in the header. The name of the schema is stored in the
105 // old_schema_name_ string.
106 // Yes, this means that we'll only do this kind of parse if the Schema
107 // defines its children before they appear. But, as mentioned in the header,
108 // this is exactly Google Earth's behavior. Likewise, only one <Schema>
109 // element is used to define a subclass of Placemark. In the case of
110 // multiple Schema elements appearing at the top of the file, the last
111 // one wins.
112 if (name.length() == 6 && name == "Schema") {
113 FindOldSchemaParentName(attrs, &old_schema_name_);
114 }
115
116 // Push a string onto the stack we'll use to manage the gathering of
117 // character data.
118 string element_char_data;
119 char_data_.push(element_char_data);
120
121 ElementPtr element;
122
123 KmlDomType type_id =
124 static_cast<KmlDomType>(Xsd::GetSchema()->ElementId(name));
125
126 // If we're parsing old Schema usage, we force the creation of a Placemark.
127 if (!old_schema_name_.empty() && name == old_schema_name_) {
128 // Treat this as a Placemark.
129 type_id = Type_Placemark;
130 }
131
132 XsdType xsd_type = Xsd::GetSchema()->ElementType(type_id);
133 if ((xsd_type == XSD_COMPLEX_TYPE) &&
134 (element = kml_factory_.CreateElementById(type_id))) {
135
136 // Icon as a child of IconStyle is really IconStyleIcon
137 if (element->Type() == Type_Icon) {
138 // If there is a parent and it is IconStyle...
139 if (!stack_.empty() && stack_.top()->Type() == Type_IconStyle) {
140 // ... delete the Icon and create an IconStyleIcon instead.
141 element = kml_factory_.CreateElementById(Type_IconStyleIcon);
142 }
143 }
144
145 // We parse attributes only if StartElement received any.
146 if (!attrs.empty()) {
147 // Element::ParseAttributes takes ownership of the created Attributes.
148 element->ParseAttributes(Attributes::Create(attrs));
149 }
150 } else if (xsd_type == XSD_SIMPLE_TYPE) {
151 element = kml_factory_.CreateFieldById(type_id);
152 } else if (xsd_type == XSD_UNKNOWN && !old_schema_name_.empty()) {
153 // We might be parsing one of the children of the old schema usage.
154 in_old_schema_placemark_ = ParseOldSchemaChild(name, simplefield_name_vec_,
155 &simpledata_vec_);
156 if (in_old_schema_placemark_) {
157 return;
158 }
159 }
160
161 if (!element) {
162 if (stack_.empty()) {
163 // Root element is not known. XML_TRUE causes XML_Parse() to return
164 // XML_STATUS_SUSPENDED. Returning XML_FALSE _can_ result in
165 // XML_Parse() returning XML_STATUS_OK.
166 XML_StopParser(get_parser(), XML_TRUE);
167 return;
168 }
169 // The transition point from known to unknown KML. We treat everything
170 // from this point as a string until EndElement has decremented the
171 // skip_depth_ counter to 0.
172 InsertUnknownStartElement(name, attrs);
173 skip_depth_++;
174 return;
175 }
176 // This is a known element. Push onto parse stack and gather content.
177 stack_.push(element);
178
179 // We need to permit parsing of un-CDATA'd markup inside <description>
180 // elements. We bump the skip counter here as if we'd encountered an unknown
181 // element, but only after we've allowed the description ElementPtr to be
182 // pushed onto the stack. In EndElement we'll check for the closing of
183 // description and decrement the skip counter before anything else happens.
184 if (element->Type() == Type_description) {
185 skip_depth_++;
186 in_description_++;
187 }
188
189 // Call the NewElement() method of each ParserObserver. The whole parse
190 // terminates if and when any observer's NewElement() returns false.
191 if (!CallNewElementObservers(observers_, element)) {
192 XML_StopParser(get_parser(), XML_TRUE);
193 }
194 }
195
196 // private
CallNewElementObservers(const parser_observer_vector_t & observers,const ElementPtr & element)197 bool KmlHandler::CallNewElementObservers(
198 const parser_observer_vector_t& observers, const ElementPtr& element) {
199 for (size_t i = 0; i < observers_.size(); ++i) {
200 if (!observers_[i]->NewElement(element)) {
201 return false;
202 }
203 }
204 return true;
205 }
206
EndElement(const string & name)207 void KmlHandler::EndElement(const string& name) {
208 --nesting_depth_;
209 // See the comment towards the end of StartElement about handling "raw" HTML
210 // inside <description> elements. Here we are checking to see if (1) we're
211 // inside a closing </description> element and (2) if we're at the end of any
212 // possible series of nested description elements.
213 if (name.length() == 11 && name == "description" && --in_description_ == 0) {
214 skip_depth_--;
215 }
216
217 if (skip_depth_ > 0) {
218 // We're inside an unknown element. Build the closing tag, decrement
219 // the skip counter and then check if we're back to known KML.
220 InsertUnknownEndElement(name);
221 if (--skip_depth_ == 0) {
222 // The next element will be known KML. Push the gathered char_data_ up
223 // to Element as a string for serializiation later on.
224 char_data_.top().append("\n");
225 stack_.top()->AddUnknownElement(char_data_.top());
226 char_data_.pop();
227 }
228 return;
229 }
230
231 // This is to cover the special case of "<unknown/>" in expat where
232 // endElement is still called even if XML_StopParser() was called in the
233 // beginElement handler. See the discussion on this matter in
234 // XML_StopParser comment in expat.h.
235 if (stack_.empty()) {
236 return;
237 }
238
239 // If we're parsing an old Schema placemark child, store the character data
240 // gathered into the SimpleData element we put on a stack in StartElement.
241 if (in_old_schema_placemark_ && simpledata_vec_.size() > 0) {
242 // TODO: the pretty serialization of SimpleData will produce some
243 // ugly (but harmless) whitespace and unnecessary line breaks. Fix this in
244 // the serializer.
245 simpledata_vec_.back()->set_text(char_data_.top());
246 char_data_.pop();
247 in_old_schema_placemark_ = false;
248 return;
249 }
250
251 // The top of the stack is the begin of the element ending here.
252 ElementPtr child = stack_.top();
253
254 string child_char_data_ = char_data_.top();
255 char_data_.pop();
256
257 child->set_char_data(child_char_data_);
258
259 if (child->Type() == Type_coordinates ||
260 child->Type() == Type_Snippet ||
261 child->Type() == Type_linkSnippet ||
262 child->Type() == Type_SimpleData) {
263 // These are effectively complex elements, but with character data.
264 child->AddElement(child); // "Parse yourself"
265 }
266
267 // Check if we're parsing old-style Schema KML. If we are, and if this
268 // EndElement is the closing </Schema>, give the schema an id (by appending
269 // "_id" to its name) and walk through its <SimpleField> children to
270 // discover what element name we should special-case in StartElement.
271
272 // Handle the case of reaching the closing of an old-style </Schema>.
273 if (!old_schema_name_.empty()) {
274 if (name.length() == 6 && name == "Schema") {
275 HandleOldSchemaEndElement(AsSchema(child), old_schema_name_,
276 &simplefield_name_vec_);
277 } else if (name == old_schema_name_) {
278 // Or that of its Placemark substitute.
279 HandleOldSchemaParentEndElement(AsPlacemark(child), old_schema_name_,
280 kml_factory_, simpledata_vec_);
281 }
282 }
283
284 // If stack_.size() == 1 this is the root element: leave it alone.
285 if (stack_.size() >= 2) {
286 // We have a parent. Pop ourselves off and hand to parent. Parent element
287 // always takes ownsership: 1) a known complex child, 2) a known field,
288 // or 3) unknown is passed onwards to its parent and possibly ultimately
289 // to the unknown element list in Element.
290 stack_.pop();
291 if (CallEndElementObservers(observers_, stack_.top(), child)) {
292 stack_.top()->AddElement(child);
293 }
294 if (!CallAddChildObservers(observers_, stack_.top(), child)) {
295 XML_StopParser(get_parser(), XML_TRUE);
296 }
297 }
298 }
299
CallEndElementObservers(const parser_observer_vector_t & observers,const ElementPtr & parent,const ElementPtr & child)300 bool KmlHandler::CallEndElementObservers(
301 const parser_observer_vector_t& observers, const ElementPtr& parent,
302 const ElementPtr& child) {
303 for (size_t i = 0; i < observers_.size(); ++i) {
304 if (!observers_[i]->EndElement(parent, child)) {
305 return false;
306 }
307 }
308 return true;
309 }
310
311 // private
CallAddChildObservers(const parser_observer_vector_t & observers,const ElementPtr & parent,const ElementPtr & child)312 bool KmlHandler::CallAddChildObservers(
313 const parser_observer_vector_t& observers, const ElementPtr& parent,
314 const ElementPtr& child) {
315 for (size_t i = 0; i < observers_.size(); ++i) {
316 if (!observers_[i]->AddChild(parent, child)) {
317 return false;
318 }
319 }
320 return true;
321 }
322
323 // Note the handling of char data w.r.t. unknown elements. If we are within
324 // a known element that cannot contain char data, setting it in EndElement is
325 // a no-op. For known elements within unknown elements, everything is treated
326 // as a string and _all_ data is saved.
327 // <Placemark><Point>foo<coordinates/>bar</Point></Placemark> becomes:
328 // <Placemark><Point><coordinates/></Point></Placemark>
329 // <X><Point>foo<coordinates/>bar</Point></P> remains as-is.
CharData(const string & s)330 void KmlHandler::CharData(const string& s) {
331 char_data_.top().append(s);
332 }
333
334 // As with STL pop() methods this is (potentially) destructive. If the
335 // parse succeeded the root element will be the only item on the stack and
336 // this method will detach it. Either way the destructor will delete all
337 // elements on the stack. This should only be called after XML_Parse()
338 // has completed.
PopRoot()339 ElementPtr KmlHandler::PopRoot() {
340 if (!stack_.empty() && stack_.size() == 1) {
341 ElementPtr root = stack_.top();
342 stack_.pop();
343 return root;
344 }
345 return NULL;
346 }
347
348 // Private.
InsertUnknownStartElement(const string & name,const StringVector & atts)349 void KmlHandler::InsertUnknownStartElement(const string& name,
350 const StringVector& atts) {
351 string& top = char_data_.top();
352 top.append("<");
353 top.append(name);
354 for (size_t i = 0; i < atts.size(); i += 2) {
355 top.append(" ");
356 top.append(atts.at(i));
357 top.append("=\"");
358 top.append(atts.at(i+1));
359 top.append("\"");
360 }
361 top.append(">");
362 }
363
364 // Private.
InsertUnknownEndElement(const string & name)365 void KmlHandler::InsertUnknownEndElement(const string& name) {
366 string& top = char_data_.top();
367 top.append("</");
368 top.append(name);
369 top.append(">");
370 }
371
372 // Static, private.
FindOldSchemaParentName(const StringVector & attrs,string * old_schema_name)373 void KmlHandler::FindOldSchemaParentName(const StringVector& attrs,
374 string* old_schema_name) {
375 boost::scoped_ptr<Attributes> schema_attrs(Attributes::Create(attrs));
376 if (schema_attrs.get() && old_schema_name &&
377 schema_attrs->FindValue("parent", NULL)) {
378 schema_attrs->FindValue("name", old_schema_name);
379 }
380 }
381
382 // Static, private.
ParseOldSchemaChild(const string & name,const StringVector & simplefield_name_vec,std::vector<SimpleDataPtr> * simpledata_vec)383 bool KmlHandler::ParseOldSchemaChild(
384 const string& name,
385 const StringVector& simplefield_name_vec,
386 std::vector<SimpleDataPtr>* simpledata_vec) {
387 // We'll iterate through a vector of possible names (created in
388 // EndElement) and check to see if we have a match. If we do, we'll make
389 // a SimpleData element and put it on a stack for later re-parenting to
390 // an ExtendedData element (again in EndElement).
391 if (!simpledata_vec) {
392 return false;
393 }
394 StringVector::const_iterator itr = simplefield_name_vec.begin();
395 for (; itr != simplefield_name_vec.end(); itr++) {
396 if (name == *itr) {
397 // Treat this as a SimpleData element.
398 SimpleDataPtr simpledata = KmlFactory::GetFactory()->CreateSimpleData();
399 simpledata->set_name(name);
400 simpledata_vec->push_back(simpledata);
401 return true;
402 }
403 }
404 return false;
405 }
406
407 // Static, private.
HandleOldSchemaEndElement(const SchemaPtr & schema,const string & old_schema_name,StringVector * simplefield_name_vec)408 void KmlHandler::HandleOldSchemaEndElement(
409 const SchemaPtr& schema,
410 const string& old_schema_name,
411 StringVector* simplefield_name_vec) {
412 if (!simplefield_name_vec) {
413 return;
414 }
415 schema->set_id(old_schema_name + "_id");
416 // TODO: nuke the parent="Placemark" attr.
417 for (size_t i = 0; i < schema->get_simplefield_array_size(); i++) {
418 if (const SimpleFieldPtr& simplefield =
419 AsSimpleField(schema->get_simplefield_array_at(i))) {
420 if (simplefield->has_name()) {
421 simplefield_name_vec->push_back(simplefield->get_name());
422 }
423 }
424 }
425 }
426
427 // Static, private.
HandleOldSchemaParentEndElement(const PlacemarkPtr & placemark,const string & old_schema_name,const KmlFactory & kml_factory,const std::vector<SimpleDataPtr> simpledata_vec)428 void KmlHandler::HandleOldSchemaParentEndElement(
429 const PlacemarkPtr& placemark,
430 const string& old_schema_name,
431 const KmlFactory& kml_factory,
432 const std::vector<SimpleDataPtr> simpledata_vec) {
433 // We've reached the closing tag of the old placemark substitute
434 // element. Take the SimpleData elements we've been creating from its
435 // children and hand them to an ExtendedData, then give that to the
436 // parent placemark.
437 ExtendedDataPtr extendeddata = kml_factory.CreateExtendedData();
438 SchemaDataPtr schemadata = kml_factory.CreateSchemaData();
439 schemadata->set_schemaurl(old_schema_name + "_id");
440 std::vector<SimpleDataPtr>::const_iterator itr =
441 simpledata_vec.begin();
442 for (; itr != simpledata_vec.end(); itr++) {
443 schemadata->add_simpledata(*itr);
444 }
445 extendeddata->add_schemadata(schemadata);
446 placemark->set_extendeddata(extendeddata);
447 }
448
449 } // end namespace kmldom
450
451