1 // Copyright 2008, Google Inc. All rights reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are met:
5 //
6 //  1. Redistributions of source code must retain the above copyright notice,
7 //     this list of conditions and the following disclaimer.
8 //  2. Redistributions in binary form must reproduce the above copyright notice,
9 //     this list of conditions and the following disclaimer in the documentation
10 //     and/or other materials provided with the distribution.
11 //  3. Neither the name of Google Inc. nor the names of its contributors may be
12 //     used to endorse or promote products derived from this software without
13 //     specific prior written permission.
14 //
15 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
16 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
17 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
18 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 // This file implements the KML Parse() function.  The parser uses expat.
27 // The overall process is as follows:
28 // 1) map instance file element name to id
29 // 2) create Element for each element and push onto stack at StartElement
30 // 2a) call NewElement() for each ParserObserver.
31 // 3) gather character data for simple elements
32 // 4) in EndElement pop off the child to add to the parent on the stack
33 // 4a) call AddChild() for each ParserObserver.
34 
35 #include "kml/dom/kml_handler.h"
36 #include "boost/scoped_ptr.hpp"
37 #include "kml/base/attributes.h"
38 #include "kml/dom/element.h"
39 #include "kml/dom/kml_cast.h"
40 #include "kml/dom/kml_factory.h"
41 #include "kml/dom/parser_observer.h"
42 #include "kml/dom/xsd.h"
43 
44 using kmlbase::Attributes;
45 using kmlbase::StringVector;
46 
47 // The maximum nesting depth we permit. Depths beyond this are treated as
48 // errors. Override it with a -DLIBKML_MAX_NESTING_DEPTH preprocessor
49 // instruction.
50 // TODO: some flags-like solution would be preferable.
51 static const unsigned int kMaxNestingDepth = 100;
52 #ifdef LIBKML_MAX_NESTING_DEPTH
53 kMaxNestingDepth = LIBKML_MAX_NESTING_DEPTH;
54 #endif
55 
56 namespace kmldom {
57 
KmlHandler(parser_observer_vector_t & observers)58 KmlHandler::KmlHandler(parser_observer_vector_t& observers)
59   : kml_factory_(*KmlFactory::GetFactory()),
60     skip_depth_(0),
61     in_description_(0),
62     nesting_depth_(0),
63     in_old_schema_placemark_(false),
64     observers_(observers) {
65 }
66 
~KmlHandler()67 KmlHandler::~KmlHandler() {
68   // stack_'s destructor calls the destructor of each ElementPtr releasing
69   // the reference and potentially freeing the associated storage.
70 }
71 
StartElement(const string & name,const StringVector & attrs)72 void KmlHandler::StartElement(const string& name,
73                               const StringVector& attrs) {
74   // Check that we're not nested beyond the max permissible depth.
75   if (++nesting_depth_ > kMaxNestingDepth) {
76     XML_StopParser(get_parser(), XML_TRUE);
77     return;
78   }
79   // 3 possibilities:
80   // 1) complex element: create an Element.
81   // 2) simple element: create a Field
82   // 3) unknown element: save XML as a string inside the parent element.
83   // No matter what an Element is pushed onto the stack and we always gather
84   // character data.
85 
86   // See the comment towards the end of this function about permitting "raw"
87   // HTML inside <description> elements. This check will catch an instance
88   // of a <description> inside a <description> and permit us to handle it
89   // correctly as unknown text.
90   if (in_description_ > 0 && name.length() == 11 && name == "description") {
91     in_description_++;
92   }
93 
94   if (skip_depth_ > 0) {
95     // We're already inside an unknown element. Stringify the next element and
96     // its attributes, increment the skip counter again, and return
97     // immediately.
98     InsertUnknownStartElement(name, attrs);
99     skip_depth_++;
100     return;
101   }
102 
103   // If we see <Schema parent=""> then we attempt to parse the old Schema
104   // usage outlined in the header. The name of the schema is stored in the
105   // old_schema_name_ string.
106   // Yes, this means that we'll only do this kind of parse if the Schema
107   // defines its children before they appear. But, as mentioned in the header,
108   // this is exactly Google Earth's behavior. Likewise, only one <Schema>
109   // element is used to define a subclass of Placemark. In the case of
110   // multiple Schema elements appearing at the top of the file, the last
111   // one wins.
112   if (name.length() == 6 && name == "Schema") {
113     FindOldSchemaParentName(attrs, &old_schema_name_);
114   }
115 
116   // Push a string onto the stack we'll use to manage the gathering of
117   // character data.
118   string element_char_data;
119   char_data_.push(element_char_data);
120 
121   ElementPtr element;
122 
123   KmlDomType type_id =
124     static_cast<KmlDomType>(Xsd::GetSchema()->ElementId(name));
125 
126   // If we're parsing old Schema usage, we force the creation of a Placemark.
127   if (!old_schema_name_.empty() && name == old_schema_name_) {
128     // Treat this as a Placemark.
129     type_id = Type_Placemark;
130   }
131 
132   XsdType xsd_type = Xsd::GetSchema()->ElementType(type_id);
133   if ((xsd_type == XSD_COMPLEX_TYPE) &&
134       (element = kml_factory_.CreateElementById(type_id))) {
135 
136     // Icon as a child of IconStyle is really IconStyleIcon
137     if (element->Type() == Type_Icon) {
138       // If there is a parent and it is IconStyle...
139       if (!stack_.empty() && stack_.top()->Type() == Type_IconStyle) {
140         // ... delete the Icon and create an IconStyleIcon instead.
141         element = kml_factory_.CreateElementById(Type_IconStyleIcon);
142       }
143     }
144 
145     // We parse attributes only if StartElement received any.
146     if (!attrs.empty()) {
147       // Element::ParseAttributes takes ownership of the created Attributes.
148       element->ParseAttributes(Attributes::Create(attrs));
149     }
150   } else if (xsd_type == XSD_SIMPLE_TYPE) {
151     element = kml_factory_.CreateFieldById(type_id);
152   } else if (xsd_type == XSD_UNKNOWN && !old_schema_name_.empty()) {
153     // We might be parsing one of the children of the old schema usage.
154     in_old_schema_placemark_ = ParseOldSchemaChild(name, simplefield_name_vec_,
155                                                    &simpledata_vec_);
156     if (in_old_schema_placemark_) {
157       return;
158     }
159   }
160 
161   if (!element) {
162     if (stack_.empty()) {
163       // Root element is not known.  XML_TRUE causes XML_Parse() to return
164       // XML_STATUS_SUSPENDED.  Returning XML_FALSE _can_ result in
165       // XML_Parse() returning XML_STATUS_OK.
166       XML_StopParser(get_parser(), XML_TRUE);
167       return;
168     }
169     // The transition point from known to unknown KML. We treat everything
170     // from this point as a string until EndElement has decremented the
171     // skip_depth_ counter to 0.
172     InsertUnknownStartElement(name, attrs);
173     skip_depth_++;
174     return;
175   }
176   // This is a known element.  Push onto parse stack and gather content.
177   stack_.push(element);
178 
179   // We need to permit parsing of un-CDATA'd markup inside <description>
180   // elements. We bump the skip counter here as if we'd encountered an unknown
181   // element, but only after we've allowed the description ElementPtr to be
182   // pushed onto the stack. In EndElement we'll check for the closing of
183   // description and decrement the skip counter before anything else happens.
184   if (element->Type() == Type_description) {
185     skip_depth_++;
186     in_description_++;
187   }
188 
189   // Call the NewElement() method of each ParserObserver.  The whole parse
190   // terminates if and when any observer's NewElement() returns false.
191   if (!CallNewElementObservers(observers_, element)) {
192     XML_StopParser(get_parser(), XML_TRUE);
193   }
194 }
195 
196 // private
CallNewElementObservers(const parser_observer_vector_t & observers,const ElementPtr & element)197 bool KmlHandler::CallNewElementObservers(
198     const parser_observer_vector_t& observers, const ElementPtr& element) {
199   for (size_t i = 0; i < observers_.size(); ++i) {
200     if (!observers_[i]->NewElement(element)) {
201       return false;
202     }
203   }
204   return true;
205 }
206 
EndElement(const string & name)207 void KmlHandler::EndElement(const string& name) {
208   --nesting_depth_;
209   // See the comment towards the end of StartElement about handling "raw" HTML
210   // inside <description> elements. Here we are checking to see if (1) we're
211   // inside a closing </description> element and (2) if we're at the end of any
212   // possible series of nested description elements.
213   if (name.length() == 11 && name == "description" && --in_description_ == 0) {
214     skip_depth_--;
215   }
216 
217   if (skip_depth_ > 0) {
218     // We're inside an unknown element. Build the closing tag, decrement
219     // the skip counter and then check if we're back to known KML.
220     InsertUnknownEndElement(name);
221     if (--skip_depth_ == 0) {
222       // The next element will be known KML. Push the gathered char_data_ up
223       // to Element as a string for serializiation later on.
224       char_data_.top().append("\n");
225       stack_.top()->AddUnknownElement(char_data_.top());
226       char_data_.pop();
227     }
228     return;
229   }
230 
231   // This is to cover the special case of "<unknown/>" in expat where
232   // endElement is still called even if XML_StopParser() was called in the
233   // beginElement handler. See the discussion on this matter in
234   // XML_StopParser comment in expat.h.
235   if (stack_.empty()) {
236     return;
237   }
238 
239   // If we're parsing an old Schema placemark child, store the character data
240   // gathered into the SimpleData element we put on a stack in StartElement.
241   if (in_old_schema_placemark_ && simpledata_vec_.size() > 0) {
242     // TODO: the pretty serialization of SimpleData will produce some
243     // ugly (but harmless) whitespace and unnecessary line breaks. Fix this in
244     // the serializer.
245     simpledata_vec_.back()->set_text(char_data_.top());
246     char_data_.pop();
247     in_old_schema_placemark_ = false;
248     return;
249   }
250 
251   // The top of the stack is the begin of the element ending here.
252   ElementPtr child = stack_.top();
253 
254   string child_char_data_ = char_data_.top();
255   char_data_.pop();
256 
257   child->set_char_data(child_char_data_);
258 
259   if (child->Type() == Type_coordinates ||
260       child->Type() == Type_Snippet ||
261       child->Type() == Type_linkSnippet ||
262       child->Type() == Type_SimpleData) {
263     // These are effectively complex elements, but with character data.
264     child->AddElement(child);  // "Parse yourself"
265   }
266 
267   // Check if we're parsing old-style Schema KML. If we are, and if this
268   // EndElement is the closing </Schema>, give the schema an id (by appending
269   // "_id" to its name) and walk through its <SimpleField> children to
270   // discover what element name we should special-case in StartElement.
271 
272   // Handle the case of reaching the closing of an old-style </Schema>.
273   if (!old_schema_name_.empty()) {
274     if (name.length() == 6 && name == "Schema") {
275       HandleOldSchemaEndElement(AsSchema(child), old_schema_name_,
276                                 &simplefield_name_vec_);
277     } else if (name == old_schema_name_) {
278       // Or that of its Placemark substitute.
279       HandleOldSchemaParentEndElement(AsPlacemark(child), old_schema_name_,
280                                       kml_factory_, simpledata_vec_);
281     }
282   }
283 
284   // If stack_.size() == 1 this is the root element: leave it alone.
285   if (stack_.size() >= 2) {
286     // We have a parent.  Pop ourselves off and hand to parent.  Parent element
287     // always takes ownsership: 1) a known complex child, 2) a known field,
288     // or 3) unknown is passed onwards to its parent and possibly ultimately
289     // to the unknown element list in Element.
290     stack_.pop();
291     if (CallEndElementObservers(observers_, stack_.top(), child)) {
292       stack_.top()->AddElement(child);
293     }
294     if (!CallAddChildObservers(observers_, stack_.top(), child)) {
295       XML_StopParser(get_parser(), XML_TRUE);
296     }
297   }
298 }
299 
CallEndElementObservers(const parser_observer_vector_t & observers,const ElementPtr & parent,const ElementPtr & child)300 bool KmlHandler::CallEndElementObservers(
301     const parser_observer_vector_t& observers, const ElementPtr& parent,
302     const ElementPtr& child) {
303   for (size_t i = 0; i < observers_.size(); ++i) {
304     if (!observers_[i]->EndElement(parent, child)) {
305       return false;
306     }
307   }
308   return true;
309 }
310 
311 // private
CallAddChildObservers(const parser_observer_vector_t & observers,const ElementPtr & parent,const ElementPtr & child)312 bool KmlHandler::CallAddChildObservers(
313     const parser_observer_vector_t& observers, const ElementPtr& parent,
314     const ElementPtr& child) {
315   for (size_t i = 0; i < observers_.size(); ++i) {
316     if (!observers_[i]->AddChild(parent, child)) {
317       return false;
318     }
319   }
320   return true;
321 }
322 
323 // Note the handling of char data w.r.t. unknown elements. If we are within
324 // a known element that cannot contain char data, setting it in EndElement is
325 // a no-op. For known elements within unknown elements, everything is treated
326 // as a string and _all_ data is saved.
327 // <Placemark><Point>foo<coordinates/>bar</Point></Placemark> becomes:
328 // <Placemark><Point><coordinates/></Point></Placemark>
329 // <X><Point>foo<coordinates/>bar</Point></P> remains as-is.
CharData(const string & s)330 void KmlHandler::CharData(const string& s) {
331   char_data_.top().append(s);
332 }
333 
334 // As with STL pop() methods this is (potentially) destructive.  If the
335 // parse succeeded the root element will be the only item on the stack and
336 // this method will detach it.  Either way the destructor will delete all
337 // elements on the stack.  This should only be called after XML_Parse()
338 // has completed.
PopRoot()339 ElementPtr KmlHandler::PopRoot() {
340   if (!stack_.empty() && stack_.size() == 1) {
341     ElementPtr root = stack_.top();
342     stack_.pop();
343     return root;
344   }
345   return NULL;
346 }
347 
348 // Private.
InsertUnknownStartElement(const string & name,const StringVector & atts)349 void KmlHandler::InsertUnknownStartElement(const string& name,
350                                            const StringVector& atts) {
351   string& top = char_data_.top();
352   top.append("<");
353   top.append(name);
354   for (size_t i = 0; i < atts.size(); i += 2)  {
355     top.append(" ");
356     top.append(atts.at(i));
357     top.append("=\"");
358     top.append(atts.at(i+1));
359     top.append("\"");
360   }
361   top.append(">");
362 }
363 
364 // Private.
InsertUnknownEndElement(const string & name)365 void KmlHandler::InsertUnknownEndElement(const string& name) {
366   string& top = char_data_.top();
367   top.append("</");
368   top.append(name);
369   top.append(">");
370 }
371 
372 // Static, private.
FindOldSchemaParentName(const StringVector & attrs,string * old_schema_name)373 void KmlHandler::FindOldSchemaParentName(const StringVector& attrs,
374                                          string* old_schema_name) {
375   boost::scoped_ptr<Attributes> schema_attrs(Attributes::Create(attrs));
376   if (schema_attrs.get() && old_schema_name &&
377       schema_attrs->FindValue("parent", NULL)) {
378     schema_attrs->FindValue("name", old_schema_name);
379   }
380 }
381 
382 // Static, private.
ParseOldSchemaChild(const string & name,const StringVector & simplefield_name_vec,std::vector<SimpleDataPtr> * simpledata_vec)383 bool KmlHandler::ParseOldSchemaChild(
384     const string& name,
385     const StringVector& simplefield_name_vec,
386     std::vector<SimpleDataPtr>* simpledata_vec) {
387   // We'll iterate through a vector of possible names (created in
388   // EndElement) and check to see if we have a match. If we do, we'll make
389   // a SimpleData element and put it on a stack for later re-parenting to
390   // an ExtendedData element (again in EndElement).
391   if (!simpledata_vec) {
392     return false;
393   }
394   StringVector::const_iterator itr = simplefield_name_vec.begin();
395   for (; itr != simplefield_name_vec.end(); itr++) {
396     if (name == *itr) {
397       // Treat this as a SimpleData element.
398       SimpleDataPtr simpledata = KmlFactory::GetFactory()->CreateSimpleData();
399       simpledata->set_name(name);
400       simpledata_vec->push_back(simpledata);
401       return true;
402     }
403   }
404   return false;
405 }
406 
407 // Static, private.
HandleOldSchemaEndElement(const SchemaPtr & schema,const string & old_schema_name,StringVector * simplefield_name_vec)408 void KmlHandler::HandleOldSchemaEndElement(
409     const SchemaPtr& schema,
410     const string& old_schema_name,
411     StringVector* simplefield_name_vec) {
412   if (!simplefield_name_vec) {
413     return;
414   }
415   schema->set_id(old_schema_name + "_id");
416   // TODO: nuke the parent="Placemark" attr.
417   for (size_t i = 0; i < schema->get_simplefield_array_size(); i++) {
418     if (const SimpleFieldPtr& simplefield =
419         AsSimpleField(schema->get_simplefield_array_at(i))) {
420       if (simplefield->has_name()) {
421         simplefield_name_vec->push_back(simplefield->get_name());
422       }
423     }
424   }
425 }
426 
427 // Static, private.
HandleOldSchemaParentEndElement(const PlacemarkPtr & placemark,const string & old_schema_name,const KmlFactory & kml_factory,const std::vector<SimpleDataPtr> simpledata_vec)428 void KmlHandler::HandleOldSchemaParentEndElement(
429     const PlacemarkPtr& placemark,
430     const string& old_schema_name,
431     const KmlFactory& kml_factory,
432     const std::vector<SimpleDataPtr> simpledata_vec) {
433   // We've reached the closing tag of the old placemark substitute
434   // element. Take the SimpleData elements we've been creating from its
435   // children and hand them to an ExtendedData, then give that to the
436   // parent placemark.
437   ExtendedDataPtr extendeddata = kml_factory.CreateExtendedData();
438   SchemaDataPtr schemadata = kml_factory.CreateSchemaData();
439   schemadata->set_schemaurl(old_schema_name + "_id");
440   std::vector<SimpleDataPtr>::const_iterator itr =
441     simpledata_vec.begin();
442   for (; itr != simpledata_vec.end(); itr++) {
443     schemadata->add_simpledata(*itr);
444   }
445   extendeddata->add_schemadata(schemadata);
446   placemark->set_extendeddata(extendeddata);
447 }
448 
449 }  // end namespace kmldom
450 
451