1 /******************************************************************************
2 *
3 * Project: KML Driver
4 * Purpose: Class for reading, parsing and handling a kmlfile.
5 * Author: Jens Oberender, j.obi@troja.net
6 *
7 ******************************************************************************
8 * Copyright (c) 2007, Jens Oberender
9 * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a
12 * copy of this software and associated documentation files (the "Software"),
13 * to deal in the Software without restriction, including without limitation
14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 * and/or sell copies of the Software, and to permit persons to whom the
16 * Software is furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included
19 * in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 * DEALINGS IN THE SOFTWARE.
28 ****************************************************************************/
29 #include "kmlnode.h"
30 #include "kml.h"
31
32 #include <cstring>
33 #include <cstdio>
34 #include <exception>
35 #include <iostream>
36 #include <string>
37
38 #include "cpl_conv.h"
39 #include "cpl_error.h"
40 #ifdef HAVE_EXPAT
41 # include "expat.h"
42 #endif
43
44 CPL_CVSID("$Id: kml.cpp c99a871a7bdedc751c503bb8cf508d9016510fe0 2020-01-08 14:54:05 +0100 Even Rouault $")
45
KML()46 KML::KML() :
47 poTrunk_(nullptr),
48 nNumLayers_(-1),
49 papoLayers_(nullptr),
50 nDepth_(0),
51 validity(KML_VALIDITY_UNKNOWN),
52 pKMLFile_(nullptr),
53 poCurrent_(nullptr),
54 oCurrentParser(nullptr),
55 nDataHandlerCounter(0),
56 nWithoutEventCounter(0)
57 {}
58
~KML()59 KML::~KML()
60 {
61 if( nullptr != pKMLFile_ )
62 VSIFCloseL(pKMLFile_);
63 CPLFree(papoLayers_);
64
65 delete poTrunk_;
66 }
67
open(const char * pszFilename)68 bool KML::open(const char * pszFilename)
69 {
70 if( nullptr != pKMLFile_ )
71 VSIFCloseL( pKMLFile_ );
72
73 pKMLFile_ = VSIFOpenL( pszFilename, "r" );
74 return pKMLFile_ != nullptr;
75 }
76
parse()77 bool KML::parse()
78 {
79 if( nullptr == pKMLFile_ )
80 {
81 sError_ = "No file given";
82 return false;
83 }
84
85 if(poTrunk_ != nullptr) {
86 delete poTrunk_;
87 poTrunk_ = nullptr;
88 }
89
90 if(poCurrent_ != nullptr)
91 {
92 delete poCurrent_;
93 poCurrent_ = nullptr;
94 }
95
96 XML_Parser oParser = OGRCreateExpatXMLParser();
97 XML_SetUserData(oParser, this);
98 XML_SetElementHandler(oParser, startElement, endElement);
99 XML_SetCharacterDataHandler(oParser, dataHandler);
100 oCurrentParser = oParser;
101 nWithoutEventCounter = 0;
102
103 int nDone = 0;
104 int nLen = 0;
105 char aBuf[BUFSIZ] = { 0 };
106 bool bError = false;
107
108 do
109 {
110 nDataHandlerCounter = 0;
111 nLen = (int)VSIFReadL( aBuf, 1, sizeof(aBuf), pKMLFile_ );
112 nDone = VSIFEofL(pKMLFile_);
113 if (XML_Parse(oParser, aBuf, nLen, nDone) == XML_STATUS_ERROR)
114 {
115 CPLError( CE_Failure, CPLE_AppDefined,
116 "XML parsing of KML file failed : %s at line %d, "
117 "column %d",
118 XML_ErrorString(XML_GetErrorCode(oParser)),
119 static_cast<int>(XML_GetCurrentLineNumber(oParser)),
120 static_cast<int>(XML_GetCurrentColumnNumber(oParser)));
121 bError = true;
122 break;
123 }
124 nWithoutEventCounter ++;
125 } while (!nDone && nLen > 0 && nWithoutEventCounter < 10);
126
127 XML_ParserFree(oParser);
128 VSIRewindL(pKMLFile_);
129
130 if (nWithoutEventCounter == 10)
131 {
132 CPLError(CE_Failure, CPLE_AppDefined,
133 "Too much data inside one element. File probably corrupted");
134 bError = true;
135 }
136
137 if( bError )
138 {
139 if( poCurrent_ != nullptr )
140 {
141 while( poCurrent_ )
142 {
143 KMLNode* poTemp = poCurrent_->getParent();
144 delete poCurrent_;
145 poCurrent_ = poTemp;
146 }
147 // No need to destroy poTrunk_ : it has been destroyed in
148 // the last iteration
149 }
150 else
151 {
152 // Case of invalid content after closing element matching
153 // first <kml> element
154 delete poTrunk_;
155 }
156 poTrunk_ = nullptr;
157 return false;
158 }
159
160 poCurrent_ = nullptr;
161 return true;
162 }
163
checkValidity()164 void KML::checkValidity()
165 {
166 if(poTrunk_ != nullptr)
167 {
168 delete poTrunk_;
169 poTrunk_ = nullptr;
170 }
171
172 if(poCurrent_ != nullptr)
173 {
174 delete poCurrent_;
175 poCurrent_ = nullptr;
176 }
177
178 if(pKMLFile_ == nullptr)
179 {
180 sError_ = "No file given";
181 return;
182 }
183
184 XML_Parser oParser = OGRCreateExpatXMLParser();
185 XML_SetUserData(oParser, this);
186 XML_SetElementHandler(oParser, startElementValidate, nullptr);
187 XML_SetCharacterDataHandler(oParser, dataHandlerValidate);
188 int nCount = 0;
189
190 oCurrentParser = oParser;
191
192 int nDone = 0;
193 int nLen = 0;
194 char aBuf[BUFSIZ] = { 0 };
195
196 // Parses the file until we find the first element.
197 do
198 {
199 nDataHandlerCounter = 0;
200 nLen = static_cast<int>(VSIFReadL( aBuf, 1, sizeof(aBuf), pKMLFile_ ));
201 nDone = VSIFEofL(pKMLFile_);
202 if (XML_Parse(oParser, aBuf, nLen, nDone) == XML_STATUS_ERROR)
203 {
204 if (nLen <= BUFSIZ-1)
205 aBuf[nLen] = 0;
206 else
207 aBuf[BUFSIZ-1] = 0;
208 if( strstr(aBuf, "<?xml") && (
209 strstr(aBuf, "<kml") ||
210 (strstr(aBuf, "<Document") && strstr(aBuf, "/kml/2.")) ) )
211 {
212 CPLError(CE_Failure, CPLE_AppDefined,
213 "XML parsing of KML file failed : %s at line %d, column %d",
214 XML_ErrorString(XML_GetErrorCode(oParser)),
215 (int)XML_GetCurrentLineNumber(oParser),
216 (int)XML_GetCurrentColumnNumber(oParser));
217 }
218
219 validity = KML_VALIDITY_INVALID;
220 XML_ParserFree(oParser);
221 VSIRewindL(pKMLFile_);
222 return;
223 }
224
225 nCount ++;
226 /* After reading 50 * BUFSIZE bytes, and not finding whether the file */
227 /* is KML or not, we give up and fail silently */
228 } while ( !nDone && nLen > 0 &&
229 validity == KML_VALIDITY_UNKNOWN && nCount < 50 );
230
231 XML_ParserFree(oParser);
232 VSIRewindL(pKMLFile_);
233 poCurrent_ = nullptr;
234 }
235
startElement(void * pUserData,const char * pszName,const char ** ppszAttr)236 void XMLCALL KML::startElement( void* pUserData, const char* pszName,
237 const char** ppszAttr )
238 {
239 KML* poKML = static_cast<KML*>(pUserData);
240 try
241 {
242 poKML->nWithoutEventCounter = 0;
243
244 const char* pszColumn = strchr(pszName, ':');
245 if( pszColumn)
246 pszName = pszColumn + 1;
247
248 if(poKML->poTrunk_ == nullptr
249 || (poKML->poCurrent_ != nullptr &&
250 poKML->poCurrent_->getName().compare("description") != 0))
251 {
252 if (poKML->nDepth_ == 1024)
253 {
254 CPLError( CE_Failure, CPLE_AppDefined,
255 "Too big depth level (%d) while parsing KML.",
256 poKML->nDepth_ );
257 XML_StopParser(poKML->oCurrentParser, XML_FALSE);
258 return;
259 }
260
261 KMLNode* poMynew = new KMLNode();
262 poMynew->setName(pszName);
263 poMynew->setLevel(poKML->nDepth_);
264
265 for( int i = 0; ppszAttr[i]; i += 2 )
266 {
267 Attribute* poAtt = new Attribute();
268 poAtt->sName = ppszAttr[i];
269 poAtt->sValue = ppszAttr[i + 1];
270 poMynew->addAttribute(poAtt);
271 }
272
273 if(poKML->poTrunk_ == nullptr)
274 poKML->poTrunk_ = poMynew;
275 if(poKML->poCurrent_ != nullptr)
276 poMynew->setParent(poKML->poCurrent_);
277 poKML->poCurrent_ = poMynew;
278
279 poKML->nDepth_++;
280 }
281 else if( poKML->poCurrent_ != nullptr )
282 {
283 std::string sNewContent = "<";
284 sNewContent += pszName;
285 for( int i = 0; ppszAttr[i]; i += 2 )
286 {
287 sNewContent += " ";
288 sNewContent += ppszAttr[i];
289 sNewContent += "=\"";
290 sNewContent += ppszAttr[i + 1];
291 sNewContent += "\"";
292 }
293 sNewContent += ">";
294 if(poKML->poCurrent_->numContent() == 0)
295 poKML->poCurrent_->addContent(sNewContent);
296 else
297 poKML->poCurrent_->appendContent(sNewContent);
298 }
299 }
300 catch(const std::exception& ex)
301 {
302 CPLError(CE_Failure, CPLE_AppDefined,
303 "KML: libstdc++ exception : %s", ex.what());
304 XML_StopParser(poKML->oCurrentParser, XML_FALSE);
305 }
306 }
307
startElementValidate(void * pUserData,const char * pszName,const char ** ppszAttr)308 void XMLCALL KML::startElementValidate( void* pUserData, const char* pszName,
309 const char** ppszAttr )
310 {
311 KML* poKML = static_cast<KML *>(pUserData);
312
313 if (poKML->validity != KML_VALIDITY_UNKNOWN)
314 return;
315
316 poKML->validity = KML_VALIDITY_INVALID;
317
318 const char* pszColumn = strchr(pszName, ':');
319 if( pszColumn)
320 pszName = pszColumn + 1;
321
322 if(strcmp(pszName, "kml") == 0 || strcmp(pszName, "Document") == 0)
323 {
324 // Check all Attributes
325 for( int i = 0; ppszAttr[i]; i += 2 )
326 {
327 // Find the namespace and determine the KML version
328 if(strcmp(ppszAttr[i], "xmlns") == 0)
329 {
330 // Is it KML 2.2?
331 if((strcmp(ppszAttr[i + 1], "http://earth.google.com/kml/2.2") == 0) ||
332 (strcmp(ppszAttr[i + 1], "http://www.opengis.net/kml/2.2") == 0))
333 {
334 poKML->validity = KML_VALIDITY_VALID;
335 poKML->sVersion_ = "2.2";
336 }
337 else if(strcmp(ppszAttr[i + 1], "http://earth.google.com/kml/2.1") == 0)
338 {
339 poKML->validity = KML_VALIDITY_VALID;
340 poKML->sVersion_ = "2.1";
341 }
342 else if(strcmp(ppszAttr[i + 1], "http://earth.google.com/kml/2.0") == 0)
343 {
344 poKML->validity = KML_VALIDITY_VALID;
345 poKML->sVersion_ = "2.0";
346 }
347 else
348 {
349 CPLDebug("KML",
350 "Unhandled xmlns value : %s. Going on though...",
351 ppszAttr[i]);
352 poKML->validity = KML_VALIDITY_VALID;
353 poKML->sVersion_ = "?";
354 }
355 }
356 }
357
358 if (poKML->validity == KML_VALIDITY_INVALID)
359 {
360 CPLDebug( "KML",
361 "Did not find xmlns attribute in <kml> element. "
362 "Going on though..." );
363 poKML->validity = KML_VALIDITY_VALID;
364 poKML->sVersion_ = "?";
365 }
366 }
367 }
368
dataHandlerValidate(void * pUserData,const char *,int)369 void XMLCALL KML::dataHandlerValidate( void * pUserData,
370 const char * /* pszData */,
371 int /* nLen */ )
372 {
373 KML* poKML = static_cast<KML *>(pUserData);
374
375 poKML->nDataHandlerCounter ++;
376 if (poKML->nDataHandlerCounter >= BUFSIZ)
377 {
378 CPLError( CE_Failure, CPLE_AppDefined,
379 "File probably corrupted (million laugh pattern)" );
380 XML_StopParser(poKML->oCurrentParser, XML_FALSE);
381 }
382 }
383
endElement(void * pUserData,const char * pszName)384 void XMLCALL KML::endElement(void* pUserData, const char* pszName)
385 {
386 KML* poKML = static_cast<KML *>(pUserData);
387
388 try
389 {
390 poKML->nWithoutEventCounter = 0;
391
392 const char* pszColumn = strchr(pszName, ':');
393 if( pszColumn)
394 pszName = pszColumn + 1;
395
396 if(poKML->poCurrent_ != nullptr &&
397 poKML->poCurrent_->getName().compare(pszName) == 0)
398 {
399 poKML->nDepth_--;
400 KMLNode* poTmp = poKML->poCurrent_;
401 // Split the coordinates
402 if(poKML->poCurrent_->getName().compare("coordinates") == 0 &&
403 poKML->poCurrent_->numContent() == 1)
404 {
405 const std::string sData = poKML->poCurrent_->getContent(0);
406 std::size_t nPos = 0;
407 const std::size_t nLength = sData.length();
408 const char* pszData = sData.c_str();
409 while( true )
410 {
411 // Cut off whitespaces
412 while( nPos < nLength &&
413 (pszData[nPos] == ' ' || pszData[nPos] == '\n'
414 || pszData[nPos] == '\r' || pszData[nPos] == '\t' ) )
415 nPos++;
416
417 if (nPos == nLength)
418 break;
419
420 const std::size_t nPosBegin = nPos;
421
422 // Get content
423 while(nPos < nLength &&
424 pszData[nPos] != ' ' && pszData[nPos] != '\n' &&
425 pszData[nPos] != '\r' &&
426 pszData[nPos] != '\t')
427 nPos++;
428
429 if(nPos - nPosBegin > 0)
430 {
431 std::string sTmp(pszData + nPosBegin, nPos - nPosBegin);
432 poKML->poCurrent_->addContent(sTmp);
433 }
434 }
435 if(poKML->poCurrent_->numContent() > 1)
436 poKML->poCurrent_->deleteContent(0);
437 }
438 else if (poKML->poCurrent_->numContent() == 1)
439 {
440 const std::string sData = poKML->poCurrent_->getContent(0);
441 std::string sDataWithoutNL;
442 std::size_t nPos = 0;
443 const std::size_t nLength = sData.length();
444 const char* pszData = sData.c_str();
445 std::size_t nLineStartPos = 0;
446 bool bLineStart = true;
447
448 // Re-assemble multi-line content by removing leading spaces for
449 // each line. I am not sure why we do that. Should we preserve
450 // content as such?
451 while(nPos < nLength)
452 {
453 const char ch = pszData[nPos];
454 if( bLineStart && (ch == ' ' || ch == '\t' || ch == '\n' ||
455 ch == '\r') )
456 nLineStartPos ++;
457 else if( ch == '\n' || ch == '\r' )
458 {
459 if( !bLineStart )
460 {
461 std::string sTmp( pszData + nLineStartPos,
462 nPos - nLineStartPos);
463 if( !sDataWithoutNL.empty() )
464 sDataWithoutNL += " ";
465 sDataWithoutNL += sTmp;
466 bLineStart = true;
467 }
468 nLineStartPos = nPos + 1;
469 }
470 else
471 {
472 bLineStart = false;
473 }
474 nPos++;
475 }
476
477 if( nLineStartPos > 0 )
478 {
479 if (nLineStartPos < nPos)
480 {
481 std::string sTmp( pszData + nLineStartPos,
482 nPos - nLineStartPos);
483 if( !sDataWithoutNL.empty() )
484 sDataWithoutNL += " ";
485 sDataWithoutNL += sTmp;
486 }
487
488 poKML->poCurrent_->deleteContent(0);
489 poKML->poCurrent_->addContent(sDataWithoutNL);
490 }
491 }
492
493 if(poKML->poCurrent_->getParent() != nullptr)
494 poKML->poCurrent_ = poKML->poCurrent_->getParent();
495 else
496 poKML->poCurrent_ = nullptr;
497
498 if(!poKML->isHandled(pszName))
499 {
500 CPLDebug("KML", "Not handled: %s", pszName);
501 delete poTmp;
502 if( poKML->poCurrent_ == poTmp )
503 poKML->poCurrent_ = nullptr;
504 if( poKML->poTrunk_ == poTmp )
505 poKML->poTrunk_ = nullptr;
506 }
507 else
508 {
509 if(poKML->poCurrent_ != nullptr)
510 poKML->poCurrent_->addChildren(poTmp);
511 }
512 }
513 else if(poKML->poCurrent_ != nullptr)
514 {
515 std::string sNewContent = "</";
516 sNewContent += pszName;
517 sNewContent += ">";
518 if(poKML->poCurrent_->numContent() == 0)
519 poKML->poCurrent_->addContent(sNewContent);
520 else
521 poKML->poCurrent_->appendContent(sNewContent);
522 }
523 }
524 catch(const std::exception& ex)
525 {
526 CPLError(CE_Failure, CPLE_AppDefined,
527 "KML: libstdc++ exception : %s", ex.what());
528 XML_StopParser(poKML->oCurrentParser, XML_FALSE);
529 }
530 }
531
dataHandler(void * pUserData,const char * pszData,int nLen)532 void XMLCALL KML::dataHandler(void* pUserData, const char* pszData, int nLen)
533 {
534 KML* poKML = static_cast<KML *>(pUserData);
535
536 poKML->nWithoutEventCounter = 0;
537
538 if(nLen < 1 || poKML->poCurrent_ == nullptr)
539 return;
540
541 poKML->nDataHandlerCounter ++;
542 if (poKML->nDataHandlerCounter >= BUFSIZ)
543 {
544 CPLError( CE_Failure, CPLE_AppDefined,
545 "File probably corrupted (million laugh pattern)" );
546 XML_StopParser(poKML->oCurrentParser, XML_FALSE);
547 }
548
549 try
550 {
551 std::string sData(pszData, nLen);
552
553 if(poKML->poCurrent_->numContent() == 0)
554 poKML->poCurrent_->addContent(sData);
555 else
556 poKML->poCurrent_->appendContent(sData);
557 }
558 catch(const std::exception& ex)
559 {
560 CPLError(CE_Failure, CPLE_AppDefined,
561 "KML: libstdc++ exception : %s", ex.what());
562 XML_StopParser(poKML->oCurrentParser, XML_FALSE);
563 }
564 }
565
isValid()566 bool KML::isValid()
567 {
568 checkValidity();
569
570 if( validity == KML_VALIDITY_VALID )
571 CPLDebug( "KML", "Valid: %d Version: %s",
572 validity == KML_VALIDITY_VALID, sVersion_.c_str());
573
574 return validity == KML_VALIDITY_VALID;
575 }
576
getError() const577 std::string KML::getError() const
578 {
579 return sError_;
580 }
581
classifyNodes()582 int KML::classifyNodes()
583 {
584 if( poTrunk_ == nullptr )
585 return false;
586 return poTrunk_->classify(this);
587 }
588
eliminateEmpty()589 void KML::eliminateEmpty()
590 {
591 if( poTrunk_ != nullptr )
592 poTrunk_->eliminateEmpty(this);
593 }
594
print(unsigned short nNum)595 void KML::print(unsigned short nNum)
596 {
597 if( poTrunk_ != nullptr )
598 poTrunk_->print(nNum);
599 }
600
isHandled(std::string const & elem) const601 bool KML::isHandled(std::string const& elem) const
602 {
603 return isLeaf(elem) || isFeature(elem) || isFeatureContainer(elem)
604 || isContainer(elem) || isRest(elem);
605 }
606
isLeaf(std::string const &) const607 bool KML::isLeaf( std::string const& /* elem */ ) const
608 {
609 return false;
610 }
611
isFeature(std::string const &) const612 bool KML::isFeature( std::string const& /* elem */ ) const
613 {
614 return false;
615 }
616
isFeatureContainer(std::string const &) const617 bool KML::isFeatureContainer( std::string const& /* elem */ ) const
618 {
619 return false;
620 }
621
isContainer(std::string const &) const622 bool KML::isContainer( std::string const& /* elem */ ) const
623 {
624 return false;
625 }
626
isRest(std::string const &) const627 bool KML::isRest( std::string const& /* elem */ ) const
628 {
629 return false;
630 }
631
findLayers(KMLNode *,int)632 void KML::findLayers( KMLNode* /* poNode */, int /* bKeepEmptyContainers */ )
633 {
634 // idle
635 }
636
hasOnlyEmpty() const637 bool KML::hasOnlyEmpty() const
638 {
639 return poTrunk_->hasOnlyEmpty();
640 }
641
getNumLayers() const642 int KML::getNumLayers() const
643 {
644 return nNumLayers_;
645 }
646
selectLayer(int nNum)647 bool KML::selectLayer(int nNum) {
648 if( nNumLayers_ < 1 || nNum >= nNumLayers_ )
649 return false;
650 poCurrent_ = papoLayers_[nNum];
651 return true;
652 }
653
getCurrentName() const654 std::string KML::getCurrentName() const
655 {
656 std::string tmp;
657 if( poCurrent_ != nullptr )
658 {
659 tmp = poCurrent_->getNameElement();
660 }
661 return tmp;
662 }
663
getCurrentType() const664 Nodetype KML::getCurrentType() const
665 {
666 if(poCurrent_ != nullptr)
667 return poCurrent_->getType();
668
669 return Unknown;
670 }
671
is25D() const672 int KML::is25D() const
673 {
674 if(poCurrent_ != nullptr)
675 return poCurrent_->is25D();
676
677 return Unknown;
678 }
679
getNumFeatures()680 int KML::getNumFeatures()
681 {
682 if(poCurrent_ == nullptr)
683 return -1;
684
685 return static_cast<int>(poCurrent_->getNumFeatures());
686 }
687
getFeature(std::size_t nNum,int & nLastAsked,int & nLastCount)688 Feature* KML::getFeature(std::size_t nNum, int& nLastAsked, int &nLastCount)
689 {
690 if(poCurrent_ == nullptr)
691 return nullptr;
692
693 return poCurrent_->getFeature(nNum, nLastAsked, nLastCount);
694 }
695
unregisterLayerIfMatchingThisNode(KMLNode * poNode)696 void KML::unregisterLayerIfMatchingThisNode(KMLNode* poNode)
697 {
698 for(int i=0;i<nNumLayers_;)
699 {
700 if( papoLayers_[i] == poNode )
701 {
702 if( i < nNumLayers_ - 1 )
703 {
704 memmove( papoLayers_ + i, papoLayers_ + i + 1,
705 (nNumLayers_ - 1 - i) * sizeof(KMLNode*) );
706 }
707 nNumLayers_ --;
708 break;
709 }
710 i++;
711 }
712 }
713