1 /****************************************************************************** 2 * Project: OGR 3 * Purpose: OGRGMLASDriver implementation 4 * Author: Even Rouault, <even dot rouault at spatialys dot com> 5 * 6 * Initial development funded by the European Earth observation programme 7 * Copernicus 8 * 9 ****************************************************************************** 10 * Copyright (c) 2016, Even Rouault, <even dot rouault at spatialys dot com> 11 * 12 * Permission is hereby granted, free of charge, to any person obtaining a 13 * copy of this software and associated documentation files (the "Software"), 14 * to deal in the Software without restriction, including without limitation 15 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 16 * and/or sell copies of the Software, and to permit persons to whom the 17 * Software is furnished to do so, subject to the following conditions: 18 * 19 * The above copyright notice and this permission notice shall be included 20 * in all copies or substantial portions of the Software. 21 * 22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 23 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 25 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 28 * DEALINGS IN THE SOFTWARE. 29 ****************************************************************************/ 30 31 #ifndef OGR_GMLAS_INCLUDED 32 #define OGR_GMLAS_INCLUDED 33 34 // Must be first for DEBUG_BOOL case 35 #include "xercesc_headers.h" 36 #include "ogr_xerces.h" 37 38 #include "gdal_priv.h" 39 #include "ogrsf_frmts.h" 40 41 #include "ogr_gmlas_consts.h" 42 43 #include <set> 44 #include <map> 45 #include <vector> 46 47 typedef std::pair<CPLString, CPLString> PairURIFilename; 48 49 typedef enum 50 { 51 GMLAS_SWAP_AUTO, 52 GMLAS_SWAP_YES, 53 GMLAS_SWAP_NO, 54 } GMLASSwapCoordinatesEnum; 55 56 GDALDataset *OGRGMLASDriverCreateCopy( 57 const char * pszFilename, 58 GDALDataset *poSrcDS, 59 int /*bStrict*/, char ** papszOptions, 60 GDALProgressFunc pfnProgress, void * pProgressData ); 61 62 /************************************************************************/ 63 /* IGMLASInputSourceClosing */ 64 /************************************************************************/ 65 66 class IGMLASInputSourceClosing 67 { 68 public: ~IGMLASInputSourceClosing()69 virtual ~IGMLASInputSourceClosing() {} 70 71 virtual void notifyClosing(const CPLString& osFilename) = 0; 72 }; 73 74 /************************************************************************/ 75 /* GMLASResourceCache */ 76 /************************************************************************/ 77 78 class GMLASResourceCache 79 { 80 protected: 81 bool m_bHasCheckedCacheDirectory; 82 CPLString m_osCacheDirectory; 83 bool m_bRefresh; 84 bool m_bAllowDownload; 85 std::set<CPLString> m_aoSetRefreshedFiles; 86 87 static bool RecursivelyCreateDirectoryIfNeeded( 88 const CPLString& osDirname); 89 bool RecursivelyCreateDirectoryIfNeeded(); 90 91 CPLString GetCachedFilename(const CPLString& osResource); 92 93 public: 94 GMLASResourceCache(); 95 virtual ~GMLASResourceCache(); 96 97 void SetCacheDirectory(const CPLString& osCacheDirectory); SetRefreshMode(bool bRefresh)98 void SetRefreshMode(bool bRefresh) 99 { m_bRefresh = bRefresh; } SetAllowDownload(bool bVal)100 void SetAllowDownload(bool bVal) 101 { m_bAllowDownload = bVal; } 102 }; 103 104 /************************************************************************/ 105 /* GMLASXSDCache */ 106 /************************************************************************/ 107 108 class GMLASXSDCache final: public GMLASResourceCache 109 { 110 public: 111 GMLASXSDCache(); 112 virtual ~GMLASXSDCache(); 113 114 VSILFILE* Open( const CPLString& osResource, 115 const CPLString& osBasePath, 116 CPLString& osOutFilename ); 117 }; 118 119 /************************************************************************/ 120 /* GMLASBaseEntityResolver */ 121 /************************************************************************/ 122 123 class GMLASBaseEntityResolver: public EntityResolver, 124 public IGMLASInputSourceClosing 125 { 126 protected: 127 std::vector<CPLString> m_aosPathStack; 128 GMLASXSDCache& m_oCache; 129 CPLString m_osGMLVersionFound; 130 std::set<CPLString> m_oSetSchemaURLs; 131 132 public: 133 GMLASBaseEntityResolver(const CPLString& osBasePath, 134 GMLASXSDCache& oCache); 135 virtual ~GMLASBaseEntityResolver(); 136 137 void SetBasePath(const CPLString& osBasePath); GetGMLVersionFound()138 const CPLString& GetGMLVersionFound() const 139 { return m_osGMLVersionFound; } GetSchemaURLS()140 const std::set<CPLString>& GetSchemaURLS() const 141 { return m_oSetSchemaURLs; } 142 143 virtual void notifyClosing(const CPLString& osFilename ) override; 144 virtual InputSource* resolveEntity( const XMLCh* const publicId, 145 const XMLCh* const systemId) override; 146 147 virtual void DoExtraSchemaProcessing(const CPLString& osFilename, 148 VSILFILE* fp); 149 }; 150 151 /************************************************************************/ 152 /* GMLASInputSource */ 153 /************************************************************************/ 154 155 class GMLASInputSource final: public InputSource 156 { 157 VSILFILE *m_fp; 158 bool m_bOwnFP; 159 int m_nCounter; 160 int *m_pnCounter; 161 CPLString m_osFilename; 162 IGMLASInputSourceClosing* m_cbk; 163 164 public: 165 GMLASInputSource(const char* pszFilename, 166 VSILFILE* fp, 167 bool bOwnFP, 168 MemoryManager* const manager = 169 XMLPlatformUtils::fgMemoryManager); 170 virtual ~GMLASInputSource(); 171 172 virtual BinInputStream* makeStream() const override; 173 174 void SetClosingCallback( IGMLASInputSourceClosing* cbk ); 175 }; 176 177 /************************************************************************/ 178 /* GMLASErrorHandler */ 179 /************************************************************************/ 180 181 class GMLASErrorHandler : public ErrorHandler 182 { 183 public: GMLASErrorHandler()184 GMLASErrorHandler () : m_bFailed (false), 185 m_bSchemaFullChecking (false), 186 m_bHandleMultipleImports (false) {} 187 SetSchemaFullCheckingEnabled(bool b)188 void SetSchemaFullCheckingEnabled(bool b) 189 { m_bSchemaFullChecking = b; } 190 SetHandleMultipleImportsEnabled(bool b)191 void SetHandleMultipleImportsEnabled(bool b) 192 { m_bHandleMultipleImports = b; } 193 hasFailed()194 bool hasFailed () const { return m_bFailed; } 195 196 virtual void warning (const SAXParseException& e) override; 197 virtual void error (const SAXParseException& e) override; 198 virtual void fatalError (const SAXParseException& e) override; 199 resetErrors()200 virtual void resetErrors () override { m_bFailed = false; } 201 202 private: 203 bool m_bFailed; 204 bool m_bSchemaFullChecking; 205 bool m_bHandleMultipleImports; 206 207 void handle (const SAXParseException& e, CPLErr eErr); 208 }; 209 210 /************************************************************************/ 211 /* GMLASXLinkResolutionConf */ 212 /************************************************************************/ 213 214 class GMLASXLinkResolutionConf 215 { 216 public: 217 /* See data/gmlasconf.xsd for docomentation of the fields */ 218 219 typedef enum 220 { 221 RawContent, 222 FieldsFromXPath 223 } ResolutionMode; 224 225 int m_nTimeOut; 226 227 int m_nMaxFileSize; 228 229 int m_nMaxGlobalResolutionTime; 230 231 CPLString m_osProxyServerPort; 232 233 CPLString m_osProxyUserPassword; 234 235 CPLString m_osProxyAuth; 236 237 CPLString m_osCacheDirectory; 238 239 bool m_bDefaultResolutionEnabled; 240 241 bool m_bDefaultAllowRemoteDownload; 242 243 ResolutionMode m_eDefaultResolutionMode; 244 245 int m_nDefaultResolutionDepth; 246 247 bool m_bDefaultCacheResults; 248 249 bool m_bResolveInternalXLinks; 250 251 class URLSpecificResolution 252 { 253 public: 254 255 CPLString m_osURLPrefix; 256 257 std::vector< std::pair<CPLString, CPLString> > m_aosNameValueHTTPHeaders; 258 259 bool m_bAllowRemoteDownload; 260 261 ResolutionMode m_eResolutionMode; 262 263 int m_nResolutionDepth; 264 265 bool m_bCacheResults; 266 267 class XPathDerivedField 268 { 269 public: 270 271 CPLString m_osName; 272 273 CPLString m_osType; 274 275 CPLString m_osXPath; 276 }; 277 278 std::vector<XPathDerivedField> m_aoFields; 279 280 URLSpecificResolution(); 281 }; 282 283 std::vector<URLSpecificResolution> m_aoURLSpecificRules; 284 285 GMLASXLinkResolutionConf(); 286 287 bool LoadFromXML(CPLXMLNode* psRoot); 288 }; 289 290 /************************************************************************/ 291 /* GMLASConfiguration */ 292 /************************************************************************/ 293 294 class GMLASConfiguration 295 { 296 public: 297 298 /** Whether remote schemas are allowed to be download. */ 299 bool m_bAllowRemoteSchemaDownload; 300 301 /** Whether a ogr_pkid attribute should always be generated. */ 302 bool m_bAlwaysGenerateOGRId; 303 304 /** Whether to remove layers found to be unused in initial scan pass */ 305 bool m_bRemoveUnusedLayers; 306 307 /** Whether to remove fields found to be unused in initial scan pass */ 308 bool m_bRemoveUnusedFields; 309 310 /** Whether repeated strings, integers, reals should be in corresponding 311 OGR array types. */ 312 bool m_bUseArrays; 313 314 /** Whether OGR field null state should be used. */ 315 bool m_bUseNullState; 316 317 /** Whether geometries should be stored as XML in a OGR string field. */ 318 bool m_bIncludeGeometryXML; 319 320 /** Whether, when dealing with schemas that import the 321 GML namespace, and that at least one of them has 322 elements that derive from gml:_Feature or 323 gml:AbstractFeatureonly, only such elements should be 324 instantiated as OGR layers, during the first pass that 325 iterates over top level elements of the imported 326 schemas. */ 327 bool m_bInstantiateGMLFeaturesOnly; 328 329 /** Maximum length of layer and field identifiers*/ 330 int m_nIdentifierMaxLength; 331 332 /** Whether case insensitive comparison should be used for identifier equality testing */ 333 bool m_bCaseInsensitiveIdentifier; 334 335 /** Whether to launder identifiers like postgresql does */ 336 bool m_bPGIdentifierLaundering; 337 338 /* Maximum number of fields in an element considered for flattening. */ 339 int m_nMaximumFieldsForFlattening; 340 341 /** Whether remote XSD schemas should be locally cached. */ 342 bool m_bAllowXSDCache; 343 344 /** Cache directory for cached XSD schemas. */ 345 CPLString m_osXSDCacheDirectory; 346 347 /** Whether to enable schema full checking. */ 348 bool m_bSchemaFullChecking; 349 350 /** Whether to allow multiple imports of the same namespace. */ 351 bool m_bHandleMultipleImports; 352 353 /** Whether validation of document against schema should be done. */ 354 bool m_bValidate; 355 356 /** Whether a validation error should prevent dataset opening. */ 357 bool m_bFailIfValidationError; 358 359 /** Whether technical layers should be exposed. */ 360 bool m_bExposeMetadataLayers; 361 362 /** For flatening rules, map prefix namespace to its URI */ 363 std::map<CPLString, CPLString> m_oMapPrefixToURIFlatteningRules; 364 365 std::vector<CPLString> m_osForcedFlattenedXPath; 366 367 std::vector<CPLString> m_osDisabledFlattenedXPath; 368 369 enum SWEActivationMode 370 { 371 SWE_ACTIVATE_IF_NAMESPACE_FOUND, 372 SWE_ACTIVATE_TRUE, 373 SWE_ACTIVATE_FALSE 374 }; 375 376 /** If and when activate SWE special processings */ 377 SWEActivationMode m_eSWEActivationMode; 378 379 /** If enabling swe:DataRecord parsing */ 380 bool m_bSWEProcessDataRecord; 381 382 /** If enabling swe:DataArray parsing */ 383 bool m_bSWEProcessDataArray; 384 385 /** For ignored xpaths, map prefix namespace to its URI */ 386 std::map<CPLString, CPLString> m_oMapPrefixToURIIgnoredXPaths; 387 388 /** Ignored xpaths */ 389 std::vector<CPLString> m_aosIgnoredXPaths; 390 391 /** For type constraints, map prefix namespace to its URI */ 392 std::map<CPLString, CPLString> m_oMapPrefixToURITypeConstraints; 393 394 /** Map an XPath to a list of potential types for its children */ 395 std::map<CPLString, std::vector<CPLString> > m_oMapChildrenElementsConstraints; 396 397 /* Beginning of Writer config */ 398 399 /** Number of spaces for indentation */ 400 int m_nIndentSize; 401 402 CPLString m_osComment; 403 404 /** End of line format: "CRLF" or "LR" */ 405 CPLString m_osLineFormat; 406 407 /** "SHORT", "OGC_URN" or "OGC_URL" */ 408 CPLString m_osSRSNameFormat; 409 410 /** "WFS2_FEATURECOLLECTION" or "GMLAS_FEATURECOLLECTION" */ 411 CPLString m_osWrapping; 412 413 /** XML datetime or empty for current time */ 414 CPLString m_osTimestamp; 415 416 /** Path or URL to OGC WFS 2.0 schema. */ 417 CPLString m_osWFS20SchemaLocation; 418 419 /* End of Writer config */ 420 421 /** Whether a warning should be emitted when an element or attribute is 422 found in the document parsed, but ignored because of the ignored 423 XPath defined. */ 424 std::map<CPLString, bool> m_oMapIgnoredXPathToWarn; 425 426 GMLASXLinkResolutionConf m_oXLinkResolution; 427 428 GMLASConfiguration(); 429 virtual ~GMLASConfiguration(); 430 431 bool Load(const char* pszFilename); 432 void Finalize(); 433 434 static CPLString GetBaseCacheDirectory(); 435 }; 436 437 /************************************************************************/ 438 /* GMLASXLinkResolver */ 439 /************************************************************************/ 440 441 class GMLASXLinkResolver final: public GMLASResourceCache 442 { 443 GMLASXLinkResolutionConf m_oConf; 444 int m_nGlobalResolutionTime; 445 446 std::map<CPLString, CPLString> m_oMapURLToContent; 447 std::map<size_t, std::vector<CPLString> > m_oMapFileSizeToURLs; 448 size_t m_nMaxRAMCacheSize; 449 size_t m_nCurrentRAMCacheSize; 450 451 CPLString FetchRawContent(const CPLString& osURL, 452 const char* pszHeaders); 453 454 CPLString GetRawContent(const CPLString& osURL, 455 const char* pszHeaders, 456 bool bAllowRemoteDownload, 457 bool bCacheResults); 458 public: 459 460 GMLASXLinkResolver(); 461 462 void SetConf( const GMLASXLinkResolutionConf& oConf ); GetConf()463 const GMLASXLinkResolutionConf& GetConf() const { return m_oConf; } 464 465 bool IsRawContentResolutionEnabled() const; 466 int GetMatchingResolutionRule(const CPLString& osURL) const; 467 CPLString GetRawContent(const CPLString& osURL); 468 CPLString GetRawContentForRule(const CPLString& osURL, int nIdxRule); 469 }; 470 471 /************************************************************************/ 472 /* GMLASXPathMatcher */ 473 /************************************************************************/ 474 475 /** Object to compares a user provided XPath against a set of test XPaths */ 476 class GMLASXPathMatcher 477 { 478 class XPathComponent 479 { 480 public: 481 CPLString m_osValue; 482 bool m_bDirectChild; 483 }; 484 485 /** For reference xpaths, map prefix namespace to its URI */ 486 std::map<CPLString, CPLString> m_oMapPrefixToURIReferenceXPaths; 487 488 /** Reference xpaths */ 489 std::vector<CPLString> m_aosReferenceXPathsUncompiled; 490 491 /** Reference xpaths "compiled" */ 492 std::vector< std::vector<XPathComponent> > m_aosReferenceXPaths; 493 494 static bool MatchesRefXPath( 495 const CPLString& osXPath, 496 const std::vector<XPathComponent>& oRefXPath); 497 498 public: 499 GMLASXPathMatcher(); 500 virtual ~GMLASXPathMatcher(); 501 502 void SetRefXPaths(const std::map<CPLString, CPLString>& 503 oMapPrefixToURIReferenceXPaths, 504 const std::vector<CPLString>& 505 aosReferenceXPaths); 506 507 void SetDocumentMapURIToPrefix( 508 const std::map<CPLString,CPLString>& oMapURIToPrefix ); 509 510 /** Return true if osXPath matches one of the XPath of 511 m_aosReferenceXPaths */ 512 bool MatchesRefXPath( 513 const CPLString& osXPath, 514 CPLString& osOutMatchedXPath ) const; 515 GetMapPrefixToURI()516 const std::map<CPLString, CPLString>& GetMapPrefixToURI() const 517 { return m_oMapPrefixToURIReferenceXPaths; } 518 }; 519 520 /************************************************************************/ 521 /* GMLASFieldType */ 522 /************************************************************************/ 523 524 /** Enumeration for XML primitive types */ 525 typedef enum 526 { 527 GMLAS_FT_STRING, 528 GMLAS_FT_ID, 529 GMLAS_FT_BOOLEAN, 530 GMLAS_FT_SHORT, 531 GMLAS_FT_INT32, 532 GMLAS_FT_INT64, 533 GMLAS_FT_FLOAT, 534 GMLAS_FT_DOUBLE, 535 GMLAS_FT_DECIMAL, 536 GMLAS_FT_DATE, 537 GMLAS_FT_GYEAR, 538 GMLAS_FT_GYEAR_MONTH, 539 GMLAS_FT_TIME, 540 GMLAS_FT_DATETIME, 541 GMLAS_FT_BASE64BINARY, 542 GMLAS_FT_HEXBINARY, 543 GMLAS_FT_ANYURI, 544 GMLAS_FT_ANYTYPE, 545 GMLAS_FT_ANYSIMPLETYPE, 546 GMLAS_FT_GEOMETRY, // this one isn't a XML primitive type. 547 } GMLASFieldType; 548 549 /************************************************************************/ 550 /* GMLASField */ 551 /************************************************************************/ 552 553 class GMLASField 554 { 555 public: 556 typedef enum 557 { 558 /** Field that is going to be instantiated as a OGR field */ 559 REGULAR, 560 561 /** Non-instanciable field. The corresponding element to the XPath 562 is stored in a child layer that will reference back to the 563 main layer. */ 564 PATH_TO_CHILD_ELEMENT_NO_LINK, 565 566 /** Field that will store the PKID of a child element */ 567 PATH_TO_CHILD_ELEMENT_WITH_LINK, 568 569 /** Non-instanciable field. The corresponding element to the XPath 570 is stored in a child layer. And the link between both will be 571 done through a junction table. */ 572 PATH_TO_CHILD_ELEMENT_WITH_JUNCTION_TABLE, 573 574 /** Non-instanciable field. Corresponds to a group of an element. */ 575 GROUP 576 } Category; 577 578 private: 579 CPLString m_osName; /**< Field name */ 580 GMLASFieldType m_eType; /**< Field type */ 581 OGRwkbGeometryType m_eGeomType; /**< Field geometry type */ 582 CPLString m_osTypeName; /**< Original XSD type */ 583 int m_nWidth; /**< Field width */ 584 bool m_bNotNullable; /**< If the field is not nullable */ 585 bool m_bArray; /**< If the field is an array (from OGR types point of view) */ 586 bool m_bList; /**< If the field is a list (a xs:list) */ 587 588 /** Category of the field. */ 589 Category m_eCategory; 590 591 /** XPath of the field. */ 592 CPLString m_osXPath; 593 594 /** Set of XPath that are linked to this field. 595 This is used for cases where a gml:AbstractGeometry element is 596 referenced. In which case all possible realizations of this 597 element are listed. Will be used with eType == GMLAS_FT_ANYTYPE 598 to store XML blob on parsing. */ 599 std::vector<CPLString> m_aosXPath; 600 601 CPLString m_osFixedValue; /**< Value of fixed='' attribute */ 602 CPLString m_osDefaultValue; /**< Value of default='' attribute */ 603 604 /** Minimum number of occurrences. Might be -1 if unset */ 605 int m_nMinOccurs; 606 607 /** Maximum number of occurrences, or MAXOCCURS_UNLIMITED. Might be 608 -1 if unset. */ 609 int m_nMaxOccurs; 610 611 /** For a PATH_TO_CHILD_ELEMENT_NO_LINK, whether maxOccurs>1 is on the 612 sequence rather than on the element */ 613 bool m_bRepetitionOnSequence; 614 615 /** In case of m_eType == GMLAS_FT_ANYTYPE whether the current element 616 must be stored in the XML blob (if false, only its children) */ 617 bool m_bIncludeThisEltInBlob; 618 619 /** Only used for PATH_TO_CHILD_ELEMENT_WITH_JUNCTION_TABLE. The XPath 620 of the abstract element (the concrete XPath is in m_osXPath). 621 e.g myns:mainElt/myns:subEltAbstract whereas the concrete XPath 622 is myns:mainElt/myns:subEltRealization */ 623 CPLString m_osAbstractElementXPath; 624 625 /** Only used for PATH_TO_CHILD_ELEMENT_WITH_LINK and 626 PATH_TO_CHILD_ELEMENT_WITH_JUNCTION_TABLE (and also for 627 PATH_TO_CHILD_ELEMENT_NO_LINK and GROUP but for metadata layers only). 628 The XPath of the child element. */ 629 CPLString m_osRelatedClassXPath; 630 631 /** Only use for PATH_TO_CHILD_ELEMENT_WITH_JUNCTION_TABLE. Name of 632 the junction layer to consult for this field. Only used by 633 writer code. */ 634 CPLString m_osJunctionLayer; 635 636 /** Dirty hack to register attributes with fixed values, despite being 637 in the XPath ignored list. Needed to avoid warning when doing validation */ 638 bool m_bIgnored; 639 640 /** Documentation from schema */ 641 CPLString m_osDoc; 642 643 /** For elements within xs:choice */ 644 bool m_bMayAppearOutOfOrder; 645 646 public: 647 GMLASField(); 648 SetName(const CPLString & osName)649 void SetName(const CPLString& osName) { m_osName = osName; } 650 void SetType(GMLASFieldType eType, const char* pszTypeName); SetGeomType(OGRwkbGeometryType eGeomType)651 void SetGeomType(OGRwkbGeometryType eGeomType) 652 { m_eGeomType = eGeomType; } SetWidth(int nWidth)653 void SetWidth(int nWidth) { m_nWidth = nWidth; } SetNotNullable(bool bNotNullable)654 void SetNotNullable(bool bNotNullable) 655 { m_bNotNullable = bNotNullable; } SetArray(bool bArray)656 void SetArray(bool bArray) { m_bArray = bArray; } SetList(bool bList)657 void SetList(bool bList) { m_bList = bList; } SetXPath(const CPLString & osXPath)658 void SetXPath(const CPLString& osXPath) { m_osXPath = osXPath; } AddAlternateXPath(const CPLString & osXPath)659 void AddAlternateXPath(const CPLString& osXPath) 660 { m_aosXPath.push_back(osXPath); } SetFixedValue(const CPLString & osFixedValue)661 void SetFixedValue(const CPLString& osFixedValue) 662 { m_osFixedValue = osFixedValue; } SetDefaultValue(const CPLString & osDefaultValue)663 void SetDefaultValue(const CPLString& osDefaultValue) 664 { m_osDefaultValue = osDefaultValue; } SetCategory(Category eCategory)665 void SetCategory(Category eCategory) { m_eCategory = eCategory; } SetMinOccurs(int nMinOccurs)666 void SetMinOccurs(int nMinOccurs) { m_nMinOccurs = nMinOccurs; } SetMaxOccurs(int nMaxOccurs)667 void SetMaxOccurs(int nMaxOccurs) { m_nMaxOccurs = nMaxOccurs; } SetRepetitionOnSequence(bool b)668 void SetRepetitionOnSequence(bool b) { m_bRepetitionOnSequence = b; } SetIncludeThisEltInBlob(bool b)669 void SetIncludeThisEltInBlob(bool b) { m_bIncludeThisEltInBlob = b; } SetAbstractElementXPath(const CPLString & osName)670 void SetAbstractElementXPath(const CPLString& osName) 671 { m_osAbstractElementXPath = osName; } 672 SetRelatedClassXPath(const CPLString & osName)673 void SetRelatedClassXPath(const CPLString& osName) 674 { m_osRelatedClassXPath = osName; } SetJunctionLayer(const CPLString & osName)675 void SetJunctionLayer(const CPLString& osName) 676 { m_osJunctionLayer = osName; } 677 SetIgnored()678 void SetIgnored() { m_bIgnored = true; } SetDocumentation(const CPLString & osDoc)679 void SetDocumentation(const CPLString& osDoc) { m_osDoc = osDoc; } SetMayAppearOutOfOrder(bool b)680 void SetMayAppearOutOfOrder(bool b) { m_bMayAppearOutOfOrder = b; } 681 MakePKIDFieldXPathFromXLinkHrefXPath(const CPLString & osBaseXPath)682 static CPLString MakePKIDFieldXPathFromXLinkHrefXPath( 683 const CPLString& osBaseXPath) 684 { return "{" + osBaseXPath + "}_pkid"; } 685 MakeXLinkRawContentFieldXPathFromXLinkHrefXPath(const CPLString & osBaseXPath)686 static CPLString MakeXLinkRawContentFieldXPathFromXLinkHrefXPath( 687 const CPLString& osBaseXPath) 688 { return "{" + osBaseXPath + "}_rawcontent"; } 689 MakeXLinkDerivedFieldXPathFromXLinkHrefXPath(const CPLString & osBaseXPath,const CPLString & osName)690 static CPLString MakeXLinkDerivedFieldXPathFromXLinkHrefXPath( 691 const CPLString& osBaseXPath, const CPLString& osName) 692 { return "{" + osBaseXPath + "}_derived_" + osName; } 693 GetName()694 const CPLString& GetName() const { return m_osName; } GetXPath()695 const CPLString& GetXPath() const { return m_osXPath; } GetAlternateXPaths()696 const std::vector<CPLString>& GetAlternateXPaths() const 697 { return m_aosXPath; } GetType()698 GMLASFieldType GetType() const { return m_eType; } GetGeomType()699 OGRwkbGeometryType GetGeomType() const { return m_eGeomType; } GetTypeName()700 const CPLString& GetTypeName() const { return m_osTypeName; } GetWidth()701 int GetWidth() const { return m_nWidth; } IsNotNullable()702 bool IsNotNullable() const { return m_bNotNullable; } IsArray()703 bool IsArray() const { return m_bArray; } IsList()704 bool IsList() const { return m_bList; } GetFixedValue()705 const CPLString& GetFixedValue() const { return m_osFixedValue; } GetDefaultValue()706 const CPLString& GetDefaultValue() const { return m_osDefaultValue; } GetCategory()707 Category GetCategory() const { return m_eCategory; } GetMinOccurs()708 int GetMinOccurs() const { return m_nMinOccurs; } GetMaxOccurs()709 int GetMaxOccurs() const { return m_nMaxOccurs; } GetRepetitionOnSequence()710 bool GetRepetitionOnSequence() const { return m_bRepetitionOnSequence; } GetIncludeThisEltInBlob()711 bool GetIncludeThisEltInBlob() const { return m_bIncludeThisEltInBlob; } GetAbstractElementXPath()712 const CPLString& GetAbstractElementXPath() const 713 { return m_osAbstractElementXPath; } GetJunctionLayer()714 const CPLString& GetJunctionLayer() const 715 { return m_osJunctionLayer; } GetRelatedClassXPath()716 const CPLString& GetRelatedClassXPath() const 717 { return m_osRelatedClassXPath; } IsIgnored()718 bool IsIgnored() const { return m_bIgnored; } GetDocumentation()719 const CPLString& GetDocumentation() const { return m_osDoc; } MayAppearOutOfOrder()720 bool MayAppearOutOfOrder() const { return m_bMayAppearOutOfOrder; } 721 722 static GMLASFieldType GetTypeFromString( const CPLString& osType ); 723 }; 724 725 /************************************************************************/ 726 /* GMLASFeatureClass */ 727 /************************************************************************/ 728 729 class GMLASFeatureClass 730 { 731 /** User facing name */ 732 CPLString m_osName; 733 734 /** XPath to the main element of the feature class */ 735 CPLString m_osXPath; 736 737 /** List of fields */ 738 std::vector<GMLASField> m_aoFields; 739 740 /** Child nested classes */ 741 std::vector<GMLASFeatureClass> m_aoNestedClasses; 742 743 /** Whether this layer corresponds to a (multiple instantiated) xs:group 744 or a repeated sequence */ 745 bool m_bIsRepeatedSequence; 746 747 /** Whether this is a repeated group. Should be set together with m_bIsRepeatedSequence */ 748 bool m_bIsGroup; 749 750 /** Only used for junction tables. The XPath to the parent table */ 751 CPLString m_osParentXPath; 752 753 /** Only used for junction tables. The XPath to the child table */ 754 CPLString m_osChildXPath; 755 756 /** Whether this corresponds to a top-level XSD element in the schema */ 757 bool m_bIsTopLevelElt; 758 759 /** Documentation from schema */ 760 CPLString m_osDoc; 761 762 public: 763 GMLASFeatureClass(); 764 765 void SetName(const CPLString& osName); 766 void SetXPath(const CPLString& osXPath); 767 void AddField( const GMLASField& oField ); 768 void PrependFields( const std::vector<GMLASField>& aoFields ); 769 void AppendFields( const std::vector<GMLASField>& aoFields ); 770 void AddNestedClass( const GMLASFeatureClass& oNestedClass ); SetIsRepeatedSequence(bool bIsRepeatedSequence)771 void SetIsRepeatedSequence( bool bIsRepeatedSequence ) 772 { m_bIsRepeatedSequence = bIsRepeatedSequence; } SetIsGroup(bool bIsGroup)773 void SetIsGroup( bool bIsGroup ) 774 { m_bIsGroup = bIsGroup; } SetParentXPath(const CPLString & osXPath)775 void SetParentXPath(const CPLString& osXPath) 776 { m_osParentXPath = osXPath; } SetChildXPath(const CPLString & osXPath)777 void SetChildXPath(const CPLString& osXPath) 778 { m_osChildXPath = osXPath; } SetIsTopLevelElt(bool bIsTopLevelElt)779 void SetIsTopLevelElt(bool bIsTopLevelElt ) 780 { m_bIsTopLevelElt = bIsTopLevelElt; } SetDocumentation(const CPLString & osDoc)781 void SetDocumentation(const CPLString& osDoc) { m_osDoc = osDoc; } 782 GetName()783 const CPLString& GetName() const { return m_osName; } GetXPath()784 const CPLString& GetXPath() const { return m_osXPath; } GetFields()785 const std::vector<GMLASField>& GetFields() const { return m_aoFields; } GetFields()786 std::vector<GMLASField>& GetFields() { return m_aoFields; } GetNestedClasses()787 const std::vector<GMLASFeatureClass>& GetNestedClasses() const 788 { return m_aoNestedClasses; } GetNestedClasses()789 std::vector<GMLASFeatureClass>& GetNestedClasses() 790 { return m_aoNestedClasses; } IsRepeatedSequence()791 bool IsRepeatedSequence() const { return m_bIsRepeatedSequence; } IsGroup()792 bool IsGroup() const { return m_bIsGroup; } GetParentXPath()793 const CPLString& GetParentXPath() const { return m_osParentXPath; } GetChildXPath()794 const CPLString& GetChildXPath() const { return m_osChildXPath; } IsTopLevelElt()795 bool IsTopLevelElt() const { return m_bIsTopLevelElt; } GetDocumentation()796 const CPLString& GetDocumentation() const { return m_osDoc; } 797 }; 798 799 /************************************************************************/ 800 /* GMLASSchemaAnalyzer */ 801 /************************************************************************/ 802 803 class GMLASSchemaAnalyzer 804 { 805 GMLASXPathMatcher& m_oIgnoredXPathMatcher; 806 807 GMLASXPathMatcher& m_oChildrenElementsConstraintsXPathMatcher; 808 809 GMLASXPathMatcher& m_oForcedFlattenedXPathMatcher; 810 811 GMLASXPathMatcher& m_oDisabledFlattenedXPathMatcher; 812 813 std::map<CPLString, std::vector<CPLString> > m_oMapChildrenElementsConstraints; 814 815 /** Whether repeated strings, integers, reals should be in corresponding 816 OGR array types. */ 817 bool m_bUseArrays; 818 819 /** Whether OGR field null state should be used. */ 820 bool m_bUseNullState; 821 822 /** Whether, when dealing with schemas that import the 823 GML namespace, and that at least one of them has 824 elements that derive from gml:_Feature or 825 gml:AbstractFeatureonly, only such elements should be 826 instantiated as OGR layers, during the first pass that 827 iterates over top level elements of the imported 828 schemas. */ 829 bool m_bInstantiateGMLFeaturesOnly; 830 831 /** Vector of feature classes */ 832 std::vector<GMLASFeatureClass> m_aoClasses; 833 834 /** Map from a namespace URI to the corresponding prefix */ 835 std::map<CPLString, CPLString> m_oMapURIToPrefix; 836 837 /** Map element XPath to its XSElementDeclaration* */ 838 std::map<CPLString, XSElementDeclaration*> m_oMapXPathToEltDecl; 839 840 typedef std::map<XSElementDeclaration*, 841 std::vector<XSElementDeclaration*> > 842 tMapParentEltToChildElt; 843 /** Map from a base/parent element to a vector of derived/children 844 elements that are substitutionGroup of it. The map only 845 addresses the direct derived types, and not the 2nd level or more 846 derived ones. For that recursion in the map must be used.*/ 847 tMapParentEltToChildElt m_oMapParentEltToChildElt; 848 849 /** Map from a XSModelGroup* object to the name of its group definition. */ 850 std::map< XSModelGroup*, XSModelGroupDefinition*> m_oMapModelGroupToMGD; 851 852 /** Map from (non namespace prefixed) element names to the number of 853 elements that share the same namespace (in different namespaces) */ 854 std::map<CPLString, int> m_oMapEltNamesToInstanceCount; 855 856 /** Set of elements that match a OGR layer */ 857 std::set<XSElementDeclaration*> m_oSetEltsForTopClass; 858 859 /** Set of elements that are simple enough to be inlined whenever they 860 are referenced with cardinality 1. The use case if base:identifier 861 used by Inspire schemas. */ 862 std::set<XSElementDeclaration*> m_oSetSimpleEnoughElts; 863 864 /** Maximum length of layer and field identifiers*/ 865 int m_nIdentifierMaxLength; 866 867 /** Whether case insensitive comparison should be used for identifier equality testing */ 868 bool m_bCaseInsensitiveIdentifier; 869 870 /** Whether to launder identifiers like postgresql does */ 871 bool m_bPGIdentifierLaundering; 872 873 /* Maximum number of fields in an element considered for flattening. */ 874 int m_nMaximumFieldsForFlattening; 875 876 /** GML version found: 2.1.1, 3.1.1 or 3.2.1 or empty*/ 877 CPLString m_osGMLVersionFound; 878 879 /** Set of schemas opened */ 880 std::set<CPLString> m_oSetSchemaURLs; 881 882 /** Map from namespace URI to namespace prefix coming from the 883 * examination of xmlns:foo=bar attributes of the top element of the 884 * GML document */ 885 std::map<CPLString,CPLString> m_oMapDocNSURIToPrefix; 886 887 bool m_bAlwaysGenerateOGRId; 888 889 static bool IsSame( const XSModelGroup* poModelGroup1, 890 const XSModelGroup* poModelGroup2 ); 891 XSModelGroupDefinition* GetGroupDefinition( const XSModelGroup* poModelGroup ); 892 void SetFieldFromAttribute(GMLASField& oField, 893 XSAttributeUse* poAttr, 894 const CPLString& osXPathPrefix, 895 const CPLString& osNamePrefix = CPLString()); 896 void GetConcreteImplementationTypes( 897 XSElementDeclaration* poParentElt, 898 std::vector<XSElementDeclaration*>& apoImplEltList); 899 std::vector<XSElementDeclaration*> 900 GetConstraintChildrenElements(const CPLString& osFullXPath); 901 bool FindElementsWithMustBeToLevel( 902 const CPLString& osParentXPath, 903 XSModelGroup* poModelGroup, 904 int nRecursionCounter, 905 std::set<XSElementDeclaration*>& oSetVisitedEltDecl, 906 std::set<XSModelGroup*>& oSetVisitedModelGroups, 907 std::vector<XSElementDeclaration*>& oVectorEltsForTopClass, 908 std::set<CPLString>& aoSetXPathEltsForTopClass, 909 XSModel* poModel, 910 bool& bSimpleEnoughOut, 911 int& nCountSubEltsOut); 912 void BuildMapCountOccurrencesOfSameName( 913 XSModelGroup* poModelGroup, 914 std::map< CPLString, int >& oMapCountOccurrencesOfSameName); 915 bool ExploreModelGroup( XSModelGroup* psMainModelGroup, 916 XSAttributeUseList* poMainAttrList, 917 GMLASFeatureClass& oClass, 918 int nRecursionCounter, 919 std::set<XSModelGroup*>& oSetVisitedModelGroups, 920 XSModel* poModel, 921 const std::map< CPLString, int >& oMapCountOccurrencesOfSameName); 922 void SetFieldTypeAndWidthFromDefinition( XSSimpleTypeDefinition* poST, 923 GMLASField& oField ); 924 CPLString GetPrefix( const CPLString& osNamespaceURI ); 925 CPLString MakeXPath( const CPLString& osNamespace, 926 const CPLString& osName ); 927 void LaunderFieldNames( GMLASFeatureClass& oClass ); 928 void LaunderClassNames(); 929 930 XSElementDeclaration* GetTopElementDeclarationFromXPath( 931 const CPLString& osXPath, 932 XSModel* poModel); 933 934 bool InstantiateClassFromEltDeclaration(XSElementDeclaration* poEltDecl, 935 XSModel* poModel, 936 bool& bError); 937 void CreateNonNestedRelationship( 938 XSElementDeclaration* poElt, 939 std::vector<XSElementDeclaration*>& apoSubEltList, 940 GMLASFeatureClass& oClass, 941 int nMaxOccurs, 942 bool bEltNameWillNeedPrefix, 943 bool bForceJunctionTable, 944 bool bCaseOfConstraintChildren); 945 946 bool IsGMLNamespace(const CPLString& osURI); 947 948 bool DerivesFromGMLFeature(XSElementDeclaration* poEltDecl); 949 950 bool IsIgnoredXPath(const CPLString& osXPath); 951 952 void CollectClassesReferences( 953 GMLASFeatureClass& oClass, 954 std::vector<GMLASFeatureClass*>& aoClasses ); 955 956 CPL_DISALLOW_COPY_ASSIGN(GMLASSchemaAnalyzer) 957 958 public: 959 GMLASSchemaAnalyzer( 960 GMLASXPathMatcher& oIgnoredXPathMatcher, 961 GMLASXPathMatcher& oChildrenElementsConstraintsXPathMatcher, 962 const std::map<CPLString, std::vector<CPLString> >& 963 oMapChildrenElementsConstraints, 964 GMLASXPathMatcher& oForcedFlattenedXPathMatcher, 965 GMLASXPathMatcher& oDisabledFlattenedXPathMatcher); 966 SetUseArrays(bool b)967 void SetUseArrays(bool b) { m_bUseArrays = b; } SetUseNullState(bool b)968 void SetUseNullState(bool b) { m_bUseNullState = b; } SetInstantiateGMLFeaturesOnly(bool b)969 void SetInstantiateGMLFeaturesOnly(bool b) 970 { m_bInstantiateGMLFeaturesOnly = b; } SetIdentifierMaxLength(int nLength)971 void SetIdentifierMaxLength(int nLength) 972 { m_nIdentifierMaxLength = nLength; } SetCaseInsensitiveIdentifier(bool b)973 void SetCaseInsensitiveIdentifier(bool b) 974 { m_bCaseInsensitiveIdentifier = b; } SetPGIdentifierLaundering(bool b)975 void SetPGIdentifierLaundering(bool b) 976 { m_bPGIdentifierLaundering = b; } SetMaximumFieldsForFlattening(int n)977 void SetMaximumFieldsForFlattening(int n) 978 { m_nMaximumFieldsForFlattening = n; } SetMapDocNSURIToPrefix(const std::map<CPLString,CPLString> & oMap)979 void SetMapDocNSURIToPrefix(const std::map<CPLString,CPLString>& oMap) 980 { m_oMapDocNSURIToPrefix = oMap; } SetAlwaysGenerateOGRId(bool b)981 void SetAlwaysGenerateOGRId(bool b) 982 { m_bAlwaysGenerateOGRId = b; } 983 984 bool Analyze(GMLASXSDCache& oCache, 985 const CPLString& osBaseDirname, 986 std::vector<PairURIFilename>& aoXSDs, 987 bool bSchemaFullChecking, 988 bool bHandleMultipleImports); GetClasses()989 const std::vector<GMLASFeatureClass>& GetClasses() const 990 { return m_aoClasses; } 991 GetMapURIToPrefix()992 const std::map<CPLString, CPLString>& GetMapURIToPrefix() const 993 { return m_oMapURIToPrefix; } 994 GetGMLVersionFound()995 const CPLString& GetGMLVersionFound() const 996 { return m_osGMLVersionFound; } GetSchemaURLS()997 const std::set<CPLString>& GetSchemaURLS() const 998 { return m_oSetSchemaURLs; } 999 BuildJunctionTableXPath(const CPLString & osEltXPath,const CPLString & osSubEltXPath)1000 static CPLString BuildJunctionTableXPath(const CPLString& osEltXPath, 1001 const CPLString& osSubEltXPath) 1002 { return osEltXPath + "|" + osSubEltXPath; } 1003 }; 1004 1005 /************************************************************************/ 1006 /* OGRGMLASDataSource */ 1007 /************************************************************************/ 1008 1009 class OGRGMLASLayer; 1010 class GMLASReader; 1011 1012 class OGRGMLASDataSource final: public GDALDataset 1013 { 1014 std::vector<OGRGMLASLayer*> m_apoLayers; 1015 std::map<CPLString, CPLString> m_oMapURIToPrefix; 1016 CPLString m_osGMLFilename; 1017 OGRLayer *m_poFieldsMetadataLayer; 1018 OGRLayer *m_poLayersMetadataLayer; 1019 OGRLayer *m_poRelationshipsLayer; 1020 OGRLayer *m_poOtherMetadataLayer; 1021 std::vector<OGRLayer*> m_apoRequestedMetadataLayers; 1022 VSILFILE *m_fpGML; 1023 VSILFILE *m_fpGMLParser; 1024 bool m_bLayerInitFinished; 1025 bool m_bSchemaFullChecking; 1026 bool m_bHandleMultipleImports; 1027 bool m_bValidate; 1028 bool m_bRemoveUnusedLayers; 1029 bool m_bRemoveUnusedFields; 1030 bool m_bFirstPassDone; 1031 /** Map from a SRS name to a boolean indicating if its coordinate 1032 order is inverted. */ 1033 std::map<CPLString, bool> m_oMapSRSNameToInvertedAxis; 1034 1035 /** Map from geometry field definition to its expected SRSName */ 1036 std::map<OGRGeomFieldDefn*, CPLString> m_oMapGeomFieldDefnToSRSName; 1037 1038 /* map the ID attribute to its belonging layer, e.g foo.1 -> layer Foo */ 1039 std::map<CPLString, OGRGMLASLayer*> m_oMapElementIdToLayer; 1040 1041 /* map the ID attribute to the feature PKID (when different from itself) */ 1042 std::map<CPLString, CPLString> m_oMapElementIdToPKID; 1043 1044 std::vector<PairURIFilename> m_aoXSDsManuallyPassed; 1045 1046 GMLASConfiguration m_oConf; 1047 1048 /** Schema cache */ 1049 GMLASXSDCache m_oCache; 1050 1051 GMLASXPathMatcher m_oIgnoredXPathMatcher; 1052 1053 GMLASXPathMatcher m_oChildrenElementsConstraintsXPathMatcher; 1054 1055 GMLASXPathMatcher m_oForcedFlattenedXPathMatcher; 1056 1057 GMLASXPathMatcher m_oDisabledFlattenedXPathMatcher; 1058 1059 GMLASSwapCoordinatesEnum m_eSwapCoordinates; 1060 1061 /** Base unique identifier */ 1062 CPLString m_osHash; 1063 1064 vsi_l_offset m_nFileSize; 1065 1066 GMLASReader* m_poReader; 1067 1068 bool m_bEndOfReaderLayers; 1069 1070 int m_nCurMetadataLayerIdx; 1071 1072 GMLASXLinkResolver m_oXLinkResolver; 1073 1074 CPLString m_osGMLVersionFound; 1075 1076 bool m_bFoundSWE; 1077 1078 // Pointers are also included in m_apoLayers 1079 std::vector<OGRGMLASLayer*> m_apoSWEDataArrayLayers; 1080 1081 void TranslateClasses( OGRGMLASLayer* poParentLayer, 1082 const GMLASFeatureClass& oFC ); 1083 1084 bool RunFirstPassIfNeeded( GMLASReader* poReader, 1085 GDALProgressFunc pfnProgress, 1086 void* pProgressData ); 1087 1088 void FillOtherMetadataLayer( 1089 GDALOpenInfo* poOpenInfo, 1090 const CPLString& osConfigFile, 1091 const std::vector<PairURIFilename>& aoXSDs, 1092 const std::set<CPLString>& oSetSchemaURLs); 1093 1094 static std::vector<PairURIFilename> BuildXSDVector( 1095 const CPLString& osXSDFilenames); 1096 1097 void InitReaderWithFirstPassElements(GMLASReader* poReader); 1098 1099 public: 1100 OGRGMLASDataSource(); 1101 virtual ~OGRGMLASDataSource(); 1102 1103 virtual int GetLayerCount() override; 1104 virtual OGRLayer *GetLayer(int) override; 1105 virtual OGRLayer *GetLayerByName(const char* pszName) override; 1106 1107 virtual void ResetReading() override; 1108 virtual OGRFeature* GetNextFeature( OGRLayer** ppoBelongingLayer, 1109 double* pdfProgressPct, 1110 GDALProgressFunc pfnProgress, 1111 void* pProgressData ) override; 1112 virtual int TestCapability( const char* ) override; 1113 1114 bool Open(GDALOpenInfo* poOpenInfo); 1115 GetLayers()1116 std::vector<OGRGMLASLayer*>* GetLayers() 1117 { return &m_apoLayers; } GetMapURIToPrefix()1118 const std::map<CPLString, CPLString>& GetMapURIToPrefix() const 1119 { return m_oMapURIToPrefix; } GetGMLFilename()1120 const CPLString& GetGMLFilename() const 1121 { return m_osGMLFilename; } GetGMLVersionFound()1122 const CPLString& GetGMLVersionFound() const 1123 { return m_osGMLVersionFound; } 1124 GetFieldsMetadataLayer()1125 OGRLayer* GetFieldsMetadataLayer() 1126 { return m_poFieldsMetadataLayer; } GetLayersMetadataLayer()1127 OGRLayer* GetLayersMetadataLayer() 1128 { return m_poLayersMetadataLayer; } GetRelationshipsLayer()1129 OGRLayer* GetRelationshipsLayer() 1130 { return m_poRelationshipsLayer; } 1131 OGRGMLASLayer* GetLayerByXPath( const CPLString& osXPath ); 1132 1133 GMLASReader* CreateReader( VSILFILE*& fpGML, 1134 GDALProgressFunc pfnProgress = nullptr, 1135 void* pProgressData = nullptr ); 1136 GetCache()1137 GMLASXSDCache& GetCache() { return m_oCache; } 1138 1139 void PushUnusedGMLFilePointer( VSILFILE* fpGML ); 1140 VSILFILE *PopUnusedGMLFilePointer(); IsLayerInitFinished()1141 bool IsLayerInitFinished() const { return m_bLayerInitFinished; } GetSwapCoordinates()1142 GMLASSwapCoordinatesEnum GetSwapCoordinates() const 1143 { return m_eSwapCoordinates; } 1144 GetMapIgnoredXPathToWarn()1145 const std::map<CPLString,bool>& GetMapIgnoredXPathToWarn() const { 1146 return m_oConf.m_oMapIgnoredXPathToWarn; } GetIgnoredXPathMatcher()1147 const GMLASXPathMatcher& GetIgnoredXPathMatcher() const 1148 { return m_oIgnoredXPathMatcher; } 1149 GetConf()1150 const GMLASConfiguration& GetConf() const { return m_oConf; } GetXSDsManuallyPassed()1151 const std::vector<PairURIFilename>& GetXSDsManuallyPassed() const { 1152 return m_aoXSDsManuallyPassed; } 1153 }; 1154 1155 /************************************************************************/ 1156 /* OGRGMLASLayer */ 1157 /************************************************************************/ 1158 1159 class OGRGMLASLayer final: public OGRLayer 1160 { 1161 friend class OGRGMLASDataSource; 1162 1163 OGRGMLASDataSource *m_poDS; 1164 GMLASFeatureClass m_oFC; 1165 bool m_bLayerDefnFinalized; 1166 int m_nMaxFieldIndex; 1167 OGRFeatureDefn *m_poFeatureDefn; 1168 1169 /** Map from XPath to corresponding field index in OGR layer 1170 definition */ 1171 std::map<CPLString, int> m_oMapFieldXPathToOGRFieldIdx; 1172 1173 /** Map from XPath to corresponding geometry field index in OGR layer 1174 definition */ 1175 std::map<CPLString, int> m_oMapFieldXPathToOGRGeomFieldIdx; 1176 1177 /** Map from a OGR field index to the corresponding field index in 1178 m_oFC.GetFields() */ 1179 std::map<int, int> m_oMapOGRFieldIdxtoFCFieldIdx; 1180 std::map<int, int> m_oMapOGRGeomFieldIdxtoFCFieldIdx; 1181 1182 /** Map from XPath to corresponding field index in m_oFC.GetFields() */ 1183 std::map<CPLString, int> m_oMapFieldXPathToFCFieldIdx; 1184 1185 bool m_bEOF; 1186 GMLASReader *m_poReader; 1187 VSILFILE *m_fpGML; 1188 /** OGR field index of the ID field */ 1189 int m_nIDFieldIdx; 1190 /** Whether the ID field is generated, or comes from the XML content */ 1191 bool m_bIDFieldIsGenerated; 1192 /** Pointer to parent layer */ 1193 OGRGMLASLayer *m_poParentLayer; 1194 /** OGR field index of the field that points to the parent ID */ 1195 int m_nParentIDFieldIdx; 1196 1197 std::map<CPLString, CPLString> m_oMapSWEFieldToOGRFieldName; 1198 1199 OGRFeature* GetNextRawFeature(); 1200 1201 bool InitReader(); 1202 SetLayerDefnFinalized(bool bVal)1203 void SetLayerDefnFinalized(bool bVal) 1204 { m_bLayerDefnFinalized = bVal; } 1205 1206 CPLString LaunderFieldName(const CPLString& osFieldName); 1207 1208 CPLString GetXPathFromOGRFieldIndex(int nIdx) const; 1209 1210 public: 1211 OGRGMLASLayer(OGRGMLASDataSource* poDS, 1212 const GMLASFeatureClass& oFC, 1213 OGRGMLASLayer* poParentLayer, 1214 bool bAlwaysGenerateOGRPKId); 1215 explicit OGRGMLASLayer(const char* pszLayerName); 1216 virtual ~OGRGMLASLayer(); 1217 GetName()1218 virtual const char* GetName() override { return GetDescription(); } 1219 virtual OGRFeatureDefn* GetLayerDefn() override; 1220 virtual void ResetReading() override; 1221 virtual OGRFeature* GetNextFeature() override; TestCapability(const char *)1222 virtual int TestCapability( const char* ) override { return FALSE; } 1223 SetDataSource(OGRGMLASDataSource * poDS)1224 void SetDataSource(OGRGMLASDataSource* poDS) { m_poDS = poDS; } 1225 1226 void PostInit(bool bIncludeGeometryXML); 1227 void ProcessDataRecordCreateFields(CPLXMLNode* psDataRecord, 1228 const std::vector<OGRFeature*>& apoFeatures, 1229 OGRLayer* poFieldsMetadataLayer); 1230 void ProcessDataRecordFillFeature(CPLXMLNode* psDataRecord, 1231 OGRFeature* poFeature); 1232 void ProcessDataRecordOfDataArrayCreateFields(OGRGMLASLayer* poParentLayer, 1233 CPLXMLNode* psDataRecord, 1234 OGRLayer* poFieldsMetadataLayer); 1235 void CreateCompoundFoldedMappings(); 1236 GetFeatureClass()1237 const GMLASFeatureClass& GetFeatureClass() const { return m_oFC; } 1238 int GetOGRFieldIndexFromXPath(const CPLString& osXPath) const; 1239 int GetOGRGeomFieldIndexFromXPath(const CPLString& osXPath) const; GetIDFieldIdx()1240 int GetIDFieldIdx() const { return m_nIDFieldIdx; } IsGeneratedIDField()1241 bool IsGeneratedIDField() const { return m_bIDFieldIsGenerated; } GetParent()1242 OGRGMLASLayer* GetParent() { return m_poParentLayer; } GetParentIDFieldIdx()1243 int GetParentIDFieldIdx() const { return m_nParentIDFieldIdx; } 1244 int GetFCFieldIndexFromOGRFieldIdx(int iOGRFieldIdx) const; 1245 int GetFCFieldIndexFromOGRGeomFieldIdx(int iOGRGeomFieldIdx) const; 1246 int GetFCFieldIndexFromXPath(const CPLString& osXPath) const; 1247 1248 bool EvaluateFilter( OGRFeature* poFeature ); 1249 1250 bool RemoveField( int nIdx ); 1251 void InsertNewField( int nInsertPos, 1252 OGRFieldDefn& oFieldDefn, 1253 const CPLString& osXPath ); 1254 1255 CPLString GetXPathOfFieldLinkForAttrToOtherLayer( 1256 const CPLString& osFieldName, 1257 const CPLString& osTargetLayerXPath ); 1258 CPLString CreateLinkForAttrToOtherLayer( const CPLString& osFieldName, 1259 const CPLString& osTargetLayerXPath ); 1260 }; 1261 1262 /************************************************************************/ 1263 /* GMLASReader */ 1264 /************************************************************************/ 1265 1266 class GMLASReader final: public DefaultHandler 1267 { 1268 /** Schema cache */ 1269 GMLASXSDCache& m_oCache; 1270 1271 /** Object to tell if a XPath must be ignored */ 1272 const GMLASXPathMatcher& m_oIgnoredXPathMatcher; 1273 1274 /** XLink resolver */ 1275 GMLASXLinkResolver& m_oXLinkResolver; 1276 1277 /** Whether we should stop parsing */ 1278 bool m_bParsingError; 1279 1280 /** Xerces reader object */ 1281 SAX2XMLReader *m_poSAXReader; 1282 1283 /** Token for Xerces */ 1284 XMLPScanToken m_oToFill; 1285 1286 /** File descriptor (not owned by this object) */ 1287 VSILFILE *m_fp; 1288 1289 /** Input source */ 1290 GMLASInputSource *m_GMLInputSource; 1291 1292 /** Whether we are at the first iteration */ 1293 bool m_bFirstIteration; 1294 1295 /** Whether we have reached end of file (or an error) */ 1296 bool m_bEOF; 1297 1298 /** Whether GetNextFeature() has been user interrupted (progress cbk) */ 1299 bool m_bInterrupted; 1300 1301 /** Error handler (for Xerces reader) */ 1302 GMLASErrorHandler m_oErrorHandler; 1303 1304 /** Map URI namespaces to their prefix */ 1305 std::map<CPLString, CPLString> m_oMapURIToPrefix; 1306 1307 /** List of OGR layers */ 1308 std::vector<OGRGMLASLayer*>* m_papoLayers; 1309 1310 /** Vector of features ready for consumption */ 1311 std::vector< std::pair<OGRFeature*, OGRGMLASLayer*> > m_aoFeaturesReady; 1312 1313 /** OGR field index of the current field */ 1314 int m_nCurFieldIdx; 1315 1316 /** OGR geometry field index of the current field */ 1317 int m_nCurGeomFieldIdx; 1318 1319 /** XML nested level of current field */ 1320 int m_nCurFieldLevel; 1321 1322 /** Whether we should store all content of the current field as XML */ 1323 bool m_bIsXMLBlob; 1324 bool m_bIsXMLBlobIncludeUpper; 1325 1326 /** Content of the current field */ 1327 CPLString m_osTextContent; 1328 1329 /** For list field types, list of content */ 1330 CPLStringList m_osTextContentList; 1331 /** Estimated memory footprint of m_osTextContentList */ 1332 size_t m_nTextContentListEstimatedSize; 1333 1334 /** Which layer is of interest for the reader, or NULL for all */ 1335 OGRGMLASLayer *m_poLayerOfInterest; 1336 1337 /** Stack of length of split XPath components */ 1338 std::vector<size_t> m_anStackXPathLength; 1339 1340 /** Current absolute XPath */ 1341 CPLString m_osCurXPath; 1342 1343 /** Current XPath, relative to top-level feature */ 1344 CPLString m_osCurSubXPath; 1345 1346 /** Current XML nesting level */ 1347 int m_nLevel; 1348 1349 /** Map layer to global FID */ 1350 std::map<OGRLayer*, int> m_oMapGlobalCounter; 1351 1352 /** Parsing context */ 1353 struct Context 1354 { 1355 /** XML nesting level */ 1356 int m_nLevel; 1357 1358 /** Current feature */ 1359 OGRFeature *m_poFeature; 1360 1361 /** Layer of m_poFeature */ 1362 OGRGMLASLayer *m_poLayer; 1363 1364 /** Current layer in a repeated group */ 1365 OGRGMLASLayer *m_poGroupLayer; 1366 1367 /** Nesting level of m_poCurGroupLayer */ 1368 int m_nGroupLayerLevel; 1369 1370 /** Index of the last processed OGR field in m_poCurGroupLayer */ 1371 int m_nLastFieldIdxGroupLayer; 1372 1373 /** Map layer to local FID */ 1374 std::map<OGRLayer*, int> m_oMapCounter; 1375 1376 /** Current XPath, relative to (current) top-level feature */ 1377 CPLString m_osCurSubXPath; 1378 1379 void Dump() const; 1380 }; 1381 1382 /** Current context */ 1383 Context m_oCurCtxt; 1384 1385 /** Stack of saved contexts */ 1386 std::vector<Context> m_aoStackContext; 1387 1388 /** Context used in m_apsXMLNodeStack */ 1389 typedef struct 1390 { 1391 /** Current node */ 1392 CPLXMLNode* psNode; 1393 1394 /** Last child of psNode (for fast append operations) */ 1395 CPLXMLNode* psLastChild; 1396 } NodeLastChild; 1397 1398 /** Stack of contexts to build XML tree of GML Geometry */ 1399 std::vector<NodeLastChild> m_apsXMLNodeStack; 1400 1401 /** Maximum allowed number of XML nesting level */ 1402 int m_nMaxLevel; 1403 1404 /** Maximum allowed size of XML content in byte */ 1405 size_t m_nMaxContentSize; 1406 1407 /** Map from a SRS name to a boolean indicating if its coordinate 1408 order is inverted. */ 1409 std::map<CPLString, bool> m_oMapSRSNameToInvertedAxis; 1410 1411 /** Set of geometry fields with unknown SRS */ 1412 std::set<OGRGeomFieldDefn*> m_oSetGeomFieldsWithUnknownSRS; 1413 1414 /** Map from geometry field definition to its expected SRSName. 1415 This is used to know if reprojection must be done */ 1416 std::map<OGRGeomFieldDefn*, CPLString> m_oMapGeomFieldDefnToSRSName; 1417 1418 /** Whether this parsing involves schema validation */ 1419 bool m_bValidate; 1420 1421 /** Entity resolver used during schema validation */ 1422 GMLASBaseEntityResolver* m_poEntityResolver; 1423 1424 /** First level from which warnings about ignored XPath should be 1425 silent. */ 1426 int m_nLevelSilentIgnoredXPath; 1427 1428 /** Whether a warning should be emitted when an element or attribute is 1429 found in the document parsed, but ignored because of the ignored 1430 XPath defined. */ 1431 std::map<CPLString, bool> m_oMapIgnoredXPathToWarn; 1432 1433 /** Policy to decide when to invert coordinates */ 1434 GMLASSwapCoordinatesEnum m_eSwapCoordinates; 1435 1436 /** Initial pass to guess SRS, etc... */ 1437 bool m_bInitialPass; 1438 1439 /** Whether to process swe:DataArray in a special way */ 1440 bool m_bProcessSWEDataArray; 1441 1442 /** Whether to process swe:DataArray in a special way */ 1443 bool m_bProcessSWEDataRecord; 1444 1445 /** Depth level of the swe:DataArray element */ 1446 int m_nSWEDataArrayLevel; 1447 1448 /** Field name to which the DataArray belongs to */ 1449 CPLString m_osSWEDataArrayParentField; 1450 1451 /** Depth level of the swe:DataRecord element */ 1452 int m_nSWEDataRecordLevel; 1453 1454 OGRLayer *m_poFieldsMetadataLayer; 1455 OGRLayer *m_poLayersMetadataLayer; 1456 OGRLayer *m_poRelationshipsLayer; 1457 1458 /** Base unique identifier */ 1459 CPLString m_osHash; 1460 1461 vsi_l_offset m_nFileSize; 1462 1463 bool m_bWarnUnexpected; 1464 1465 /** Map from layer to a map of field XPath to a set of matching 1466 URL specific resolution rule index */ 1467 std::map<OGRGMLASLayer*, std::map<CPLString, std::set<int> > > m_oMapXLinkFields; 1468 1469 /** Variables that could be local but more efficient to have same 1470 persistent, so as to save many memory allocations/deallocations */ 1471 CPLString m_osLocalname; 1472 CPLString m_osNSUri; 1473 CPLString m_osNSPrefix; 1474 CPLString m_osXPath; 1475 CPLString m_osLayerXPath; 1476 CPLString m_osAttrNSUri; 1477 CPLString m_osAttrNSPrefix; 1478 CPLString m_osAttrLocalName; 1479 CPLString m_osAttrXPath; 1480 CPLString m_osAttrValue; 1481 CPLString m_osText; 1482 1483 std::vector<OGRGMLASLayer*> m_apoSWEDataArrayLayers; 1484 int m_nSWEDataArrayLayerIdx; 1485 1486 /* Set of 3 maps used for xlink:href="#xxxx" internal links resolution */ 1487 /* 1) map the ID attribute to its belonging layer, e.g foo.1 -> layer Foo */ 1488 std::map<CPLString, OGRGMLASLayer*> m_oMapElementIdToLayer; 1489 /* 2) map the ID attribute to the feature PKID (when different from itself) */ 1490 std::map<CPLString, CPLString> m_oMapElementIdToPKID; 1491 /* 3) map each (layer, field_xpath) to the list of ID it refers to */ 1492 /* e.g (layer Bar, field_xpath) -> [foo.1, foo.2] */ 1493 std::map<std::pair<OGRGMLASLayer*, CPLString>, 1494 std::vector<CPLString> > m_oMapFieldXPathToLinkValue; 1495 1496 void SetField( OGRFeature* poFeature, 1497 OGRGMLASLayer* poLayer, 1498 int nAttrIdx, 1499 const CPLString& osAttrValue ); 1500 1501 void CreateNewFeature(const CPLString& osLocalname); 1502 1503 void PushFeatureReady( OGRFeature* poFeature, 1504 OGRGMLASLayer* poLayer ); 1505 1506 void PushContext( const Context& oContext ); 1507 void PopContext(); 1508 1509 void BuildXMLBlobStartElement(const CPLString& osXPath, 1510 const Attributes& attrs); 1511 1512 OGRGMLASLayer* GetLayerByXPath( const CPLString& osXPath ); 1513 1514 void AttachAsLastChild(CPLXMLNode* psNode); 1515 1516 void ProcessSWEDataArray(CPLXMLNode* psRoot); 1517 void ProcessSWEDataRecord(CPLXMLNode* psRoot); 1518 void ProcessGeometry(CPLXMLNode* psRoot); 1519 1520 void ProcessAttributes(const Attributes& attrs); 1521 void ProcessXLinkHref( int nAttrIdx, 1522 const CPLString& osAttrXPath, 1523 const CPLString& osAttrValue ); 1524 void ExploreXMLDoc( const CPLString& osAttrXPath, 1525 const GMLASXLinkResolutionConf::URLSpecificResolution& oRule, 1526 CPLXMLNode* psNode, 1527 const CPLString& osParentXPath, 1528 const GMLASXPathMatcher& oMatcher, 1529 const std::map<CPLString, size_t>& oMapFieldXPathToIdx ); 1530 1531 void CreateFieldsForURLSpecificRules(); 1532 void CreateFieldsForURLSpecificRule( 1533 OGRGMLASLayer* poLayer, 1534 int nFieldIdx, 1535 const CPLString& osFieldXPath, 1536 int& nInsertFieldIdx, 1537 const GMLASXLinkResolutionConf::URLSpecificResolution& oRule ); 1538 FillTextContent()1539 bool FillTextContent() const { return !m_bInitialPass && m_nCurFieldIdx >=0; } 1540 1541 void ProcessInternalXLinkFirstPass(bool bRemoveUnusedFields, 1542 std::map<OGRGMLASLayer*, std::set<CPLString> >&oMapUnusedFields); 1543 1544 public: 1545 GMLASReader(GMLASXSDCache& oCache, 1546 const GMLASXPathMatcher& oIgnoredXPathMatcher, 1547 GMLASXLinkResolver& oXLinkResolver); 1548 ~GMLASReader(); 1549 1550 bool Init(const char* pszFilename, 1551 VSILFILE* fp, 1552 const std::map<CPLString, CPLString>& oMapURIToPrefix, 1553 std::vector<OGRGMLASLayer*>* papoLayers, 1554 bool bValidate, 1555 const std::vector<PairURIFilename>& aoXSDs, 1556 bool bSchemaFullChecking, 1557 bool bHandleMultipleImports ); 1558 1559 void SetLayerOfInterest( OGRGMLASLayer* poLayer ); 1560 SetMapIgnoredXPathToWarn(const std::map<CPLString,bool> & oMap)1561 void SetMapIgnoredXPathToWarn(const std::map<CPLString,bool>& oMap) 1562 { m_oMapIgnoredXPathToWarn = oMap; } 1563 SetSwapCoordinates(GMLASSwapCoordinatesEnum eVal)1564 void SetSwapCoordinates(GMLASSwapCoordinatesEnum eVal) 1565 { m_eSwapCoordinates = eVal; } 1566 GetFP()1567 VSILFILE* GetFP() const { return m_fp; } 1568 GetMapSRSNameToInvertedAxis()1569 const std::map<CPLString, bool>& GetMapSRSNameToInvertedAxis() const 1570 { return m_oMapSRSNameToInvertedAxis; } SetMapSRSNameToInvertedAxis(const std::map<CPLString,bool> & oMap)1571 void SetMapSRSNameToInvertedAxis( const std::map<CPLString, bool>& oMap ) 1572 { m_oMapSRSNameToInvertedAxis = oMap; } 1573 GetMapGeomFieldDefnToSRSName()1574 const std::map<OGRGeomFieldDefn*, CPLString>& GetMapGeomFieldDefnToSRSName() const 1575 { return m_oMapGeomFieldDefnToSRSName; } SetMapGeomFieldDefnToSRSName(const std::map<OGRGeomFieldDefn *,CPLString> & oMap)1576 void SetMapGeomFieldDefnToSRSName(const std::map<OGRGeomFieldDefn*, CPLString>& oMap ) 1577 { m_oMapGeomFieldDefnToSRSName = oMap; } 1578 GetMapElementIdToLayer()1579 const std::map<CPLString, OGRGMLASLayer*>& GetMapElementIdToLayer() const 1580 { return m_oMapElementIdToLayer; } SetMapElementIdToLayer(std::map<CPLString,OGRGMLASLayer * > & oMap)1581 void SetMapElementIdToLayer(std::map<CPLString, OGRGMLASLayer*>& oMap) 1582 { m_oMapElementIdToLayer = oMap; } 1583 GetMapElementIdToPKID()1584 const std::map<CPLString, CPLString>& GetMapElementIdToPKID() const 1585 { return m_oMapElementIdToPKID; } SetMapElementIdToPKID(const std::map<CPLString,CPLString> & oMap)1586 void SetMapElementIdToPKID(const std::map<CPLString, CPLString>& oMap ) 1587 { m_oMapElementIdToPKID = oMap; } 1588 SetHash(const CPLString & osHash)1589 void SetHash(const CPLString& osHash) { m_osHash = osHash; } 1590 SetFileSize(vsi_l_offset nFileSize)1591 void SetFileSize(vsi_l_offset nFileSize) { m_nFileSize = nFileSize; } 1592 1593 OGRFeature* GetNextFeature( OGRGMLASLayer** ppoBelongingLayer = nullptr, 1594 GDALProgressFunc pfnProgress = nullptr, 1595 void* pProgressData = nullptr ); 1596 1597 virtual void startElement( 1598 const XMLCh* const uri, 1599 const XMLCh* const localname, 1600 const XMLCh* const qname, 1601 const Attributes& attrs 1602 ) override; 1603 virtual void endElement( 1604 const XMLCh* const uri, 1605 const XMLCh* const localname, 1606 const XMLCh* const qname 1607 ) override; 1608 1609 virtual void characters( const XMLCh *const chars, 1610 const XMLSize_t length ) override; 1611 1612 bool RunFirstPass(GDALProgressFunc pfnProgress, 1613 void* pProgressData, 1614 bool bRemoveUnusedLayers, 1615 bool bRemoveUnusedFields, 1616 bool bProcessSWEDataArray, 1617 OGRLayer* poFieldsMetadataLayer, 1618 OGRLayer* poLayersMetadataLayer, 1619 OGRLayer* poRelationshipsLayer, 1620 std::set<CPLString>& aoSetRemovedLayerNames); 1621 1622 static bool LoadXSDInParser( SAX2XMLReader* poParser, 1623 GMLASXSDCache& oCache, 1624 GMLASBaseEntityResolver& oXSDEntityResolver, 1625 const CPLString& osBaseDirname, 1626 const CPLString& osXSDFilename, 1627 Grammar** ppoGrammar, 1628 bool bSchemaFullChecking, 1629 bool bHandleMultipleImports ); 1630 1631 void SetSWEDataArrayLayers( const std::vector<OGRGMLASLayer*>& ar ); SetProcessDataRecord(bool b)1632 void SetProcessDataRecord(bool b) { m_bProcessSWEDataRecord = b; } GetSWEDataArrayLayers()1633 const std::vector<OGRGMLASLayer*>& GetSWEDataArrayLayers() const 1634 { return m_apoSWEDataArrayLayers; } 1635 }; 1636 1637 CPLString OGRGMLASTruncateIdentifier(const CPLString& osName, 1638 int nIdentMaxLength); 1639 1640 CPLString OGRGMLASAddSerialNumber(const CPLString& osNameIn, 1641 int iOccurrence, 1642 size_t nOccurrences, 1643 int nIdentMaxLength); 1644 1645 #endif // OGR_GMLAS_INCLUDED 1646