1 /******************************************************************************
2  * $Id: gmlreaderp.h 2b939583852d95f8724a939c927361cf40e81500 2020-10-22 11:42:59 +0200 Even Rouault $
3  *
4  * Project:  GML Reader
5  * Purpose:  Private Declarations for OGR free GML Reader code.
6  * Author:   Frank Warmerdam, warmerdam@pobox.com
7  *
8  ******************************************************************************
9  * Copyright (c) 2002, Frank Warmerdam
10  * Copyright (c) 2008-2013, Even Rouault <even dot rouault at spatialys.com>
11  *
12  * Permission is hereby granted, free of charge, to any person obtaining a
13  * copy of this software and associated documentation files (the "Software"),
14  * to deal in the Software without restriction, including without limitation
15  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
16  * and/or sell copies of the Software, and to permit persons to whom the
17  * Software is furnished to do so, subject to the following conditions:
18  *
19  * The above copyright notice and this permission notice shall be included
20  * in all copies or substantial portions of the Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  ****************************************************************************/
30 
31 #ifndef CPL_GMLREADERP_H_INCLUDED
32 #define CPL_GMLREADERP_H_INCLUDED
33 
34 #if defined(HAVE_XERCES)
35 
36 // Must be first for DEBUG_BOOL case
37 #include "xercesc_headers.h"
38 #include "ogr_xerces.h"
39 
40 #endif /* HAVE_XERCES */
41 
42 #include "cpl_string.h"
43 #include "gmlreader.h"
44 #include "ogr_api.h"
45 #include "cpl_vsi.h"
46 #include "cpl_multiproc.h"
47 #include "gmlutils.h"
48 
49 #include <string>
50 #include <vector>
51 
52 #define PARSER_BUF_SIZE (10*8192)
53 
54 class GMLReader;
55 
56 typedef struct _GeometryNamesStruct GeometryNamesStruct;
57 
58 bool OGRGMLIsGeometryElement(const char* pszElement);
59 
60 /************************************************************************/
61 /*                        GFSTemplateList                               */
62 /************************************************************************/
63 
64 class GFSTemplateItem;
65 
66 class GFSTemplateList
67 {
68   private:
69     bool            m_bSequentialLayers;
70     GFSTemplateItem *pFirst;
71     GFSTemplateItem *pLast;
72     GFSTemplateItem *Insert( const char *pszName );
73 
74   public:
75                     GFSTemplateList();
76                     ~GFSTemplateList();
77     void            Update( const char *pszName, int bHasGeom );
GetFirst()78     GFSTemplateItem *GetFirst() { return pFirst; }
HaveSequentialLayers()79     bool            HaveSequentialLayers() { return m_bSequentialLayers; }
80     int             GetClassCount();
81 };
82 
83 void gmlUpdateFeatureClasses ( GFSTemplateList *pCC,
84                                GMLReader *pReader,
85                                int *pnHasSequentialLayers );
86 
87 /************************************************************************/
88 /*                              GMLHandler                              */
89 /************************************************************************/
90 
91 #define STACK_SIZE 5
92 
93 typedef enum
94 {
95     STATE_TOP,
96     STATE_DEFAULT,
97     STATE_FEATURE,
98     STATE_PROPERTY,
99     STATE_FEATUREPROPERTY,
100     STATE_GEOMETRY,
101     STATE_IGNORED_FEATURE,
102     STATE_BOUNDED_BY,
103     STATE_CITYGML_ATTRIBUTE
104 } HandlerState;
105 
106 typedef struct
107 {
108     CPLXMLNode* psNode;
109     CPLXMLNode* psLastChild;
110 } NodeLastChild;
111 
112 typedef enum
113 {
114     APPSCHEMA_GENERIC,
115     APPSCHEMA_CITYGML,
116     APPSCHEMA_AIXM,
117     APPSCHEMA_MTKGML /* format of National Land Survey Finnish */
118 } GMLAppSchemaType;
119 
120 class GMLHandler
121 {
122     char      *m_pszCurField;
123     unsigned int m_nCurFieldAlloc;
124     unsigned int m_nCurFieldLen;
125     bool       m_bInCurField;
126     int        m_nAttributeIndex;
127     int        m_nAttributeDepth;
128 
129     char      *m_pszGeometry;
130     unsigned int m_nGeomAlloc;
131     unsigned int m_nGeomLen;
132     int        m_nGeometryDepth;
133     bool       m_bAlreadyFoundGeometry;
134     int        m_nGeometryPropertyIndex;
135 
136     int        m_nDepth;
137     int        m_nDepthFeature;
138 
139     int        m_inBoundedByDepth;
140 
141     char      *m_pszCityGMLGenericAttrName;
142     int        m_inCityGMLGenericAttrDepth;
143 
144     bool       m_bReportHref;
145     char      *m_pszHref;
146     char      *m_pszUom;
147     char      *m_pszValue;
148     char      *m_pszKieli;
149 
150     GeometryNamesStruct* pasGeometryNames;
151 
152     std::vector<NodeLastChild> apsXMLNode;
153 
154     int        m_nSRSDimensionIfMissing;
155 
156     OGRErr     startElementTop(const char *pszName, int nLenName, void* attr);
157 
158     OGRErr     endElementIgnoredFeature();
159 
160     OGRErr     startElementBoundedBy(const char *pszName, int nLenName, void* attr);
161     OGRErr     endElementBoundedBy();
162 
163     OGRErr     startElementFeatureAttribute(const char *pszName, int nLenName, void* attr);
164     OGRErr     endElementFeature();
165 
166     OGRErr     startElementCityGMLGenericAttr(const char *pszName, int nLenName, void* attr);
167     OGRErr     endElementCityGMLGenericAttr();
168 
169     OGRErr     startElementGeometry(const char *pszName, int nLenName, void* attr);
170     CPLXMLNode* ParseAIXMElevationPoint(CPLXMLNode*);
171     OGRErr     endElementGeometry();
172     OGRErr     dataHandlerGeometry(const char *data, int nLen);
173 
174     OGRErr     endElementAttribute();
175     OGRErr     dataHandlerAttribute(const char *data, int nLen);
176 
177     OGRErr     startElementDefault(const char *pszName, int nLenName, void* attr);
178     OGRErr     endElementDefault();
179 
180     OGRErr     startElementFeatureProperty(const char *pszName, int nLenName, void* attr);
181     OGRErr     endElementFeatureProperty();
182 
183     void       DealWithAttributes(const char *pszName, int nLenName, void* attr );
184     bool       IsConditionMatched(const char* pszCondition, void* attr);
185     int        FindRealPropertyByCheckingConditions(int nIdx, void* attr);
186 
187 protected:
188     GMLReader  *m_poReader;
189     GMLAppSchemaType eAppSchemaType;
190 
191     int              nStackDepth;
192     HandlerState     stateStack[STACK_SIZE];
193 
194     CPLString           m_osFID;
195     virtual const char* GetFID(void* attr) = 0;
196 
197     virtual CPLXMLNode* AddAttributes(CPLXMLNode* psNode, void* attr) = 0;
198 
199     OGRErr      startElement(const char *pszName, int nLenName, void* attr);
200     OGRErr      endElement();
201     OGRErr      dataHandler(const char *data, int nLen);
202 
203     bool       IsGeometryElement( const char *pszElement );
204 
205 public:
206     explicit GMLHandler( GMLReader *poReader );
207     virtual ~GMLHandler();
208 
209     virtual char*       GetAttributeValue(void* attr, const char* pszAttributeName) = 0;
210     virtual char*       GetAttributeByIdx(void* attr, unsigned int idx, char** ppszKey) = 0;
211 };
212 
213 #if defined(HAVE_XERCES)
214 
215 /************************************************************************/
216 /*                         GMLXercesHandler                             */
217 /************************************************************************/
218 class GMLXercesHandler final: public DefaultHandler, public GMLHandler
219 {
220     int        m_nEntityCounter;
221     CPLString  m_osElement;
222     CPLString  m_osCharacters;
223     CPLString  m_osAttrName;
224     CPLString  m_osAttrValue;
225 
226   public:
227     explicit GMLXercesHandler( GMLReader *poReader );
228 
229     void startElement(
230         const   XMLCh* const    uri,
231         const   XMLCh* const    localname,
232         const   XMLCh* const    qname,
233         const   Attributes& attrs
234     ) override;
235     void endElement(
236         const   XMLCh* const    uri,
237         const   XMLCh* const    localname,
238         const   XMLCh* const    qname
239     ) override;
240     void characters( const XMLCh *const chars,
241                      const XMLSize_t length ) override;
242 
243     void fatalError(const SAXParseException&) override;
244 
245     void startEntity (const XMLCh *const name) override;
246 
247     virtual const char* GetFID(void* attr) override;
248     virtual CPLXMLNode* AddAttributes(CPLXMLNode* psNode, void* attr) override;
249     virtual char*       GetAttributeValue(void* attr, const char* pszAttributeName) override;
250     virtual char*       GetAttributeByIdx(void* attr, unsigned int idx, char** ppszKey) override;
251 };
252 
253 #endif
254 
255 #if defined(HAVE_EXPAT)
256 
257 #include "ogr_expat.h"
258 
259 /************************************************************************/
260 /*                           GMLExpatHandler                            */
261 /************************************************************************/
262 class GMLExpatHandler final: public GMLHandler
263 {
264     XML_Parser m_oParser;
265     bool       m_bStopParsing;
266     int        m_nDataHandlerCounter;
267 
268 public:
269     GMLExpatHandler( GMLReader *poReader, XML_Parser oParser );
270 
HasStoppedParsing()271     bool        HasStoppedParsing() { return m_bStopParsing; }
272 
ResetDataHandlerCounter()273     void        ResetDataHandlerCounter() { m_nDataHandlerCounter = 0; }
274 
275     virtual const char* GetFID(void* attr) override;
276     virtual CPLXMLNode* AddAttributes(CPLXMLNode* psNode, void* attr) override;
277     virtual char*       GetAttributeValue(void* attr, const char* pszAttributeName) override;
278     virtual char*       GetAttributeByIdx(void* attr, unsigned int idx, char** ppszKey) override;
279 
280     static void XMLCALL startElementCbk(void *pUserData, const char *pszName,
281                                         const char **ppszAttr);
282 
283     static void XMLCALL endElementCbk(void *pUserData, const char *pszName);
284 
285     static void XMLCALL dataHandlerCbk(void *pUserData, const char *data, int nLen);
286 };
287 
288 #endif
289 
290 /************************************************************************/
291 /*                             GMLReadState                             */
292 /************************************************************************/
293 
294 class GMLReadState
295 {
296     std::vector<std::string> aosPathComponents;
297 
298 public:
299     GMLReadState();
300     ~GMLReadState();
301 
302     void        PushPath( const char *pszElement, int nLen = -1 );
303     void        PopPath();
304 
GetLastComponent()305     const char  *GetLastComponent() const {
306         return ( m_nPathLength == 0 ) ? "" : aosPathComponents[m_nPathLength-1].c_str();
307     }
308 
GetLastComponentLen()309     size_t GetLastComponentLen() const {
310         return ( m_nPathLength == 0 ) ? 0: aosPathComponents[m_nPathLength-1].size();
311     }
312 
313     void        Reset();
314 
315     GMLFeature  *m_poFeature;
316     GMLReadState *m_poParentState;
317 
318     std::string  osPath; // element path ... | as separator.
319     int          m_nPathLength;
320 };
321 
322 /************************************************************************/
323 /*                              GMLReader                               */
324 /************************************************************************/
325 
326 class GMLReader final: public IGMLReader
327 {
328   private:
329     bool          m_bClassListLocked;
330 
331     int         m_nClassCount;
332     GMLFeatureClass **m_papoClass;
333     bool          m_bLookForClassAtAnyLevel;
334 
335     char          *m_pszFilename;
336 
337     bool           bUseExpatReader;
338 
339     GMLHandler    *m_poGMLHandler;
340 
341 #if defined(HAVE_XERCES)
342     SAX2XMLReader *m_poSAXReader;
343     XMLPScanToken m_oToFill;
344     GMLFeature   *m_poCompleteFeature;
345     InputSource  *m_GMLInputSource;
346     bool          m_bEOF;
347     bool          m_bXercesInitialized;
348     bool          SetupParserXerces();
349     GMLFeature   *NextFeatureXerces();
350 #endif
351 
352 #if defined(HAVE_EXPAT)
353     XML_Parser    oParser;
354     GMLFeature ** ppoFeatureTab;
355     int           nFeatureTabLength;
356     int           nFeatureTabIndex;
357     int           nFeatureTabAlloc;
358     bool          SetupParserExpat();
359     GMLFeature   *NextFeatureExpat();
360     char         *pabyBuf;
361     CPLString     m_osErrorMessage{};
362 #endif
363 
364     VSILFILE*     fpGML;
365     bool          m_bReadStarted;
366 
367     GMLReadState *m_poState;
368     GMLReadState *m_poRecycledState;
369 
370     bool          m_bStopParsing;
371 
372     bool          SetupParser();
373     void          CleanupParser();
374 
375     bool          m_bFetchAllGeometries;
376 
377     bool          m_bInvertAxisOrderIfLatLong;
378     bool          m_bConsiderEPSGAsURN;
379     GMLSwapCoordinatesEnum m_eSwapCoordinates;
380     bool          m_bGetSecondaryGeometryOption;
381 
382     int           ParseFeatureType(CPLXMLNode *psSchemaNode,
383                                 const char* pszName,
384                                 const char *pszType);
385 
386     char         *m_pszGlobalSRSName;
387     bool          m_bCanUseGlobalSRSName;
388 
389     char         *m_pszFilteredClassName;
390     int           m_nFilteredClassIndex;
391 
392     int           m_nHasSequentialLayers;
393 
394     std::string   osElemPath;
395 
396     bool          m_bFaceHoleNegative;
397 
398     bool          m_bSetWidthFlag;
399 
400     bool          m_bReportAllAttributes;
401 
402     bool          m_bIsWFSJointLayer;
403 
404     bool          m_bEmptyAsNull;
405 
406     bool          ParseXMLHugeFile( const char *pszOutputFilename,
407                                     const bool bSqliteIsTempFile,
408                                     const int iSqliteCacheMB );
409 
410 public:
411                 GMLReader(bool bExpatReader, bool bInvertAxisOrderIfLatLong,
412                           bool bConsiderEPSGAsURN,
413                           GMLSwapCoordinatesEnum eSwapCoordinates,
414                           bool bGetSecondaryGeometryOption);
415     virtual     ~GMLReader();
416 
IsClassListLocked()417     bool             IsClassListLocked() const override { return m_bClassListLocked; }
SetClassListLocked(bool bFlag)418     void             SetClassListLocked( bool bFlag ) override
419         { m_bClassListLocked = bFlag; }
420 
421     void             SetSourceFile( const char *pszFilename ) override;
422     void             SetFP( VSILFILE* fp ) override;
423     const char*      GetSourceFileName() override;
424 
GetClassCount()425     int              GetClassCount() const override { return m_nClassCount; }
426     GMLFeatureClass *GetClass( int i ) const override;
427     GMLFeatureClass *GetClass( const char *pszName ) const override;
428 
429     int              AddClass( GMLFeatureClass *poClass ) override;
430     void             ClearClasses() override;
431 
432     GMLFeature       *NextFeature() override;
433 
434     bool             LoadClasses( const char *pszFile = nullptr ) override;
435     bool             SaveClasses( const char *pszFile = nullptr ) override;
436 
437     bool             ResolveXlinks( const char *pszFile,
438                                     bool* pbOutIsTempFile,
439                                     char **papszSkip = nullptr,
440                                     const bool bStrict = false ) override;
441 
442     bool             HugeFileResolver( const char *pszFile,
443                                        bool bSqliteIsTempFile,
444                                        int iSqliteCacheMB ) override;
445 
446     bool             PrescanForSchema(bool bGetExtents = true,
447                                       bool bOnlyDetectSRS = false ) override;
448     bool             PrescanForTemplate() override;
449     bool             ReArrangeTemplateClasses( GFSTemplateList *pCC );
450     void             ResetReading() override;
451 
452 // ---
453 
GetState()454     GMLReadState     *GetState() const { return m_poState; }
455     void             PopState();
456     void             PushState( GMLReadState * );
457 
ShouldLookForClassAtAnyLevel()458     bool             ShouldLookForClassAtAnyLevel() { return m_bLookForClassAtAnyLevel; }
459 
460     int         GetFeatureElementIndex( const char *pszElement, int nLen, GMLAppSchemaType eAppSchemaType );
461     int         GetAttributeElementIndex( const char *pszElement, int nLen, const char* pszAttrKey = nullptr );
462     bool        IsCityGMLGenericAttributeElement( const char *pszElement, void* attr );
463 
464     void        PushFeature( const char *pszElement,
465                              const char *pszFID,
466                              int nClassIndex );
467 
468     void        SetFeaturePropertyDirectly( const char *pszElement,
469                                             char *pszValue,
470                                             int iPropertyIn,
471                                             GMLPropertyType eType = GMLPT_Untyped );
472 
SetWidthFlag(bool bFlag)473     void        SetWidthFlag(bool bFlag) { m_bSetWidthFlag = bFlag; }
474 
HasStoppedParsing()475     bool        HasStoppedParsing() override { return m_bStopParsing; }
476 
FetchAllGeometries()477     bool       FetchAllGeometries() { return m_bFetchAllGeometries; }
478 
479     void        SetGlobalSRSName( const char* pszGlobalSRSName ) override ;
GetGlobalSRSName()480     const char* GetGlobalSRSName() override { return m_pszGlobalSRSName; }
481 
CanUseGlobalSRSName()482     bool        CanUseGlobalSRSName() override { return m_bCanUseGlobalSRSName; }
483 
484     bool        SetFilteredClassName(const char* pszClassName) override;
GetFilteredClassName()485     const char* GetFilteredClassName() override { return m_pszFilteredClassName; }
GetFilteredClassIndex()486     int         GetFilteredClassIndex() { return m_nFilteredClassIndex; }
487 
IsSequentialLayers()488     bool        IsSequentialLayers() const override { return m_nHasSequentialLayers == TRUE; }
489 
SetReportAllAttributes(bool bFlag)490     void        SetReportAllAttributes(bool bFlag) { m_bReportAllAttributes = bFlag; }
ReportAllAttributes()491     bool        ReportAllAttributes() const { return m_bReportAllAttributes; }
492 
SetIsWFSJointLayer(bool bFlag)493     void             SetIsWFSJointLayer( bool bFlag ) { m_bIsWFSJointLayer = bFlag; }
IsWFSJointLayer()494     bool             IsWFSJointLayer() const { return m_bIsWFSJointLayer; }
495 
SetEmptyAsNull(bool bFlag)496     void             SetEmptyAsNull( bool bFlag ) { m_bEmptyAsNull = bFlag; }
IsEmptyAsNull()497     bool             IsEmptyAsNull() const { return m_bEmptyAsNull; }
498 
499     static CPLMutex* hMutex;
500 };
501 
502 #endif /* CPL_GMLREADERP_H_INCLUDED */
503