1 /******************************************************************************
2  *
3  * Project:  GML Reader
4  * Purpose:  Implementation of GMLParseXSD()
5  * Author:   Frank Warmerdam, warmerdam@pobox.com
6  *
7  ******************************************************************************
8  * Copyright (c) 2005, Frank Warmerdam
9  * Copyright (c) 2010-2014, Even Rouault <even dot rouault at spatialys.com>
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included
19  * in all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
24  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27  * DEALINGS IN THE SOFTWARE.
28  ****************************************************************************/
29 
30 #include "cpl_port.h"
31 #include "parsexsd.h"
32 
33 #include <cstdlib>
34 #include <cstring>
35 #include <set>
36 #include <string>
37 #include <utility>
38 
39 #include "cpl_conv.h"
40 #include "cpl_error.h"
41 #include "cpl_http.h"
42 #include "cpl_minixml.h"
43 #include "cpl_string.h"
44 #include "ogr_core.h"
45 
46 CPL_CVSID("$Id: parsexsd.cpp 7c24d0c095ca05eba45ed40003d37e698bbee248 2020-09-24 14:51:48 +0200 Even Rouault $")
47 
48 /************************************************************************/
49 /*                              StripNS()                               */
50 /*                                                                      */
51 /*      Return potentially shortened form of string with namespace      */
52 /*      stripped off if there is one.  Returns pointer into             */
53 /*      original string.                                                */
54 /************************************************************************/
StripNS(const char * pszFullValue)55 static const char *StripNS(const char *pszFullValue)
56 
57 {
58     const char *pszColon = strstr(pszFullValue, ":");
59     if( pszColon == nullptr )
60         return pszFullValue;
61     else
62         return pszColon + 1;
63 }
64 
65 /************************************************************************/
66 /*                   GetSimpleTypeProperties()                          */
67 /************************************************************************/
68 
69 static
GetSimpleTypeProperties(CPLXMLNode * psTypeNode,GMLPropertyType * pGMLType,int * pnWidth,int * pnPrecision)70 bool GetSimpleTypeProperties(CPLXMLNode *psTypeNode,
71                              GMLPropertyType *pGMLType,
72                              int *pnWidth,
73                              int *pnPrecision)
74 {
75     const char *pszBase =
76         StripNS(CPLGetXMLValue(psTypeNode, "restriction.base", ""));
77 
78     if( EQUAL(pszBase, "decimal") )
79     {
80         *pGMLType = GMLPT_Real;
81         const char *pszWidth =
82             CPLGetXMLValue(psTypeNode, "restriction.totalDigits.value", "0");
83         const char *pszPrecision =
84             CPLGetXMLValue(psTypeNode, "restriction.fractionDigits.value", "0");
85         *pnWidth = atoi(pszWidth);
86         *pnPrecision = atoi(pszPrecision);
87         return true;
88     }
89 
90      else if( EQUAL(pszBase, "float") )
91     {
92         *pGMLType = GMLPT_Float;
93         return true;
94     }
95 
96     else if( EQUAL(pszBase, "double") )
97     {
98         *pGMLType = GMLPT_Real;
99         return true;
100     }
101 
102     else if( EQUAL(pszBase, "integer") )
103     {
104         *pGMLType = GMLPT_Integer;
105         const char *pszWidth =
106             CPLGetXMLValue(psTypeNode, "restriction.totalDigits.value", "0");
107         *pnWidth = atoi(pszWidth);
108         return true;
109     }
110 
111     else if( EQUAL(pszBase, "long") )
112     {
113         *pGMLType = GMLPT_Integer64;
114         const char *pszWidth =
115             CPLGetXMLValue(psTypeNode, "restriction.totalDigits.value", "0");
116         *pnWidth = atoi(pszWidth);
117         return true;
118     }
119 
120     else if( EQUAL(pszBase, "unsignedLong") )
121     {
122         // Optimistically map to signed integer...
123         *pGMLType = GMLPT_Integer64;
124         const char *pszWidth =
125             CPLGetXMLValue(psTypeNode, "restriction.totalDigits.value", "0");
126         *pnWidth = atoi(pszWidth);
127         return true;
128     }
129 
130     else if( EQUAL(pszBase, "string") )
131     {
132         *pGMLType = GMLPT_String;
133         const char *pszWidth =
134             CPLGetXMLValue(psTypeNode, "restriction.maxLength.value", "0");
135         *pnWidth = atoi(pszWidth);
136         return true;
137     }
138 
139     else if( EQUAL(pszBase, "date")  )
140     {
141         *pGMLType = GMLPT_Date;
142         return true;
143     }
144 
145     else if( EQUAL(pszBase, "time")  )
146     {
147         *pGMLType = GMLPT_Time;
148         return true;
149     }
150 
151     else if( EQUAL(pszBase, "dateTime") )
152     {
153         *pGMLType = GMLPT_DateTime;
154         return true;
155     }
156 
157     else if( EQUAL(pszBase, "boolean") )
158     {
159         *pGMLType = GMLPT_Boolean;
160         return true;
161     }
162 
163     else if( EQUAL(pszBase, "short") )
164     {
165         *pGMLType = GMLPT_Short;
166         return true;
167     }
168 
169     return false;
170 }
171 
172 /************************************************************************/
173 /*                      LookForSimpleType()                             */
174 /************************************************************************/
175 
176 static
LookForSimpleType(CPLXMLNode * psSchemaNode,const char * pszStrippedNSType,GMLPropertyType * pGMLType,int * pnWidth,int * pnPrecision)177 bool LookForSimpleType(CPLXMLNode *psSchemaNode,
178                       const char* pszStrippedNSType,
179                       GMLPropertyType *pGMLType,
180                       int *pnWidth,
181                       int *pnPrecision)
182 {
183     CPLXMLNode *psThis = psSchemaNode->psChild;
184     for( ; psThis != nullptr; psThis = psThis->psNext )
185     {
186         if( psThis->eType == CXT_Element &&
187             EQUAL(psThis->pszValue, "simpleType") &&
188             EQUAL(CPLGetXMLValue(psThis, "name", ""), pszStrippedNSType) )
189         {
190             break;
191         }
192     }
193     if( psThis == nullptr )
194         return false;
195 
196     return GetSimpleTypeProperties(psThis, pGMLType, pnWidth, pnPrecision);
197 }
198 
199 /************************************************************************/
200 /*                      GetSingleChildElement()                         */
201 /************************************************************************/
202 
203 /* Returns the child element whose name is pszExpectedValue only if */
204 /* there is only one child that is an element. */
205 static
GetSingleChildElement(CPLXMLNode * psNode,const char * pszExpectedValue)206 CPLXMLNode *GetSingleChildElement(CPLXMLNode *psNode,
207                                   const char *pszExpectedValue)
208 {
209     if( psNode == nullptr )
210         return nullptr;
211 
212     CPLXMLNode *psIter = psNode->psChild;
213     if( psIter == nullptr )
214         return nullptr;
215 
216     CPLXMLNode *psChild = nullptr;
217     while( psIter != nullptr )
218     {
219         if( psIter->eType == CXT_Element )
220         {
221             if( psChild != nullptr )
222                 return nullptr;
223             if( pszExpectedValue != nullptr &&
224                 strcmp(psIter->pszValue, pszExpectedValue) != 0 )
225                 return nullptr;
226             psChild = psIter;
227         }
228         psIter = psIter->psNext;
229     }
230     return psChild;
231 }
232 
233 /************************************************************************/
234 /*                      CheckMinMaxOccursCardinality()                  */
235 /************************************************************************/
236 
CheckMinMaxOccursCardinality(CPLXMLNode * psNode)237 static int CheckMinMaxOccursCardinality(CPLXMLNode *psNode)
238 {
239     const char *pszMinOccurs = CPLGetXMLValue(psNode, "minOccurs", nullptr);
240     const char *pszMaxOccurs = CPLGetXMLValue(psNode, "maxOccurs", nullptr);
241     return (pszMinOccurs == nullptr || EQUAL(pszMinOccurs, "0") ||
242             EQUAL(pszMinOccurs, "1")) &&
243            (pszMaxOccurs == nullptr || EQUAL(pszMaxOccurs, "1"));
244 }
245 
246 /************************************************************************/
247 /*                     GetListTypeFromSingleType()                      */
248 /************************************************************************/
249 
GetListTypeFromSingleType(GMLPropertyType eType)250 static GMLPropertyType GetListTypeFromSingleType(GMLPropertyType eType)
251 {
252     if( eType == GMLPT_String )
253         return GMLPT_StringList;
254     if( eType == GMLPT_Integer || eType == GMLPT_Short )
255         return GMLPT_IntegerList;
256     if( eType == GMLPT_Integer64 )
257         return GMLPT_Integer64List;
258     if( eType == GMLPT_Real || eType == GMLPT_Float )
259         return GMLPT_RealList;
260     if( eType == GMLPT_Boolean )
261         return GMLPT_BooleanList;
262     if( eType == GMLPT_FeatureProperty )
263         return GMLPT_FeaturePropertyList;
264     return eType;
265 }
266 
267 /************************************************************************/
268 /*                      ParseFeatureType()                              */
269 /************************************************************************/
270 
271 typedef struct
272 {
273     const char *pszName;
274     OGRwkbGeometryType eType;
275 } AssocNameType;
276 
277 static const AssocNameType apsPropertyTypes[] =
278 {
279     {"GeometryPropertyType", wkbUnknown},
280     {"PointPropertyType", wkbPoint},
281     {"LineStringPropertyType", wkbLineString},
282     {"CurvePropertyType", wkbCompoundCurve},
283     {"PolygonPropertyType", wkbPolygon},
284     {"SurfacePropertyType", wkbCurvePolygon},
285     {"MultiPointPropertyType", wkbMultiPoint},
286     {"MultiLineStringPropertyType", wkbMultiLineString},
287     {"MultiCurvePropertyType", wkbMultiCurve},
288     {"MultiPolygonPropertyType", wkbMultiPolygon},
289     {"MultiSurfacePropertyType", wkbMultiSurface},
290     {"MultiGeometryPropertyType", wkbGeometryCollection},
291     {"GeometryAssociationType", wkbUnknown},
292     {nullptr, wkbUnknown},
293 };
294 
295 /* Found in FME .xsd  (e.g. <element ref="gml:curveProperty" minOccurs="0"/>) */
296 static const AssocNameType apsRefTypes[] =
297 {
298     {"pointProperty", wkbPoint},
299     {"curveProperty", wkbLineString}, // Should we promote to wkbCompoundCurve?
300     {"surfaceProperty", wkbPolygon},  // Should we promote to wkbCurvePolygon?
301     {"multiPointProperty", wkbMultiPoint},
302     {"multiCurveProperty", wkbMultiLineString},
303     // Should we promote to wkbMultiSurface?
304     {"multiSurfaceProperty", wkbMultiPolygon},
305     {nullptr, wkbUnknown},
306 };
307 
308 static
309 GMLFeatureClass *GMLParseFeatureType(CPLXMLNode *psSchemaNode,
310                                      const char *pszName,
311                                      CPLXMLNode *psThis);
312 
313 static
GMLParseFeatureType(CPLXMLNode * psSchemaNode,const char * pszName,const char * pszType)314 GMLFeatureClass *GMLParseFeatureType(CPLXMLNode *psSchemaNode,
315                                 const char *pszName,
316                                 const char *pszType)
317 {
318     CPLXMLNode *psThis = psSchemaNode->psChild;
319     for( ; psThis != nullptr; psThis = psThis->psNext )
320     {
321         if( psThis->eType == CXT_Element &&
322             EQUAL(psThis->pszValue, "complexType") &&
323             EQUAL(CPLGetXMLValue(psThis, "name", ""), pszType) )
324         {
325             break;
326         }
327     }
328     if( psThis == nullptr )
329         return nullptr;
330 
331     return GMLParseFeatureType(psSchemaNode, pszName, psThis);
332 }
333 
334 static
GMLParseFeatureType(CPLXMLNode * psSchemaNode,const char * pszName,CPLXMLNode * psComplexType)335 GMLFeatureClass *GMLParseFeatureType(CPLXMLNode *psSchemaNode,
336                                      const char *pszName,
337                                      CPLXMLNode *psComplexType)
338 {
339 
340 /* -------------------------------------------------------------------- */
341 /*      Grab the sequence of extensions greatgrandchild.                */
342 /* -------------------------------------------------------------------- */
343     CPLXMLNode *psAttrSeq =
344         CPLGetXMLNode(psComplexType, "complexContent.extension.sequence");
345 
346     if( psAttrSeq == nullptr )
347     {
348         return nullptr;
349     }
350 
351 /* -------------------------------------------------------------------- */
352 /*      We are pretty sure this going to be a valid Feature class       */
353 /*      now, so create it.                                              */
354 /* -------------------------------------------------------------------- */
355     GMLFeatureClass *poClass = new GMLFeatureClass(pszName);
356 
357 /* -------------------------------------------------------------------- */
358 /*      Loop over each of the attribute elements being defined for      */
359 /*      this feature class.                                             */
360 /* -------------------------------------------------------------------- */
361     int nAttributeIndex = 0;
362 
363     bool bGotUnrecognizedType = false;
364 
365     CPLXMLNode *psAttrDef = psAttrSeq->psChild;
366     for( ; psAttrDef != nullptr; psAttrDef = psAttrDef->psNext )
367     {
368         if( strcmp(psAttrDef->pszValue, "group") == 0 )
369         {
370             /* Too complex schema for us. Aborts parsing */
371             delete poClass;
372             return nullptr;
373         }
374 
375         /* Parse stuff like:
376         <xs:choice>
377             <xs:element ref="gml:polygonProperty"/>
378             <xs:element ref="gml:multiPolygonProperty"/>
379         </xs:choice>
380         as found in https://downloadagiv.blob.core.windows.net/overstromingsgebieden-en-oeverzones/2014_01/Overstromingsgebieden_en_oeverzones_2014_01_GML.zip
381         */
382         if( strcmp(psAttrDef->pszValue, "choice") == 0 )
383         {
384             CPLXMLNode *psChild = psAttrDef->psChild;
385             bool bPolygon = false;
386             bool bMultiPolygon = false;
387             for( ; psChild; psChild = psChild->psNext )
388             {
389                 if( psChild->eType != CXT_Element )
390                     continue;
391                 if( strcmp(psChild->pszValue, "element") == 0 )
392                 {
393                     const char *pszRef = CPLGetXMLValue(psChild, "ref", nullptr);
394                     if( pszRef != nullptr )
395                     {
396                         if( strcmp(pszRef, "gml:polygonProperty") == 0 )
397                         {
398                             bPolygon = true;
399                         }
400                         else if( strcmp(pszRef, "gml:multiPolygonProperty") == 0 )
401                         {
402                             bMultiPolygon = true;
403                         }
404                         else
405                         {
406                             delete poClass;
407                             return nullptr;
408                         }
409                     }
410                     else
411                     {
412                         delete poClass;
413                         return nullptr;
414                     }
415                 }
416             }
417             if( bPolygon && bMultiPolygon )
418             {
419                 poClass->AddGeometryProperty(new GMLGeometryPropertyDefn(
420                     "", "", wkbMultiPolygon, nAttributeIndex, true));
421 
422                 nAttributeIndex++;
423             }
424             continue;
425         }
426 
427         if( !EQUAL(psAttrDef->pszValue, "element") )
428             continue;
429 
430         // MapServer WFS writes element type as an attribute of element
431         // not as a simpleType definition.
432         const char *pszType = CPLGetXMLValue(psAttrDef, "type", nullptr);
433         const char *pszElementName = CPLGetXMLValue(psAttrDef, "name", nullptr);
434         bool bNullable =
435             EQUAL(CPLGetXMLValue(psAttrDef, "minOccurs", "1"), "0");
436         const char *pszMaxOccurs = CPLGetXMLValue(psAttrDef, "maxOccurs", nullptr);
437         if (pszType != nullptr)
438         {
439             const char *pszStrippedNSType = StripNS(pszType);
440             int nWidth = 0;
441             int nPrecision = 0;
442 
443             GMLPropertyType gmlType = GMLPT_Untyped;
444             if (EQUAL(pszStrippedNSType, "string") ||
445                 EQUAL(pszStrippedNSType, "Character"))
446                 gmlType = GMLPT_String;
447             else if (EQUAL(pszStrippedNSType, "date"))
448                 gmlType = GMLPT_Date;
449             else if (EQUAL(pszStrippedNSType, "time"))
450                 gmlType = GMLPT_Time;
451             else if (EQUAL(pszStrippedNSType, "dateTime"))
452                 gmlType = GMLPT_DateTime;
453             else if (EQUAL(pszStrippedNSType, "real") ||
454                      EQUAL(pszStrippedNSType, "double") ||
455                      EQUAL(pszStrippedNSType, "decimal"))
456                 gmlType = GMLPT_Real;
457             else if (EQUAL(pszStrippedNSType, "float") )
458                 gmlType = GMLPT_Float;
459             else if (EQUAL(pszStrippedNSType, "int") ||
460                      EQUAL(pszStrippedNSType, "integer"))
461                 gmlType = GMLPT_Integer;
462             else if (EQUAL(pszStrippedNSType, "long"))
463                 gmlType = GMLPT_Integer64;
464             else if (EQUAL(pszStrippedNSType, "unsignedLong"))
465             {
466                 // Optimistically map to signed integer
467                 gmlType = GMLPT_Integer64;
468             }
469             else if (EQUAL(pszStrippedNSType, "short") )
470                 gmlType = GMLPT_Short;
471             else if (EQUAL(pszStrippedNSType, "boolean") )
472                 gmlType = GMLPT_Boolean;
473             // TODO: Would be nice to have a binary type.
474             else if (EQUAL(pszStrippedNSType, "hexBinary"))
475                 gmlType = GMLPT_String;
476             else if (strcmp(pszType, "gml:FeaturePropertyType") == 0 )
477             {
478                 gmlType = GMLPT_FeatureProperty;
479             }
480             else if (STARTS_WITH(pszType, "gml:"))
481             {
482                 const AssocNameType *psIter = apsPropertyTypes;
483                 while(psIter->pszName)
484                 {
485                     if (strncmp(pszType + 4, psIter->pszName,
486                                 strlen(psIter->pszName)) == 0)
487                     {
488                         OGRwkbGeometryType eType = psIter->eType;
489 
490                         // Look if there's a comment restricting to subclasses.
491                         if( psAttrDef->psNext != nullptr &&
492                             psAttrDef->psNext->eType == CXT_Comment )
493                         {
494                             if( strstr(psAttrDef->psNext->pszValue,
495                                        "restricted to Polygon") )
496                                 eType = wkbPolygon;
497                             else if( strstr(psAttrDef->psNext->pszValue,
498                                             "restricted to LineString") )
499                                 eType = wkbLineString;
500                             else if( strstr(psAttrDef->psNext->pszValue,
501                                             "restricted to MultiPolygon") )
502                                 eType = wkbMultiPolygon;
503                             else if( strstr(psAttrDef->psNext->pszValue,
504                                             "restricted to MultiLineString") )
505                                 eType = wkbMultiLineString;
506                         }
507 
508                         GMLGeometryPropertyDefn* poDefn =
509                             new GMLGeometryPropertyDefn(
510                                 pszElementName, pszElementName, eType,
511                                 nAttributeIndex, bNullable);
512 
513                         if( poClass->AddGeometryProperty(poDefn) < 0 )
514                             delete poDefn;
515                         else
516                             nAttributeIndex++;
517 
518                         break;
519                     }
520 
521                     psIter++;
522                 }
523 
524                 if (psIter->pszName == nullptr)
525                 {
526                     // Can be a non geometry gml type.
527                     // Too complex schema for us. Aborts parsing.
528                     delete poClass;
529                     return nullptr;
530                 }
531 
532                 if (poClass->GetGeometryPropertyCount() == 0)
533                     bGotUnrecognizedType = true;
534 
535                 continue;
536             }
537 
538             /* Integraph stuff */
539             else if (strcmp(pszType, "G:Point_MultiPointPropertyType") == 0 ||
540                      strcmp(pszType, "gmgml:Point_MultiPointPropertyType") == 0)
541             {
542                 GMLGeometryPropertyDefn* poDefn =
543                     new GMLGeometryPropertyDefn(
544                         pszElementName, pszElementName, wkbMultiPoint,
545                         nAttributeIndex, bNullable);
546 
547                 if( poClass->AddGeometryProperty(poDefn) < 0 )
548                     delete poDefn;
549                 else
550                     nAttributeIndex++;
551 
552                 continue;
553             }
554             else if (strcmp(pszType,
555                             "G:LineString_MultiLineStringPropertyType") == 0 ||
556                      strcmp(pszType,
557                             "gmgml:LineString_MultiLineStringPropertyType") == 0)
558             {
559                 GMLGeometryPropertyDefn* poDefn =
560                     new GMLGeometryPropertyDefn(
561                         pszElementName, pszElementName, wkbMultiLineString,
562                         nAttributeIndex, bNullable);
563 
564                 if( poClass->AddGeometryProperty(poDefn) < 0 )
565                     delete poDefn;
566                 else
567                     nAttributeIndex++;
568 
569                 continue;
570             }
571             else if (strcmp(pszType,
572                             "G:Polygon_MultiPolygonPropertyType") == 0 ||
573                      strcmp(pszType,
574                             "gmgml:Polygon_MultiPolygonPropertyType") == 0 ||
575                      strcmp(pszType,
576                             "gmgml:Polygon_Surface_MultiSurface_CompositeSurfacePropertyType") == 0)
577             {
578                 GMLGeometryPropertyDefn* poDefn =
579                     new GMLGeometryPropertyDefn(
580                         pszElementName, pszElementName, wkbMultiPolygon,
581                         nAttributeIndex, bNullable);
582 
583                 if( poClass->AddGeometryProperty(poDefn) < 0 )
584                     delete poDefn;
585                 else
586                     nAttributeIndex++;
587 
588                 continue;
589             }
590 
591             // ERDAS Apollo stufflike in
592             // http://apollo.erdas.com/erdas-apollo/vector/WORLDWIDE?SERVICE=WFS&VERSION=1.0.0&REQUEST=DescribeFeatureType&TYPENAME=wfs:cntry98)
593             else if (strcmp(pszType, "wfs:MixedPolygonPropertyType") == 0)
594             {
595                 GMLGeometryPropertyDefn* poDefn =
596                     new GMLGeometryPropertyDefn(
597                         pszElementName, pszElementName, wkbMultiPolygon,
598                         nAttributeIndex, bNullable);
599 
600                 if( poClass->AddGeometryProperty(poDefn) < 0 )
601                     delete poDefn;
602                 else
603                     nAttributeIndex++;
604 
605                 continue;
606             }
607 
608             else
609             {
610                 gmlType = GMLPT_Untyped;
611                 if ( !LookForSimpleType(psSchemaNode, pszStrippedNSType,
612                                         &gmlType, &nWidth, &nPrecision) )
613                 {
614                     // Too complex schema for us. Aborts parsing.
615                     delete poClass;
616                     return nullptr;
617                 }
618             }
619 
620             if (pszElementName == nullptr)
621                 pszElementName = "unnamed";
622             const char *pszPropertyName = pszElementName;
623             if( gmlType == GMLPT_FeatureProperty )
624             {
625                 pszPropertyName = CPLSPrintf("%s_href", pszElementName);
626             }
627             GMLPropertyDefn *poProp =
628                 new GMLPropertyDefn(pszPropertyName, pszElementName);
629 
630             if( pszMaxOccurs != nullptr && strcmp(pszMaxOccurs, "1") != 0 )
631                 gmlType = GetListTypeFromSingleType(gmlType);
632 
633             poProp->SetType(gmlType);
634             poProp->SetWidth(nWidth);
635             poProp->SetPrecision(nPrecision);
636             poProp->SetNullable(bNullable);
637 
638             if (poClass->AddProperty( poProp ) < 0)
639                 delete poProp;
640             else
641                 nAttributeIndex++;
642 
643             continue;
644         }
645 
646         // For now we skip geometries.  Fixup later.
647         CPLXMLNode *psSimpleType = CPLGetXMLNode(psAttrDef, "simpleType");
648         if( psSimpleType == nullptr )
649         {
650             const char *pszRef = CPLGetXMLValue(psAttrDef, "ref", nullptr);
651 
652             // FME .xsd
653             if (pszRef != nullptr && STARTS_WITH(pszRef, "gml:"))
654             {
655                 const AssocNameType *psIter = apsRefTypes;
656                 while(psIter->pszName)
657                 {
658                     if (strncmp(pszRef + 4, psIter->pszName,
659                                 strlen(psIter->pszName)) == 0)
660                     {
661                         if (poClass->GetGeometryPropertyCount() > 0)
662                         {
663                             OGRwkbGeometryType eNewType = psIter->eType;
664                             OGRwkbGeometryType eOldType =
665                                 (OGRwkbGeometryType)poClass
666                                     ->GetGeometryProperty(0)
667                                     ->GetType();
668 
669                             if ((eNewType == wkbMultiPoint &&
670                                  eOldType == wkbPoint) ||
671                                 (eNewType == wkbMultiLineString &&
672                                  eOldType == wkbLineString) ||
673                                 (eNewType == wkbMultiPolygon &&
674                                  eOldType == wkbPolygon))
675                             {
676                                 poClass->GetGeometryProperty(0)->SetType(
677                                     eNewType);
678                             }
679                             else
680                             {
681                                 CPLDebug(
682                                     "GML",
683                                     "Geometry field already found ! "
684                                     "Ignoring the following ones");
685                             }
686                         }
687                         else
688                         {
689                             GMLGeometryPropertyDefn* poDefn =
690                                 new GMLGeometryPropertyDefn(
691                                     pszElementName, pszElementName,
692                                     psIter->eType, nAttributeIndex, true);
693 
694                             if( poClass->AddGeometryProperty(poDefn) < 0 )
695                                 delete poDefn;
696                             else
697                                 nAttributeIndex++;
698                         }
699 
700                         break;
701                     }
702 
703                     psIter++;
704                 }
705 
706                 if (psIter->pszName == nullptr)
707                 {
708                     // Can be a non geometry gml type .
709                     // Too complex schema for us. Aborts parsing.
710                     delete poClass;
711                     return nullptr;
712                 }
713 
714                 if (poClass->GetGeometryPropertyCount() == 0)
715                     bGotUnrecognizedType = true;
716 
717                 continue;
718             }
719 
720             /* Parse stuff like the following found in http://199.29.1.81:8181/miwfs/GetFeature.ashx?REQUEST=GetFeature&MAXFEATURES=1&SERVICE=WFS&VERSION=1.0.0&TYPENAME=miwfs:World :
721             <xs:element name="Obj" minOccurs="0" maxOccurs="1">
722                 <xs:complexType>
723                     <xs:sequence>
724                         <xs:element ref="gml:_Geometry"/>
725                     </xs:sequence>
726                 </xs:complexType>
727             </xs:element>
728             */
729             CPLXMLNode *l_psComplexType =
730                 GetSingleChildElement(psAttrDef, "complexType");
731             CPLXMLNode *psComplexTypeSequence =
732                 GetSingleChildElement(l_psComplexType, "sequence");
733             CPLXMLNode *psComplexTypeSequenceElement =
734                 GetSingleChildElement(psComplexTypeSequence, "element");
735 
736             if( pszElementName != nullptr &&
737                 CheckMinMaxOccursCardinality(psAttrDef) &&
738                 psComplexTypeSequenceElement != nullptr &&
739                 CheckMinMaxOccursCardinality(psComplexTypeSequence) &&
740                 strcmp(CPLGetXMLValue(psComplexTypeSequenceElement, "ref", ""),
741                        "gml:_Geometry") == 0 )
742             {
743                 GMLGeometryPropertyDefn* poDefn =
744                     new GMLGeometryPropertyDefn(
745                         pszElementName, pszElementName, wkbUnknown, nAttributeIndex,
746                         bNullable);
747 
748                 if( poClass->AddGeometryProperty(poDefn) < 0 )
749                     delete poDefn;
750                 else
751                     nAttributeIndex++;
752 
753                 continue;
754             }
755             else
756             {
757                 // Too complex schema for us. Aborts parsing.
758                 delete poClass;
759                 return nullptr;
760             }
761         }
762 
763         if (pszElementName == nullptr)
764             pszElementName = "unnamed";
765         GMLPropertyDefn *poProp =
766             new GMLPropertyDefn(pszElementName, pszElementName);
767 
768         GMLPropertyType eType = GMLPT_Untyped;
769         int nWidth = 0;
770         int nPrecision = 0;
771         GetSimpleTypeProperties(psSimpleType, &eType, &nWidth, &nPrecision);
772 
773         if( pszMaxOccurs != nullptr && strcmp(pszMaxOccurs, "1") != 0 )
774             eType = GetListTypeFromSingleType(eType);
775 
776         poProp->SetType(eType);
777         poProp->SetWidth(nWidth);
778         poProp->SetPrecision(nPrecision);
779         poProp->SetNullable(bNullable);
780 
781         if (poClass->AddProperty(poProp) < 0)
782             delete poProp;
783         else
784             nAttributeIndex++;
785     }
786 
787     // If we have found an unknown types, let's be on the side of caution and
788     // create a geometry field.
789     if( poClass->GetGeometryPropertyCount() == 0 &&
790         bGotUnrecognizedType )
791     {
792         poClass->AddGeometryProperty(
793             new GMLGeometryPropertyDefn("", "", wkbUnknown, -1, true));
794     }
795 
796 /* -------------------------------------------------------------------- */
797 /*      Class complete, add to reader class list.                       */
798 /* -------------------------------------------------------------------- */
799     poClass->SetSchemaLocked(true);
800 
801     return poClass;
802 }
803 
804 /************************************************************************/
805 /*                         GMLParseXMLFile()                            */
806 /************************************************************************/
807 
GMLParseXMLFile(const char * pszFilename)808 static CPLXMLNode *GMLParseXMLFile(const char *pszFilename)
809 {
810     if( STARTS_WITH(pszFilename, "http://") ||
811         STARTS_WITH(pszFilename, "https://") )
812     {
813         CPLXMLNode *psRet = nullptr;
814         CPLHTTPResult *psResult = CPLHTTPFetch(pszFilename, nullptr);
815         if( psResult != nullptr )
816         {
817             if( psResult->pabyData != nullptr )
818             {
819                 psRet = CPLParseXMLString((const char *)psResult->pabyData);
820             }
821             CPLHTTPDestroyResult(psResult);
822         }
823         return psRet;
824     }
825     else
826     {
827         return CPLParseXMLFile(pszFilename);
828     }
829 }
830 
831 /************************************************************************/
832 /*                       CPLGetFirstChildNode()                         */
833 /************************************************************************/
834 
CPLGetFirstChildNode(CPLXMLNode * psNode)835 static CPLXMLNode *CPLGetFirstChildNode(CPLXMLNode *psNode)
836 {
837     if( psNode == nullptr )
838         return nullptr;
839     CPLXMLNode *psIter = psNode->psChild;
840     while( psIter != nullptr )
841     {
842         if( psIter->eType == CXT_Element )
843             return psIter;
844         psIter = psIter->psNext;
845     }
846     return nullptr;
847 }
848 
849 /************************************************************************/
850 /*                          CPLGetLastNode()                            */
851 /************************************************************************/
852 
CPLGetLastNode(CPLXMLNode * psNode)853 static CPLXMLNode *CPLGetLastNode(CPLXMLNode *psNode)
854 {
855     CPLXMLNode *psIter = psNode;
856     while( psIter->psNext != nullptr )
857         psIter = psIter->psNext;
858     return psIter;
859 }
860 
861 /************************************************************************/
862 /*                       CPLXMLSchemaResolveInclude()                   */
863 /************************************************************************/
864 
865 static
CPLXMLSchemaResolveInclude(const char * pszMainSchemaLocation,CPLXMLNode * psSchemaNode)866 void CPLXMLSchemaResolveInclude( const char *pszMainSchemaLocation,
867                                  CPLXMLNode *psSchemaNode )
868 {
869     std::set<CPLString> osAlreadyIncluded;
870 
871     bool bTryAgain;
872     do
873     {
874         CPLXMLNode *psLast = nullptr;
875         bTryAgain = false;
876 
877         CPLXMLNode *psThis = psSchemaNode->psChild;
878         for( ; psThis != nullptr; psThis = psThis->psNext )
879         {
880             if( psThis->eType == CXT_Element &&
881                 EQUAL(psThis->pszValue, "include") )
882             {
883                 const char *pszSchemaLocation =
884                     CPLGetXMLValue(psThis, "schemaLocation", nullptr);
885                 if( pszSchemaLocation != nullptr &&
886                     osAlreadyIncluded.count( pszSchemaLocation) == 0 )
887                 {
888                     osAlreadyIncluded.insert( pszSchemaLocation );
889 
890                     if( !STARTS_WITH(pszSchemaLocation, "http://") &&
891                         !STARTS_WITH(pszSchemaLocation, "https://") &&
892                         CPLIsFilenameRelative(pszSchemaLocation) )
893                     {
894                         pszSchemaLocation =
895                             CPLFormFilename(CPLGetPath(pszMainSchemaLocation),
896                                             pszSchemaLocation, nullptr);
897                     }
898 
899                     CPLXMLNode *psIncludedXSDTree =
900                                 GMLParseXMLFile( pszSchemaLocation );
901                     if( psIncludedXSDTree != nullptr )
902                     {
903                         CPLStripXMLNamespace(psIncludedXSDTree, nullptr, TRUE);
904                         CPLXMLNode *psIncludedSchemaNode =
905                             CPLGetXMLNode(psIncludedXSDTree, "=schema");
906                         if( psIncludedSchemaNode != nullptr )
907                         {
908                             // Substitute de <include> node by its content.
909                             CPLXMLNode *psFirstChildElement =
910                                 CPLGetFirstChildNode(psIncludedSchemaNode);
911                             if( psFirstChildElement != nullptr )
912                             {
913                                 CPLXMLNode *psCopy =
914                                     CPLCloneXMLTree(psFirstChildElement);
915                                 if( psLast != nullptr )
916                                     psLast->psNext = psCopy;
917                                 else
918                                     psSchemaNode->psChild = psCopy;
919                                 CPLXMLNode *psNext = psThis->psNext;
920                                 psThis->psNext = nullptr;
921                                 CPLDestroyXMLNode(psThis);
922                                 psThis = CPLGetLastNode(psCopy);
923                                 psThis->psNext = psNext;
924 
925                                 // In case the included schema also contains
926                                 // includes.
927                                 bTryAgain = true;
928                             }
929                         }
930                         CPLDestroyXMLNode(psIncludedXSDTree);
931                     }
932                 }
933             }
934 
935             psLast = psThis;
936         }
937     } while( bTryAgain );
938 
939     const char *pszSchemaOutputName =
940         CPLGetConfigOption("GML_SCHEMA_OUTPUT_NAME", nullptr);
941     if( pszSchemaOutputName != nullptr )
942     {
943         CPLSerializeXMLTreeToFile(psSchemaNode, pszSchemaOutputName);
944     }
945 }
946 
947 /************************************************************************/
948 /*                       GetUniqueConstraints()                         */
949 /************************************************************************/
950 
951 static std::set<std::pair<std::string, std::string>>
GetUniqueConstraints(const CPLXMLNode * psNode)952                                 GetUniqueConstraints(const CPLXMLNode* psNode)
953 {
954     /* Parse
955         <xs:unique name="uniqueConstraintpolyeas_id">
956             <xs:selector xpath="ogr:featureMember/ogr:poly"/>
957             <xs:field xpath="ogr:eas_id"/>
958         </xs:unique>
959     */
960     std::set<std::pair<std::string, std::string>> oSet;
961     for( const auto* psIter= psNode->psChild; psIter != nullptr; psIter = psIter->psNext )
962     {
963         if( psIter->eType == CXT_Element &&
964             EQUAL(psIter->pszValue, "unique") )
965         {
966             const char* pszSelector = CPLGetXMLValue(psIter, "selector.xpath", nullptr);
967             const char* pszField = CPLGetXMLValue(psIter, "field.xpath", nullptr);
968             if( pszSelector && pszField && pszField[0] != '@' )
969             {
970                 const char* pszSlash = strchr(pszSelector, '/');
971                 if( pszSlash )
972                 {
973                     oSet.insert(std::pair<std::string,std::string>(
974                         StripNS(pszSlash+1), StripNS(pszField)));
975                 }
976             }
977         }
978     }
979     return oSet;
980 }
981 
982 /************************************************************************/
983 /*                          GMLParseXSD()                               */
984 /************************************************************************/
985 
GMLParseXSD(const char * pszFile,std::vector<GMLFeatureClass * > & aosClasses,bool & bFullyUnderstood)986 bool GMLParseXSD( const char *pszFile,
987                  std::vector<GMLFeatureClass*> &aosClasses,
988                  bool &bFullyUnderstood)
989 
990 {
991     bFullyUnderstood = false;
992 
993     if( pszFile == nullptr )
994         return false;
995 
996 /* -------------------------------------------------------------------- */
997 /*      Load the raw XML file.                                          */
998 /* -------------------------------------------------------------------- */
999     CPLXMLNode *psXSDTree = GMLParseXMLFile(pszFile);
1000 
1001     if( psXSDTree == nullptr )
1002         return false;
1003 
1004 /* -------------------------------------------------------------------- */
1005 /*      Strip off any namespace qualifiers.                             */
1006 /* -------------------------------------------------------------------- */
1007     CPLStripXMLNamespace( psXSDTree, nullptr, TRUE );
1008 
1009 /* -------------------------------------------------------------------- */
1010 /*      Find <schema> root element.                                     */
1011 /* -------------------------------------------------------------------- */
1012     CPLXMLNode *psSchemaNode = CPLGetXMLNode(psXSDTree, "=schema");
1013     if( psSchemaNode == nullptr )
1014     {
1015         CPLDestroyXMLNode( psXSDTree );
1016         return false;
1017     }
1018 
1019 /* ==================================================================== */
1020 /*      Process each include directive.                                 */
1021 /* ==================================================================== */
1022     CPLXMLSchemaResolveInclude(pszFile, psSchemaNode);
1023 
1024     // CPLSerializeXMLTreeToFile(psSchemaNode, "/vsistdout/");
1025 
1026     bFullyUnderstood = true;
1027 
1028 /* ==================================================================== */
1029 /*      Process each feature class definition.                          */
1030 /* ==================================================================== */
1031     CPLXMLNode *psThis = psSchemaNode->psChild;
1032 
1033     std::set<std::pair<std::string, std::string>> oSetUniqueConstraints;
1034 
1035     for( ; psThis != nullptr; psThis = psThis->psNext )
1036     {
1037 /* -------------------------------------------------------------------- */
1038 /*      Check for <xs:element> node.                                    */
1039 /* -------------------------------------------------------------------- */
1040         if( psThis->eType != CXT_Element
1041             || !EQUAL(psThis->pszValue, "element") )
1042             continue;
1043 
1044 /* -------------------------------------------------------------------- */
1045 /*      Get name                                                        */
1046 /* -------------------------------------------------------------------- */
1047         const char *pszName = CPLGetXMLValue(psThis, "name", nullptr);
1048         if( pszName == nullptr )
1049         {
1050             continue;
1051         }
1052 
1053 /* -------------------------------------------------------------------- */
1054 /*      Check the substitution group.                                   */
1055 /* -------------------------------------------------------------------- */
1056         const char *pszSubGroup =
1057             StripNS(CPLGetXMLValue(psThis, "substitutionGroup", ""));
1058 
1059         if( EQUAL(pszName, "FeatureCollection") &&
1060             (EQUAL(pszSubGroup, "_FeatureCollection") ||
1061              EQUAL(pszSubGroup, "_GML") ||
1062              EQUAL(pszSubGroup, "AbstractFeature")) )
1063         {
1064             oSetUniqueConstraints = GetUniqueConstraints(psThis);
1065             continue;
1066         }
1067 
1068         // AbstractFeature used by GML 3.2.
1069         if( !EQUAL(pszSubGroup, "_Feature") &&
1070             !EQUAL(pszSubGroup, "AbstractFeature") )
1071         {
1072             continue;
1073         }
1074 
1075 /* -------------------------------------------------------------------- */
1076 /*      Get type and verify relationship with name.                     */
1077 /* -------------------------------------------------------------------- */
1078         const char *pszType = CPLGetXMLValue(psThis, "type", nullptr);
1079         if (pszType == nullptr)
1080         {
1081             CPLXMLNode *psComplexType = CPLGetXMLNode(psThis, "complexType");
1082             if (psComplexType)
1083             {
1084                 GMLFeatureClass *poClass =
1085                     GMLParseFeatureType(psSchemaNode, pszName, psComplexType);
1086                 if (poClass)
1087                     aosClasses.push_back(poClass);
1088                 else
1089                     bFullyUnderstood = false;
1090             }
1091             continue;
1092         }
1093         if( strstr(pszType, ":") != nullptr )
1094             pszType = strstr(pszType, ":") + 1;
1095         if( EQUAL(pszType, pszName) )
1096         {
1097             // A few WFS servers return a type name which is the element name
1098             // without any _Type or Type suffix
1099             // e.g.:
1100             // http://apollo.erdas.com/erdas-apollo/vector/Cherokee?SERVICE=WFS&VERSION=1.0.0&REQUEST=DescribeFeatureType&TYPENAME=iwfs:Air */
1101 
1102             // TODO(schwehr): What was supposed to go here?
1103         }
1104 
1105         // <element name="RekisteriyksikonPalstanTietoja" type="ktjkiiwfs:PalstanTietojaType" substitutionGroup="gml:_Feature" />
1106         else if( strlen(pszType) > 4 &&
1107                  strcmp(pszType + strlen(pszType) - 4, "Type") == 0 &&
1108                  strlen(pszName) > strlen(pszType) - 4 &&
1109                  strncmp(pszName + strlen(pszName) - (strlen(pszType) - 4),
1110                          pszType,
1111                          strlen(pszType) - 4) == 0 )        {
1112         }
1113 
1114         else if( !EQUALN(pszType, pszName, strlen(pszName))
1115             || !(EQUAL(pszType + strlen(pszName), "_Type") ||
1116                     EQUAL(pszType + strlen(pszName), "Type") ||
1117                     EQUAL(pszType + strlen(pszName), "FeatureType")) )
1118         {
1119             continue;
1120         }
1121 
1122         // CanVec .xsd contains weird types that are not used in the related
1123         // GML.
1124         if (STARTS_WITH(pszName, "XyZz") ||
1125             STARTS_WITH(pszName, "XyZ1") ||
1126             STARTS_WITH(pszName, "XyZ2"))
1127             continue;
1128 
1129         GMLFeatureClass *poClass =
1130             GMLParseFeatureType(psSchemaNode, pszName, pszType);
1131         if (poClass)
1132             aosClasses.push_back(poClass);
1133         else
1134             bFullyUnderstood = false;
1135     }
1136 
1137     CPLDestroyXMLNode(psXSDTree);
1138 
1139     // Attach unique constraints to fields
1140     for( const auto& typeFieldPair: oSetUniqueConstraints )
1141     {
1142         for( const auto* poClass: aosClasses )
1143         {
1144             if( poClass->GetName() == typeFieldPair.first )
1145             {
1146                 auto poProperty = poClass->GetProperty(typeFieldPair.second.c_str());
1147                 if( poProperty )
1148                 {
1149                     poProperty->SetUnique(true);
1150                 }
1151                 break;
1152             }
1153         }
1154     }
1155 
1156     return !aosClasses.empty();
1157 }
1158