1 /******************************************************************************
2 *
3 * Project: GML Reader
4 * Purpose: Implementation of GMLParseXSD()
5 * Author: Frank Warmerdam, warmerdam@pobox.com
6 *
7 ******************************************************************************
8 * Copyright (c) 2005, Frank Warmerdam
9 * Copyright (c) 2010-2014, Even Rouault <even dot rouault at spatialys.com>
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a
12 * copy of this software and associated documentation files (the "Software"),
13 * to deal in the Software without restriction, including without limitation
14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 * and/or sell copies of the Software, and to permit persons to whom the
16 * Software is furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included
19 * in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 * DEALINGS IN THE SOFTWARE.
28 ****************************************************************************/
29
30 #include "cpl_port.h"
31 #include "parsexsd.h"
32
33 #include <cstdlib>
34 #include <cstring>
35 #include <set>
36 #include <string>
37 #include <utility>
38
39 #include "cpl_conv.h"
40 #include "cpl_error.h"
41 #include "cpl_http.h"
42 #include "cpl_minixml.h"
43 #include "cpl_string.h"
44 #include "ogr_core.h"
45
46 CPL_CVSID("$Id: parsexsd.cpp 7c24d0c095ca05eba45ed40003d37e698bbee248 2020-09-24 14:51:48 +0200 Even Rouault $")
47
48 /************************************************************************/
49 /* StripNS() */
50 /* */
51 /* Return potentially shortened form of string with namespace */
52 /* stripped off if there is one. Returns pointer into */
53 /* original string. */
54 /************************************************************************/
StripNS(const char * pszFullValue)55 static const char *StripNS(const char *pszFullValue)
56
57 {
58 const char *pszColon = strstr(pszFullValue, ":");
59 if( pszColon == nullptr )
60 return pszFullValue;
61 else
62 return pszColon + 1;
63 }
64
65 /************************************************************************/
66 /* GetSimpleTypeProperties() */
67 /************************************************************************/
68
69 static
GetSimpleTypeProperties(CPLXMLNode * psTypeNode,GMLPropertyType * pGMLType,int * pnWidth,int * pnPrecision)70 bool GetSimpleTypeProperties(CPLXMLNode *psTypeNode,
71 GMLPropertyType *pGMLType,
72 int *pnWidth,
73 int *pnPrecision)
74 {
75 const char *pszBase =
76 StripNS(CPLGetXMLValue(psTypeNode, "restriction.base", ""));
77
78 if( EQUAL(pszBase, "decimal") )
79 {
80 *pGMLType = GMLPT_Real;
81 const char *pszWidth =
82 CPLGetXMLValue(psTypeNode, "restriction.totalDigits.value", "0");
83 const char *pszPrecision =
84 CPLGetXMLValue(psTypeNode, "restriction.fractionDigits.value", "0");
85 *pnWidth = atoi(pszWidth);
86 *pnPrecision = atoi(pszPrecision);
87 return true;
88 }
89
90 else if( EQUAL(pszBase, "float") )
91 {
92 *pGMLType = GMLPT_Float;
93 return true;
94 }
95
96 else if( EQUAL(pszBase, "double") )
97 {
98 *pGMLType = GMLPT_Real;
99 return true;
100 }
101
102 else if( EQUAL(pszBase, "integer") )
103 {
104 *pGMLType = GMLPT_Integer;
105 const char *pszWidth =
106 CPLGetXMLValue(psTypeNode, "restriction.totalDigits.value", "0");
107 *pnWidth = atoi(pszWidth);
108 return true;
109 }
110
111 else if( EQUAL(pszBase, "long") )
112 {
113 *pGMLType = GMLPT_Integer64;
114 const char *pszWidth =
115 CPLGetXMLValue(psTypeNode, "restriction.totalDigits.value", "0");
116 *pnWidth = atoi(pszWidth);
117 return true;
118 }
119
120 else if( EQUAL(pszBase, "unsignedLong") )
121 {
122 // Optimistically map to signed integer...
123 *pGMLType = GMLPT_Integer64;
124 const char *pszWidth =
125 CPLGetXMLValue(psTypeNode, "restriction.totalDigits.value", "0");
126 *pnWidth = atoi(pszWidth);
127 return true;
128 }
129
130 else if( EQUAL(pszBase, "string") )
131 {
132 *pGMLType = GMLPT_String;
133 const char *pszWidth =
134 CPLGetXMLValue(psTypeNode, "restriction.maxLength.value", "0");
135 *pnWidth = atoi(pszWidth);
136 return true;
137 }
138
139 else if( EQUAL(pszBase, "date") )
140 {
141 *pGMLType = GMLPT_Date;
142 return true;
143 }
144
145 else if( EQUAL(pszBase, "time") )
146 {
147 *pGMLType = GMLPT_Time;
148 return true;
149 }
150
151 else if( EQUAL(pszBase, "dateTime") )
152 {
153 *pGMLType = GMLPT_DateTime;
154 return true;
155 }
156
157 else if( EQUAL(pszBase, "boolean") )
158 {
159 *pGMLType = GMLPT_Boolean;
160 return true;
161 }
162
163 else if( EQUAL(pszBase, "short") )
164 {
165 *pGMLType = GMLPT_Short;
166 return true;
167 }
168
169 return false;
170 }
171
172 /************************************************************************/
173 /* LookForSimpleType() */
174 /************************************************************************/
175
176 static
LookForSimpleType(CPLXMLNode * psSchemaNode,const char * pszStrippedNSType,GMLPropertyType * pGMLType,int * pnWidth,int * pnPrecision)177 bool LookForSimpleType(CPLXMLNode *psSchemaNode,
178 const char* pszStrippedNSType,
179 GMLPropertyType *pGMLType,
180 int *pnWidth,
181 int *pnPrecision)
182 {
183 CPLXMLNode *psThis = psSchemaNode->psChild;
184 for( ; psThis != nullptr; psThis = psThis->psNext )
185 {
186 if( psThis->eType == CXT_Element &&
187 EQUAL(psThis->pszValue, "simpleType") &&
188 EQUAL(CPLGetXMLValue(psThis, "name", ""), pszStrippedNSType) )
189 {
190 break;
191 }
192 }
193 if( psThis == nullptr )
194 return false;
195
196 return GetSimpleTypeProperties(psThis, pGMLType, pnWidth, pnPrecision);
197 }
198
199 /************************************************************************/
200 /* GetSingleChildElement() */
201 /************************************************************************/
202
203 /* Returns the child element whose name is pszExpectedValue only if */
204 /* there is only one child that is an element. */
205 static
GetSingleChildElement(CPLXMLNode * psNode,const char * pszExpectedValue)206 CPLXMLNode *GetSingleChildElement(CPLXMLNode *psNode,
207 const char *pszExpectedValue)
208 {
209 if( psNode == nullptr )
210 return nullptr;
211
212 CPLXMLNode *psIter = psNode->psChild;
213 if( psIter == nullptr )
214 return nullptr;
215
216 CPLXMLNode *psChild = nullptr;
217 while( psIter != nullptr )
218 {
219 if( psIter->eType == CXT_Element )
220 {
221 if( psChild != nullptr )
222 return nullptr;
223 if( pszExpectedValue != nullptr &&
224 strcmp(psIter->pszValue, pszExpectedValue) != 0 )
225 return nullptr;
226 psChild = psIter;
227 }
228 psIter = psIter->psNext;
229 }
230 return psChild;
231 }
232
233 /************************************************************************/
234 /* CheckMinMaxOccursCardinality() */
235 /************************************************************************/
236
CheckMinMaxOccursCardinality(CPLXMLNode * psNode)237 static int CheckMinMaxOccursCardinality(CPLXMLNode *psNode)
238 {
239 const char *pszMinOccurs = CPLGetXMLValue(psNode, "minOccurs", nullptr);
240 const char *pszMaxOccurs = CPLGetXMLValue(psNode, "maxOccurs", nullptr);
241 return (pszMinOccurs == nullptr || EQUAL(pszMinOccurs, "0") ||
242 EQUAL(pszMinOccurs, "1")) &&
243 (pszMaxOccurs == nullptr || EQUAL(pszMaxOccurs, "1"));
244 }
245
246 /************************************************************************/
247 /* GetListTypeFromSingleType() */
248 /************************************************************************/
249
GetListTypeFromSingleType(GMLPropertyType eType)250 static GMLPropertyType GetListTypeFromSingleType(GMLPropertyType eType)
251 {
252 if( eType == GMLPT_String )
253 return GMLPT_StringList;
254 if( eType == GMLPT_Integer || eType == GMLPT_Short )
255 return GMLPT_IntegerList;
256 if( eType == GMLPT_Integer64 )
257 return GMLPT_Integer64List;
258 if( eType == GMLPT_Real || eType == GMLPT_Float )
259 return GMLPT_RealList;
260 if( eType == GMLPT_Boolean )
261 return GMLPT_BooleanList;
262 if( eType == GMLPT_FeatureProperty )
263 return GMLPT_FeaturePropertyList;
264 return eType;
265 }
266
267 /************************************************************************/
268 /* ParseFeatureType() */
269 /************************************************************************/
270
271 typedef struct
272 {
273 const char *pszName;
274 OGRwkbGeometryType eType;
275 } AssocNameType;
276
277 static const AssocNameType apsPropertyTypes[] =
278 {
279 {"GeometryPropertyType", wkbUnknown},
280 {"PointPropertyType", wkbPoint},
281 {"LineStringPropertyType", wkbLineString},
282 {"CurvePropertyType", wkbCompoundCurve},
283 {"PolygonPropertyType", wkbPolygon},
284 {"SurfacePropertyType", wkbCurvePolygon},
285 {"MultiPointPropertyType", wkbMultiPoint},
286 {"MultiLineStringPropertyType", wkbMultiLineString},
287 {"MultiCurvePropertyType", wkbMultiCurve},
288 {"MultiPolygonPropertyType", wkbMultiPolygon},
289 {"MultiSurfacePropertyType", wkbMultiSurface},
290 {"MultiGeometryPropertyType", wkbGeometryCollection},
291 {"GeometryAssociationType", wkbUnknown},
292 {nullptr, wkbUnknown},
293 };
294
295 /* Found in FME .xsd (e.g. <element ref="gml:curveProperty" minOccurs="0"/>) */
296 static const AssocNameType apsRefTypes[] =
297 {
298 {"pointProperty", wkbPoint},
299 {"curveProperty", wkbLineString}, // Should we promote to wkbCompoundCurve?
300 {"surfaceProperty", wkbPolygon}, // Should we promote to wkbCurvePolygon?
301 {"multiPointProperty", wkbMultiPoint},
302 {"multiCurveProperty", wkbMultiLineString},
303 // Should we promote to wkbMultiSurface?
304 {"multiSurfaceProperty", wkbMultiPolygon},
305 {nullptr, wkbUnknown},
306 };
307
308 static
309 GMLFeatureClass *GMLParseFeatureType(CPLXMLNode *psSchemaNode,
310 const char *pszName,
311 CPLXMLNode *psThis);
312
313 static
GMLParseFeatureType(CPLXMLNode * psSchemaNode,const char * pszName,const char * pszType)314 GMLFeatureClass *GMLParseFeatureType(CPLXMLNode *psSchemaNode,
315 const char *pszName,
316 const char *pszType)
317 {
318 CPLXMLNode *psThis = psSchemaNode->psChild;
319 for( ; psThis != nullptr; psThis = psThis->psNext )
320 {
321 if( psThis->eType == CXT_Element &&
322 EQUAL(psThis->pszValue, "complexType") &&
323 EQUAL(CPLGetXMLValue(psThis, "name", ""), pszType) )
324 {
325 break;
326 }
327 }
328 if( psThis == nullptr )
329 return nullptr;
330
331 return GMLParseFeatureType(psSchemaNode, pszName, psThis);
332 }
333
334 static
GMLParseFeatureType(CPLXMLNode * psSchemaNode,const char * pszName,CPLXMLNode * psComplexType)335 GMLFeatureClass *GMLParseFeatureType(CPLXMLNode *psSchemaNode,
336 const char *pszName,
337 CPLXMLNode *psComplexType)
338 {
339
340 /* -------------------------------------------------------------------- */
341 /* Grab the sequence of extensions greatgrandchild. */
342 /* -------------------------------------------------------------------- */
343 CPLXMLNode *psAttrSeq =
344 CPLGetXMLNode(psComplexType, "complexContent.extension.sequence");
345
346 if( psAttrSeq == nullptr )
347 {
348 return nullptr;
349 }
350
351 /* -------------------------------------------------------------------- */
352 /* We are pretty sure this going to be a valid Feature class */
353 /* now, so create it. */
354 /* -------------------------------------------------------------------- */
355 GMLFeatureClass *poClass = new GMLFeatureClass(pszName);
356
357 /* -------------------------------------------------------------------- */
358 /* Loop over each of the attribute elements being defined for */
359 /* this feature class. */
360 /* -------------------------------------------------------------------- */
361 int nAttributeIndex = 0;
362
363 bool bGotUnrecognizedType = false;
364
365 CPLXMLNode *psAttrDef = psAttrSeq->psChild;
366 for( ; psAttrDef != nullptr; psAttrDef = psAttrDef->psNext )
367 {
368 if( strcmp(psAttrDef->pszValue, "group") == 0 )
369 {
370 /* Too complex schema for us. Aborts parsing */
371 delete poClass;
372 return nullptr;
373 }
374
375 /* Parse stuff like:
376 <xs:choice>
377 <xs:element ref="gml:polygonProperty"/>
378 <xs:element ref="gml:multiPolygonProperty"/>
379 </xs:choice>
380 as found in https://downloadagiv.blob.core.windows.net/overstromingsgebieden-en-oeverzones/2014_01/Overstromingsgebieden_en_oeverzones_2014_01_GML.zip
381 */
382 if( strcmp(psAttrDef->pszValue, "choice") == 0 )
383 {
384 CPLXMLNode *psChild = psAttrDef->psChild;
385 bool bPolygon = false;
386 bool bMultiPolygon = false;
387 for( ; psChild; psChild = psChild->psNext )
388 {
389 if( psChild->eType != CXT_Element )
390 continue;
391 if( strcmp(psChild->pszValue, "element") == 0 )
392 {
393 const char *pszRef = CPLGetXMLValue(psChild, "ref", nullptr);
394 if( pszRef != nullptr )
395 {
396 if( strcmp(pszRef, "gml:polygonProperty") == 0 )
397 {
398 bPolygon = true;
399 }
400 else if( strcmp(pszRef, "gml:multiPolygonProperty") == 0 )
401 {
402 bMultiPolygon = true;
403 }
404 else
405 {
406 delete poClass;
407 return nullptr;
408 }
409 }
410 else
411 {
412 delete poClass;
413 return nullptr;
414 }
415 }
416 }
417 if( bPolygon && bMultiPolygon )
418 {
419 poClass->AddGeometryProperty(new GMLGeometryPropertyDefn(
420 "", "", wkbMultiPolygon, nAttributeIndex, true));
421
422 nAttributeIndex++;
423 }
424 continue;
425 }
426
427 if( !EQUAL(psAttrDef->pszValue, "element") )
428 continue;
429
430 // MapServer WFS writes element type as an attribute of element
431 // not as a simpleType definition.
432 const char *pszType = CPLGetXMLValue(psAttrDef, "type", nullptr);
433 const char *pszElementName = CPLGetXMLValue(psAttrDef, "name", nullptr);
434 bool bNullable =
435 EQUAL(CPLGetXMLValue(psAttrDef, "minOccurs", "1"), "0");
436 const char *pszMaxOccurs = CPLGetXMLValue(psAttrDef, "maxOccurs", nullptr);
437 if (pszType != nullptr)
438 {
439 const char *pszStrippedNSType = StripNS(pszType);
440 int nWidth = 0;
441 int nPrecision = 0;
442
443 GMLPropertyType gmlType = GMLPT_Untyped;
444 if (EQUAL(pszStrippedNSType, "string") ||
445 EQUAL(pszStrippedNSType, "Character"))
446 gmlType = GMLPT_String;
447 else if (EQUAL(pszStrippedNSType, "date"))
448 gmlType = GMLPT_Date;
449 else if (EQUAL(pszStrippedNSType, "time"))
450 gmlType = GMLPT_Time;
451 else if (EQUAL(pszStrippedNSType, "dateTime"))
452 gmlType = GMLPT_DateTime;
453 else if (EQUAL(pszStrippedNSType, "real") ||
454 EQUAL(pszStrippedNSType, "double") ||
455 EQUAL(pszStrippedNSType, "decimal"))
456 gmlType = GMLPT_Real;
457 else if (EQUAL(pszStrippedNSType, "float") )
458 gmlType = GMLPT_Float;
459 else if (EQUAL(pszStrippedNSType, "int") ||
460 EQUAL(pszStrippedNSType, "integer"))
461 gmlType = GMLPT_Integer;
462 else if (EQUAL(pszStrippedNSType, "long"))
463 gmlType = GMLPT_Integer64;
464 else if (EQUAL(pszStrippedNSType, "unsignedLong"))
465 {
466 // Optimistically map to signed integer
467 gmlType = GMLPT_Integer64;
468 }
469 else if (EQUAL(pszStrippedNSType, "short") )
470 gmlType = GMLPT_Short;
471 else if (EQUAL(pszStrippedNSType, "boolean") )
472 gmlType = GMLPT_Boolean;
473 // TODO: Would be nice to have a binary type.
474 else if (EQUAL(pszStrippedNSType, "hexBinary"))
475 gmlType = GMLPT_String;
476 else if (strcmp(pszType, "gml:FeaturePropertyType") == 0 )
477 {
478 gmlType = GMLPT_FeatureProperty;
479 }
480 else if (STARTS_WITH(pszType, "gml:"))
481 {
482 const AssocNameType *psIter = apsPropertyTypes;
483 while(psIter->pszName)
484 {
485 if (strncmp(pszType + 4, psIter->pszName,
486 strlen(psIter->pszName)) == 0)
487 {
488 OGRwkbGeometryType eType = psIter->eType;
489
490 // Look if there's a comment restricting to subclasses.
491 if( psAttrDef->psNext != nullptr &&
492 psAttrDef->psNext->eType == CXT_Comment )
493 {
494 if( strstr(psAttrDef->psNext->pszValue,
495 "restricted to Polygon") )
496 eType = wkbPolygon;
497 else if( strstr(psAttrDef->psNext->pszValue,
498 "restricted to LineString") )
499 eType = wkbLineString;
500 else if( strstr(psAttrDef->psNext->pszValue,
501 "restricted to MultiPolygon") )
502 eType = wkbMultiPolygon;
503 else if( strstr(psAttrDef->psNext->pszValue,
504 "restricted to MultiLineString") )
505 eType = wkbMultiLineString;
506 }
507
508 GMLGeometryPropertyDefn* poDefn =
509 new GMLGeometryPropertyDefn(
510 pszElementName, pszElementName, eType,
511 nAttributeIndex, bNullable);
512
513 if( poClass->AddGeometryProperty(poDefn) < 0 )
514 delete poDefn;
515 else
516 nAttributeIndex++;
517
518 break;
519 }
520
521 psIter++;
522 }
523
524 if (psIter->pszName == nullptr)
525 {
526 // Can be a non geometry gml type.
527 // Too complex schema for us. Aborts parsing.
528 delete poClass;
529 return nullptr;
530 }
531
532 if (poClass->GetGeometryPropertyCount() == 0)
533 bGotUnrecognizedType = true;
534
535 continue;
536 }
537
538 /* Integraph stuff */
539 else if (strcmp(pszType, "G:Point_MultiPointPropertyType") == 0 ||
540 strcmp(pszType, "gmgml:Point_MultiPointPropertyType") == 0)
541 {
542 GMLGeometryPropertyDefn* poDefn =
543 new GMLGeometryPropertyDefn(
544 pszElementName, pszElementName, wkbMultiPoint,
545 nAttributeIndex, bNullable);
546
547 if( poClass->AddGeometryProperty(poDefn) < 0 )
548 delete poDefn;
549 else
550 nAttributeIndex++;
551
552 continue;
553 }
554 else if (strcmp(pszType,
555 "G:LineString_MultiLineStringPropertyType") == 0 ||
556 strcmp(pszType,
557 "gmgml:LineString_MultiLineStringPropertyType") == 0)
558 {
559 GMLGeometryPropertyDefn* poDefn =
560 new GMLGeometryPropertyDefn(
561 pszElementName, pszElementName, wkbMultiLineString,
562 nAttributeIndex, bNullable);
563
564 if( poClass->AddGeometryProperty(poDefn) < 0 )
565 delete poDefn;
566 else
567 nAttributeIndex++;
568
569 continue;
570 }
571 else if (strcmp(pszType,
572 "G:Polygon_MultiPolygonPropertyType") == 0 ||
573 strcmp(pszType,
574 "gmgml:Polygon_MultiPolygonPropertyType") == 0 ||
575 strcmp(pszType,
576 "gmgml:Polygon_Surface_MultiSurface_CompositeSurfacePropertyType") == 0)
577 {
578 GMLGeometryPropertyDefn* poDefn =
579 new GMLGeometryPropertyDefn(
580 pszElementName, pszElementName, wkbMultiPolygon,
581 nAttributeIndex, bNullable);
582
583 if( poClass->AddGeometryProperty(poDefn) < 0 )
584 delete poDefn;
585 else
586 nAttributeIndex++;
587
588 continue;
589 }
590
591 // ERDAS Apollo stufflike in
592 // http://apollo.erdas.com/erdas-apollo/vector/WORLDWIDE?SERVICE=WFS&VERSION=1.0.0&REQUEST=DescribeFeatureType&TYPENAME=wfs:cntry98)
593 else if (strcmp(pszType, "wfs:MixedPolygonPropertyType") == 0)
594 {
595 GMLGeometryPropertyDefn* poDefn =
596 new GMLGeometryPropertyDefn(
597 pszElementName, pszElementName, wkbMultiPolygon,
598 nAttributeIndex, bNullable);
599
600 if( poClass->AddGeometryProperty(poDefn) < 0 )
601 delete poDefn;
602 else
603 nAttributeIndex++;
604
605 continue;
606 }
607
608 else
609 {
610 gmlType = GMLPT_Untyped;
611 if ( !LookForSimpleType(psSchemaNode, pszStrippedNSType,
612 &gmlType, &nWidth, &nPrecision) )
613 {
614 // Too complex schema for us. Aborts parsing.
615 delete poClass;
616 return nullptr;
617 }
618 }
619
620 if (pszElementName == nullptr)
621 pszElementName = "unnamed";
622 const char *pszPropertyName = pszElementName;
623 if( gmlType == GMLPT_FeatureProperty )
624 {
625 pszPropertyName = CPLSPrintf("%s_href", pszElementName);
626 }
627 GMLPropertyDefn *poProp =
628 new GMLPropertyDefn(pszPropertyName, pszElementName);
629
630 if( pszMaxOccurs != nullptr && strcmp(pszMaxOccurs, "1") != 0 )
631 gmlType = GetListTypeFromSingleType(gmlType);
632
633 poProp->SetType(gmlType);
634 poProp->SetWidth(nWidth);
635 poProp->SetPrecision(nPrecision);
636 poProp->SetNullable(bNullable);
637
638 if (poClass->AddProperty( poProp ) < 0)
639 delete poProp;
640 else
641 nAttributeIndex++;
642
643 continue;
644 }
645
646 // For now we skip geometries. Fixup later.
647 CPLXMLNode *psSimpleType = CPLGetXMLNode(psAttrDef, "simpleType");
648 if( psSimpleType == nullptr )
649 {
650 const char *pszRef = CPLGetXMLValue(psAttrDef, "ref", nullptr);
651
652 // FME .xsd
653 if (pszRef != nullptr && STARTS_WITH(pszRef, "gml:"))
654 {
655 const AssocNameType *psIter = apsRefTypes;
656 while(psIter->pszName)
657 {
658 if (strncmp(pszRef + 4, psIter->pszName,
659 strlen(psIter->pszName)) == 0)
660 {
661 if (poClass->GetGeometryPropertyCount() > 0)
662 {
663 OGRwkbGeometryType eNewType = psIter->eType;
664 OGRwkbGeometryType eOldType =
665 (OGRwkbGeometryType)poClass
666 ->GetGeometryProperty(0)
667 ->GetType();
668
669 if ((eNewType == wkbMultiPoint &&
670 eOldType == wkbPoint) ||
671 (eNewType == wkbMultiLineString &&
672 eOldType == wkbLineString) ||
673 (eNewType == wkbMultiPolygon &&
674 eOldType == wkbPolygon))
675 {
676 poClass->GetGeometryProperty(0)->SetType(
677 eNewType);
678 }
679 else
680 {
681 CPLDebug(
682 "GML",
683 "Geometry field already found ! "
684 "Ignoring the following ones");
685 }
686 }
687 else
688 {
689 GMLGeometryPropertyDefn* poDefn =
690 new GMLGeometryPropertyDefn(
691 pszElementName, pszElementName,
692 psIter->eType, nAttributeIndex, true);
693
694 if( poClass->AddGeometryProperty(poDefn) < 0 )
695 delete poDefn;
696 else
697 nAttributeIndex++;
698 }
699
700 break;
701 }
702
703 psIter++;
704 }
705
706 if (psIter->pszName == nullptr)
707 {
708 // Can be a non geometry gml type .
709 // Too complex schema for us. Aborts parsing.
710 delete poClass;
711 return nullptr;
712 }
713
714 if (poClass->GetGeometryPropertyCount() == 0)
715 bGotUnrecognizedType = true;
716
717 continue;
718 }
719
720 /* Parse stuff like the following found in http://199.29.1.81:8181/miwfs/GetFeature.ashx?REQUEST=GetFeature&MAXFEATURES=1&SERVICE=WFS&VERSION=1.0.0&TYPENAME=miwfs:World :
721 <xs:element name="Obj" minOccurs="0" maxOccurs="1">
722 <xs:complexType>
723 <xs:sequence>
724 <xs:element ref="gml:_Geometry"/>
725 </xs:sequence>
726 </xs:complexType>
727 </xs:element>
728 */
729 CPLXMLNode *l_psComplexType =
730 GetSingleChildElement(psAttrDef, "complexType");
731 CPLXMLNode *psComplexTypeSequence =
732 GetSingleChildElement(l_psComplexType, "sequence");
733 CPLXMLNode *psComplexTypeSequenceElement =
734 GetSingleChildElement(psComplexTypeSequence, "element");
735
736 if( pszElementName != nullptr &&
737 CheckMinMaxOccursCardinality(psAttrDef) &&
738 psComplexTypeSequenceElement != nullptr &&
739 CheckMinMaxOccursCardinality(psComplexTypeSequence) &&
740 strcmp(CPLGetXMLValue(psComplexTypeSequenceElement, "ref", ""),
741 "gml:_Geometry") == 0 )
742 {
743 GMLGeometryPropertyDefn* poDefn =
744 new GMLGeometryPropertyDefn(
745 pszElementName, pszElementName, wkbUnknown, nAttributeIndex,
746 bNullable);
747
748 if( poClass->AddGeometryProperty(poDefn) < 0 )
749 delete poDefn;
750 else
751 nAttributeIndex++;
752
753 continue;
754 }
755 else
756 {
757 // Too complex schema for us. Aborts parsing.
758 delete poClass;
759 return nullptr;
760 }
761 }
762
763 if (pszElementName == nullptr)
764 pszElementName = "unnamed";
765 GMLPropertyDefn *poProp =
766 new GMLPropertyDefn(pszElementName, pszElementName);
767
768 GMLPropertyType eType = GMLPT_Untyped;
769 int nWidth = 0;
770 int nPrecision = 0;
771 GetSimpleTypeProperties(psSimpleType, &eType, &nWidth, &nPrecision);
772
773 if( pszMaxOccurs != nullptr && strcmp(pszMaxOccurs, "1") != 0 )
774 eType = GetListTypeFromSingleType(eType);
775
776 poProp->SetType(eType);
777 poProp->SetWidth(nWidth);
778 poProp->SetPrecision(nPrecision);
779 poProp->SetNullable(bNullable);
780
781 if (poClass->AddProperty(poProp) < 0)
782 delete poProp;
783 else
784 nAttributeIndex++;
785 }
786
787 // If we have found an unknown types, let's be on the side of caution and
788 // create a geometry field.
789 if( poClass->GetGeometryPropertyCount() == 0 &&
790 bGotUnrecognizedType )
791 {
792 poClass->AddGeometryProperty(
793 new GMLGeometryPropertyDefn("", "", wkbUnknown, -1, true));
794 }
795
796 /* -------------------------------------------------------------------- */
797 /* Class complete, add to reader class list. */
798 /* -------------------------------------------------------------------- */
799 poClass->SetSchemaLocked(true);
800
801 return poClass;
802 }
803
804 /************************************************************************/
805 /* GMLParseXMLFile() */
806 /************************************************************************/
807
GMLParseXMLFile(const char * pszFilename)808 static CPLXMLNode *GMLParseXMLFile(const char *pszFilename)
809 {
810 if( STARTS_WITH(pszFilename, "http://") ||
811 STARTS_WITH(pszFilename, "https://") )
812 {
813 CPLXMLNode *psRet = nullptr;
814 CPLHTTPResult *psResult = CPLHTTPFetch(pszFilename, nullptr);
815 if( psResult != nullptr )
816 {
817 if( psResult->pabyData != nullptr )
818 {
819 psRet = CPLParseXMLString((const char *)psResult->pabyData);
820 }
821 CPLHTTPDestroyResult(psResult);
822 }
823 return psRet;
824 }
825 else
826 {
827 return CPLParseXMLFile(pszFilename);
828 }
829 }
830
831 /************************************************************************/
832 /* CPLGetFirstChildNode() */
833 /************************************************************************/
834
CPLGetFirstChildNode(CPLXMLNode * psNode)835 static CPLXMLNode *CPLGetFirstChildNode(CPLXMLNode *psNode)
836 {
837 if( psNode == nullptr )
838 return nullptr;
839 CPLXMLNode *psIter = psNode->psChild;
840 while( psIter != nullptr )
841 {
842 if( psIter->eType == CXT_Element )
843 return psIter;
844 psIter = psIter->psNext;
845 }
846 return nullptr;
847 }
848
849 /************************************************************************/
850 /* CPLGetLastNode() */
851 /************************************************************************/
852
CPLGetLastNode(CPLXMLNode * psNode)853 static CPLXMLNode *CPLGetLastNode(CPLXMLNode *psNode)
854 {
855 CPLXMLNode *psIter = psNode;
856 while( psIter->psNext != nullptr )
857 psIter = psIter->psNext;
858 return psIter;
859 }
860
861 /************************************************************************/
862 /* CPLXMLSchemaResolveInclude() */
863 /************************************************************************/
864
865 static
CPLXMLSchemaResolveInclude(const char * pszMainSchemaLocation,CPLXMLNode * psSchemaNode)866 void CPLXMLSchemaResolveInclude( const char *pszMainSchemaLocation,
867 CPLXMLNode *psSchemaNode )
868 {
869 std::set<CPLString> osAlreadyIncluded;
870
871 bool bTryAgain;
872 do
873 {
874 CPLXMLNode *psLast = nullptr;
875 bTryAgain = false;
876
877 CPLXMLNode *psThis = psSchemaNode->psChild;
878 for( ; psThis != nullptr; psThis = psThis->psNext )
879 {
880 if( psThis->eType == CXT_Element &&
881 EQUAL(psThis->pszValue, "include") )
882 {
883 const char *pszSchemaLocation =
884 CPLGetXMLValue(psThis, "schemaLocation", nullptr);
885 if( pszSchemaLocation != nullptr &&
886 osAlreadyIncluded.count( pszSchemaLocation) == 0 )
887 {
888 osAlreadyIncluded.insert( pszSchemaLocation );
889
890 if( !STARTS_WITH(pszSchemaLocation, "http://") &&
891 !STARTS_WITH(pszSchemaLocation, "https://") &&
892 CPLIsFilenameRelative(pszSchemaLocation) )
893 {
894 pszSchemaLocation =
895 CPLFormFilename(CPLGetPath(pszMainSchemaLocation),
896 pszSchemaLocation, nullptr);
897 }
898
899 CPLXMLNode *psIncludedXSDTree =
900 GMLParseXMLFile( pszSchemaLocation );
901 if( psIncludedXSDTree != nullptr )
902 {
903 CPLStripXMLNamespace(psIncludedXSDTree, nullptr, TRUE);
904 CPLXMLNode *psIncludedSchemaNode =
905 CPLGetXMLNode(psIncludedXSDTree, "=schema");
906 if( psIncludedSchemaNode != nullptr )
907 {
908 // Substitute de <include> node by its content.
909 CPLXMLNode *psFirstChildElement =
910 CPLGetFirstChildNode(psIncludedSchemaNode);
911 if( psFirstChildElement != nullptr )
912 {
913 CPLXMLNode *psCopy =
914 CPLCloneXMLTree(psFirstChildElement);
915 if( psLast != nullptr )
916 psLast->psNext = psCopy;
917 else
918 psSchemaNode->psChild = psCopy;
919 CPLXMLNode *psNext = psThis->psNext;
920 psThis->psNext = nullptr;
921 CPLDestroyXMLNode(psThis);
922 psThis = CPLGetLastNode(psCopy);
923 psThis->psNext = psNext;
924
925 // In case the included schema also contains
926 // includes.
927 bTryAgain = true;
928 }
929 }
930 CPLDestroyXMLNode(psIncludedXSDTree);
931 }
932 }
933 }
934
935 psLast = psThis;
936 }
937 } while( bTryAgain );
938
939 const char *pszSchemaOutputName =
940 CPLGetConfigOption("GML_SCHEMA_OUTPUT_NAME", nullptr);
941 if( pszSchemaOutputName != nullptr )
942 {
943 CPLSerializeXMLTreeToFile(psSchemaNode, pszSchemaOutputName);
944 }
945 }
946
947 /************************************************************************/
948 /* GetUniqueConstraints() */
949 /************************************************************************/
950
951 static std::set<std::pair<std::string, std::string>>
GetUniqueConstraints(const CPLXMLNode * psNode)952 GetUniqueConstraints(const CPLXMLNode* psNode)
953 {
954 /* Parse
955 <xs:unique name="uniqueConstraintpolyeas_id">
956 <xs:selector xpath="ogr:featureMember/ogr:poly"/>
957 <xs:field xpath="ogr:eas_id"/>
958 </xs:unique>
959 */
960 std::set<std::pair<std::string, std::string>> oSet;
961 for( const auto* psIter= psNode->psChild; psIter != nullptr; psIter = psIter->psNext )
962 {
963 if( psIter->eType == CXT_Element &&
964 EQUAL(psIter->pszValue, "unique") )
965 {
966 const char* pszSelector = CPLGetXMLValue(psIter, "selector.xpath", nullptr);
967 const char* pszField = CPLGetXMLValue(psIter, "field.xpath", nullptr);
968 if( pszSelector && pszField && pszField[0] != '@' )
969 {
970 const char* pszSlash = strchr(pszSelector, '/');
971 if( pszSlash )
972 {
973 oSet.insert(std::pair<std::string,std::string>(
974 StripNS(pszSlash+1), StripNS(pszField)));
975 }
976 }
977 }
978 }
979 return oSet;
980 }
981
982 /************************************************************************/
983 /* GMLParseXSD() */
984 /************************************************************************/
985
GMLParseXSD(const char * pszFile,std::vector<GMLFeatureClass * > & aosClasses,bool & bFullyUnderstood)986 bool GMLParseXSD( const char *pszFile,
987 std::vector<GMLFeatureClass*> &aosClasses,
988 bool &bFullyUnderstood)
989
990 {
991 bFullyUnderstood = false;
992
993 if( pszFile == nullptr )
994 return false;
995
996 /* -------------------------------------------------------------------- */
997 /* Load the raw XML file. */
998 /* -------------------------------------------------------------------- */
999 CPLXMLNode *psXSDTree = GMLParseXMLFile(pszFile);
1000
1001 if( psXSDTree == nullptr )
1002 return false;
1003
1004 /* -------------------------------------------------------------------- */
1005 /* Strip off any namespace qualifiers. */
1006 /* -------------------------------------------------------------------- */
1007 CPLStripXMLNamespace( psXSDTree, nullptr, TRUE );
1008
1009 /* -------------------------------------------------------------------- */
1010 /* Find <schema> root element. */
1011 /* -------------------------------------------------------------------- */
1012 CPLXMLNode *psSchemaNode = CPLGetXMLNode(psXSDTree, "=schema");
1013 if( psSchemaNode == nullptr )
1014 {
1015 CPLDestroyXMLNode( psXSDTree );
1016 return false;
1017 }
1018
1019 /* ==================================================================== */
1020 /* Process each include directive. */
1021 /* ==================================================================== */
1022 CPLXMLSchemaResolveInclude(pszFile, psSchemaNode);
1023
1024 // CPLSerializeXMLTreeToFile(psSchemaNode, "/vsistdout/");
1025
1026 bFullyUnderstood = true;
1027
1028 /* ==================================================================== */
1029 /* Process each feature class definition. */
1030 /* ==================================================================== */
1031 CPLXMLNode *psThis = psSchemaNode->psChild;
1032
1033 std::set<std::pair<std::string, std::string>> oSetUniqueConstraints;
1034
1035 for( ; psThis != nullptr; psThis = psThis->psNext )
1036 {
1037 /* -------------------------------------------------------------------- */
1038 /* Check for <xs:element> node. */
1039 /* -------------------------------------------------------------------- */
1040 if( psThis->eType != CXT_Element
1041 || !EQUAL(psThis->pszValue, "element") )
1042 continue;
1043
1044 /* -------------------------------------------------------------------- */
1045 /* Get name */
1046 /* -------------------------------------------------------------------- */
1047 const char *pszName = CPLGetXMLValue(psThis, "name", nullptr);
1048 if( pszName == nullptr )
1049 {
1050 continue;
1051 }
1052
1053 /* -------------------------------------------------------------------- */
1054 /* Check the substitution group. */
1055 /* -------------------------------------------------------------------- */
1056 const char *pszSubGroup =
1057 StripNS(CPLGetXMLValue(psThis, "substitutionGroup", ""));
1058
1059 if( EQUAL(pszName, "FeatureCollection") &&
1060 (EQUAL(pszSubGroup, "_FeatureCollection") ||
1061 EQUAL(pszSubGroup, "_GML") ||
1062 EQUAL(pszSubGroup, "AbstractFeature")) )
1063 {
1064 oSetUniqueConstraints = GetUniqueConstraints(psThis);
1065 continue;
1066 }
1067
1068 // AbstractFeature used by GML 3.2.
1069 if( !EQUAL(pszSubGroup, "_Feature") &&
1070 !EQUAL(pszSubGroup, "AbstractFeature") )
1071 {
1072 continue;
1073 }
1074
1075 /* -------------------------------------------------------------------- */
1076 /* Get type and verify relationship with name. */
1077 /* -------------------------------------------------------------------- */
1078 const char *pszType = CPLGetXMLValue(psThis, "type", nullptr);
1079 if (pszType == nullptr)
1080 {
1081 CPLXMLNode *psComplexType = CPLGetXMLNode(psThis, "complexType");
1082 if (psComplexType)
1083 {
1084 GMLFeatureClass *poClass =
1085 GMLParseFeatureType(psSchemaNode, pszName, psComplexType);
1086 if (poClass)
1087 aosClasses.push_back(poClass);
1088 else
1089 bFullyUnderstood = false;
1090 }
1091 continue;
1092 }
1093 if( strstr(pszType, ":") != nullptr )
1094 pszType = strstr(pszType, ":") + 1;
1095 if( EQUAL(pszType, pszName) )
1096 {
1097 // A few WFS servers return a type name which is the element name
1098 // without any _Type or Type suffix
1099 // e.g.:
1100 // http://apollo.erdas.com/erdas-apollo/vector/Cherokee?SERVICE=WFS&VERSION=1.0.0&REQUEST=DescribeFeatureType&TYPENAME=iwfs:Air */
1101
1102 // TODO(schwehr): What was supposed to go here?
1103 }
1104
1105 // <element name="RekisteriyksikonPalstanTietoja" type="ktjkiiwfs:PalstanTietojaType" substitutionGroup="gml:_Feature" />
1106 else if( strlen(pszType) > 4 &&
1107 strcmp(pszType + strlen(pszType) - 4, "Type") == 0 &&
1108 strlen(pszName) > strlen(pszType) - 4 &&
1109 strncmp(pszName + strlen(pszName) - (strlen(pszType) - 4),
1110 pszType,
1111 strlen(pszType) - 4) == 0 ) {
1112 }
1113
1114 else if( !EQUALN(pszType, pszName, strlen(pszName))
1115 || !(EQUAL(pszType + strlen(pszName), "_Type") ||
1116 EQUAL(pszType + strlen(pszName), "Type") ||
1117 EQUAL(pszType + strlen(pszName), "FeatureType")) )
1118 {
1119 continue;
1120 }
1121
1122 // CanVec .xsd contains weird types that are not used in the related
1123 // GML.
1124 if (STARTS_WITH(pszName, "XyZz") ||
1125 STARTS_WITH(pszName, "XyZ1") ||
1126 STARTS_WITH(pszName, "XyZ2"))
1127 continue;
1128
1129 GMLFeatureClass *poClass =
1130 GMLParseFeatureType(psSchemaNode, pszName, pszType);
1131 if (poClass)
1132 aosClasses.push_back(poClass);
1133 else
1134 bFullyUnderstood = false;
1135 }
1136
1137 CPLDestroyXMLNode(psXSDTree);
1138
1139 // Attach unique constraints to fields
1140 for( const auto& typeFieldPair: oSetUniqueConstraints )
1141 {
1142 for( const auto* poClass: aosClasses )
1143 {
1144 if( poClass->GetName() == typeFieldPair.first )
1145 {
1146 auto poProperty = poClass->GetProperty(typeFieldPair.second.c_str());
1147 if( poProperty )
1148 {
1149 poProperty->SetUnique(true);
1150 }
1151 break;
1152 }
1153 }
1154 }
1155
1156 return !aosClasses.empty();
1157 }
1158