1 /***************************************************************************
2                              qgsmetadatautils.cpp
3                              -------------------
4     begin                : April 2021
5     copyright            : (C) 2021 by Nyall Dawson
6     email                : nyall dot dawson at gmail dot com
7  ***************************************************************************/
8 
9 /***************************************************************************
10  *                                                                         *
11  *   This program is free software; you can redistribute it and/or modify  *
12  *   it under the terms of the GNU General Public License as published by  *
13  *   the Free Software Foundation; either version 2 of the License, or     *
14  *   (at your option) any later version.                                   *
15  *                                                                         *
16  ***************************************************************************/
17 
18 #include "qgsmetadatautils.h"
19 #include "qgslayermetadata.h"
20 
21 #include <QDomDocument>
22 #include <QTextDocumentFragment>
23 
convertFromEsri(const QDomDocument & document)24 QgsLayerMetadata QgsMetadataUtils::convertFromEsri( const QDomDocument &document )
25 {
26   QgsLayerMetadata metadata;
27   const QDomElement metadataElem = document.firstChildElement( QStringLiteral( "metadata" ) );
28 
29   const QDomElement esri = metadataElem.firstChildElement( QStringLiteral( "Esri" ) );
30   const QDomElement dataProperties = esri.firstChildElement( QStringLiteral( "DataProperties" ) );
31   const QDomElement itemProps = dataProperties.firstChildElement( QStringLiteral( "itemProps" ) );
32   metadata.setIdentifier( itemProps.firstChildElement( QStringLiteral( "itemName" ) ).text() );
33 
34   const QDomElement dataIdInfo = metadataElem.firstChildElement( QStringLiteral( "dataIdInfo" ) );
35   const QDomElement idInfo = metadataElem.firstChildElement( QStringLiteral( "idinfo" ) );
36 
37   // title
38   const QDomElement idCitation = dataIdInfo.firstChildElement( QStringLiteral( "idCitation" ) );
39   const QString title = idCitation.firstChildElement( QStringLiteral( "resTitle" ) ).text();
40   metadata.setTitle( title );
41 
42   // if no explicit identifier we use the title
43   if ( metadata.identifier().isEmpty()  && !title.isEmpty() )
44     metadata.setIdentifier( title );
45 
46   // abstract
47   const QDomElement idAbs = dataIdInfo.firstChildElement( QStringLiteral( "idAbs" ) );
48   const QString abstractPlainText = QTextDocumentFragment::fromHtml( idAbs.text() ).toPlainText();
49   metadata.setAbstract( abstractPlainText );
50 
51   // purpose
52   const QDomElement idPurp = dataIdInfo.firstChildElement( QStringLiteral( "idPurp" ) );
53   const QString purposePlainText = QTextDocumentFragment::fromHtml( idPurp.text() ).toPlainText();
54   if ( !metadata.abstract().isEmpty() )
55     metadata.setAbstract( metadata.abstract() + QStringLiteral( "\n\n" ) + purposePlainText );
56   else
57     metadata.setAbstract( purposePlainText );
58 
59   // older metadata format used "descript" element instead
60   const QDomElement descript = idInfo.firstChildElement( QStringLiteral( "descript" ) );
61   if ( !descript.isNull() )
62   {
63     const QDomElement abstract = descript.firstChildElement( QStringLiteral( "abstract" ) );
64     const QString abstractPlainText = QTextDocumentFragment::fromHtml( abstract.text() ).toPlainText();
65     if ( !abstractPlainText.isEmpty() )
66     {
67       if ( !metadata.abstract().isEmpty() )
68         metadata.setAbstract( metadata.abstract() + QStringLiteral( "\n\n" ) + abstractPlainText );
69       else
70         metadata.setAbstract( abstractPlainText );
71     }
72 
73     const QDomElement purpose = descript.firstChildElement( QStringLiteral( "purpose" ) );
74     const QString purposePlainText = QTextDocumentFragment::fromHtml( purpose.text() ).toPlainText();
75     if ( !purposePlainText.isEmpty() )
76     {
77       if ( !metadata.abstract().isEmpty() )
78         metadata.setAbstract( metadata.abstract() + QStringLiteral( "\n\n" ) + purposePlainText );
79       else
80         metadata.setAbstract( purposePlainText );
81     }
82 
83     const QDomElement supplinf = descript.firstChildElement( QStringLiteral( "supplinf" ) );
84     const QString supplinfPlainText = QTextDocumentFragment::fromHtml( supplinf.text() ).toPlainText();
85     if ( !supplinfPlainText.isEmpty() )
86     {
87       if ( !metadata.abstract().isEmpty() )
88         metadata.setAbstract( metadata.abstract() + QStringLiteral( "\n\n" ) + supplinfPlainText );
89       else
90         metadata.setAbstract( supplinfPlainText );
91     }
92   }
93 
94   // supplementary info
95   const QDomElement suppInfo = dataIdInfo.firstChildElement( QStringLiteral( "suppInfo" ) );
96   const QString suppInfoPlainText = QTextDocumentFragment::fromHtml( suppInfo.text() ).toPlainText();
97   if ( !suppInfoPlainText.isEmpty() )
98   {
99     if ( !metadata.abstract().isEmpty() )
100       metadata.setAbstract( metadata.abstract() + QStringLiteral( "\n\n" ) + suppInfoPlainText );
101     else
102       metadata.setAbstract( suppInfoPlainText );
103   }
104 
105   // language
106   const QDomElement dataLang = dataIdInfo.firstChildElement( QStringLiteral( "dataLang" ) );
107   const QDomElement languageCode = dataLang.firstChildElement( QStringLiteral( "languageCode" ) );
108   const QString language = languageCode.attribute( QStringLiteral( "value" ) ).toUpper();
109   metadata.setLanguage( language );
110 
111   // keywords
112   QDomElement searchKeys = dataIdInfo.firstChildElement( QStringLiteral( "searchKeys" ) );
113   QStringList keywords;
114   while ( !searchKeys.isNull() )
115   {
116     QDomElement keyword = searchKeys.firstChildElement( QStringLiteral( "keyword" ) );
117     while ( !keyword.isNull() )
118     {
119       keywords << keyword.text();
120       keyword = keyword.nextSiblingElement( QStringLiteral( "keyword" ) );
121     }
122 
123     searchKeys = searchKeys.nextSiblingElement( QStringLiteral( "searchKeys" ) );
124   }
125 
126   // categories
127   QDomElement themeKeys = dataIdInfo.firstChildElement( QStringLiteral( "themeKeys" ) );
128   QStringList categories;
129   while ( !themeKeys.isNull() )
130   {
131     const QDomElement thesaName = themeKeys.firstChildElement( QStringLiteral( "thesaName" ) );
132     const QString thesaTitle = thesaName.firstChildElement( QStringLiteral( "resTitle" ) ).text();
133 
134     const bool isSearchKeyWord = thesaTitle.compare( QLatin1String( "Common Search Terms" ), Qt::CaseInsensitive ) == 0;
135 
136     QDomElement themeKeyword = themeKeys.firstChildElement( QStringLiteral( "keyword" ) );
137     while ( !themeKeyword.isNull() )
138     {
139       if ( isSearchKeyWord )
140       {
141         keywords.append( themeKeyword.text().split( ',' ) );
142       }
143       else
144       {
145         categories << themeKeyword.text();
146       }
147       themeKeyword = themeKeyword.nextSiblingElement( QStringLiteral( "keyword" ) );
148     }
149     themeKeys = themeKeys.nextSiblingElement( QStringLiteral( "themeKeys" ) );
150   }
151 
152   // older xml format
153   QDomElement keywordsElem = idInfo.firstChildElement( QStringLiteral( "keywords" ) );
154   while ( !keywordsElem.isNull() )
155   {
156     QDomElement theme = keywordsElem.firstChildElement( QStringLiteral( "theme" ) );
157     while ( !theme.isNull() )
158     {
159       categories << theme.firstChildElement( QStringLiteral( "themekey" ) ).text();
160       theme = theme.nextSiblingElement( QStringLiteral( "theme" ) );
161     }
162 
163     keywordsElem = keywordsElem.nextSiblingElement( QStringLiteral( "keywords" ) );
164   }
165 
166   if ( !categories.isEmpty() )
167     metadata.setCategories( categories );
168 
169   if ( !keywords.empty() )
170     metadata.addKeywords( QObject::tr( "Search keys" ), keywords );
171 
172   QgsLayerMetadata::Extent extent;
173 
174   // pubDate
175   const QDomElement date = idCitation.firstChildElement( QStringLiteral( "date" ) );
176   const QString pubDate = date.firstChildElement( QStringLiteral( "pubDate" ) ).text();
177   const QDateTime publicationDate = QDateTime::fromString( pubDate, Qt::ISODate );
178   if ( publicationDate.isValid() )
179   {
180     extent.setTemporalExtents( { publicationDate, QDateTime() } );
181   }
182   else
183   {
184     // older XML format
185     QDomElement timeperd = idInfo.firstChildElement( QStringLiteral( "timeperd" ) );
186     while ( !timeperd.isNull() )
187     {
188       if ( timeperd.firstChildElement( QStringLiteral( "current" ) ).text().compare( QLatin1String( "publication date" ) ) == 0 )
189       {
190         const QDomElement timeinfo = timeperd.firstChildElement( QStringLiteral( "timeinfo" ) );
191         const QDomElement sngdate = timeinfo.firstChildElement( QStringLiteral( "sngdate" ) );
192         if ( !sngdate.isNull() )
193         {
194           const QDomElement caldate = sngdate.firstChildElement( QStringLiteral( "caldate" ) );
195           const QString caldateString = caldate.text();
196           const QDateTime publicationDate = QDateTime::fromString( caldateString, QStringLiteral( "MMMM yyyy" ) );
197           if ( publicationDate.isValid() )
198           {
199             extent.setTemporalExtents( { publicationDate, QDateTime() } );
200             break;
201           }
202         }
203         const QDomElement rngdates = timeinfo.firstChildElement( QStringLiteral( "rngdates" ) );
204         if ( !rngdates.isNull() )
205         {
206           const QDomElement begdate = rngdates.firstChildElement( QStringLiteral( "begdate" ) );
207           const QDomElement enddate = rngdates.firstChildElement( QStringLiteral( "enddate" ) );
208           const QString begdateString = begdate.text();
209           const QString enddateString = enddate.text();
210           QDateTime begin;
211           QDateTime end;
212           for ( const QString format : { "yyyy-MM-dd", "dd/MM/yyyy" } )
213           {
214             if ( !begin.isValid() )
215               begin = QDateTime::fromString( begdateString, format );
216             if ( !end.isValid() )
217               end = QDateTime::fromString( enddateString, format );
218           }
219 
220           if ( begin.isValid() || end.isValid() )
221           {
222             extent.setTemporalExtents( {QgsDateTimeRange{ begin, end } } );
223             break;
224           }
225         }
226       }
227 
228       timeperd = timeperd.nextSiblingElement( QStringLiteral( "timeperd" ) );
229     }
230   }
231 
232   //crs
233   QgsCoordinateReferenceSystem crs;
234   QDomElement refSysInfo = metadataElem.firstChildElement( QStringLiteral( "refSysInfo" ) );
235   while ( !refSysInfo.isNull() )
236   {
237     const QDomElement refSystem = refSysInfo.firstChildElement( QStringLiteral( "RefSystem" ) );
238     const QDomElement refSysID = refSystem.firstChildElement( QStringLiteral( "refSysID" ) );
239     const QDomElement identAuth = refSysID.firstChildElement( QStringLiteral( "identAuth" ) );
240     if ( !identAuth.isNull() )
241     {
242       if ( identAuth.firstChildElement( QStringLiteral( "resTitle" ) ).text().compare( QLatin1String( "EPSG Geodetic Parameter Dataset" ) ) == 0 )
243       {
244         const QString code = refSysID.firstChildElement( QStringLiteral( "identCode" ) ).attribute( QStringLiteral( "code" ) );
245         crs = QgsCoordinateReferenceSystem( code );
246       }
247     }
248     else
249     {
250       const QString code = refSysID.firstChildElement( QStringLiteral( "identCode" ) ).attribute( QStringLiteral( "code" ) );
251       const QString auth = refSysID.firstChildElement( QStringLiteral( "idCodeSpace" ) ).text();
252       crs = QgsCoordinateReferenceSystem( QStringLiteral( "%1:%2" ).arg( auth, code ) );
253     }
254 
255     if ( crs.isValid() )
256     {
257       metadata.setCrs( crs );
258       break;
259     }
260     refSysInfo = refSysInfo.nextSiblingElement( QStringLiteral( "refSysInfo" ) );
261   }
262 
263   // extent
264   QDomElement dataExt = dataIdInfo.firstChildElement( QStringLiteral( "dataExt" ) );
265   while ( !dataExt.isNull() )
266   {
267     const QDomElement geoEle = dataExt.firstChildElement( QStringLiteral( "geoEle" ) );
268     if ( !geoEle.isNull() )
269     {
270       const QDomElement geoBndBox = geoEle.firstChildElement( QStringLiteral( "GeoBndBox" ) );
271       const double west = geoBndBox.firstChildElement( QStringLiteral( "westBL" ) ).text().toDouble();
272       const double east = geoBndBox.firstChildElement( QStringLiteral( "eastBL" ) ).text().toDouble();
273       const double south = geoBndBox.firstChildElement( QStringLiteral( "northBL" ) ).text().toDouble();
274       const double north = geoBndBox.firstChildElement( QStringLiteral( "southBL" ) ).text().toDouble();
275 
276       QgsLayerMetadata::SpatialExtent spatialExtent;
277       spatialExtent.extentCrs = crs.isValid() ? crs : QgsCoordinateReferenceSystem( QStringLiteral( "EPSG:4326" ) );
278       spatialExtent.bounds = QgsBox3d( west, south, 0, east, north, 0 );
279 
280       extent.setSpatialExtents( { spatialExtent } );
281       break;
282     }
283     dataExt = dataExt.nextSiblingElement( QStringLiteral( "dataExt" ) );
284   }
285 
286   metadata.setExtent( extent );
287 
288   // licenses, constraints
289   QStringList licenses;
290   QStringList rights;
291   QgsLayerMetadata::ConstraintList constraints;
292   QDomElement resConst = dataIdInfo.firstChildElement( QStringLiteral( "resConst" ) );
293   while ( !resConst.isNull() )
294   {
295     QDomElement legConsts = resConst.firstChildElement( QStringLiteral( "LegConsts" ) );
296     while ( !legConsts.isNull() )
297     {
298       const QString restrictCd = legConsts.firstChildElement( QStringLiteral( "useConsts" ) ).firstChildElement( QStringLiteral( "RestrictCd" ) ).attribute( QStringLiteral( "value" ) );
299 
300       if ( restrictCd.compare( QLatin1String( "005" ) ) == 0 )
301       {
302         licenses << QTextDocumentFragment::fromHtml( legConsts.firstChildElement( QStringLiteral( "useLimit" ) ).text() ).toPlainText();
303       }
304       else if ( restrictCd.compare( QLatin1String( "006" ) ) == 0 )
305       {
306         rights << QTextDocumentFragment::fromHtml( legConsts.firstChildElement( QStringLiteral( "useLimit" ) ).text() ).toPlainText();
307       }
308       legConsts = legConsts.nextSiblingElement( QStringLiteral( "LegConsts" ) );
309     }
310 
311     QDomElement secConsts = resConst.firstChildElement( QStringLiteral( "SecConsts" ) );
312     while ( !secConsts.isNull() )
313     {
314       QgsLayerMetadata::Constraint constraint;
315       constraint.type = QObject::tr( "Security constraints" );
316       constraint.constraint = QTextDocumentFragment::fromHtml( secConsts.firstChildElement( QStringLiteral( "userNote" ) ).text() ).toPlainText();
317       constraints << constraint;
318       secConsts = secConsts.nextSiblingElement( QStringLiteral( "SecConsts" ) );
319     }
320 
321     QDomElement consts = resConst.firstChildElement( QStringLiteral( "Consts" ) );
322     while ( !consts.isNull() )
323     {
324       QgsLayerMetadata::Constraint constraint;
325       constraint.type = QObject::tr( "Limitations of use" );
326       constraint.constraint = QTextDocumentFragment::fromHtml( consts.firstChildElement( QStringLiteral( "useLimit" ) ).text() ).toPlainText();
327       constraints << constraint;
328       consts = consts.nextSiblingElement( QStringLiteral( "Consts" ) );
329     }
330 
331     resConst = resConst.nextSiblingElement( QStringLiteral( "resConst" ) );
332   }
333 
334   const QDomElement idCredit = dataIdInfo.firstChildElement( QStringLiteral( "idCredit" ) );
335   const QString credit = idCredit.text();
336   if ( !credit.isEmpty() )
337     rights << credit;
338 
339   // older xml format
340   QDomElement accconst = idInfo.firstChildElement( QStringLiteral( "accconst" ) );
341   while ( !accconst.isNull() )
342   {
343     QgsLayerMetadata::Constraint constraint;
344     constraint.type = QObject::tr( "Access" );
345     constraint.constraint = QTextDocumentFragment::fromHtml( accconst.text() ).toPlainText();
346     constraints << constraint;
347 
348     accconst = accconst.nextSiblingElement( QStringLiteral( "accconst" ) );
349   }
350   QDomElement useconst = idInfo.firstChildElement( QStringLiteral( "useconst" ) );
351   while ( !useconst.isNull() )
352   {
353     rights << QTextDocumentFragment::fromHtml( useconst.text() ).toPlainText();
354     useconst = useconst.nextSiblingElement( QStringLiteral( "useconst" ) );
355   }
356 
357   metadata.setLicenses( licenses );
358   metadata.setRights( rights );
359   metadata.setConstraints( constraints );
360 
361   // links
362   const QDomElement distInfo = metadataElem.firstChildElement( QStringLiteral( "distInfo" ) );
363   const QDomElement distributor = distInfo.firstChildElement( QStringLiteral( "distributor" ) );
364 
365   QDomElement distorTran = distributor.firstChildElement( QStringLiteral( "distorTran" ) );
366   while ( !distorTran.isNull() )
367   {
368     const QDomElement onLineSrc = distorTran.firstChildElement( QStringLiteral( "onLineSrc" ) );
369     if ( !onLineSrc.isNull() )
370     {
371       QgsAbstractMetadataBase::Link link;
372       link.url = onLineSrc.firstChildElement( QStringLiteral( "linkage" ) ).text();
373 
374       const QDomElement distorFormat = distributor.firstChildElement( QStringLiteral( "distorFormat" ) );
375       link.name = distorFormat.firstChildElement( QStringLiteral( "formatName" ) ).text();
376       link.type = distorFormat.firstChildElement( QStringLiteral( "formatSpec" ) ).text();
377 
378       if ( link.type.isEmpty() )
379       {
380         // older xml format
381         link.type = onLineSrc.firstChildElement( QStringLiteral( "protocol" ) ).text();
382       }
383       metadata.addLink( link );
384     }
385 
386     distorTran = distorTran.nextSiblingElement( QStringLiteral( "distorTran" ) );
387   }
388 
389   // lineage
390   const QDomElement dqInfo = metadataElem.firstChildElement( QStringLiteral( "dqInfo" ) );
391   const QDomElement dataLineage = dqInfo.firstChildElement( QStringLiteral( "dataLineage" ) );
392   const QString statement = QTextDocumentFragment::fromHtml( dataLineage.firstChildElement( QStringLiteral( "statement" ) ).text() ).toPlainText();
393   if ( !statement.isEmpty() )
394     metadata.addHistoryItem( statement );
395 
396   QDomElement dataSource = dataLineage.firstChildElement( QStringLiteral( "dataSource" ) );
397   while ( !dataSource.isNull() )
398   {
399     metadata.addHistoryItem( QObject::tr( "Data source: %1" ).arg( QTextDocumentFragment::fromHtml( dataSource.firstChildElement( QStringLiteral( "srcDesc" ) ).text() ).toPlainText() ) );
400     dataSource = dataSource.nextSiblingElement( QStringLiteral( "dataSource" ) );
401   }
402 
403   // contacts
404   const QDomElement mdContact = metadataElem.firstChildElement( QStringLiteral( "mdContact" ) );
405   if ( !mdContact.isNull() )
406   {
407     QgsAbstractMetadataBase::Contact contact;
408     contact.name = mdContact.firstChildElement( QStringLiteral( "rpIndName" ) ).text();
409     contact.organization = mdContact.firstChildElement( QStringLiteral( "rpOrgName" ) ).text();
410     contact.position = mdContact.firstChildElement( QStringLiteral( "rpPosName" ) ).text();
411 
412     const QString role = mdContact.firstChildElement( QStringLiteral( "role" ) ).firstChildElement( QStringLiteral( "RoleCd" ) ).attribute( QStringLiteral( "value" ) );
413     if ( role == QLatin1String( "007" ) )
414       contact.role = QObject::tr( "Point of contact" );
415 
416     const QDomElement rpCntInfo = mdContact.firstChildElement( QStringLiteral( "rpCntInfo" ) );
417     contact.email = rpCntInfo.firstChildElement( QStringLiteral( "cntAddress" ) ).firstChildElement( QStringLiteral( "eMailAdd" ) ).text();
418     contact.voice = rpCntInfo.firstChildElement( QStringLiteral( "cntPhone" ) ).firstChildElement( QStringLiteral( "voiceNum" ) ).text();
419 
420     QDomElement cntAddress = rpCntInfo.firstChildElement( QStringLiteral( "cntAddress" ) );
421     while ( !cntAddress.isNull() )
422     {
423       QgsAbstractMetadataBase::Address address;
424 
425       address.type = cntAddress.attribute( QStringLiteral( "addressType" ) );
426       address.address = cntAddress.firstChildElement( QStringLiteral( "delPoint" ) ).text();
427       address.city = cntAddress.firstChildElement( QStringLiteral( "city" ) ).text();
428       address.administrativeArea = cntAddress.firstChildElement( QStringLiteral( "adminArea" ) ).text();
429       address.postalCode = cntAddress.firstChildElement( QStringLiteral( "postCode" ) ).text();
430       address.country = cntAddress.firstChildElement( QStringLiteral( "country" ) ).text();
431 
432       contact.addresses.append( address );
433 
434       cntAddress = cntAddress.nextSiblingElement( QStringLiteral( "cntAddress" ) );
435     }
436 
437 
438     metadata.addContact( contact );
439   }
440 
441   // older xml format
442   const QDomElement ptcontac = idInfo.firstChildElement( QStringLiteral( "ptcontac" ) );
443   const QDomElement cntinfo = ptcontac.firstChildElement( QStringLiteral( "cntinfo" ) );
444   if ( !cntinfo.isNull() )
445   {
446     QgsAbstractMetadataBase::Contact contact;
447     const QDomElement cntorgp = cntinfo.firstChildElement( QStringLiteral( "cntorgp" ) );
448     const QString org = cntorgp.firstChildElement( QStringLiteral( "cntorg" ) ).text();
449 
450     contact.name = org;
451     contact.organization = org;
452     contact.role = QObject::tr( "Point of contact" );
453 
454     const QDomElement rpCntInfo = mdContact.firstChildElement( QStringLiteral( "rpCntInfo" ) );
455     contact.email = cntinfo.firstChildElement( QStringLiteral( "cntemail" ) ).text();
456     contact.fax = cntinfo.firstChildElement( QStringLiteral( "cntfax" ) ).text();
457     contact.voice = cntinfo.firstChildElement( QStringLiteral( "cntvoice" ) ).text();
458 
459     QDomElement cntaddr = cntinfo.firstChildElement( QStringLiteral( "cntaddr" ) );
460     while ( !cntaddr.isNull() )
461     {
462       QgsAbstractMetadataBase::Address address;
463 
464       QDomElement addressElem = cntaddr.firstChildElement( QStringLiteral( "address" ) );
465       while ( !addressElem.isNull() )
466       {
467         const QString addressPart = addressElem.text();
468         address.address = address.address.isEmpty() ? addressPart : address.address + '\n' + addressPart;
469         addressElem = addressElem.nextSiblingElement( QStringLiteral( "address" ) );
470       }
471       address.type = cntaddr.firstChildElement( QStringLiteral( "addrtype" ) ).text();
472       address.city = cntaddr.firstChildElement( QStringLiteral( "city" ) ).text();
473       address.administrativeArea = cntaddr.firstChildElement( QStringLiteral( "state" ) ).text();
474       address.postalCode = cntaddr.firstChildElement( QStringLiteral( "postal" ) ).text();
475       address.country = cntaddr.firstChildElement( QStringLiteral( "country" ) ).text();
476 
477       contact.addresses.append( address );
478 
479       cntaddr = cntaddr.nextSiblingElement( QStringLiteral( "cntaddr" ) );
480     }
481 
482     metadata.addContact( contact );
483   }
484 
485   return metadata;
486 }
487