1 /******************************************************************************
2  *
3  * Project:  GeoRSS Translator
4  * Purpose:  Implements OGRGeoRSSDataSource class
5  * Author:   Even Rouault, even dot rouault at spatialys.com
6  *
7  ******************************************************************************
8  * Copyright (c) 2008-2011, Even Rouault <even dot rouault at spatialys.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a
11  * copy of this software and associated documentation files (the "Software"),
12  * to deal in the Software without restriction, including without limitation
13  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14  * and/or sell copies of the Software, and to permit persons to whom the
15  * Software is furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included
18  * in all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26  * DEALINGS IN THE SOFTWARE.
27  ****************************************************************************/
28 
29 #include "cpl_port.h"
30 #include "ogr_georss.h"
31 
32 #include <cstdio>
33 #include <cstring>
34 
35 #include "cpl_conv.h"
36 #include "cpl_csv.h"
37 #include "cpl_error.h"
38 #include "cpl_string.h"
39 #include "cpl_vsi.h"
40 #ifdef HAVE_EXPAT
41 #  include "expat.h"
42 #endif
43 #include "ogr_core.h"
44 #include "ogr_expat.h"
45 #include "ogr_spatialref.h"
46 #include "ogrsf_frmts.h"
47 
48 CPL_CVSID("$Id: ogrgeorssdatasource.cpp 355b41831cd2685c85d1aabe5b95665a2c6e99b7 2019-06-19 17:07:04 +0200 Even Rouault $")
49 
50 /************************************************************************/
51 /*                          OGRGeoRSSDataSource()                          */
52 /************************************************************************/
53 
OGRGeoRSSDataSource()54 OGRGeoRSSDataSource::OGRGeoRSSDataSource() :
55     pszName(nullptr),
56     papoLayers(nullptr),
57     nLayers(0),
58     fpOutput(nullptr),
59 #ifdef HAVE_EXPAT
60     validity(GEORSS_VALIDITY_UNKNOWN),
61 #endif
62     eFormat(GEORSS_RSS),
63     eGeomDialect(GEORSS_SIMPLE),
64     bUseExtensions(false),
65     bWriteHeaderAndFooter(true)
66 #ifdef HAVE_EXPAT
67     ,
68     oCurrentParser(nullptr),
69     nDataHandlerCounter(0)
70 #endif
71 {}
72 
73 /************************************************************************/
74 /*                         ~OGRGeoRSSDataSource()                          */
75 /************************************************************************/
76 
~OGRGeoRSSDataSource()77 OGRGeoRSSDataSource::~OGRGeoRSSDataSource()
78 
79 {
80     if( fpOutput != nullptr )
81     {
82         if( bWriteHeaderAndFooter )
83         {
84             if( eFormat == GEORSS_RSS )
85             {
86                 VSIFPrintfL(fpOutput, "  </channel>\n");
87                 VSIFPrintfL(fpOutput, "</rss>\n");
88             }
89             else
90             {
91                 VSIFPrintfL(fpOutput, "</feed>\n");
92             }
93         }
94         VSIFCloseL( fpOutput);
95     }
96 
97     for( int i = 0; i < nLayers; i++ )
98         delete papoLayers[i];
99     CPLFree(papoLayers);
100     CPLFree(pszName);
101 }
102 
103 /************************************************************************/
104 /*                           TestCapability()                           */
105 /************************************************************************/
106 
TestCapability(const char * pszCap)107 int OGRGeoRSSDataSource::TestCapability( const char * pszCap )
108 
109 {
110     if( EQUAL(pszCap, ODsCCreateLayer) )
111         return TRUE;
112     // else if( EQUAL(pszCap,ODsCDeleteLayer) )
113     //    return FALSE;
114 
115     return FALSE;
116 }
117 
118 /************************************************************************/
119 /*                              GetLayer()                              */
120 /************************************************************************/
121 
GetLayer(int iLayer)122 OGRLayer *OGRGeoRSSDataSource::GetLayer( int iLayer )
123 
124 {
125     if( iLayer < 0 || iLayer >= nLayers )
126         return nullptr;
127 
128     return papoLayers[iLayer];
129 }
130 
131 /************************************************************************/
132 /*                           ICreateLayer()                             */
133 /************************************************************************/
134 
ICreateLayer(const char * pszLayerName,OGRSpatialReference * poSRS,OGRwkbGeometryType,char **)135 OGRLayer * OGRGeoRSSDataSource::ICreateLayer(
136     const char * pszLayerName,
137     OGRSpatialReference *poSRS,
138     OGRwkbGeometryType /* eType */,
139     char ** /* papszOptions */ )
140 {
141     if (fpOutput == nullptr)
142         return nullptr;
143 
144     if (poSRS != nullptr && eGeomDialect != GEORSS_GML)
145     {
146         OGRSpatialReference oSRS;
147         oSRS.SetWellKnownGeogCS("WGS84");
148         oSRS.SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER);
149         const char* const apszOptions[] = {
150                 "IGNORE_DATA_AXIS_TO_SRS_AXIS_MAPPING=YES", nullptr };
151         if( !poSRS->IsSame(&oSRS, apszOptions) )
152         {
153             CPLError(CE_Failure, CPLE_NotSupported,
154                      "For a non GML dialect, only WGS84 SRS is supported");
155             return nullptr;
156         }
157     }
158 
159     nLayers++;
160     papoLayers = static_cast<OGRGeoRSSLayer **>(
161         CPLRealloc(papoLayers, nLayers * sizeof(OGRGeoRSSLayer*)));
162     auto poSRSClone = poSRS;
163     if( poSRSClone )
164     {
165         poSRSClone = poSRSClone->Clone();
166         poSRSClone->SetAxisMappingStrategy(OAMS_TRADITIONAL_GIS_ORDER);
167     }
168     papoLayers[nLayers-1] =
169         new OGRGeoRSSLayer(pszName, pszLayerName, this, poSRSClone, TRUE);
170     if( poSRSClone )
171         poSRSClone->Release();
172 
173     return papoLayers[nLayers-1];
174 }
175 
176 #ifdef HAVE_EXPAT
177 /************************************************************************/
178 /*                startElementValidateCbk()                             */
179 /************************************************************************/
180 
startElementValidateCbk(const char * pszNameIn,const char ** ppszAttr)181 void OGRGeoRSSDataSource::startElementValidateCbk( const char *pszNameIn,
182                                                    const char **ppszAttr )
183 {
184     if( validity == GEORSS_VALIDITY_UNKNOWN )
185     {
186         if( strcmp(pszNameIn, "rss") == 0 )
187         {
188             validity = GEORSS_VALIDITY_VALID;
189             eFormat = GEORSS_RSS;
190         }
191         else if( strcmp(pszNameIn, "feed") == 0 ||
192                  strcmp(pszNameIn, "atom:feed") == 0 )
193         {
194             validity = GEORSS_VALIDITY_VALID;
195             eFormat = GEORSS_ATOM;
196         }
197         else if( strcmp(pszNameIn, "rdf:RDF") == 0 )
198         {
199             const char** ppszIter = ppszAttr;
200             while( *ppszIter )
201             {
202                 if( strcmp(*ppszIter, "xmlns:georss") == 0 )
203                 {
204                     validity = GEORSS_VALIDITY_VALID;
205                     eFormat = GEORSS_RSS_RDF;
206                 }
207                 ppszIter += 2;
208             }
209         }
210         else
211         {
212             validity = GEORSS_VALIDITY_INVALID;
213         }
214     }
215 }
216 
217 /************************************************************************/
218 /*                      dataHandlerValidateCbk()                        */
219 /************************************************************************/
220 
dataHandlerValidateCbk(const char *,int)221 void OGRGeoRSSDataSource::dataHandlerValidateCbk( const char * /* data */,
222                                                   int /* nLen */)
223 {
224     nDataHandlerCounter++;
225     if( nDataHandlerCounter >= BUFSIZ )
226     {
227         CPLError(CE_Failure, CPLE_AppDefined,
228                  "File probably corrupted (million laugh pattern)");
229         XML_StopParser(oCurrentParser, XML_FALSE);
230     }
231 }
232 
startElementValidateCbk(void * pUserData,const char * pszName,const char ** ppszAttr)233 static void XMLCALL startElementValidateCbk( void *pUserData,
234                                              const char *pszName,
235                                              const char **ppszAttr )
236 {
237     OGRGeoRSSDataSource* poDS = static_cast<OGRGeoRSSDataSource *>(pUserData);
238     poDS->startElementValidateCbk(pszName, ppszAttr);
239 }
240 
dataHandlerValidateCbk(void * pUserData,const char * data,int nLen)241 static void XMLCALL dataHandlerValidateCbk( void *pUserData, const char *data,
242                                             int nLen )
243 {
244     OGRGeoRSSDataSource* poDS = static_cast<OGRGeoRSSDataSource *>(pUserData);
245     poDS->dataHandlerValidateCbk(data, nLen);
246 }
247 #endif
248 
249 /************************************************************************/
250 /*                                Open()                                */
251 /************************************************************************/
252 
Open(const char * pszFilename,int bUpdateIn)253 int OGRGeoRSSDataSource::Open( const char * pszFilename, int bUpdateIn)
254 
255 {
256     if( bUpdateIn )
257     {
258         CPLError(CE_Failure, CPLE_NotSupported,
259                  "OGR/GeoRSS driver does not support opening a file "
260                  "in update mode");
261         return FALSE;
262     }
263 #ifdef HAVE_EXPAT
264     pszName = CPLStrdup(pszFilename);
265 
266     // Try to open the file.
267     VSILFILE* fp = VSIFOpenL(pszFilename, "r");
268     if( fp == nullptr )
269         return FALSE;
270 
271     validity = GEORSS_VALIDITY_UNKNOWN;
272 
273     XML_Parser oParser = OGRCreateExpatXMLParser();
274     XML_SetUserData(oParser, this);
275     XML_SetElementHandler(oParser, ::startElementValidateCbk, nullptr);
276     XML_SetCharacterDataHandler(oParser, ::dataHandlerValidateCbk);
277     oCurrentParser = oParser;
278 
279     char aBuf[BUFSIZ];
280     int nDone = 0;
281     unsigned int nLen = 0;
282     int nCount = 0;
283 
284     // Begin to parse the file and look for the <rss> or <feed> element.
285     // It *MUST* be the first element of an XML file.
286     // Once we have read the first element, we know if we can
287     // handle the file or not with that driver.
288     do
289     {
290         nDataHandlerCounter = 0;
291         nLen = static_cast<unsigned int>(VSIFReadL(aBuf, 1, sizeof(aBuf), fp));
292         nDone = VSIFEofL(fp);
293         if( XML_Parse(oParser, aBuf, nLen, nDone) == XML_STATUS_ERROR )
294         {
295             if( nLen <= BUFSIZ - 1 )
296                 aBuf[nLen] = 0;
297             else
298                 aBuf[BUFSIZ-1] = 0;
299 
300             if( strstr(aBuf, "<?xml") &&
301                 (strstr(aBuf, "<rss") ||
302                  strstr(aBuf, "<feed") ||
303                  strstr(aBuf, "<atom:feed")) )
304             {
305                 CPLError(CE_Failure, CPLE_AppDefined,
306                          "XML parsing of GeoRSS file failed: "
307                          "%s at line %d, column %d",
308                          XML_ErrorString(XML_GetErrorCode(oParser)),
309                          static_cast<int>(XML_GetCurrentLineNumber(oParser)),
310                          static_cast<int>(XML_GetCurrentColumnNumber(oParser)));
311             }
312             validity = GEORSS_VALIDITY_INVALID;
313             break;
314         }
315         if (validity == GEORSS_VALIDITY_INVALID)
316         {
317             break;
318         }
319         else if (validity == GEORSS_VALIDITY_VALID)
320         {
321             break;
322         }
323         else
324         {
325             // After reading 50 * BUFSIZ bytes, and not finding whether the file
326             // is GeoRSS or not, we give up and fail silently.
327             nCount++;
328             if( nCount == 50 )
329                 break;
330         }
331     } while( !nDone && nLen > 0 );
332 
333     XML_ParserFree(oParser);
334 
335     VSIFCloseL(fp);
336 
337     if( validity == GEORSS_VALIDITY_VALID )
338     {
339         CPLDebug("GeoRSS", "%s seems to be a GeoRSS file.", pszFilename);
340 
341         nLayers = 1;
342         papoLayers = static_cast<OGRGeoRSSLayer **>(
343             CPLRealloc(papoLayers, nLayers * sizeof(OGRGeoRSSLayer*)));
344         papoLayers[0] =
345             new OGRGeoRSSLayer(pszName, "georss", this, nullptr, FALSE);
346     }
347 
348     return validity == GEORSS_VALIDITY_VALID;
349 #else
350     VSILFILE* fp = VSIFOpenL(pszFilename, "r");
351     if (fp)
352     {
353         char aBuf[256];
354         const unsigned int nLen =
355             static_cast<unsigned int>(VSIFReadL(aBuf, 1, 255, fp));
356         aBuf[nLen] = '\0';
357         if( strstr(aBuf, "<?xml") &&
358             (strstr(aBuf, "<rss") ||
359              strstr(aBuf, "<atom:feed") ||
360              strstr(aBuf, "<feed")) )
361         {
362             CPLError(CE_Failure, CPLE_NotSupported,
363                      "OGR/GeoRSS driver has not been built with read support. "
364                      "Expat library required");
365         }
366         VSIFCloseL(fp);
367     }
368     return FALSE;
369 #endif
370 }
371 
372 /************************************************************************/
373 /*                               Create()                               */
374 /************************************************************************/
375 
Create(const char * pszFilename,char ** papszOptions)376 int OGRGeoRSSDataSource::Create( const char *pszFilename,
377                                  char **papszOptions )
378 {
379     if( fpOutput != nullptr )
380     {
381         CPLAssert(false);
382         return FALSE;
383     }
384 
385     if( strcmp(pszFilename, "/dev/stdout") == 0 )
386         pszFilename = "/vsistdout/";
387 
388 /* -------------------------------------------------------------------- */
389 /*     Do not override exiting file.                                    */
390 /* -------------------------------------------------------------------- */
391     VSIStatBufL sStatBuf;
392 
393     if( VSIStatL(pszFilename, &sStatBuf) == 0 )
394     {
395         CPLError(CE_Failure, CPLE_NotSupported,
396                  "You have to delete %s before being able to create it "
397                  "with the GeoRSS driver",
398                  pszFilename);
399         return FALSE;
400     }
401 
402 /* -------------------------------------------------------------------- */
403 /*      Create the output file.                                         */
404 /* -------------------------------------------------------------------- */
405     pszName = CPLStrdup(pszFilename);
406 
407     fpOutput = VSIFOpenL(pszFilename, "w");
408     if( fpOutput == nullptr )
409     {
410         CPLError(CE_Failure, CPLE_OpenFailed,
411                  "Failed to create GeoRSS file %s.",
412                  pszFilename);
413         return FALSE;
414     }
415 
416     const char* pszFormat = CSLFetchNameValue(papszOptions, "FORMAT");
417     if( pszFormat )
418     {
419         if( EQUAL(pszFormat, "RSS") )
420             eFormat = GEORSS_RSS;
421         else if (EQUAL(pszFormat, "ATOM"))
422             eFormat = GEORSS_ATOM;
423         else
424             CPLError(CE_Warning, CPLE_NotSupported,
425                      "Unsupported value for %s : %s", "FORMAT", pszFormat);
426     }
427 
428     const char* pszGeomDialect =
429         CSLFetchNameValue(papszOptions, "GEOM_DIALECT");
430     if (pszGeomDialect)
431     {
432         if( EQUAL(pszGeomDialect, "GML") )
433             eGeomDialect = GEORSS_GML;
434         else if (EQUAL(pszGeomDialect, "SIMPLE"))
435             eGeomDialect = GEORSS_SIMPLE;
436         else if (EQUAL(pszGeomDialect, "W3C_GEO"))
437             eGeomDialect = GEORSS_W3C_GEO;
438         else
439             CPLError(CE_Warning, CPLE_NotSupported,
440                      "Unsupported value for %s : %s", "GEOM_DIALECT",
441                      pszGeomDialect);
442     }
443 
444     const char* pszWriteHeaderAndFooter =
445         CSLFetchNameValue(papszOptions, "WRITE_HEADER_AND_FOOTER");
446     if( pszWriteHeaderAndFooter && !CPLTestBool(pszWriteHeaderAndFooter) )
447     {
448         bWriteHeaderAndFooter = false;
449         return TRUE;
450     }
451 
452     const char* pszTitle = nullptr;
453     const char* pszDescription = nullptr;
454     const char* pszLink = nullptr;
455     const char* pszUpdated = nullptr;
456     const char* pszAuthorName = nullptr;
457     const char* pszId = nullptr;
458 
459     const char* pszHeader = CSLFetchNameValue(papszOptions, "HEADER");
460 
461     if( eFormat == GEORSS_RSS && pszHeader == nullptr )
462     {
463         pszTitle = CSLFetchNameValue(papszOptions, "TITLE");
464         if( pszTitle == nullptr )
465             pszTitle = "title";
466 
467         pszDescription = CSLFetchNameValue(papszOptions, "DESCRIPTION");
468         if( pszDescription == nullptr )
469             pszDescription = "channel_description";
470 
471         pszLink = CSLFetchNameValue(papszOptions, "LINK");
472         if( pszLink == nullptr )
473             pszLink = "channel_link";
474     }
475     else if( eFormat == GEORSS_ATOM && pszHeader == nullptr )
476     {
477         pszTitle = CSLFetchNameValue(papszOptions, "TITLE");
478         if( pszTitle == nullptr )
479             pszTitle = "title";
480 
481         pszUpdated = CSLFetchNameValue(papszOptions, "UPDATED");
482         if( pszUpdated == nullptr )
483             pszUpdated = "2009-01-01T00:00:00Z";
484 
485         pszAuthorName = CSLFetchNameValue(papszOptions, "AUTHOR_NAME");
486         if( pszAuthorName == nullptr )
487             pszAuthorName = "author";
488 
489         pszId = CSLFetchNameValue(papszOptions, "ID");
490         if( pszId == nullptr )
491             pszId = "id";
492     }
493 
494     const char* pszUseExtensions =
495         CSLFetchNameValue( papszOptions, "USE_EXTENSIONS");
496     bUseExtensions = pszUseExtensions && CPLTestBool(pszUseExtensions);
497 
498 /* -------------------------------------------------------------------- */
499 /*     Output header of GeoRSS file.                                       */
500 /* -------------------------------------------------------------------- */
501     VSIFPrintfL(fpOutput, "<?xml version=\"1.0\"?>\n");
502     if( eFormat == GEORSS_RSS )
503     {
504         VSIFPrintfL(fpOutput, "<rss version=\"2.0\" ");
505         if( eGeomDialect == GEORSS_GML )
506             VSIFPrintfL(fpOutput, "xmlns:georss=\"http://www.georss.org/georss\" xmlns:gml=\"http://www.opengis.net/gml\"");
507         else if (eGeomDialect == GEORSS_SIMPLE)
508             VSIFPrintfL(fpOutput, "xmlns:georss=\"http://www.georss.org/georss\"");
509         else
510             VSIFPrintfL(fpOutput, "xmlns:geo=\"http://www.w3.org/2003/01/geo/wgs84_pos#\"");
511         VSIFPrintfL(fpOutput, ">\n");
512         VSIFPrintfL(fpOutput, "  <channel>\n");
513         if( pszHeader )
514         {
515             VSIFPrintfL(fpOutput, "%s", pszHeader);
516         }
517         else
518         {
519             VSIFPrintfL(fpOutput, "    <title>%s</title>\n", pszTitle);
520             VSIFPrintfL(fpOutput, "    <description>%s</description>\n",
521                         pszDescription);
522             VSIFPrintfL(fpOutput, "    <link>%s</link>\n", pszLink);
523         }
524     }
525     else
526     {
527         VSIFPrintfL(fpOutput, "<feed xmlns=\"http://www.w3.org/2005/Atom\" ");
528         if( eGeomDialect == GEORSS_GML )
529             VSIFPrintfL(fpOutput, "xmlns:gml=\"http://www.opengis.net/gml\"");
530         else if( eGeomDialect == GEORSS_SIMPLE )
531             VSIFPrintfL(fpOutput, "xmlns:georss=\"http://www.georss.org/georss\"");
532         else
533             VSIFPrintfL(fpOutput, "xmlns:geo=\"http://www.w3.org/2003/01/geo/wgs84_pos#\"");
534         VSIFPrintfL(fpOutput, ">\n");
535         if( pszHeader )
536         {
537             VSIFPrintfL(fpOutput, "%s", pszHeader);
538         }
539         else
540         {
541             VSIFPrintfL(fpOutput, "  <title>%s</title>\n", pszTitle);
542             VSIFPrintfL(fpOutput, "  <updated>%s</updated>\n", pszUpdated);
543             VSIFPrintfL(fpOutput, "  <author><name>%s</name></author>\n",
544                         pszAuthorName);
545             VSIFPrintfL(fpOutput, "  <id>%s</id>\n", pszId);
546         }
547     }
548 
549     return TRUE;
550 }
551