1 /******************************************************************************
2  *
3  * Project:  CSV Translator
4  * Purpose:  Implements OGRCSVDriver.
5  * Author:   Frank Warmerdam, warmerdam@pobox.com
6  *
7  ******************************************************************************
8  * Copyright (c) 2004, Frank Warmerdam <warmerdam@pobox.com>
9  * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com>
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included
19  * in all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27  * DEALINGS IN THE SOFTWARE.
28  ****************************************************************************/
29 
30 #include "cpl_port.h"
31 #include "ogr_csv.h"
32 
33 #include <cerrno>
34 #include <cstring>
35 #include <map>
36 #include <string>
37 #include <utility>
38 
39 #include "cpl_conv.h"
40 #include "cpl_error.h"
41 #include "cpl_multiproc.h"
42 #include "cpl_string.h"
43 #include "cpl_vsi.h"
44 #include "gdal.h"
45 #include "gdal_priv.h"
46 
47 CPL_CVSID("$Id: ogrcsvdriver.cpp 1761acd90777d5bcc49eddbc13c193098f0ed40b 2020-10-01 12:12:00 +0200 Even Rouault $")
48 
49 static CPLMutex *hMutex = nullptr;
50 static std::map<CPLString, GDALDataset *> *poMap = nullptr;
51 
52 /************************************************************************/
53 /*                         OGRCSVDriverIdentify()                       */
54 /************************************************************************/
55 
OGRCSVDriverIdentify(GDALOpenInfo * poOpenInfo)56 static int OGRCSVDriverIdentify( GDALOpenInfo *poOpenInfo )
57 
58 {
59     if( poOpenInfo->fpL != nullptr )
60     {
61         const CPLString osBaseFilename =
62             CPLGetFilename(poOpenInfo->pszFilename);
63         const CPLString osExt =
64             OGRCSVDataSource::GetRealExtension(poOpenInfo->pszFilename);
65 
66         if( EQUAL(osBaseFilename, "NfdcFacilities.xls") ||
67             EQUAL(osBaseFilename, "NfdcRunways.xls") ||
68             EQUAL(osBaseFilename, "NfdcRemarks.xls") ||
69             EQUAL(osBaseFilename, "NfdcSchedules.xls") )
70         {
71             return TRUE;
72         }
73         else if( (STARTS_WITH_CI(osBaseFilename, "NationalFile_") ||
74                   STARTS_WITH_CI(osBaseFilename, "POP_PLACES_") ||
75                   STARTS_WITH_CI(osBaseFilename, "HIST_FEATURES_") ||
76                   STARTS_WITH_CI(osBaseFilename, "US_CONCISE_") ||
77                   STARTS_WITH_CI(osBaseFilename, "AllNames_") ||
78                   STARTS_WITH_CI(osBaseFilename,
79                                  "Feature_Description_History_") ||
80                   STARTS_WITH_CI(osBaseFilename, "ANTARCTICA_") ||
81                   STARTS_WITH_CI(osBaseFilename, "GOVT_UNITS_") ||
82                   STARTS_WITH_CI(osBaseFilename, "NationalFedCodes_") ||
83                   STARTS_WITH_CI(osBaseFilename, "AllStates_") ||
84                   STARTS_WITH_CI(osBaseFilename, "AllStatesFedCodes_") ||
85                   (osBaseFilename.size() > 2 &&
86                    STARTS_WITH_CI(osBaseFilename + 2, "_Features_")) ||
87                   (osBaseFilename.size() > 2 &&
88                    STARTS_WITH_CI(osBaseFilename + 2, "_FedCodes_"))) &&
89                  (EQUAL(osExt, "txt") || EQUAL(osExt, "zip")) )
90         {
91             return TRUE;
92         }
93         else if( EQUAL(osBaseFilename, "allCountries.txt") ||
94                  EQUAL(osBaseFilename, "allCountries.zip") )
95         {
96             return TRUE;
97         }
98         else if( EQUAL(osExt, "csv") || EQUAL(osExt, "tsv") )
99         {
100             return TRUE;
101         }
102         else if( STARTS_WITH(poOpenInfo->pszFilename, "/vsizip/") &&
103                  EQUAL(osExt, "zip") )
104         {
105             return -1;  // Unsure.
106         }
107         else
108         {
109             return FALSE;
110         }
111     }
112     else if( STARTS_WITH_CI(poOpenInfo->pszFilename, "CSV:") )
113     {
114         return TRUE;
115     }
116     else if( poOpenInfo->bIsDirectory )
117     {
118         return -1;  // Unsure.
119     }
120 
121     return FALSE;
122 }
123 
124 /************************************************************************/
125 /*                        OGRCSVDriverRemoveFromMap()                   */
126 /************************************************************************/
127 
OGRCSVDriverRemoveFromMap(const char * pszName,GDALDataset * poDS)128 void OGRCSVDriverRemoveFromMap(const char *pszName, GDALDataset *poDS)
129 {
130     if( poMap == nullptr )
131         return;
132     CPLMutexHolderD(&hMutex);
133     std::map<CPLString, GDALDataset *>::iterator oIter = poMap->find(pszName);
134     if( oIter != poMap->end() )
135     {
136         GDALDataset *poOtherDS = oIter->second;
137         if( poDS == poOtherDS )
138             poMap->erase(oIter);
139     }
140 }
141 
142 /************************************************************************/
143 /*                                Open()                                */
144 /************************************************************************/
145 
OGRCSVDriverOpen(GDALOpenInfo * poOpenInfo)146 static GDALDataset *OGRCSVDriverOpen( GDALOpenInfo *poOpenInfo )
147 
148 {
149     if( !OGRCSVDriverIdentify(poOpenInfo) )
150         return nullptr;
151 
152     if( poMap != nullptr )
153     {
154         CPLMutexHolderD(&hMutex);
155         std::map<CPLString, GDALDataset *>::iterator oIter =
156             poMap->find(poOpenInfo->pszFilename);
157         if( oIter != poMap->end() )
158         {
159             GDALDataset *poOtherDS = oIter->second;
160             poOtherDS->FlushCache();
161         }
162     }
163 
164     OGRCSVDataSource *poDS = new OGRCSVDataSource();
165 
166     if( !poDS->Open(poOpenInfo->pszFilename, poOpenInfo->eAccess == GA_Update,
167                     FALSE, poOpenInfo->papszOpenOptions) )
168     {
169         delete poDS;
170         poDS = nullptr;
171     }
172 
173     if( poOpenInfo->eAccess == GA_Update && poDS != nullptr )
174     {
175         CPLMutexHolderD(&hMutex);
176         if( poMap == nullptr )
177             poMap = new std::map<CPLString, GDALDataset *>();
178         if( poMap->find(poOpenInfo->pszFilename) == poMap->end() )
179         {
180             (*poMap)[poOpenInfo->pszFilename] = poDS;
181         }
182     }
183 
184     return poDS;
185 }
186 
187 /************************************************************************/
188 /*                               Create()                               */
189 /************************************************************************/
190 
OGRCSVDriverCreate(const char * pszName,CPL_UNUSED int nBands,CPL_UNUSED int nXSize,CPL_UNUSED int nYSize,CPL_UNUSED GDALDataType eDT,char ** papszOptions)191 static GDALDataset *OGRCSVDriverCreate( const char *pszName,
192                                         CPL_UNUSED int nBands,
193                                         CPL_UNUSED int nXSize,
194                                         CPL_UNUSED int nYSize,
195                                         CPL_UNUSED GDALDataType eDT,
196                                         char **papszOptions )
197 {
198     // First, ensure there isn't any such file yet.
199     VSIStatBufL sStatBuf;
200 
201     if (strcmp(pszName, "/dev/stdout") == 0)
202         pszName = "/vsistdout/";
203 
204     if( VSIStatL(pszName, &sStatBuf) == 0 )
205     {
206         CPLError(CE_Failure, CPLE_AppDefined,
207                  "It seems a file system object called '%s' already exists.",
208                  pszName);
209 
210         return nullptr;
211     }
212 
213     // If the target is not a simple .csv then create it as a directory.
214     CPLString osDirName;
215 
216     if( EQUAL(CPLGetExtension(pszName), "csv") )
217     {
218         osDirName = CPLGetPath(pszName);
219         if( osDirName == "" )
220             osDirName = ".";
221 
222         // HACK: CPLGetPath("/vsimem/foo.csv") = "/vsimem", but this is not
223         // recognized afterwards as a valid directory name.
224         if( osDirName == "/vsimem" )
225             osDirName = "/vsimem/";
226     }
227     else
228     {
229         if( STARTS_WITH(pszName, "/vsizip/"))
230         {
231             // Do nothing.
232         }
233         else if( !EQUAL(pszName, "/vsistdout/") &&
234                  VSIMkdir(pszName, 0755) != 0 )
235         {
236             CPLError(CE_Failure, CPLE_AppDefined,
237                      "Failed to create directory %s:\n%s",
238                      pszName, VSIStrerror(errno));
239             return nullptr;
240         }
241         osDirName = pszName;
242     }
243 
244     // Force it to open as a datasource.
245     OGRCSVDataSource *poDS = new OGRCSVDataSource();
246 
247     if( EQUAL(CPLGetExtension(pszName), "csv") )
248     {
249         poDS->CreateForSingleFile(osDirName, pszName);
250     }
251     else if( !poDS->Open(osDirName, TRUE, TRUE) )
252     {
253         delete poDS;
254         return nullptr;
255     }
256 
257     const char *pszGeometry = CSLFetchNameValue(papszOptions, "GEOMETRY");
258     if( pszGeometry != nullptr && EQUAL(pszGeometry, "AS_WKT") )
259         poDS->EnableGeometryFields();
260 
261     return poDS;
262 }
263 
264 /************************************************************************/
265 /*                              Delete()                                */
266 /************************************************************************/
267 
OGRCSVDriverDelete(const char * pszFilename)268 static CPLErr OGRCSVDriverDelete( const char *pszFilename )
269 
270 {
271     return CPLUnlinkTree(pszFilename) == 0 ? CE_None : CE_Failure;
272 }
273 
274 /************************************************************************/
275 /*                           OGRCSVDriverUnload()                       */
276 /************************************************************************/
277 
OGRCSVDriverUnload(GDALDriver *)278 static void OGRCSVDriverUnload( GDALDriver * )
279 {
280     if( hMutex != nullptr )
281         CPLDestroyMutex(hMutex);
282     hMutex = nullptr;
283     delete poMap;
284     poMap = nullptr;
285 }
286 
287 /************************************************************************/
288 /*                           RegisterOGRCSV()                           */
289 /************************************************************************/
290 
RegisterOGRCSV()291 void RegisterOGRCSV()
292 
293 {
294     if( GDALGetDriverByName("CSV") != nullptr )
295         return;
296 
297     GDALDriver *poDriver = new GDALDriver();
298 
299     poDriver->SetDescription("CSV");
300     poDriver->SetMetadataItem(GDAL_DCAP_VECTOR, "YES");
301     poDriver->SetMetadataItem(GDAL_DMD_LONGNAME,
302                               "Comma Separated Value (.csv)");
303     poDriver->SetMetadataItem(GDAL_DMD_EXTENSION, "csv");
304     poDriver->SetMetadataItem(GDAL_DMD_HELPTOPIC, "drivers/vector/csv.html");
305 
306     poDriver->SetMetadataItem(GDAL_DMD_CREATIONOPTIONLIST,
307 "<CreationOptionList>"
308 "  <Option name='GEOMETRY' type='string-select' description='how to encode geometry fields'>"
309 "    <Value>AS_WKT</Value>"
310 "  </Option>"
311 "</CreationOptionList>");
312 
313     poDriver->SetMetadataItem(GDAL_DS_LAYER_CREATIONOPTIONLIST,
314 "<LayerCreationOptionList>"
315 "  <Option name='SEPARATOR' type='string-select' description='field separator' default='COMMA'>"
316 "    <Value>COMMA</Value>"
317 "    <Value>SEMICOLON</Value>"
318 "    <Value>TAB</Value>"
319 "    <Value>SPACE</Value>"
320 "  </Option>"
321 #ifdef WIN32
322 "  <Option name='LINEFORMAT' type='string-select' description='end-of-line sequence' default='CRLF'>"
323 #else
324 "  <Option name='LINEFORMAT' type='string-select' description='end-of-line sequence' default='LF'>"
325 #endif
326 "    <Value>CRLF</Value>"
327 "    <Value>LF</Value>"
328 "  </Option>"
329 "  <Option name='GEOMETRY' type='string-select' description='how to encode geometry fields'>"
330 "    <Value>AS_WKT</Value>"
331 "    <Value>AS_XYZ</Value>"
332 "    <Value>AS_XY</Value>"
333 "    <Value>AS_YX</Value>"
334 "  </Option>"
335 "  <Option name='CREATE_CSVT' type='boolean' description='whether to create a .csvt file' default='NO'/>"
336 "  <Option name='WRITE_BOM' type='boolean' description='whether to write a UTF-8 BOM prefix' default='NO'/>"
337 "  <Option name='GEOMETRY_NAME' type='string' description='Name of geometry column. Only used if GEOMETRY=AS_WKT' default='WKT'/>"
338 "  <Option name='STRING_QUOTING' type='string-select' description='whether to double-quote strings. IF_AMBIGUOUS means that string values that look like numbers will be quoted (it also implies IF_NEEDED).' default='IF_AMBIGUOUS'>"
339 "    <Value>IF_NEEDED</Value>"
340 "    <Value>IF_AMBIGUOUS</Value>"
341 "    <Value>ALWAYS</Value>"
342 "  </Option>"
343 "</LayerCreationOptionList>");
344 
345     poDriver->SetMetadataItem(GDAL_DMD_OPENOPTIONLIST,
346 "<OpenOptionList>"
347 #if 0
348 "  <Option name='SEPARATOR' type='string-select' description='field separator' default='AUTO'>"
349 "    <Value>AUTO</Value>"
350 "    <Value>COMMA</Value>"
351 "    <Value>SEMICOLON</Value>"
352 "    <Value>TAB</Value>"
353 "    <Value>SPACE</Value>"
354 "  </Option>"
355 #endif
356 "  <Option name='MERGE_SEPARATOR' type='boolean' description='whether to merge consecutive separators' default='NO'/>"
357 "  <Option name='AUTODETECT_TYPE' type='boolean' description='whether to guess data type from first bytes of the file' default='NO'/>"
358 "  <Option name='KEEP_SOURCE_COLUMNS' type='boolean' description='whether to add original columns whose guessed data type is not String. Only used if AUTODETECT_TYPE=YES' default='NO'/>"
359 "  <Option name='AUTODETECT_WIDTH' type='string-select' description='whether to auto-detect width/precision. Only used if AUTODETECT_TYPE=YES' default='NO'>"
360 "    <Value>YES</Value>"
361 "    <Value>NO</Value>"
362 "    <Value>STRING_ONLY</Value>"
363 "  </Option>"
364 "  <Option name='AUTODETECT_SIZE_LIMIT' type='int' description='number of bytes to inspect for auto-detection of data type. Only used if AUTODETECT_TYPE=YES' default='1000000'/>"
365 "  <Option name='QUOTED_FIELDS_AS_STRING' type='boolean' description='Only used if AUTODETECT_TYPE=YES. Whether to enforce quoted fields as string fields.' default='NO'/>"
366 "  <Option name='X_POSSIBLE_NAMES' type='string' description='Comma separated list of possible names for X/longitude coordinate of a point.'/>"
367 "  <Option name='Y_POSSIBLE_NAMES' type='string' description='Comma separated list of possible names for Y/latitude coordinate of a point.'/>"
368 "  <Option name='Z_POSSIBLE_NAMES' type='string' description='Comma separated list of possible names for Z/elevation coordinate of a point.'/>"
369 "  <Option name='GEOM_POSSIBLE_NAMES' type='string' description='Comma separated list of possible names for geometry columns.' default='WKT'/>"
370 "  <Option name='KEEP_GEOM_COLUMNS' type='boolean' description='whether to add original x/y/geometry columns as regular fields.' default='YES'/>"
371 "  <Option name='HEADERS' type='string-select' description='Whether the first line of the file contains column names or not' default='AUTO'>"
372 "    <Value>YES</Value>"
373 "    <Value>NO</Value>"
374 "    <Value>AUTO</Value>"
375 "  </Option>"
376 "  <Option name='EMPTY_STRING_AS_NULL' type='boolean' description='Whether to consider empty strings as null fields on reading' default='NO'/>"
377 "</OpenOptionList>");
378 
379     poDriver->SetMetadataItem(GDAL_DCAP_VIRTUALIO, "YES");
380     poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES,
381                               "Integer Integer64 Real String Date DateTime "
382                               "Time IntegerList Integer64List RealList "
383                               "StringList");
384     poDriver->SetMetadataItem( GDAL_DMD_CREATIONFIELDDATASUBTYPES, "Boolean Int16 Float32" );
385 
386     poDriver->pfnOpen = OGRCSVDriverOpen;
387     poDriver->pfnIdentify = OGRCSVDriverIdentify;
388     poDriver->pfnCreate = OGRCSVDriverCreate;
389     poDriver->pfnDelete = OGRCSVDriverDelete;
390     poDriver->pfnUnloadDriver = OGRCSVDriverUnload;
391 
392     GetGDALDriverManager()->RegisterDriver(poDriver);
393 }
394