1 /******************************************************************************
2 *
3 * Project: CSV Translator
4 * Purpose: Implements OGRCSVDriver.
5 * Author: Frank Warmerdam, warmerdam@pobox.com
6 *
7 ******************************************************************************
8 * Copyright (c) 2004, Frank Warmerdam <warmerdam@pobox.com>
9 * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com>
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a
12 * copy of this software and associated documentation files (the "Software"),
13 * to deal in the Software without restriction, including without limitation
14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 * and/or sell copies of the Software, and to permit persons to whom the
16 * Software is furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included
19 * in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 * DEALINGS IN THE SOFTWARE.
28 ****************************************************************************/
29
30 #include "cpl_port.h"
31 #include "ogr_csv.h"
32
33 #include <cerrno>
34 #include <cstring>
35 #include <map>
36 #include <string>
37 #include <utility>
38
39 #include "cpl_conv.h"
40 #include "cpl_error.h"
41 #include "cpl_multiproc.h"
42 #include "cpl_string.h"
43 #include "cpl_vsi.h"
44 #include "gdal.h"
45 #include "gdal_priv.h"
46
47 CPL_CVSID("$Id: ogrcsvdriver.cpp 1761acd90777d5bcc49eddbc13c193098f0ed40b 2020-10-01 12:12:00 +0200 Even Rouault $")
48
49 static CPLMutex *hMutex = nullptr;
50 static std::map<CPLString, GDALDataset *> *poMap = nullptr;
51
52 /************************************************************************/
53 /* OGRCSVDriverIdentify() */
54 /************************************************************************/
55
OGRCSVDriverIdentify(GDALOpenInfo * poOpenInfo)56 static int OGRCSVDriverIdentify( GDALOpenInfo *poOpenInfo )
57
58 {
59 if( poOpenInfo->fpL != nullptr )
60 {
61 const CPLString osBaseFilename =
62 CPLGetFilename(poOpenInfo->pszFilename);
63 const CPLString osExt =
64 OGRCSVDataSource::GetRealExtension(poOpenInfo->pszFilename);
65
66 if( EQUAL(osBaseFilename, "NfdcFacilities.xls") ||
67 EQUAL(osBaseFilename, "NfdcRunways.xls") ||
68 EQUAL(osBaseFilename, "NfdcRemarks.xls") ||
69 EQUAL(osBaseFilename, "NfdcSchedules.xls") )
70 {
71 return TRUE;
72 }
73 else if( (STARTS_WITH_CI(osBaseFilename, "NationalFile_") ||
74 STARTS_WITH_CI(osBaseFilename, "POP_PLACES_") ||
75 STARTS_WITH_CI(osBaseFilename, "HIST_FEATURES_") ||
76 STARTS_WITH_CI(osBaseFilename, "US_CONCISE_") ||
77 STARTS_WITH_CI(osBaseFilename, "AllNames_") ||
78 STARTS_WITH_CI(osBaseFilename,
79 "Feature_Description_History_") ||
80 STARTS_WITH_CI(osBaseFilename, "ANTARCTICA_") ||
81 STARTS_WITH_CI(osBaseFilename, "GOVT_UNITS_") ||
82 STARTS_WITH_CI(osBaseFilename, "NationalFedCodes_") ||
83 STARTS_WITH_CI(osBaseFilename, "AllStates_") ||
84 STARTS_WITH_CI(osBaseFilename, "AllStatesFedCodes_") ||
85 (osBaseFilename.size() > 2 &&
86 STARTS_WITH_CI(osBaseFilename + 2, "_Features_")) ||
87 (osBaseFilename.size() > 2 &&
88 STARTS_WITH_CI(osBaseFilename + 2, "_FedCodes_"))) &&
89 (EQUAL(osExt, "txt") || EQUAL(osExt, "zip")) )
90 {
91 return TRUE;
92 }
93 else if( EQUAL(osBaseFilename, "allCountries.txt") ||
94 EQUAL(osBaseFilename, "allCountries.zip") )
95 {
96 return TRUE;
97 }
98 else if( EQUAL(osExt, "csv") || EQUAL(osExt, "tsv") )
99 {
100 return TRUE;
101 }
102 else if( STARTS_WITH(poOpenInfo->pszFilename, "/vsizip/") &&
103 EQUAL(osExt, "zip") )
104 {
105 return -1; // Unsure.
106 }
107 else
108 {
109 return FALSE;
110 }
111 }
112 else if( STARTS_WITH_CI(poOpenInfo->pszFilename, "CSV:") )
113 {
114 return TRUE;
115 }
116 else if( poOpenInfo->bIsDirectory )
117 {
118 return -1; // Unsure.
119 }
120
121 return FALSE;
122 }
123
124 /************************************************************************/
125 /* OGRCSVDriverRemoveFromMap() */
126 /************************************************************************/
127
OGRCSVDriverRemoveFromMap(const char * pszName,GDALDataset * poDS)128 void OGRCSVDriverRemoveFromMap(const char *pszName, GDALDataset *poDS)
129 {
130 if( poMap == nullptr )
131 return;
132 CPLMutexHolderD(&hMutex);
133 std::map<CPLString, GDALDataset *>::iterator oIter = poMap->find(pszName);
134 if( oIter != poMap->end() )
135 {
136 GDALDataset *poOtherDS = oIter->second;
137 if( poDS == poOtherDS )
138 poMap->erase(oIter);
139 }
140 }
141
142 /************************************************************************/
143 /* Open() */
144 /************************************************************************/
145
OGRCSVDriverOpen(GDALOpenInfo * poOpenInfo)146 static GDALDataset *OGRCSVDriverOpen( GDALOpenInfo *poOpenInfo )
147
148 {
149 if( !OGRCSVDriverIdentify(poOpenInfo) )
150 return nullptr;
151
152 if( poMap != nullptr )
153 {
154 CPLMutexHolderD(&hMutex);
155 std::map<CPLString, GDALDataset *>::iterator oIter =
156 poMap->find(poOpenInfo->pszFilename);
157 if( oIter != poMap->end() )
158 {
159 GDALDataset *poOtherDS = oIter->second;
160 poOtherDS->FlushCache();
161 }
162 }
163
164 OGRCSVDataSource *poDS = new OGRCSVDataSource();
165
166 if( !poDS->Open(poOpenInfo->pszFilename, poOpenInfo->eAccess == GA_Update,
167 FALSE, poOpenInfo->papszOpenOptions) )
168 {
169 delete poDS;
170 poDS = nullptr;
171 }
172
173 if( poOpenInfo->eAccess == GA_Update && poDS != nullptr )
174 {
175 CPLMutexHolderD(&hMutex);
176 if( poMap == nullptr )
177 poMap = new std::map<CPLString, GDALDataset *>();
178 if( poMap->find(poOpenInfo->pszFilename) == poMap->end() )
179 {
180 (*poMap)[poOpenInfo->pszFilename] = poDS;
181 }
182 }
183
184 return poDS;
185 }
186
187 /************************************************************************/
188 /* Create() */
189 /************************************************************************/
190
OGRCSVDriverCreate(const char * pszName,CPL_UNUSED int nBands,CPL_UNUSED int nXSize,CPL_UNUSED int nYSize,CPL_UNUSED GDALDataType eDT,char ** papszOptions)191 static GDALDataset *OGRCSVDriverCreate( const char *pszName,
192 CPL_UNUSED int nBands,
193 CPL_UNUSED int nXSize,
194 CPL_UNUSED int nYSize,
195 CPL_UNUSED GDALDataType eDT,
196 char **papszOptions )
197 {
198 // First, ensure there isn't any such file yet.
199 VSIStatBufL sStatBuf;
200
201 if (strcmp(pszName, "/dev/stdout") == 0)
202 pszName = "/vsistdout/";
203
204 if( VSIStatL(pszName, &sStatBuf) == 0 )
205 {
206 CPLError(CE_Failure, CPLE_AppDefined,
207 "It seems a file system object called '%s' already exists.",
208 pszName);
209
210 return nullptr;
211 }
212
213 // If the target is not a simple .csv then create it as a directory.
214 CPLString osDirName;
215
216 if( EQUAL(CPLGetExtension(pszName), "csv") )
217 {
218 osDirName = CPLGetPath(pszName);
219 if( osDirName == "" )
220 osDirName = ".";
221
222 // HACK: CPLGetPath("/vsimem/foo.csv") = "/vsimem", but this is not
223 // recognized afterwards as a valid directory name.
224 if( osDirName == "/vsimem" )
225 osDirName = "/vsimem/";
226 }
227 else
228 {
229 if( STARTS_WITH(pszName, "/vsizip/"))
230 {
231 // Do nothing.
232 }
233 else if( !EQUAL(pszName, "/vsistdout/") &&
234 VSIMkdir(pszName, 0755) != 0 )
235 {
236 CPLError(CE_Failure, CPLE_AppDefined,
237 "Failed to create directory %s:\n%s",
238 pszName, VSIStrerror(errno));
239 return nullptr;
240 }
241 osDirName = pszName;
242 }
243
244 // Force it to open as a datasource.
245 OGRCSVDataSource *poDS = new OGRCSVDataSource();
246
247 if( EQUAL(CPLGetExtension(pszName), "csv") )
248 {
249 poDS->CreateForSingleFile(osDirName, pszName);
250 }
251 else if( !poDS->Open(osDirName, TRUE, TRUE) )
252 {
253 delete poDS;
254 return nullptr;
255 }
256
257 const char *pszGeometry = CSLFetchNameValue(papszOptions, "GEOMETRY");
258 if( pszGeometry != nullptr && EQUAL(pszGeometry, "AS_WKT") )
259 poDS->EnableGeometryFields();
260
261 return poDS;
262 }
263
264 /************************************************************************/
265 /* Delete() */
266 /************************************************************************/
267
OGRCSVDriverDelete(const char * pszFilename)268 static CPLErr OGRCSVDriverDelete( const char *pszFilename )
269
270 {
271 return CPLUnlinkTree(pszFilename) == 0 ? CE_None : CE_Failure;
272 }
273
274 /************************************************************************/
275 /* OGRCSVDriverUnload() */
276 /************************************************************************/
277
OGRCSVDriverUnload(GDALDriver *)278 static void OGRCSVDriverUnload( GDALDriver * )
279 {
280 if( hMutex != nullptr )
281 CPLDestroyMutex(hMutex);
282 hMutex = nullptr;
283 delete poMap;
284 poMap = nullptr;
285 }
286
287 /************************************************************************/
288 /* RegisterOGRCSV() */
289 /************************************************************************/
290
RegisterOGRCSV()291 void RegisterOGRCSV()
292
293 {
294 if( GDALGetDriverByName("CSV") != nullptr )
295 return;
296
297 GDALDriver *poDriver = new GDALDriver();
298
299 poDriver->SetDescription("CSV");
300 poDriver->SetMetadataItem(GDAL_DCAP_VECTOR, "YES");
301 poDriver->SetMetadataItem(GDAL_DMD_LONGNAME,
302 "Comma Separated Value (.csv)");
303 poDriver->SetMetadataItem(GDAL_DMD_EXTENSION, "csv");
304 poDriver->SetMetadataItem(GDAL_DMD_HELPTOPIC, "drivers/vector/csv.html");
305
306 poDriver->SetMetadataItem(GDAL_DMD_CREATIONOPTIONLIST,
307 "<CreationOptionList>"
308 " <Option name='GEOMETRY' type='string-select' description='how to encode geometry fields'>"
309 " <Value>AS_WKT</Value>"
310 " </Option>"
311 "</CreationOptionList>");
312
313 poDriver->SetMetadataItem(GDAL_DS_LAYER_CREATIONOPTIONLIST,
314 "<LayerCreationOptionList>"
315 " <Option name='SEPARATOR' type='string-select' description='field separator' default='COMMA'>"
316 " <Value>COMMA</Value>"
317 " <Value>SEMICOLON</Value>"
318 " <Value>TAB</Value>"
319 " <Value>SPACE</Value>"
320 " </Option>"
321 #ifdef WIN32
322 " <Option name='LINEFORMAT' type='string-select' description='end-of-line sequence' default='CRLF'>"
323 #else
324 " <Option name='LINEFORMAT' type='string-select' description='end-of-line sequence' default='LF'>"
325 #endif
326 " <Value>CRLF</Value>"
327 " <Value>LF</Value>"
328 " </Option>"
329 " <Option name='GEOMETRY' type='string-select' description='how to encode geometry fields'>"
330 " <Value>AS_WKT</Value>"
331 " <Value>AS_XYZ</Value>"
332 " <Value>AS_XY</Value>"
333 " <Value>AS_YX</Value>"
334 " </Option>"
335 " <Option name='CREATE_CSVT' type='boolean' description='whether to create a .csvt file' default='NO'/>"
336 " <Option name='WRITE_BOM' type='boolean' description='whether to write a UTF-8 BOM prefix' default='NO'/>"
337 " <Option name='GEOMETRY_NAME' type='string' description='Name of geometry column. Only used if GEOMETRY=AS_WKT' default='WKT'/>"
338 " <Option name='STRING_QUOTING' type='string-select' description='whether to double-quote strings. IF_AMBIGUOUS means that string values that look like numbers will be quoted (it also implies IF_NEEDED).' default='IF_AMBIGUOUS'>"
339 " <Value>IF_NEEDED</Value>"
340 " <Value>IF_AMBIGUOUS</Value>"
341 " <Value>ALWAYS</Value>"
342 " </Option>"
343 "</LayerCreationOptionList>");
344
345 poDriver->SetMetadataItem(GDAL_DMD_OPENOPTIONLIST,
346 "<OpenOptionList>"
347 #if 0
348 " <Option name='SEPARATOR' type='string-select' description='field separator' default='AUTO'>"
349 " <Value>AUTO</Value>"
350 " <Value>COMMA</Value>"
351 " <Value>SEMICOLON</Value>"
352 " <Value>TAB</Value>"
353 " <Value>SPACE</Value>"
354 " </Option>"
355 #endif
356 " <Option name='MERGE_SEPARATOR' type='boolean' description='whether to merge consecutive separators' default='NO'/>"
357 " <Option name='AUTODETECT_TYPE' type='boolean' description='whether to guess data type from first bytes of the file' default='NO'/>"
358 " <Option name='KEEP_SOURCE_COLUMNS' type='boolean' description='whether to add original columns whose guessed data type is not String. Only used if AUTODETECT_TYPE=YES' default='NO'/>"
359 " <Option name='AUTODETECT_WIDTH' type='string-select' description='whether to auto-detect width/precision. Only used if AUTODETECT_TYPE=YES' default='NO'>"
360 " <Value>YES</Value>"
361 " <Value>NO</Value>"
362 " <Value>STRING_ONLY</Value>"
363 " </Option>"
364 " <Option name='AUTODETECT_SIZE_LIMIT' type='int' description='number of bytes to inspect for auto-detection of data type. Only used if AUTODETECT_TYPE=YES' default='1000000'/>"
365 " <Option name='QUOTED_FIELDS_AS_STRING' type='boolean' description='Only used if AUTODETECT_TYPE=YES. Whether to enforce quoted fields as string fields.' default='NO'/>"
366 " <Option name='X_POSSIBLE_NAMES' type='string' description='Comma separated list of possible names for X/longitude coordinate of a point.'/>"
367 " <Option name='Y_POSSIBLE_NAMES' type='string' description='Comma separated list of possible names for Y/latitude coordinate of a point.'/>"
368 " <Option name='Z_POSSIBLE_NAMES' type='string' description='Comma separated list of possible names for Z/elevation coordinate of a point.'/>"
369 " <Option name='GEOM_POSSIBLE_NAMES' type='string' description='Comma separated list of possible names for geometry columns.' default='WKT'/>"
370 " <Option name='KEEP_GEOM_COLUMNS' type='boolean' description='whether to add original x/y/geometry columns as regular fields.' default='YES'/>"
371 " <Option name='HEADERS' type='string-select' description='Whether the first line of the file contains column names or not' default='AUTO'>"
372 " <Value>YES</Value>"
373 " <Value>NO</Value>"
374 " <Value>AUTO</Value>"
375 " </Option>"
376 " <Option name='EMPTY_STRING_AS_NULL' type='boolean' description='Whether to consider empty strings as null fields on reading' default='NO'/>"
377 "</OpenOptionList>");
378
379 poDriver->SetMetadataItem(GDAL_DCAP_VIRTUALIO, "YES");
380 poDriver->SetMetadataItem(GDAL_DMD_CREATIONFIELDDATATYPES,
381 "Integer Integer64 Real String Date DateTime "
382 "Time IntegerList Integer64List RealList "
383 "StringList");
384 poDriver->SetMetadataItem( GDAL_DMD_CREATIONFIELDDATASUBTYPES, "Boolean Int16 Float32" );
385
386 poDriver->pfnOpen = OGRCSVDriverOpen;
387 poDriver->pfnIdentify = OGRCSVDriverIdentify;
388 poDriver->pfnCreate = OGRCSVDriverCreate;
389 poDriver->pfnDelete = OGRCSVDriverDelete;
390 poDriver->pfnUnloadDriver = OGRCSVDriverUnload;
391
392 GetGDALDriverManager()->RegisterDriver(poDriver);
393 }
394