1 /******************************************************************************
2 *
3 * Project: GDAL Core
4 * Purpose: Implementation of GDALOpenInfo class.
5 * Author: Frank Warmerdam, warmerdam@pobox.com
6 *
7 **********************************************************************
8 * Copyright (c) 2002, Frank Warmerdam
9 * Copyright (c) 2008-2012, Even Rouault <even dot rouault at spatialys.com>
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a
12 * copy of this software and associated documentation files (the "Software"),
13 * to deal in the Software without restriction, including without limitation
14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 * and/or sell copies of the Software, and to permit persons to whom the
16 * Software is furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included
19 * in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 * DEALINGS IN THE SOFTWARE.
28 ****************************************************************************/
29
30 #include "gdal_priv.h" // Must be included first for mingw VSIStatBufL.
31 #include "cpl_port.h"
32
33 #include <cstdlib>
34 #include <cstring>
35 #ifdef HAVE_UNISTD_H
36 #include <unistd.h>
37 #endif
38
39 #include <algorithm>
40 #include <map>
41 #include <mutex>
42 #include <vector>
43
44 #include "cpl_config.h"
45 #include "cpl_conv.h"
46 #include "cpl_error.h"
47 #include "cpl_string.h"
48 #include "cpl_vsi.h"
49 #include "gdal.h"
50
51 CPL_CVSID("$Id: gdalopeninfo.cpp 2dead8e944b4b88b137dc4a51c7efa241fd784cc 2021-09-02 08:53:32 +0200 Even Rouault $")
52
53 // Keep in sync prototype of those 2 functions between gdalopeninfo.cpp,
54 // ogrsqlitedatasource.cpp and ogrgeopackagedatasource.cpp
55 void GDALOpenInfoDeclareFileNotToOpen(const char* pszFilename,
56 const GByte* pabyHeader,
57 int nHeaderBytes);
58 void GDALOpenInfoUnDeclareFileNotToOpen(const char* pszFilename);
59
60 /************************************************************************/
61
62 /* This whole section helps for SQLite/GPKG, especially with write-ahead
63 * log enabled. The issue is that sqlite3 relies on POSIX advisory locks to
64 * properly work and decide when to create/delete the wal related files.
65 * One issue with POSIX advisory locks is that if within the same process
66 * you do
67 * f1 = open('somefile')
68 * set locks on f1
69 * f2 = open('somefile')
70 * close(f2)
71 * The close(f2) will cancel the locks set on f1. The work on f1 is done by
72 * libsqlite3 whereas the work on f2 is done by GDALOpenInfo.
73 * So as soon as sqlite3 has opened a file we should make sure not to re-open
74 * it (actually close it) ourselves.
75 */
76
77 namespace {
78 struct FileNotToOpen
79 {
80 CPLString osFilename{};
81 int nRefCount{};
82 GByte *pabyHeader{nullptr};
83 int nHeaderBytes{0};
84 };
85 }
86
87 static std::mutex sFNTOMutex;
88 static std::map<CPLString, FileNotToOpen>* pMapFNTO = nullptr;
89
GDALOpenInfoDeclareFileNotToOpen(const char * pszFilename,const GByte * pabyHeader,int nHeaderBytes)90 void GDALOpenInfoDeclareFileNotToOpen(const char* pszFilename,
91 const GByte* pabyHeader,
92 int nHeaderBytes)
93 {
94 std::lock_guard<std::mutex> oLock(sFNTOMutex);
95 if( pMapFNTO == nullptr )
96 pMapFNTO = new std::map<CPLString, FileNotToOpen>();
97 auto oIter = pMapFNTO->find(pszFilename);
98 if( oIter != pMapFNTO->end() )
99 {
100 oIter->second.nRefCount ++;
101 }
102 else
103 {
104 FileNotToOpen fnto;
105 fnto.osFilename = pszFilename;
106 fnto.nRefCount = 1;
107 fnto.pabyHeader = static_cast<GByte*>(CPLMalloc(nHeaderBytes + 1));
108 memcpy(fnto.pabyHeader, pabyHeader, nHeaderBytes);
109 fnto.pabyHeader[nHeaderBytes] = 0;
110 fnto.nHeaderBytes = nHeaderBytes;
111 (*pMapFNTO)[pszFilename] = fnto;
112 }
113 }
114
GDALOpenInfoUnDeclareFileNotToOpen(const char * pszFilename)115 void GDALOpenInfoUnDeclareFileNotToOpen(const char* pszFilename)
116 {
117 std::lock_guard<std::mutex> oLock(sFNTOMutex);
118 CPLAssert(pMapFNTO);
119 auto oIter = pMapFNTO->find(pszFilename);
120 CPLAssert( oIter != pMapFNTO->end() );
121 oIter->second.nRefCount --;
122 if( oIter->second.nRefCount == 0 )
123 {
124 CPLFree(oIter->second.pabyHeader);
125 pMapFNTO->erase(oIter);
126 }
127 if( pMapFNTO->empty() )
128 {
129 delete pMapFNTO;
130 pMapFNTO = nullptr;
131 }
132 }
133
GDALOpenInfoGetFileNotToOpen(const char * pszFilename,int * pnHeaderBytes)134 static GByte* GDALOpenInfoGetFileNotToOpen(const char* pszFilename,
135 int* pnHeaderBytes)
136 {
137 std::lock_guard<std::mutex> oLock(sFNTOMutex);
138 *pnHeaderBytes = 0;
139 if( pMapFNTO == nullptr )
140 {
141 return nullptr;
142 }
143 auto oIter = pMapFNTO->find(pszFilename);
144 if( oIter == pMapFNTO->end() )
145 {
146 return nullptr;
147 }
148 *pnHeaderBytes = oIter->second.nHeaderBytes;
149 GByte* pabyHeader = static_cast<GByte*>(CPLMalloc(*pnHeaderBytes + 1));
150 memcpy(pabyHeader, oIter->second.pabyHeader, *pnHeaderBytes);
151 pabyHeader[*pnHeaderBytes] = 0;
152 return pabyHeader;
153 }
154
155 /************************************************************************/
156 /* ==================================================================== */
157 /* GDALOpenInfo */
158 /* ==================================================================== */
159 /************************************************************************/
160
161 /************************************************************************/
162 /* GDALOpenInfo() */
163 /************************************************************************/
164
165 /** Constructor/
166 * @param pszFilenameIn filename
167 * @param nOpenFlagsIn open flags
168 * @param papszSiblingsIn list of sibling files, or NULL.
169 */
GDALOpenInfo(const char * pszFilenameIn,int nOpenFlagsIn,const char * const * papszSiblingsIn)170 GDALOpenInfo::GDALOpenInfo( const char * pszFilenameIn, int nOpenFlagsIn,
171 const char * const * papszSiblingsIn ) :
172 bHasGotSiblingFiles(false),
173 papszSiblingFiles(nullptr),
174 nHeaderBytesTried(0),
175 pszFilename(CPLStrdup(pszFilenameIn)),
176 papszOpenOptions(nullptr),
177 eAccess(nOpenFlagsIn & GDAL_OF_UPDATE ? GA_Update : GA_ReadOnly),
178 nOpenFlags(nOpenFlagsIn),
179 bStatOK(FALSE),
180 bIsDirectory(FALSE),
181 fpL(nullptr),
182 nHeaderBytes(0),
183 pabyHeader(nullptr),
184 papszAllowedDrivers(nullptr)
185 {
186 if( STARTS_WITH(pszFilename, "MVT:/vsi") )
187 return;
188
189 /* -------------------------------------------------------------------- */
190 /* Ensure that C: is treated as C:\ so we can stat it on */
191 /* Windows. Similar to what is done in CPLStat(). */
192 /* -------------------------------------------------------------------- */
193 #ifdef WIN32
194 if( strlen(pszFilenameIn) == 2 && pszFilenameIn[1] == ':' )
195 {
196 char szAltPath[10];
197
198 strcpy( szAltPath, pszFilenameIn );
199 strcat( szAltPath, "\\" );
200 CPLFree( pszFilename );
201 pszFilename = CPLStrdup( szAltPath );
202 }
203 #endif // WIN32
204
205 /* -------------------------------------------------------------------- */
206 /* Collect information about the file. */
207 /* -------------------------------------------------------------------- */
208
209 #ifdef HAVE_READLINK
210 bool bHasRetried = false;
211
212 retry: // TODO(schwehr): Stop using goto.
213
214 #endif // HAVE_READLINK
215
216 #if !(defined(_WIN32) || defined(__linux__) || defined(__ANDROID__) || (defined(__MACH__) && defined(__APPLE__)))
217 /* On BSDs, fread() on a directory returns non zero, so we have to */
218 /* do a stat() before to check the nature of pszFilename. */
219 bool bPotentialDirectory = (eAccess == GA_ReadOnly);
220 #else
221 bool bPotentialDirectory = false;
222 #endif
223
224 /* Check if the filename might be a directory of a special virtual file system */
225 if( STARTS_WITH(pszFilename, "/vsizip/") ||
226 STARTS_WITH(pszFilename, "/vsitar/") )
227 {
228 const char* pszExt = CPLGetExtension(pszFilename);
229 if( EQUAL(pszExt, "zip") || EQUAL(pszExt, "tar") || EQUAL(pszExt, "gz")
230 || pszFilename[strlen(pszFilename)-1] == '}'
231 #ifdef DEBUG
232 // For AFL, so that .cur_input is detected as the archive filename.
233 || EQUAL( CPLGetFilename(pszFilename), ".cur_input" )
234 #endif // DEBUG
235 )
236 {
237 bPotentialDirectory = true;
238 }
239 }
240 else if( STARTS_WITH(pszFilename, "/vsicurl/") )
241 {
242 bPotentialDirectory = true;
243 }
244
245 if( bPotentialDirectory )
246 {
247 int nStatFlags = VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG;
248 if(nOpenFlagsIn & GDAL_OF_VERBOSE_ERROR)
249 nStatFlags |= VSI_STAT_SET_ERROR_FLAG;
250
251 // For those special files, opening them with VSIFOpenL() might result
252 // in content, even if they should be considered as directories, so
253 // use stat.
254 VSIStatBufL sStat;
255
256 if(VSIStatExL( pszFilename, &sStat, nStatFlags) == 0) {
257 bStatOK = TRUE;
258 if( VSI_ISDIR( sStat.st_mode ) )
259 bIsDirectory = TRUE;
260 }
261 }
262
263 pabyHeader = GDALOpenInfoGetFileNotToOpen(pszFilename, &nHeaderBytes);
264
265 if( !bIsDirectory && pabyHeader == nullptr ) {
266 fpL = VSIFOpenExL( pszFilename, (eAccess == GA_Update) ? "r+b" : "rb", (nOpenFlagsIn & GDAL_OF_VERBOSE_ERROR) > 0);
267 }
268 if( pabyHeader )
269 {
270 bStatOK = TRUE;
271 nHeaderBytesTried = nHeaderBytes;
272 }
273 else if( fpL != nullptr )
274 {
275 bStatOK = TRUE;
276 int nBufSize =
277 atoi(CPLGetConfigOption("GDAL_INGESTED_BYTES_AT_OPEN", "1024"));
278 if( nBufSize < 1024 )
279 nBufSize = 1024;
280 else if( nBufSize > 10 * 1024 * 1024)
281 nBufSize = 10 * 1024 * 1024;
282 pabyHeader = static_cast<GByte *>( CPLCalloc(nBufSize+1, 1) );
283 nHeaderBytesTried = nBufSize;
284 nHeaderBytes = static_cast<int>(
285 VSIFReadL( pabyHeader, 1, nHeaderBytesTried, fpL ) );
286 VSIRewindL( fpL );
287
288 /* If we cannot read anything, check if it is not a directory instead */
289 VSIStatBufL sStat;
290 if( nHeaderBytes == 0 &&
291 VSIStatExL( pszFilename, &sStat,
292 VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG ) == 0 &&
293 VSI_ISDIR( sStat.st_mode ) )
294 {
295 CPL_IGNORE_RET_VAL(VSIFCloseL(fpL));
296 fpL = nullptr;
297 CPLFree(pabyHeader);
298 pabyHeader = nullptr;
299 bIsDirectory = TRUE;
300 }
301 }
302 else if( !bStatOK )
303 {
304 VSIStatBufL sStat;
305 if( !bPotentialDirectory && VSIStatExL( pszFilename, &sStat,
306 VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG ) == 0 )
307 {
308 bStatOK = TRUE;
309 if( VSI_ISDIR( sStat.st_mode ) )
310 bIsDirectory = TRUE;
311 }
312 #ifdef HAVE_READLINK
313 else if ( !bHasRetried && !STARTS_WITH(pszFilename, "/vsi") )
314 {
315 // If someone creates a file with "ln -sf
316 // /vsicurl/http://download.osgeo.org/gdal/data/gtiff/utm.tif
317 // my_remote_utm.tif" we will be able to open it by passing
318 // my_remote_utm.tif. This helps a lot for GDAL based readers that
319 // only provide file explorers to open datasets.
320 const int nBufSize = 2048;
321 std::vector<char> oFilename(nBufSize);
322 char *szPointerFilename = &oFilename[0];
323 int nBytes = static_cast<int>(
324 readlink( pszFilename, szPointerFilename, nBufSize ) );
325 if (nBytes != -1)
326 {
327 szPointerFilename[std::min(nBytes, nBufSize - 1)] = 0;
328 CPLFree(pszFilename);
329 pszFilename = CPLStrdup(szPointerFilename);
330 papszSiblingsIn = nullptr;
331 bHasRetried = true;
332 goto retry;
333 }
334 }
335 #endif // HAVE_READLINK
336 }
337
338 /* -------------------------------------------------------------------- */
339 /* Capture sibling list either from passed in values, or by */
340 /* scanning for them only if requested through GetSiblingFiles(). */
341 /* -------------------------------------------------------------------- */
342 if( papszSiblingsIn != nullptr )
343 {
344 papszSiblingFiles = CSLDuplicate( papszSiblingsIn );
345 bHasGotSiblingFiles = true;
346 }
347 else if( bStatOK && !bIsDirectory )
348 {
349 papszSiblingFiles = VSISiblingFiles(pszFilename);
350 if (papszSiblingFiles != nullptr)
351 {
352 bHasGotSiblingFiles = true;
353 }
354 else
355 {
356 const char* pszOptionVal =
357 CPLGetConfigOption( "GDAL_DISABLE_READDIR_ON_OPEN", "NO" );
358 if (EQUAL(pszOptionVal, "EMPTY_DIR"))
359 {
360 papszSiblingFiles =
361 CSLAddString( nullptr, CPLGetFilename(pszFilename) );
362 bHasGotSiblingFiles = true;
363 }
364 else if( CPLTestBool(pszOptionVal) )
365 {
366 /* skip reading the directory */
367 papszSiblingFiles = nullptr;
368 bHasGotSiblingFiles = true;
369 }
370 else
371 {
372 /* will be lazy loaded */
373 papszSiblingFiles = nullptr;
374 bHasGotSiblingFiles = false;
375 }
376 }
377 }
378 else
379 {
380 papszSiblingFiles = nullptr;
381 bHasGotSiblingFiles = true;
382 }
383 }
384
385 /************************************************************************/
386 /* ~GDALOpenInfo() */
387 /************************************************************************/
388
~GDALOpenInfo()389 GDALOpenInfo::~GDALOpenInfo()
390
391 {
392 VSIFree( pabyHeader );
393 CPLFree( pszFilename );
394
395 if( fpL != nullptr )
396 CPL_IGNORE_RET_VAL(VSIFCloseL( fpL ));
397 CSLDestroy( papszSiblingFiles );
398 }
399
400 /************************************************************************/
401 /* GetSiblingFiles() */
402 /************************************************************************/
403
404 /** Return sibling files.
405 * @return sibling files. Ownership below to the object.
406 */
GetSiblingFiles()407 char** GDALOpenInfo::GetSiblingFiles()
408 {
409 if( bHasGotSiblingFiles )
410 return papszSiblingFiles;
411 bHasGotSiblingFiles = true;
412
413 papszSiblingFiles = VSISiblingFiles( pszFilename );
414 if ( papszSiblingFiles != nullptr ) {
415 return papszSiblingFiles;
416 }
417
418 CPLString osDir = CPLGetDirname( pszFilename );
419 const int nMaxFiles =
420 atoi(CPLGetConfigOption("GDAL_READDIR_LIMIT_ON_OPEN", "1000"));
421 papszSiblingFiles = VSIReadDirEx( osDir, nMaxFiles );
422 if( nMaxFiles > 0 && CSLCount(papszSiblingFiles) > nMaxFiles )
423 {
424 CPLDebug("GDAL", "GDAL_READDIR_LIMIT_ON_OPEN reached on %s",
425 osDir.c_str());
426 CSLDestroy(papszSiblingFiles);
427 papszSiblingFiles = nullptr;
428 }
429
430
431 return papszSiblingFiles;
432 }
433
434 /************************************************************************/
435 /* StealSiblingFiles() */
436 /* */
437 /* Same as GetSiblingFiles() except that the list is stealed */
438 /* (ie ownership transferred to the caller) and the associated */
439 /* member variable is set to NULL. */
440 /************************************************************************/
441
442 /** Return sibling files and steal reference
443 * @return sibling files. Ownership below to the caller (must be freed with CSLDestroy)
444 */
StealSiblingFiles()445 char** GDALOpenInfo::StealSiblingFiles()
446 {
447 char** papszRet = GetSiblingFiles();
448 papszSiblingFiles = nullptr;
449 return papszRet;
450 }
451
452 /************************************************************************/
453 /* AreSiblingFilesLoaded() */
454 /************************************************************************/
455
456 /** Return whether sibling files have been loaded.
457 * @return true or false.
458 */
AreSiblingFilesLoaded() const459 bool GDALOpenInfo::AreSiblingFilesLoaded() const
460 {
461 return bHasGotSiblingFiles;
462 }
463
464 /************************************************************************/
465 /* TryToIngest() */
466 /************************************************************************/
467
468 /** Ingest bytes from the file.
469 * @param nBytes number of bytes to ingest.
470 * @return TRUE if successful
471 */
TryToIngest(int nBytes)472 int GDALOpenInfo::TryToIngest(int nBytes)
473 {
474 if( fpL == nullptr )
475 return FALSE;
476 if( nHeaderBytes < nHeaderBytesTried )
477 return TRUE;
478 pabyHeader = static_cast<GByte *>( CPLRealloc(pabyHeader, nBytes + 1) );
479 memset(pabyHeader, 0, nBytes + 1);
480 VSIRewindL(fpL);
481 nHeaderBytesTried = nBytes;
482 nHeaderBytes = static_cast<int>( VSIFReadL(pabyHeader, 1, nBytes, fpL) );
483 VSIRewindL(fpL);
484
485 return TRUE;
486 }
487