1 /******************************************************************************
2  *
3  * Project:  GDAL Core
4  * Purpose:  Implementation of GDALOpenInfo class.
5  * Author:   Frank Warmerdam, warmerdam@pobox.com
6  *
7  **********************************************************************
8  * Copyright (c) 2002, Frank Warmerdam
9  * Copyright (c) 2008-2012, Even Rouault <even dot rouault at spatialys.com>
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included
19  * in all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27  * DEALINGS IN THE SOFTWARE.
28  ****************************************************************************/
29 
30 #include "gdal_priv.h"  // Must be included first for mingw VSIStatBufL.
31 #include "cpl_port.h"
32 
33 #include <cstdlib>
34 #include <cstring>
35 #ifdef HAVE_UNISTD_H
36 #include <unistd.h>
37 #endif
38 
39 #include <algorithm>
40 #include <map>
41 #include <mutex>
42 #include <vector>
43 
44 #include "cpl_config.h"
45 #include "cpl_conv.h"
46 #include "cpl_error.h"
47 #include "cpl_string.h"
48 #include "cpl_vsi.h"
49 #include "gdal.h"
50 
51 CPL_CVSID("$Id: gdalopeninfo.cpp 2dead8e944b4b88b137dc4a51c7efa241fd784cc 2021-09-02 08:53:32 +0200 Even Rouault $")
52 
53 // Keep in sync prototype of those 2 functions between gdalopeninfo.cpp,
54 // ogrsqlitedatasource.cpp and ogrgeopackagedatasource.cpp
55 void GDALOpenInfoDeclareFileNotToOpen(const char* pszFilename,
56                                        const GByte* pabyHeader,
57                                        int nHeaderBytes);
58 void GDALOpenInfoUnDeclareFileNotToOpen(const char* pszFilename);
59 
60 /************************************************************************/
61 
62 /* This whole section helps for SQLite/GPKG, especially with write-ahead
63  * log enabled. The issue is that sqlite3 relies on POSIX advisory locks to
64  * properly work and decide when to create/delete the wal related files.
65  * One issue with POSIX advisory locks is that if within the same process
66  * you do
67  * f1 = open('somefile')
68  * set locks on f1
69  * f2 = open('somefile')
70  * close(f2)
71  * The close(f2) will cancel the locks set on f1. The work on f1 is done by
72  * libsqlite3 whereas the work on f2 is done by GDALOpenInfo.
73  * So as soon as sqlite3 has opened a file we should make sure not to re-open
74  * it (actually close it) ourselves.
75  */
76 
77 namespace {
78 struct FileNotToOpen
79 {
80     CPLString osFilename{};
81     int       nRefCount{};
82     GByte    *pabyHeader{nullptr};
83     int       nHeaderBytes{0};
84 };
85 }
86 
87 static std::mutex sFNTOMutex;
88 static std::map<CPLString, FileNotToOpen>* pMapFNTO = nullptr;
89 
GDALOpenInfoDeclareFileNotToOpen(const char * pszFilename,const GByte * pabyHeader,int nHeaderBytes)90 void GDALOpenInfoDeclareFileNotToOpen(const char* pszFilename,
91                                        const GByte* pabyHeader,
92                                        int nHeaderBytes)
93 {
94     std::lock_guard<std::mutex> oLock(sFNTOMutex);
95     if( pMapFNTO == nullptr )
96         pMapFNTO = new std::map<CPLString, FileNotToOpen>();
97     auto oIter = pMapFNTO->find(pszFilename);
98     if( oIter != pMapFNTO->end() )
99     {
100         oIter->second.nRefCount ++;
101     }
102     else
103     {
104         FileNotToOpen fnto;
105         fnto.osFilename = pszFilename;
106         fnto.nRefCount = 1;
107         fnto.pabyHeader = static_cast<GByte*>(CPLMalloc(nHeaderBytes + 1));
108         memcpy(fnto.pabyHeader, pabyHeader, nHeaderBytes);
109         fnto.pabyHeader[nHeaderBytes] = 0;
110         fnto.nHeaderBytes = nHeaderBytes;
111         (*pMapFNTO)[pszFilename] = fnto;
112     }
113 }
114 
GDALOpenInfoUnDeclareFileNotToOpen(const char * pszFilename)115 void GDALOpenInfoUnDeclareFileNotToOpen(const char* pszFilename)
116 {
117     std::lock_guard<std::mutex> oLock(sFNTOMutex);
118     CPLAssert(pMapFNTO);
119     auto oIter = pMapFNTO->find(pszFilename);
120     CPLAssert( oIter != pMapFNTO->end() );
121     oIter->second.nRefCount --;
122     if( oIter->second.nRefCount == 0 )
123     {
124         CPLFree(oIter->second.pabyHeader);
125         pMapFNTO->erase(oIter);
126     }
127     if( pMapFNTO->empty() )
128     {
129         delete pMapFNTO;
130         pMapFNTO = nullptr;
131     }
132 }
133 
GDALOpenInfoGetFileNotToOpen(const char * pszFilename,int * pnHeaderBytes)134 static GByte* GDALOpenInfoGetFileNotToOpen(const char* pszFilename,
135                                            int* pnHeaderBytes)
136 {
137     std::lock_guard<std::mutex> oLock(sFNTOMutex);
138     *pnHeaderBytes = 0;
139     if( pMapFNTO == nullptr )
140     {
141         return nullptr;
142     }
143     auto oIter = pMapFNTO->find(pszFilename);
144     if( oIter == pMapFNTO->end() )
145     {
146         return nullptr;
147     }
148     *pnHeaderBytes = oIter->second.nHeaderBytes;
149     GByte* pabyHeader = static_cast<GByte*>(CPLMalloc(*pnHeaderBytes + 1));
150     memcpy(pabyHeader, oIter->second.pabyHeader, *pnHeaderBytes);
151     pabyHeader[*pnHeaderBytes] = 0;
152     return pabyHeader;
153 }
154 
155 /************************************************************************/
156 /* ==================================================================== */
157 /*                             GDALOpenInfo                             */
158 /* ==================================================================== */
159 /************************************************************************/
160 
161 /************************************************************************/
162 /*                            GDALOpenInfo()                            */
163 /************************************************************************/
164 
165 /** Constructor/
166  * @param pszFilenameIn filename
167  * @param nOpenFlagsIn open flags
168  * @param papszSiblingsIn list of sibling files, or NULL.
169  */
GDALOpenInfo(const char * pszFilenameIn,int nOpenFlagsIn,const char * const * papszSiblingsIn)170 GDALOpenInfo::GDALOpenInfo( const char * pszFilenameIn, int nOpenFlagsIn,
171                             const char * const * papszSiblingsIn ) :
172     bHasGotSiblingFiles(false),
173     papszSiblingFiles(nullptr),
174     nHeaderBytesTried(0),
175     pszFilename(CPLStrdup(pszFilenameIn)),
176     papszOpenOptions(nullptr),
177     eAccess(nOpenFlagsIn & GDAL_OF_UPDATE ? GA_Update : GA_ReadOnly),
178     nOpenFlags(nOpenFlagsIn),
179     bStatOK(FALSE),
180     bIsDirectory(FALSE),
181     fpL(nullptr),
182     nHeaderBytes(0),
183     pabyHeader(nullptr),
184     papszAllowedDrivers(nullptr)
185 {
186     if( STARTS_WITH(pszFilename, "MVT:/vsi") )
187         return;
188 
189 /* -------------------------------------------------------------------- */
190 /*      Ensure that C: is treated as C:\ so we can stat it on           */
191 /*      Windows.  Similar to what is done in CPLStat().                 */
192 /* -------------------------------------------------------------------- */
193 #ifdef WIN32
194     if( strlen(pszFilenameIn) == 2 && pszFilenameIn[1] == ':' )
195     {
196         char    szAltPath[10];
197 
198         strcpy( szAltPath, pszFilenameIn );
199         strcat( szAltPath, "\\" );
200         CPLFree( pszFilename );
201         pszFilename = CPLStrdup( szAltPath );
202     }
203 #endif  // WIN32
204 
205 /* -------------------------------------------------------------------- */
206 /*      Collect information about the file.                             */
207 /* -------------------------------------------------------------------- */
208 
209 #ifdef HAVE_READLINK
210     bool bHasRetried = false;
211 
212 retry:  // TODO(schwehr): Stop using goto.
213 
214 #endif  // HAVE_READLINK
215 
216 #if !(defined(_WIN32) || defined(__linux__) || defined(__ANDROID__) || (defined(__MACH__) && defined(__APPLE__)))
217     /* On BSDs, fread() on a directory returns non zero, so we have to */
218     /* do a stat() before to check the nature of pszFilename. */
219     bool bPotentialDirectory = (eAccess == GA_ReadOnly);
220 #else
221     bool bPotentialDirectory = false;
222 #endif
223 
224     /* Check if the filename might be a directory of a special virtual file system */
225     if( STARTS_WITH(pszFilename, "/vsizip/") ||
226         STARTS_WITH(pszFilename, "/vsitar/") )
227     {
228         const char* pszExt = CPLGetExtension(pszFilename);
229         if( EQUAL(pszExt, "zip") || EQUAL(pszExt, "tar") || EQUAL(pszExt, "gz")
230             || pszFilename[strlen(pszFilename)-1] == '}'
231 #ifdef DEBUG
232             // For AFL, so that .cur_input is detected as the archive filename.
233             || EQUAL( CPLGetFilename(pszFilename), ".cur_input" )
234 #endif  // DEBUG
235           )
236         {
237             bPotentialDirectory = true;
238         }
239     }
240     else if( STARTS_WITH(pszFilename, "/vsicurl/") )
241     {
242         bPotentialDirectory = true;
243     }
244 
245     if( bPotentialDirectory )
246     {
247         int nStatFlags = VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG;
248         if(nOpenFlagsIn & GDAL_OF_VERBOSE_ERROR)
249             nStatFlags |= VSI_STAT_SET_ERROR_FLAG;
250 
251         // For those special files, opening them with VSIFOpenL() might result
252         // in content, even if they should be considered as directories, so
253         // use stat.
254         VSIStatBufL sStat;
255 
256         if(VSIStatExL( pszFilename, &sStat, nStatFlags) == 0) {
257             bStatOK = TRUE;
258             if( VSI_ISDIR( sStat.st_mode ) )
259                 bIsDirectory = TRUE;
260         }
261     }
262 
263     pabyHeader = GDALOpenInfoGetFileNotToOpen(pszFilename, &nHeaderBytes);
264 
265     if( !bIsDirectory && pabyHeader == nullptr ) {
266         fpL = VSIFOpenExL( pszFilename, (eAccess == GA_Update) ? "r+b" : "rb", (nOpenFlagsIn & GDAL_OF_VERBOSE_ERROR) > 0);
267     }
268     if( pabyHeader )
269     {
270         bStatOK = TRUE;
271         nHeaderBytesTried = nHeaderBytes;
272     }
273     else if( fpL != nullptr )
274     {
275         bStatOK = TRUE;
276         int nBufSize =
277             atoi(CPLGetConfigOption("GDAL_INGESTED_BYTES_AT_OPEN", "1024"));
278         if( nBufSize < 1024 )
279             nBufSize = 1024;
280         else if( nBufSize > 10 * 1024 * 1024)
281             nBufSize = 10 * 1024 * 1024;
282         pabyHeader = static_cast<GByte *>( CPLCalloc(nBufSize+1, 1) );
283         nHeaderBytesTried = nBufSize;
284         nHeaderBytes = static_cast<int>(
285             VSIFReadL( pabyHeader, 1, nHeaderBytesTried, fpL ) );
286         VSIRewindL( fpL );
287 
288         /* If we cannot read anything, check if it is not a directory instead */
289         VSIStatBufL sStat;
290         if( nHeaderBytes == 0 &&
291             VSIStatExL( pszFilename, &sStat,
292                         VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG ) == 0 &&
293             VSI_ISDIR( sStat.st_mode ) )
294         {
295             CPL_IGNORE_RET_VAL(VSIFCloseL(fpL));
296             fpL = nullptr;
297             CPLFree(pabyHeader);
298             pabyHeader = nullptr;
299             bIsDirectory = TRUE;
300         }
301     }
302     else if( !bStatOK )
303     {
304         VSIStatBufL sStat;
305         if( !bPotentialDirectory && VSIStatExL( pszFilename, &sStat,
306                         VSI_STAT_EXISTS_FLAG | VSI_STAT_NATURE_FLAG ) == 0 )
307         {
308             bStatOK = TRUE;
309             if( VSI_ISDIR( sStat.st_mode ) )
310                 bIsDirectory = TRUE;
311         }
312 #ifdef HAVE_READLINK
313         else if ( !bHasRetried && !STARTS_WITH(pszFilename, "/vsi") )
314         {
315             // If someone creates a file with "ln -sf
316             // /vsicurl/http://download.osgeo.org/gdal/data/gtiff/utm.tif
317             // my_remote_utm.tif" we will be able to open it by passing
318             // my_remote_utm.tif.  This helps a lot for GDAL based readers that
319             // only provide file explorers to open datasets.
320             const int nBufSize = 2048;
321             std::vector<char> oFilename(nBufSize);
322             char *szPointerFilename = &oFilename[0];
323             int nBytes = static_cast<int>(
324                 readlink( pszFilename, szPointerFilename, nBufSize ) );
325             if (nBytes != -1)
326             {
327                 szPointerFilename[std::min(nBytes, nBufSize - 1)] = 0;
328                 CPLFree(pszFilename);
329                 pszFilename = CPLStrdup(szPointerFilename);
330                 papszSiblingsIn = nullptr;
331                 bHasRetried = true;
332                 goto retry;
333             }
334         }
335 #endif  // HAVE_READLINK
336     }
337 
338 /* -------------------------------------------------------------------- */
339 /*      Capture sibling list either from passed in values, or by        */
340 /*      scanning for them only if requested through GetSiblingFiles().  */
341 /* -------------------------------------------------------------------- */
342     if( papszSiblingsIn != nullptr )
343     {
344         papszSiblingFiles = CSLDuplicate( papszSiblingsIn );
345         bHasGotSiblingFiles = true;
346     }
347     else if( bStatOK && !bIsDirectory )
348     {
349         papszSiblingFiles = VSISiblingFiles(pszFilename);
350         if (papszSiblingFiles != nullptr)
351         {
352             bHasGotSiblingFiles = true;
353         }
354         else
355         {
356             const char* pszOptionVal =
357                 CPLGetConfigOption( "GDAL_DISABLE_READDIR_ON_OPEN", "NO" );
358             if (EQUAL(pszOptionVal, "EMPTY_DIR"))
359             {
360                 papszSiblingFiles =
361                     CSLAddString( nullptr, CPLGetFilename(pszFilename) );
362                 bHasGotSiblingFiles = true;
363             }
364             else if( CPLTestBool(pszOptionVal) )
365             {
366                 /* skip reading the directory */
367                 papszSiblingFiles = nullptr;
368                 bHasGotSiblingFiles = true;
369             }
370             else
371             {
372                 /* will be lazy loaded */
373                 papszSiblingFiles = nullptr;
374                 bHasGotSiblingFiles = false;
375             }
376         }
377     }
378     else
379     {
380         papszSiblingFiles = nullptr;
381         bHasGotSiblingFiles = true;
382     }
383 }
384 
385 /************************************************************************/
386 /*                           ~GDALOpenInfo()                            */
387 /************************************************************************/
388 
~GDALOpenInfo()389 GDALOpenInfo::~GDALOpenInfo()
390 
391 {
392     VSIFree( pabyHeader );
393     CPLFree( pszFilename );
394 
395     if( fpL != nullptr )
396         CPL_IGNORE_RET_VAL(VSIFCloseL( fpL ));
397     CSLDestroy( papszSiblingFiles );
398 }
399 
400 /************************************************************************/
401 /*                         GetSiblingFiles()                            */
402 /************************************************************************/
403 
404 /** Return sibling files.
405  * @return sibling files. Ownership below to the object.
406  */
GetSiblingFiles()407 char** GDALOpenInfo::GetSiblingFiles()
408 {
409     if( bHasGotSiblingFiles )
410         return papszSiblingFiles;
411     bHasGotSiblingFiles = true;
412 
413     papszSiblingFiles = VSISiblingFiles( pszFilename );
414     if ( papszSiblingFiles != nullptr ) {
415         return papszSiblingFiles;
416     }
417 
418     CPLString osDir = CPLGetDirname( pszFilename );
419     const int nMaxFiles =
420         atoi(CPLGetConfigOption("GDAL_READDIR_LIMIT_ON_OPEN", "1000"));
421     papszSiblingFiles = VSIReadDirEx( osDir, nMaxFiles );
422     if( nMaxFiles > 0 && CSLCount(papszSiblingFiles) > nMaxFiles )
423     {
424         CPLDebug("GDAL", "GDAL_READDIR_LIMIT_ON_OPEN reached on %s",
425                  osDir.c_str());
426         CSLDestroy(papszSiblingFiles);
427         papszSiblingFiles = nullptr;
428     }
429 
430 
431     return papszSiblingFiles;
432 }
433 
434 /************************************************************************/
435 /*                         StealSiblingFiles()                          */
436 /*                                                                      */
437 /*      Same as GetSiblingFiles() except that the list is stealed       */
438 /*      (ie ownership transferred to the caller) and the associated     */
439 /*      member variable is set to NULL.                                 */
440 /************************************************************************/
441 
442 /** Return sibling files and steal reference
443  * @return sibling files. Ownership below to the caller (must be freed with CSLDestroy)
444  */
StealSiblingFiles()445 char** GDALOpenInfo::StealSiblingFiles()
446 {
447     char** papszRet = GetSiblingFiles();
448     papszSiblingFiles = nullptr;
449     return papszRet;
450 }
451 
452 /************************************************************************/
453 /*                        AreSiblingFilesLoaded()                       */
454 /************************************************************************/
455 
456 /** Return whether sibling files have been loaded.
457  * @return true or false.
458  */
AreSiblingFilesLoaded() const459 bool GDALOpenInfo::AreSiblingFilesLoaded() const
460 {
461     return bHasGotSiblingFiles;
462 }
463 
464 /************************************************************************/
465 /*                           TryToIngest()                              */
466 /************************************************************************/
467 
468 /** Ingest bytes from the file.
469  * @param nBytes number of bytes to ingest.
470  * @return TRUE if successful
471  */
TryToIngest(int nBytes)472 int GDALOpenInfo::TryToIngest(int nBytes)
473 {
474     if( fpL == nullptr )
475         return FALSE;
476     if( nHeaderBytes < nHeaderBytesTried )
477         return TRUE;
478     pabyHeader = static_cast<GByte *>( CPLRealloc(pabyHeader, nBytes + 1) );
479     memset(pabyHeader, 0, nBytes + 1);
480     VSIRewindL(fpL);
481     nHeaderBytesTried = nBytes;
482     nHeaderBytes = static_cast<int>( VSIFReadL(pabyHeader, 1, nBytes, fpL) );
483     VSIRewindL(fpL);
484 
485     return TRUE;
486 }
487