1 /******************************************************************************
2  * $Id: cpl_vsil_curl.cpp 28798 2015-03-27 19:37:50Z rouault $
3  *
4  * Project:  CPL - Common Portability Library
5  * Purpose:  Implement VSI large file api for HTTP/FTP files
6  * Author:   Even Rouault, even.rouault at mines-paris.org
7  *
8  ******************************************************************************
9  * Copyright (c) 2010-2013, Even Rouault <even dot rouault at mines-paris dot org>
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included
19  * in all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27  * DEALINGS IN THE SOFTWARE.
28  ****************************************************************************/
29 
30 #include "cpl_vsi_virtual.h"
31 #include "cpl_string.h"
32 #include "cpl_multiproc.h"
33 #include "cpl_hash_set.h"
34 #include "cpl_time.h"
35 #include "cpl_vsil_curl_priv.h"
36 
37 CPL_CVSID("$Id: cpl_vsil_curl.cpp 28798 2015-03-27 19:37:50Z rouault $");
38 
39 #ifndef HAVE_CURL
40 
VSIInstallCurlFileHandler(void)41 void VSIInstallCurlFileHandler(void)
42 {
43     /* not supported */
44 }
45 
46 /************************************************************************/
47 /*                      VSICurlInstallReadCbk()                         */
48 /************************************************************************/
49 
VSICurlInstallReadCbk(CPL_UNUSED VSILFILE * fp,CPL_UNUSED VSICurlReadCbkFunc pfnReadCbk,CPL_UNUSED void * pfnUserData,CPL_UNUSED int bStopOnInterrruptUntilUninstall)50 int VSICurlInstallReadCbk (CPL_UNUSED VSILFILE* fp,
51                            CPL_UNUSED VSICurlReadCbkFunc pfnReadCbk,
52                            CPL_UNUSED void* pfnUserData,
53                            CPL_UNUSED int bStopOnInterrruptUntilUninstall)
54 {
55     return FALSE;
56 }
57 
58 
59 /************************************************************************/
60 /*                    VSICurlUninstallReadCbk()                         */
61 /************************************************************************/
62 
VSICurlUninstallReadCbk(CPL_UNUSED VSILFILE * fp)63 int VSICurlUninstallReadCbk(CPL_UNUSED VSILFILE* fp)
64 {
65     return FALSE;
66 }
67 
68 #else
69 
70 #include <curl/curl.h>
71 
72 void CPLHTTPSetOptions(CURL *http_handle, char** papszOptions);
73 void VSICurlSetOptions(CURL* hCurlHandle, const char* pszURL);
74 
75 #include <map>
76 
77 #define ENABLE_DEBUG 1
78 
79 #define N_MAX_REGIONS       1000
80 
81 #define DOWNLOAD_CHUNCK_SIZE    16384
82 
83 typedef enum
84 {
85     EXIST_UNKNOWN = -1,
86     EXIST_NO,
87     EXIST_YES,
88 } ExistStatus;
89 
90 typedef struct
91 {
92     ExistStatus     eExists;
93     int             bHastComputedFileSize;
94     vsi_l_offset    fileSize;
95     int             bIsDirectory;
96     time_t          mTime;
97 } CachedFileProp;
98 
99 typedef struct
100 {
101     int             bGotFileList;
102     char**          papszFileList; /* only file name without path */
103 } CachedDirList;
104 
105 typedef struct
106 {
107     unsigned long   pszURLHash;
108     vsi_l_offset    nFileOffsetStart;
109     size_t          nSize;
110     char           *pData;
111 } CachedRegion;
112 
113 
VSICurlGetCacheFileName()114 static const char* VSICurlGetCacheFileName()
115 {
116     return "gdal_vsicurl_cache.bin";
117 }
118 
119 /************************************************************************/
120 /*          VSICurlFindStringSensitiveExceptEscapeSequences()           */
121 /************************************************************************/
122 
VSICurlFindStringSensitiveExceptEscapeSequences(char ** papszList,const char * pszTarget)123 static int VSICurlFindStringSensitiveExceptEscapeSequences( char ** papszList,
124                                                             const char * pszTarget )
125 
126 {
127     int         i;
128 
129     if( papszList == NULL )
130         return -1;
131 
132     for( i = 0; papszList[i] != NULL; i++ )
133     {
134         const char* pszIter1 = papszList[i];
135         const char* pszIter2 = pszTarget;
136         char ch1, ch2;
137         /* The comparison is case-sensitive, escape for escaped */
138         /* sequences where letters of the hexadecimal sequence */
139         /* can be uppercase or lowercase depending on the quoting algorithm */
140         while(TRUE)
141         {
142             ch1 = *pszIter1;
143             ch2 = *pszIter2;
144             if (ch1 == '\0' || ch2 == '\0')
145                 break;
146             if (ch1 == '%' && ch2 == '%' &&
147                 pszIter1[1] != '\0' && pszIter1[2] != '\0' &&
148                 pszIter2[1] != '\0' && pszIter2[2] != '\0')
149             {
150                 if (!EQUALN(pszIter1+1, pszIter2+1, 2))
151                     break;
152                 pszIter1 += 2;
153                 pszIter2 += 2;
154             }
155             if (ch1 != ch2)
156                 break;
157             pszIter1 ++;
158             pszIter2 ++;
159         }
160         if (ch1 == ch2 && ch1 == '\0')
161             return i;
162     }
163 
164     return -1;
165 }
166 
167 /************************************************************************/
168 /*                      VSICurlIsFileInList()                           */
169 /************************************************************************/
170 
VSICurlIsFileInList(char ** papszList,const char * pszTarget)171 static int VSICurlIsFileInList( char ** papszList, const char * pszTarget )
172 {
173     int nRet = VSICurlFindStringSensitiveExceptEscapeSequences(papszList, pszTarget);
174     if (nRet >= 0)
175         return nRet;
176 
177     /* If we didn't find anything, try to URL-escape the target filename */
178     char* pszEscaped = CPLEscapeString(pszTarget, -1, CPLES_URL);
179     if (strcmp(pszTarget, pszEscaped) != 0)
180     {
181         nRet = VSICurlFindStringSensitiveExceptEscapeSequences(papszList, pszEscaped);
182     }
183     CPLFree(pszEscaped);
184     return nRet;
185 }
186 
187 /************************************************************************/
188 /*                     VSICurlFilesystemHandler                         */
189 /************************************************************************/
190 
191 typedef struct
192 {
193     CPLString       osURL;
194     CURL           *hCurlHandle;
195 } CachedConnection;
196 
197 
198 class VSICurlFilesystemHandler : public VSIFilesystemHandler
199 {
200     CPLMutex       *hMutex;
201 
202     CachedRegion  **papsRegions;
203     int             nRegions;
204 
205     std::map<CPLString, CachedFileProp*>   cacheFileSize;
206     std::map<CPLString, CachedDirList*>        cacheDirList;
207 
208     int             bUseCacheDisk;
209 
210     /* Per-thread Curl connection cache */
211     std::map<GIntBig, CachedConnection*> mapConnections;
212 
213     char** GetFileList(const char *pszFilename, int* pbGotFileList);
214 
215     char**              ParseHTMLFileList(const char* pszFilename,
216                                           char* pszData,
217                                           int* pbGotFileList);
218 public:
219     VSICurlFilesystemHandler();
220     ~VSICurlFilesystemHandler();
221 
222     virtual VSIVirtualHandle *Open( const char *pszFilename,
223                                     const char *pszAccess);
224     virtual int      Stat( const char *pszFilename, VSIStatBufL *pStatBuf, int nFlags );
225     virtual int      Unlink( const char *pszFilename );
226     virtual int      Rename( const char *oldpath, const char *newpath );
227     virtual int      Mkdir( const char *pszDirname, long nMode );
228     virtual int      Rmdir( const char *pszDirname );
229     virtual char   **ReadDir( const char *pszDirname );
230     virtual char   **ReadDir( const char *pszDirname, int* pbGotFileList );
231 
232 
233     const CachedRegion* GetRegion(const char*     pszURL,
234                                   vsi_l_offset    nFileOffsetStart);
235 
236     void                AddRegion(const char*     pszURL,
237                                   vsi_l_offset    nFileOffsetStart,
238                                   size_t          nSize,
239                                   const char     *pData);
240 
241     CachedFileProp*     GetCachedFileProp(const char*     pszURL);
242 
243     void                AddRegionToCacheDisk(CachedRegion* psRegion);
244     const CachedRegion* GetRegionFromCacheDisk(const char*     pszURL,
245                                                vsi_l_offset nFileOffsetStart);
246 
247     CURL               *GetCurlHandleFor(CPLString osURL);
248 };
249 
250 /************************************************************************/
251 /*                           VSICurlHandle                              */
252 /************************************************************************/
253 
254 class VSICurlHandle : public VSIVirtualHandle
255 {
256   private:
257     VSICurlFilesystemHandler* poFS;
258 
259     char*           pszURL;
260 
261     vsi_l_offset    curOffset;
262     vsi_l_offset    fileSize;
263     int             bHastComputedFileSize;
264     ExistStatus     eExists;
265     int             bIsDirectory;
266     time_t          mTime;
267 
268     vsi_l_offset    lastDownloadedOffset;
269     int             nBlocksToDownload;
270     int             bEOF;
271 
272     int             DownloadRegion(vsi_l_offset startOffset, int nBlocks);
273 
274     VSICurlReadCbkFunc  pfnReadCbk;
275     void               *pReadCbkUserData;
276     int                 bStopOnInterrruptUntilUninstall;
277     int                 bInterrupted;
278 
279   public:
280 
281     VSICurlHandle(VSICurlFilesystemHandler* poFS, const char* pszURL);
282     ~VSICurlHandle();
283 
284     virtual int          Seek( vsi_l_offset nOffset, int nWhence );
285     virtual vsi_l_offset Tell();
286     virtual size_t       Read( void *pBuffer, size_t nSize, size_t nMemb );
287     virtual int          ReadMultiRange( int nRanges, void ** ppData,
288                                          const vsi_l_offset* panOffsets, const size_t* panSizes );
289     virtual size_t       Write( const void *pBuffer, size_t nSize, size_t nMemb );
290     virtual int          Eof();
291     virtual int          Flush();
292     virtual int          Close();
293 
IsKnownFileSize() const294     int                  IsKnownFileSize() const { return bHastComputedFileSize; }
295     vsi_l_offset         GetFileSize();
296     int                  Exists();
IsDirectory() const297     int                  IsDirectory() const { return bIsDirectory; }
GetMTime() const298     time_t               GetMTime() const { return mTime; }
299 
300     int                  InstallReadCbk(VSICurlReadCbkFunc pfnReadCbk,
301                                         void* pfnUserData,
302                                         int bStopOnInterrruptUntilUninstall);
303     int                  UninstallReadCbk();
304 };
305 
306 /************************************************************************/
307 /*                           VSICurlHandle()                            */
308 /************************************************************************/
309 
VSICurlHandle(VSICurlFilesystemHandler * poFS,const char * pszURL)310 VSICurlHandle::VSICurlHandle(VSICurlFilesystemHandler* poFS, const char* pszURL)
311 {
312     this->poFS = poFS;
313     this->pszURL = CPLStrdup(pszURL);
314 
315     curOffset = 0;
316 
317     CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
318     eExists = cachedFileProp->eExists;
319     fileSize = cachedFileProp->fileSize;
320     bHastComputedFileSize = cachedFileProp->bHastComputedFileSize;
321     bIsDirectory = cachedFileProp->bIsDirectory;
322     mTime = cachedFileProp->mTime;
323 
324     lastDownloadedOffset = -1;
325     nBlocksToDownload = 1;
326     bEOF = FALSE;
327 
328     pfnReadCbk = NULL;
329     pReadCbkUserData = NULL;
330     bStopOnInterrruptUntilUninstall = FALSE;
331     bInterrupted = FALSE;
332 }
333 
334 /************************************************************************/
335 /*                          ~VSICurlHandle()                            */
336 /************************************************************************/
337 
~VSICurlHandle()338 VSICurlHandle::~VSICurlHandle()
339 {
340     CPLFree(pszURL);
341 }
342 
343 /************************************************************************/
344 /*                          InstallReadCbk()                            */
345 /************************************************************************/
346 
InstallReadCbk(VSICurlReadCbkFunc pfnReadCbkIn,void * pfnUserDataIn,int bStopOnInterrruptUntilUninstallIn)347 int   VSICurlHandle::InstallReadCbk(VSICurlReadCbkFunc pfnReadCbkIn,
348                                     void* pfnUserDataIn,
349                                     int bStopOnInterrruptUntilUninstallIn)
350 {
351     if (pfnReadCbk != NULL)
352         return FALSE;
353 
354     pfnReadCbk = pfnReadCbkIn;
355     pReadCbkUserData = pfnUserDataIn;
356     bStopOnInterrruptUntilUninstall = bStopOnInterrruptUntilUninstallIn;
357     bInterrupted = FALSE;
358     return TRUE;
359 }
360 
361 /************************************************************************/
362 /*                         UninstallReadCbk()                           */
363 /************************************************************************/
364 
UninstallReadCbk()365 int VSICurlHandle::UninstallReadCbk()
366 {
367     if (pfnReadCbk == NULL)
368         return FALSE;
369 
370     pfnReadCbk = NULL;
371     pReadCbkUserData = NULL;
372     bStopOnInterrruptUntilUninstall = FALSE;
373     bInterrupted = FALSE;
374     return TRUE;
375 }
376 
377 /************************************************************************/
378 /*                                Seek()                                */
379 /************************************************************************/
380 
Seek(vsi_l_offset nOffset,int nWhence)381 int VSICurlHandle::Seek( vsi_l_offset nOffset, int nWhence )
382 {
383     if (nWhence == SEEK_SET)
384     {
385         curOffset = nOffset;
386     }
387     else if (nWhence == SEEK_CUR)
388     {
389         curOffset = curOffset + nOffset;
390     }
391     else
392     {
393         curOffset = GetFileSize() + nOffset;
394     }
395     bEOF = FALSE;
396     return 0;
397 }
398 
399 /************************************************************************/
400 /*                       VSICurlSetOptions()                            */
401 /************************************************************************/
402 
VSICurlSetOptions(CURL * hCurlHandle,const char * pszURL)403 void VSICurlSetOptions(CURL* hCurlHandle, const char* pszURL)
404 {
405     curl_easy_setopt(hCurlHandle, CURLOPT_URL, pszURL);
406 
407     CPLHTTPSetOptions(hCurlHandle, NULL);
408 
409 /* 7.16 */
410 #if LIBCURL_VERSION_NUM >= 0x071000
411     long option = CURLFTPMETHOD_SINGLECWD;
412     curl_easy_setopt(hCurlHandle, CURLOPT_FTP_FILEMETHOD, option);
413 #endif
414 
415 /* 7.12.3 */
416 #if LIBCURL_VERSION_NUM > 0x070C03
417     /* ftp://ftp2.cits.rncan.gc.ca/pub/cantopo/250k_tif/ doesn't like EPSV command */
418     curl_easy_setopt(hCurlHandle, CURLOPT_FTP_USE_EPSV, 0);
419 #endif
420 
421     curl_easy_setopt(hCurlHandle, CURLOPT_NOBODY, 0);
422     curl_easy_setopt(hCurlHandle, CURLOPT_HTTPGET, 1);
423     curl_easy_setopt(hCurlHandle, CURLOPT_HEADER, 0);
424 
425 /* 7.16.4 */
426 #if LIBCURL_VERSION_NUM <= 0x071004
427     curl_easy_setopt(hCurlHandle, CURLOPT_FTPLISTONLY, 0);
428 #elif LIBCURL_VERSION_NUM > 0x071004
429     curl_easy_setopt(hCurlHandle, CURLOPT_DIRLISTONLY, 0);
430 #endif
431 
432     curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, NULL);
433     curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, NULL);
434 }
435 
436 
437 typedef struct
438 {
439     char*           pBuffer;
440     size_t          nSize;
441     int             bIsHTTP;
442     int             bIsInHeader;
443     int             bMultiRange;
444     vsi_l_offset    nStartOffset;
445     vsi_l_offset    nEndOffset;
446     int             nHTTPCode;
447     vsi_l_offset    nContentLength;
448     int             bFoundContentRange;
449     int             bError;
450     int             bDownloadHeaderOnly;
451 
452     VSILFILE           *fp;
453     VSICurlReadCbkFunc  pfnReadCbk;
454     void               *pReadCbkUserData;
455     int                 bInterrupted;
456 } WriteFuncStruct;
457 
458 /************************************************************************/
459 /*                    VSICURLInitWriteFuncStruct()                      */
460 /************************************************************************/
461 
VSICURLInitWriteFuncStruct(WriteFuncStruct * psStruct,VSILFILE * fp,VSICurlReadCbkFunc pfnReadCbk,void * pReadCbkUserData)462 static void VSICURLInitWriteFuncStruct(WriteFuncStruct   *psStruct,
463                                        VSILFILE          *fp,
464                                        VSICurlReadCbkFunc pfnReadCbk,
465                                        void              *pReadCbkUserData)
466 {
467     psStruct->pBuffer = NULL;
468     psStruct->nSize = 0;
469     psStruct->bIsHTTP = FALSE;
470     psStruct->bIsInHeader = TRUE;
471     psStruct->bMultiRange = FALSE;
472     psStruct->nStartOffset = 0;
473     psStruct->nEndOffset = 0;
474     psStruct->nHTTPCode = 0;
475     psStruct->nContentLength = 0;
476     psStruct->bFoundContentRange = FALSE;
477     psStruct->bError = FALSE;
478     psStruct->bDownloadHeaderOnly = FALSE;
479 
480     psStruct->fp = fp;
481     psStruct->pfnReadCbk = pfnReadCbk;
482     psStruct->pReadCbkUserData = pReadCbkUserData;
483     psStruct->bInterrupted = FALSE;
484 }
485 
486 /************************************************************************/
487 /*                       VSICurlHandleWriteFunc()                       */
488 /************************************************************************/
489 
VSICurlHandleWriteFunc(void * buffer,size_t count,size_t nmemb,void * req)490 static int VSICurlHandleWriteFunc(void *buffer, size_t count, size_t nmemb, void *req)
491 {
492     WriteFuncStruct* psStruct = (WriteFuncStruct*) req;
493     size_t nSize = count * nmemb;
494 
495     char* pNewBuffer = (char*) VSIRealloc(psStruct->pBuffer,
496                                           psStruct->nSize + nSize + 1);
497     if (pNewBuffer)
498     {
499         psStruct->pBuffer = pNewBuffer;
500         memcpy(psStruct->pBuffer + psStruct->nSize, buffer, nSize);
501         psStruct->pBuffer[psStruct->nSize + nSize] = '\0';
502         if (psStruct->bIsHTTP && psStruct->bIsInHeader)
503         {
504             char* pszLine = psStruct->pBuffer + psStruct->nSize;
505             if (EQUALN(pszLine, "HTTP/1.0 ", 9) ||
506                 EQUALN(pszLine, "HTTP/1.1 ", 9))
507                 psStruct->nHTTPCode = atoi(pszLine + 9);
508             else if (EQUALN(pszLine, "Content-Length: ", 16))
509                 psStruct->nContentLength = CPLScanUIntBig(pszLine + 16,
510                                                           strlen(pszLine + 16));
511             else if (EQUALN(pszLine, "Content-Range: ", 15))
512                 psStruct->bFoundContentRange = TRUE;
513 
514             /*if (nSize > 2 && pszLine[nSize - 2] == '\r' &&
515                 pszLine[nSize - 1] == '\n')
516             {
517                 pszLine[nSize - 2] = 0;
518                 CPLDebug("VSICURL", "%s", pszLine);
519                 pszLine[nSize - 2] = '\r';
520             }*/
521 
522             if (pszLine[0] == '\r' || pszLine[0] == '\n')
523             {
524                 if (psStruct->bDownloadHeaderOnly)
525                 {
526                     /* If moved permanently/temporarily, go on. Otherwise stop now*/
527                     if (!(psStruct->nHTTPCode == 301 || psStruct->nHTTPCode == 302))
528                         return 0;
529                 }
530                 else
531                 {
532                     psStruct->bIsInHeader = FALSE;
533 
534                     /* Detect servers that don't support range downloading */
535                     if (psStruct->nHTTPCode == 200 &&
536                         !psStruct->bMultiRange &&
537                         !psStruct->bFoundContentRange &&
538                         (psStruct->nStartOffset != 0 || psStruct->nContentLength > 10 *
539                             (psStruct->nEndOffset - psStruct->nStartOffset + 1)))
540                     {
541                         CPLError(CE_Failure, CPLE_AppDefined,
542                                 "Range downloading not supported by this server !");
543                         psStruct->bError = TRUE;
544                         return 0;
545                     }
546                 }
547             }
548         }
549         else
550         {
551             if (psStruct->pfnReadCbk)
552             {
553                 if ( ! psStruct->pfnReadCbk(psStruct->fp, buffer, nSize,
554                                             psStruct->pReadCbkUserData) )
555                 {
556                     psStruct->bInterrupted = TRUE;
557                     return 0;
558                 }
559             }
560         }
561         psStruct->nSize += nSize;
562         return nmemb;
563     }
564     else
565     {
566         return 0;
567     }
568 }
569 
570 
571 /************************************************************************/
572 /*                           GetFileSize()                              */
573 /************************************************************************/
574 
GetFileSize()575 vsi_l_offset VSICurlHandle::GetFileSize()
576 {
577     WriteFuncStruct sWriteFuncData;
578     WriteFuncStruct sWriteFuncHeaderData;
579 
580     if (bHastComputedFileSize)
581         return fileSize;
582 
583     bHastComputedFileSize = TRUE;
584 
585     /* Consider that only the files whose extension ends up with one that is */
586     /* listed in CPL_VSIL_CURL_ALLOWED_EXTENSIONS exist on the server */
587     /* This can speeds up dramatically open experience, in case the server */
588     /* cannot return a file list */
589     /* {noext} can be used as a special token to mean file with no extension */
590     /* For example : */
591     /* gdalinfo --config CPL_VSIL_CURL_ALLOWED_EXTENSIONS ".tif" /vsicurl/http://igskmncngs506.cr.usgs.gov/gmted/Global_tiles_GMTED/075darcsec/bln/W030/30N030W_20101117_gmted_bln075.tif */
592     const char* pszAllowedExtensions =
593         CPLGetConfigOption("CPL_VSIL_CURL_ALLOWED_EXTENSIONS", NULL);
594     if (pszAllowedExtensions)
595     {
596         char** papszExtensions = CSLTokenizeString2( pszAllowedExtensions, ", ", 0 );
597         int nURLLen = strlen(pszURL);
598         int bFound = FALSE;
599         for(int i=0;papszExtensions[i] != NULL;i++)
600         {
601             int nExtensionLen = strlen(papszExtensions[i]);
602             if( EQUAL(papszExtensions[i], "{noext}") )
603             {
604                 if( nURLLen > 4 && strchr(pszURL + nURLLen - 4, '.') == NULL )
605                 {
606                     bFound = TRUE;
607                     break;
608                 }
609             }
610             else if (nURLLen > nExtensionLen &&
611                 EQUAL(pszURL + nURLLen - nExtensionLen, papszExtensions[i]))
612             {
613                 bFound = TRUE;
614                 break;
615             }
616         }
617 
618         if (!bFound)
619         {
620             eExists = EXIST_NO;
621             fileSize = 0;
622 
623             CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
624             cachedFileProp->bHastComputedFileSize = TRUE;
625             cachedFileProp->fileSize = fileSize;
626             cachedFileProp->eExists = eExists;
627 
628             CSLDestroy(papszExtensions);
629 
630             return 0;
631         }
632 
633         CSLDestroy(papszExtensions);
634     }
635 
636 #if LIBCURL_VERSION_NUM < 0x070B00
637     /* Curl 7.10.X doesn't manage to unset the CURLOPT_RANGE that would have been */
638     /* previously set, so we have to reinit the connection handle */
639     poFS->GetCurlHandleFor("");
640 #endif
641     CURL* hCurlHandle = poFS->GetCurlHandleFor(pszURL);
642 
643     VSICurlSetOptions(hCurlHandle, pszURL);
644 
645     VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, NULL, NULL, NULL);
646 
647     /* HACK for mbtiles driver: proper fix would be to auto-detect servers that don't accept HEAD */
648     /* http://a.tiles.mapbox.com/v3/ doesn't accept HEAD, so let's start a GET */
649     /* and interrupt is as soon as the header is found */
650     if (strstr(pszURL, ".tiles.mapbox.com/") != NULL
651 	|| !CSLTestBoolean(CPLGetConfigOption("CPL_VSIL_CURL_USE_HEAD", "YES")))
652     {
653         curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, &sWriteFuncHeaderData);
654         curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, VSICurlHandleWriteFunc);
655 
656         sWriteFuncHeaderData.bIsHTTP = strncmp(pszURL, "http", 4) == 0;
657         sWriteFuncHeaderData.bDownloadHeaderOnly = TRUE;
658     }
659     else
660     {
661         curl_easy_setopt(hCurlHandle, CURLOPT_NOBODY, 1);
662         curl_easy_setopt(hCurlHandle, CURLOPT_HTTPGET, 0);
663         curl_easy_setopt(hCurlHandle, CURLOPT_HEADER, 1);
664     }
665 
666     /* We need that otherwise OSGEO4W's libcurl issue a dummy range request */
667     /* when doing a HEAD when recycling connections */
668     curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, NULL);
669 
670     /* Bug with older curl versions (<=7.16.4) and FTP. See http://curl.haxx.se/mail/lib-2007-08/0312.html */
671     VSICURLInitWriteFuncStruct(&sWriteFuncData, NULL, NULL, NULL);
672     curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
673     curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
674 
675     char szCurlErrBuf[CURL_ERROR_SIZE+1];
676     szCurlErrBuf[0] = '\0';
677     curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
678 
679     double dfSize = 0;
680     curl_easy_perform(hCurlHandle);
681 
682     eExists = EXIST_UNKNOWN;
683 
684     if (strncmp(pszURL, "ftp", 3) == 0)
685     {
686         if (sWriteFuncData.pBuffer != NULL &&
687             strncmp(sWriteFuncData.pBuffer, "Content-Length: ", strlen( "Content-Length: ")) == 0)
688         {
689             const char* pszBuffer = sWriteFuncData.pBuffer + strlen("Content-Length: ");
690             eExists = EXIST_YES;
691             fileSize = CPLScanUIntBig(pszBuffer, sWriteFuncData.nSize - strlen("Content-Length: "));
692             if (ENABLE_DEBUG)
693                 CPLDebug("VSICURL", "GetFileSize(%s)=" CPL_FRMT_GUIB,
694                         pszURL, fileSize);
695         }
696     }
697 
698     if (eExists != EXIST_YES)
699     {
700         CURLcode code = curl_easy_getinfo(hCurlHandle, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &dfSize );
701         if (code == 0)
702         {
703             eExists = EXIST_YES;
704             if (dfSize < 0)
705                 fileSize = 0;
706             else
707                 fileSize = (GUIntBig)dfSize;
708         }
709         else
710         {
711             eExists = EXIST_NO;
712             fileSize = 0;
713             CPLError(CE_Failure, CPLE_AppDefined, "VSICurlHandle::GetFileSize failed");
714         }
715 
716         long response_code = 0;
717         curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
718         if (response_code != 200)
719         {
720             eExists = EXIST_NO;
721             fileSize = 0;
722         }
723 
724         /* Try to guess if this is a directory. Generally if this is a directory, */
725         /* curl will retry with an URL with slash added */
726         char *pszEffectiveURL = NULL;
727         curl_easy_getinfo(hCurlHandle, CURLINFO_EFFECTIVE_URL, &pszEffectiveURL);
728         if (pszEffectiveURL != NULL && strncmp(pszURL, pszEffectiveURL, strlen(pszURL)) == 0 &&
729             pszEffectiveURL[strlen(pszURL)] == '/')
730         {
731             eExists = EXIST_YES;
732             fileSize = 0;
733             bIsDirectory = TRUE;
734         }
735 
736         if (ENABLE_DEBUG)
737             CPLDebug("VSICURL", "GetFileSize(%s)=" CPL_FRMT_GUIB "  response_code=%d",
738                     pszURL, fileSize, (int)response_code);
739     }
740 
741     CPLFree(sWriteFuncData.pBuffer);
742     CPLFree(sWriteFuncHeaderData.pBuffer);
743 
744     CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
745     cachedFileProp->bHastComputedFileSize = TRUE;
746     cachedFileProp->fileSize = fileSize;
747     cachedFileProp->eExists = eExists;
748     cachedFileProp->bIsDirectory = bIsDirectory;
749 
750     return fileSize;
751 }
752 
753 /************************************************************************/
754 /*                                 Exists()                             */
755 /************************************************************************/
756 
Exists()757 int VSICurlHandle::Exists()
758 {
759     if (eExists == EXIST_UNKNOWN)
760         GetFileSize();
761     return eExists == EXIST_YES;
762 }
763 
764 /************************************************************************/
765 /*                                  Tell()                              */
766 /************************************************************************/
767 
Tell()768 vsi_l_offset VSICurlHandle::Tell()
769 {
770     return curOffset;
771 }
772 
773 /************************************************************************/
774 /*                          DownloadRegion()                            */
775 /************************************************************************/
776 
DownloadRegion(vsi_l_offset startOffset,int nBlocks)777 int VSICurlHandle::DownloadRegion(vsi_l_offset startOffset, int nBlocks)
778 {
779     WriteFuncStruct sWriteFuncData;
780     WriteFuncStruct sWriteFuncHeaderData;
781 
782     if (bInterrupted && bStopOnInterrruptUntilUninstall)
783         return FALSE;
784 
785     CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
786     if (cachedFileProp->eExists == EXIST_NO)
787         return FALSE;
788 
789     CURL* hCurlHandle = poFS->GetCurlHandleFor(pszURL);
790     VSICurlSetOptions(hCurlHandle, pszURL);
791 
792     VSICURLInitWriteFuncStruct(&sWriteFuncData, (VSILFILE*)this, pfnReadCbk, pReadCbkUserData);
793     curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
794     curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
795 
796     VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, NULL, NULL, NULL);
797     curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, &sWriteFuncHeaderData);
798     curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, VSICurlHandleWriteFunc);
799     sWriteFuncHeaderData.bIsHTTP = strncmp(pszURL, "http", 4) == 0;
800     sWriteFuncHeaderData.nStartOffset = startOffset;
801     sWriteFuncHeaderData.nEndOffset = startOffset + nBlocks * DOWNLOAD_CHUNCK_SIZE - 1;
802     /* Some servers don't like we try to read after end-of-file (#5786) */
803     if( cachedFileProp->bHastComputedFileSize &&
804         sWriteFuncHeaderData.nEndOffset >= cachedFileProp->fileSize )
805     {
806         sWriteFuncHeaderData.nEndOffset = cachedFileProp->fileSize - 1;
807     }
808 
809     char rangeStr[512];
810     sprintf(rangeStr, CPL_FRMT_GUIB "-" CPL_FRMT_GUIB, startOffset,
811             sWriteFuncHeaderData.nEndOffset);
812 
813     if (ENABLE_DEBUG)
814         CPLDebug("VSICURL", "Downloading %s (%s)...", rangeStr, pszURL);
815 
816     curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, rangeStr);
817 
818     char szCurlErrBuf[CURL_ERROR_SIZE+1];
819     szCurlErrBuf[0] = '\0';
820     curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
821 
822     curl_easy_perform(hCurlHandle);
823 
824     curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, NULL);
825     curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, NULL);
826     curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, NULL);
827     curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, NULL);
828 
829     if (sWriteFuncData.bInterrupted)
830     {
831         bInterrupted = TRUE;
832 
833         CPLFree(sWriteFuncData.pBuffer);
834         CPLFree(sWriteFuncHeaderData.pBuffer);
835 
836         return FALSE;
837     }
838 
839     long response_code = 0;
840     curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
841 
842     char *content_type = 0;
843     curl_easy_getinfo(hCurlHandle, CURLINFO_CONTENT_TYPE, &content_type);
844 
845     if (ENABLE_DEBUG)
846         CPLDebug("VSICURL", "Got reponse_code=%ld", response_code);
847 
848     if ((response_code != 200 && response_code != 206 &&
849          response_code != 225 && response_code != 226 && response_code != 426) || sWriteFuncHeaderData.bError)
850     {
851         if (response_code >= 400 && szCurlErrBuf[0] != '\0')
852         {
853             if (strcmp(szCurlErrBuf, "Couldn't use REST") == 0)
854                 CPLError(CE_Failure, CPLE_AppDefined, "%d: %s, %s",
855                          (int)response_code, szCurlErrBuf,
856                          "Range downloading not supported by this server !");
857             else
858                 CPLError(CE_Failure, CPLE_AppDefined, "%d: %s", (int)response_code, szCurlErrBuf);
859         }
860         if (!bHastComputedFileSize && startOffset == 0)
861         {
862             cachedFileProp->bHastComputedFileSize = bHastComputedFileSize = TRUE;
863             cachedFileProp->fileSize = fileSize = 0;
864             cachedFileProp->eExists = eExists = EXIST_NO;
865         }
866         CPLFree(sWriteFuncData.pBuffer);
867         CPLFree(sWriteFuncHeaderData.pBuffer);
868         return FALSE;
869     }
870 
871     if (!bHastComputedFileSize && sWriteFuncHeaderData.pBuffer)
872     {
873         /* Try to retrieve the filesize from the HTTP headers */
874         /* if in the form : "Content-Range: bytes x-y/filesize" */
875         char* pszContentRange = strstr(sWriteFuncHeaderData.pBuffer, "Content-Range: bytes ");
876         if (pszContentRange)
877         {
878             char* pszEOL = strchr(pszContentRange, '\n');
879             if (pszEOL)
880             {
881                 *pszEOL = 0;
882                 pszEOL = strchr(pszContentRange, '\r');
883                 if (pszEOL)
884                     *pszEOL = 0;
885                 char* pszSlash = strchr(pszContentRange, '/');
886                 if (pszSlash)
887                 {
888                     pszSlash ++;
889                     fileSize = CPLScanUIntBig(pszSlash, strlen(pszSlash));
890                 }
891             }
892         }
893         else if (strncmp(pszURL, "ftp", 3) == 0)
894         {
895             /* Parse 213 answer for FTP protocol */
896             char* pszSize = strstr(sWriteFuncHeaderData.pBuffer, "213 ");
897             if (pszSize)
898             {
899                 pszSize += 4;
900                 char* pszEOL = strchr(pszSize, '\n');
901                 if (pszEOL)
902                 {
903                     *pszEOL = 0;
904                     pszEOL = strchr(pszSize, '\r');
905                     if (pszEOL)
906                         *pszEOL = 0;
907 
908                     fileSize = CPLScanUIntBig(pszSize, strlen(pszSize));
909                 }
910             }
911         }
912 
913         if (fileSize != 0)
914         {
915             eExists = EXIST_YES;
916 
917             if (ENABLE_DEBUG)
918                 CPLDebug("VSICURL", "GetFileSize(%s)=" CPL_FRMT_GUIB "  response_code=%d",
919                         pszURL, fileSize, (int)response_code);
920 
921             bHastComputedFileSize = cachedFileProp->bHastComputedFileSize = TRUE;
922             cachedFileProp->fileSize = fileSize;
923             cachedFileProp->eExists = eExists;
924         }
925     }
926 
927     lastDownloadedOffset = startOffset + nBlocks * DOWNLOAD_CHUNCK_SIZE;
928 
929     char* pBuffer = sWriteFuncData.pBuffer;
930     int nSize = sWriteFuncData.nSize;
931 
932     if (nSize > nBlocks * DOWNLOAD_CHUNCK_SIZE)
933     {
934         if (ENABLE_DEBUG)
935             CPLDebug("VSICURL", "Got more data than expected : %d instead of %d",
936                      nSize, nBlocks * DOWNLOAD_CHUNCK_SIZE);
937     }
938 
939     while(nSize > 0)
940     {
941         //if (ENABLE_DEBUG)
942         //    CPLDebug("VSICURL", "Add region %d - %d", startOffset, MIN(DOWNLOAD_CHUNCK_SIZE, nSize));
943         poFS->AddRegion(pszURL, startOffset, MIN(DOWNLOAD_CHUNCK_SIZE, nSize), pBuffer);
944         startOffset += DOWNLOAD_CHUNCK_SIZE;
945         pBuffer += DOWNLOAD_CHUNCK_SIZE;
946         nSize -= DOWNLOAD_CHUNCK_SIZE;
947     }
948 
949     CPLFree(sWriteFuncData.pBuffer);
950     CPLFree(sWriteFuncHeaderData.pBuffer);
951 
952     return TRUE;
953 }
954 
955 /************************************************************************/
956 /*                                Read()                                */
957 /************************************************************************/
958 
Read(void * pBuffer,size_t nSize,size_t nMemb)959 size_t VSICurlHandle::Read( void *pBuffer, size_t nSize, size_t nMemb )
960 {
961     size_t nBufferRequestSize = nSize * nMemb;
962     if (nBufferRequestSize == 0)
963         return 0;
964 
965     //CPLDebug("VSICURL", "offset=%d, size=%d", (int)curOffset, (int)nBufferRequestSize);
966 
967     vsi_l_offset iterOffset = curOffset;
968     while (nBufferRequestSize)
969     {
970         const CachedRegion* psRegion = poFS->GetRegion(pszURL, iterOffset);
971         if (psRegion == NULL)
972         {
973             vsi_l_offset nOffsetToDownload =
974                 (iterOffset / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
975 
976             if (nOffsetToDownload == lastDownloadedOffset)
977             {
978                 /* In case of consecutive reads (of small size), we use a */
979                 /* heuristic that we will read the file sequentially, so */
980                 /* we double the requested size to decrease the number of */
981                 /* client/server roundtrips. */
982                 if (nBlocksToDownload < 100)
983                     nBlocksToDownload *= 2;
984             }
985             else
986             {
987                 /* Random reads. Cancel the above heuristics */
988                 nBlocksToDownload = 1;
989             }
990 
991             /* Ensure that we will request at least the number of blocks */
992             /* to satisfy the remaining buffer size to read */
993             vsi_l_offset nEndOffsetToDownload =
994                 ((iterOffset + nBufferRequestSize) / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
995             int nMinBlocksToDownload = 1 + (int)
996                 ((nEndOffsetToDownload - nOffsetToDownload) / DOWNLOAD_CHUNCK_SIZE);
997             if (nBlocksToDownload < nMinBlocksToDownload)
998                 nBlocksToDownload = nMinBlocksToDownload;
999 
1000             int i;
1001             /* Avoid reading already cached data */
1002             for(i=1;i<nBlocksToDownload;i++)
1003             {
1004                 if (poFS->GetRegion(pszURL, nOffsetToDownload + i * DOWNLOAD_CHUNCK_SIZE) != NULL)
1005                 {
1006                     nBlocksToDownload = i;
1007                     break;
1008                 }
1009             }
1010 
1011             if( nBlocksToDownload > N_MAX_REGIONS )
1012                 nBlocksToDownload = N_MAX_REGIONS;
1013 
1014             if (DownloadRegion(nOffsetToDownload, nBlocksToDownload) == FALSE)
1015             {
1016                 if (!bInterrupted)
1017                     bEOF = TRUE;
1018                 return 0;
1019             }
1020             psRegion = poFS->GetRegion(pszURL, iterOffset);
1021         }
1022         if (psRegion == NULL || psRegion->pData == NULL)
1023         {
1024             bEOF = TRUE;
1025             return 0;
1026         }
1027         int nToCopy = (int) MIN(nBufferRequestSize, psRegion->nSize - (iterOffset - psRegion->nFileOffsetStart));
1028         memcpy(pBuffer, psRegion->pData + iterOffset - psRegion->nFileOffsetStart,
1029                 nToCopy);
1030         pBuffer = (char*) pBuffer + nToCopy;
1031         iterOffset += nToCopy;
1032         nBufferRequestSize -= nToCopy;
1033         if (psRegion->nSize != DOWNLOAD_CHUNCK_SIZE && nBufferRequestSize != 0)
1034         {
1035             break;
1036         }
1037     }
1038 
1039     size_t ret = (size_t) ((iterOffset - curOffset) / nSize);
1040     if (ret != nMemb)
1041         bEOF = TRUE;
1042 
1043     curOffset = iterOffset;
1044 
1045     return ret;
1046 }
1047 
1048 
1049 /************************************************************************/
1050 /*                           ReadMultiRange()                           */
1051 /************************************************************************/
1052 
ReadMultiRange(int nRanges,void ** ppData,const vsi_l_offset * panOffsets,const size_t * panSizes)1053 int VSICurlHandle::ReadMultiRange( int nRanges, void ** ppData,
1054                                    const vsi_l_offset* panOffsets,
1055                                    const size_t* panSizes )
1056 {
1057     WriteFuncStruct sWriteFuncData;
1058     WriteFuncStruct sWriteFuncHeaderData;
1059 
1060     if (bInterrupted && bStopOnInterrruptUntilUninstall)
1061         return FALSE;
1062 
1063     CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
1064     if (cachedFileProp->eExists == EXIST_NO)
1065         return -1;
1066 
1067     CPLString osRanges, osFirstRange, osLastRange;
1068     int i;
1069     int nMergedRanges = 0;
1070     vsi_l_offset nTotalReqSize = 0;
1071     for(i=0;i<nRanges;i++)
1072     {
1073         CPLString osCurRange;
1074         if (i != 0)
1075             osRanges.append(",");
1076         osCurRange = CPLSPrintf(CPL_FRMT_GUIB "-", panOffsets[i]);
1077         while (i + 1 < nRanges && panOffsets[i] + panSizes[i] == panOffsets[i+1])
1078         {
1079             nTotalReqSize += panSizes[i];
1080             i ++;
1081         }
1082         nTotalReqSize += panSizes[i];
1083         osCurRange.append(CPLSPrintf(CPL_FRMT_GUIB, panOffsets[i] + panSizes[i]-1));
1084         nMergedRanges ++;
1085 
1086         osRanges += osCurRange;
1087 
1088         if (nMergedRanges == 1)
1089             osFirstRange = osCurRange;
1090         osLastRange = osCurRange;
1091     }
1092 
1093     const char* pszMaxRanges = CPLGetConfigOption("CPL_VSIL_CURL_MAX_RANGES", "250");
1094     int nMaxRanges = atoi(pszMaxRanges);
1095     if (nMaxRanges <= 0)
1096         nMaxRanges = 250;
1097     if (nMergedRanges > nMaxRanges)
1098     {
1099         int nHalf = nRanges / 2;
1100         int nRet = ReadMultiRange(nHalf, ppData, panOffsets, panSizes);
1101         if (nRet != 0)
1102             return nRet;
1103         return ReadMultiRange(nRanges - nHalf, ppData + nHalf, panOffsets + nHalf, panSizes + nHalf);
1104     }
1105 
1106     CURL* hCurlHandle = poFS->GetCurlHandleFor(pszURL);
1107     VSICurlSetOptions(hCurlHandle, pszURL);
1108 
1109     VSICURLInitWriteFuncStruct(&sWriteFuncData, (VSILFILE*)this, pfnReadCbk, pReadCbkUserData);
1110     curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
1111     curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
1112 
1113     VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, NULL, NULL, NULL);
1114     curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, &sWriteFuncHeaderData);
1115     curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, VSICurlHandleWriteFunc);
1116     sWriteFuncHeaderData.bIsHTTP = strncmp(pszURL, "http", 4) == 0;
1117     sWriteFuncHeaderData.bMultiRange = nMergedRanges > 1;
1118     if (nMergedRanges == 1)
1119     {
1120         sWriteFuncHeaderData.nStartOffset = panOffsets[0];
1121         sWriteFuncHeaderData.nEndOffset = panOffsets[0] + nTotalReqSize-1;
1122     }
1123 
1124     if (ENABLE_DEBUG)
1125     {
1126         if (nMergedRanges == 1)
1127             CPLDebug("VSICURL", "Downloading %s (%s)...", osRanges.c_str(), pszURL);
1128         else
1129             CPLDebug("VSICURL", "Downloading %s, ..., %s (" CPL_FRMT_GUIB " bytes, %s)...",
1130                      osFirstRange.c_str(), osLastRange.c_str(), (GUIntBig)nTotalReqSize, pszURL);
1131     }
1132 
1133     curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, osRanges.c_str());
1134 
1135     char szCurlErrBuf[CURL_ERROR_SIZE+1];
1136     szCurlErrBuf[0] = '\0';
1137     curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
1138 
1139     curl_easy_perform(hCurlHandle);
1140 
1141     curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, NULL);
1142     curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, NULL);
1143     curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, NULL);
1144     curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, NULL);
1145 
1146     if (sWriteFuncData.bInterrupted)
1147     {
1148         bInterrupted = TRUE;
1149 
1150         CPLFree(sWriteFuncData.pBuffer);
1151         CPLFree(sWriteFuncHeaderData.pBuffer);
1152 
1153         return -1;
1154     }
1155 
1156     long response_code = 0;
1157     curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
1158 
1159     char *content_type = 0;
1160     curl_easy_getinfo(hCurlHandle, CURLINFO_CONTENT_TYPE, &content_type);
1161 
1162     if ((response_code != 200 && response_code != 206 &&
1163          response_code != 225 && response_code != 226 && response_code != 426) || sWriteFuncHeaderData.bError)
1164     {
1165         if (response_code >= 400 && szCurlErrBuf[0] != '\0')
1166         {
1167             if (strcmp(szCurlErrBuf, "Couldn't use REST") == 0)
1168                 CPLError(CE_Failure, CPLE_AppDefined, "%d: %s, %s",
1169                          (int)response_code, szCurlErrBuf,
1170                          "Range downloading not supported by this server !");
1171             else
1172                 CPLError(CE_Failure, CPLE_AppDefined, "%d: %s", (int)response_code, szCurlErrBuf);
1173         }
1174         /*
1175         if (!bHastComputedFileSize && startOffset == 0)
1176         {
1177             cachedFileProp->bHastComputedFileSize = bHastComputedFileSize = TRUE;
1178             cachedFileProp->fileSize = fileSize = 0;
1179             cachedFileProp->eExists = eExists = EXIST_NO;
1180         }
1181         */
1182         CPLFree(sWriteFuncData.pBuffer);
1183         CPLFree(sWriteFuncHeaderData.pBuffer);
1184         return -1;
1185     }
1186 
1187     char* pBuffer = sWriteFuncData.pBuffer;
1188     int nSize = sWriteFuncData.nSize;
1189 
1190     int nRet = -1;
1191     char* pszBoundary;
1192     CPLString osBoundary;
1193     char *pszNext;
1194     int iRange = 0;
1195     int iPart = 0;
1196     char* pszEOL;
1197 
1198 /* -------------------------------------------------------------------- */
1199 /*      No multipart if a single range has been requested               */
1200 /* -------------------------------------------------------------------- */
1201 
1202     if (nMergedRanges == 1)
1203     {
1204         int nAccSize = 0;
1205         if ((vsi_l_offset)nSize < nTotalReqSize)
1206             goto end;
1207 
1208         for(i=0;i<nRanges;i++)
1209         {
1210             memcpy(ppData[i], pBuffer + nAccSize, panSizes[i]);
1211             nAccSize += panSizes[i];
1212         }
1213 
1214         nRet = 0;
1215         goto end;
1216     }
1217 
1218 /* -------------------------------------------------------------------- */
1219 /*      Extract boundary name                                           */
1220 /* -------------------------------------------------------------------- */
1221 
1222     pszBoundary = strstr(sWriteFuncHeaderData.pBuffer,
1223                          "Content-Type: multipart/byteranges; boundary=");
1224     if( pszBoundary == NULL )
1225     {
1226         CPLError( CE_Failure, CPLE_AppDefined, "Could not find '%s'",
1227                   "Content-Type: multipart/byteranges; boundary=" );
1228         goto end;
1229     }
1230 
1231     pszBoundary += strlen( "Content-Type: multipart/byteranges; boundary=" );
1232 
1233     pszEOL = strchr(pszBoundary, '\r');
1234     if (pszEOL)
1235         *pszEOL = 0;
1236     pszEOL = strchr(pszBoundary, '\n');
1237     if (pszEOL)
1238         *pszEOL = 0;
1239 
1240     /* Remove optional double-quote character around boundary name */
1241     if (pszBoundary[0] == '"')
1242     {
1243         pszBoundary ++;
1244         char* pszLastDoubleQuote = strrchr(pszBoundary, '"');
1245         if (pszLastDoubleQuote)
1246             *pszLastDoubleQuote = 0;
1247     }
1248 
1249     osBoundary = "--";
1250     osBoundary += pszBoundary;
1251 
1252 /* -------------------------------------------------------------------- */
1253 /*      Find the start of the first chunk.                              */
1254 /* -------------------------------------------------------------------- */
1255     pszNext = strstr(pBuffer,osBoundary.c_str());
1256     if( pszNext == NULL )
1257     {
1258         CPLError( CE_Failure, CPLE_AppDefined, "No parts found." );
1259         goto end;
1260     }
1261 
1262     pszNext += strlen(osBoundary);
1263     while( *pszNext != '\n' && *pszNext != '\r' && *pszNext != '\0' )
1264         pszNext++;
1265     if( *pszNext == '\r' )
1266         pszNext++;
1267     if( *pszNext == '\n' )
1268         pszNext++;
1269 
1270 /* -------------------------------------------------------------------- */
1271 /*      Loop over parts...                                              */
1272 /* -------------------------------------------------------------------- */
1273     while( iPart < nRanges )
1274     {
1275 /* -------------------------------------------------------------------- */
1276 /*      Collect headers.                                                */
1277 /* -------------------------------------------------------------------- */
1278         int bExpectedRange = FALSE;
1279 
1280         while( *pszNext != '\n' && *pszNext != '\r' && *pszNext != '\0' )
1281         {
1282             char *pszEOL = strstr(pszNext,"\n");
1283 
1284             if( pszEOL == NULL )
1285             {
1286                 CPLError(CE_Failure, CPLE_AppDefined,
1287                          "Error while parsing multipart content (at line %d)", __LINE__);
1288                 goto end;
1289             }
1290 
1291             *pszEOL = '\0';
1292             int bRestoreAntislashR = FALSE;
1293             if (pszEOL - pszNext > 1 && pszEOL[-1] == '\r')
1294             {
1295                 bRestoreAntislashR = TRUE;
1296                 pszEOL[-1] = '\0';
1297             }
1298 
1299             if (EQUALN(pszNext, "Content-Range: bytes ", strlen("Content-Range: bytes ")))
1300             {
1301                 bExpectedRange = TRUE; /* FIXME */
1302             }
1303 
1304             if (bRestoreAntislashR)
1305                 pszEOL[-1] = '\r';
1306             *pszEOL = '\n';
1307 
1308             pszNext = pszEOL + 1;
1309         }
1310 
1311         if (!bExpectedRange)
1312         {
1313             CPLError(CE_Failure, CPLE_AppDefined,
1314                         "Error while parsing multipart content (at line %d)", __LINE__);
1315             goto end;
1316         }
1317 
1318         if( *pszNext == '\r' )
1319             pszNext++;
1320         if( *pszNext == '\n' )
1321             pszNext++;
1322 
1323 /* -------------------------------------------------------------------- */
1324 /*      Work out the data block size.                                   */
1325 /* -------------------------------------------------------------------- */
1326         size_t nBytesAvail = nSize - (pszNext - pBuffer);
1327 
1328         while(TRUE)
1329         {
1330             if (nBytesAvail < panSizes[iRange])
1331             {
1332                 CPLError(CE_Failure, CPLE_AppDefined,
1333                             "Error while parsing multipart content (at line %d)", __LINE__);
1334                 goto end;
1335             }
1336 
1337             memcpy(ppData[iRange], pszNext, panSizes[iRange]);
1338             pszNext += panSizes[iRange];
1339             nBytesAvail -= panSizes[iRange];
1340             if( iRange + 1 < nRanges &&
1341                 panOffsets[iRange] + panSizes[iRange] == panOffsets[iRange + 1] )
1342             {
1343                 iRange++;
1344             }
1345             else
1346                 break;
1347         }
1348 
1349         iPart ++;
1350         iRange ++;
1351 
1352         while( nBytesAvail > 0
1353                && (*pszNext != '-'
1354                    || strncmp(pszNext,osBoundary,strlen(osBoundary)) != 0) )
1355         {
1356             pszNext++;
1357             nBytesAvail--;
1358         }
1359 
1360         if( nBytesAvail == 0 )
1361         {
1362             CPLError(CE_Failure, CPLE_AppDefined,
1363                         "Error while parsing multipart content (at line %d)", __LINE__);
1364             goto end;
1365         }
1366 
1367         pszNext += strlen(osBoundary);
1368         if( strncmp(pszNext,"--",2) == 0 )
1369         {
1370             /* End of multipart */
1371             break;
1372         }
1373 
1374         if( *pszNext == '\r' )
1375             pszNext++;
1376         if( *pszNext == '\n' )
1377             pszNext++;
1378         else
1379         {
1380             CPLError(CE_Failure, CPLE_AppDefined,
1381                         "Error while parsing multipart content (at line %d)", __LINE__);
1382             goto end;
1383         }
1384     }
1385 
1386     if (iPart == nMergedRanges)
1387         nRet = 0;
1388     else
1389         CPLError(CE_Failure, CPLE_AppDefined,
1390                  "Got only %d parts, where %d were expected", iPart, nMergedRanges);
1391 
1392 end:
1393     CPLFree(sWriteFuncData.pBuffer);
1394     CPLFree(sWriteFuncHeaderData.pBuffer);
1395 
1396     return nRet;
1397 }
1398 
1399 /************************************************************************/
1400 /*                               Write()                                */
1401 /************************************************************************/
1402 
Write(CPL_UNUSED const void * pBuffer,CPL_UNUSED size_t nSize,CPL_UNUSED size_t nMemb)1403 size_t VSICurlHandle::Write( CPL_UNUSED const void *pBuffer,
1404                              CPL_UNUSED size_t nSize,
1405                              CPL_UNUSED size_t nMemb )
1406 {
1407     return 0;
1408 }
1409 
1410 /************************************************************************/
1411 /*                                 Eof()                                */
1412 /************************************************************************/
1413 
1414 
Eof()1415 int       VSICurlHandle::Eof()
1416 {
1417     return bEOF;
1418 }
1419 
1420 /************************************************************************/
1421 /*                                 Flush()                              */
1422 /************************************************************************/
1423 
Flush()1424 int       VSICurlHandle::Flush()
1425 {
1426     return 0;
1427 }
1428 
1429 /************************************************************************/
1430 /*                                  Close()                             */
1431 /************************************************************************/
1432 
Close()1433 int       VSICurlHandle::Close()
1434 {
1435     return 0;
1436 }
1437 
1438 
1439 
1440 
1441 /************************************************************************/
1442 /*                   VSICurlFilesystemHandler()                         */
1443 /************************************************************************/
1444 
VSICurlFilesystemHandler()1445 VSICurlFilesystemHandler::VSICurlFilesystemHandler()
1446 {
1447     hMutex = NULL;
1448     papsRegions = NULL;
1449     nRegions = 0;
1450     bUseCacheDisk = CSLTestBoolean(CPLGetConfigOption("CPL_VSIL_CURL_USE_CACHE", "NO"));
1451 }
1452 
1453 /************************************************************************/
1454 /*                  ~VSICurlFilesystemHandler()                         */
1455 /************************************************************************/
1456 
~VSICurlFilesystemHandler()1457 VSICurlFilesystemHandler::~VSICurlFilesystemHandler()
1458 {
1459     int i;
1460     for(i=0;i<nRegions;i++)
1461     {
1462         CPLFree(papsRegions[i]->pData);
1463         CPLFree(papsRegions[i]);
1464     }
1465     CPLFree(papsRegions);
1466 
1467     std::map<CPLString, CachedFileProp*>::const_iterator iterCacheFileSize;
1468 
1469     for( iterCacheFileSize = cacheFileSize.begin(); iterCacheFileSize != cacheFileSize.end(); iterCacheFileSize++ )
1470     {
1471         CPLFree(iterCacheFileSize->second);
1472     }
1473 
1474     std::map<CPLString, CachedDirList*>::const_iterator iterCacheDirList;
1475 
1476     for( iterCacheDirList = cacheDirList.begin(); iterCacheDirList != cacheDirList.end(); iterCacheDirList++ )
1477     {
1478         CSLDestroy(iterCacheDirList->second->papszFileList);
1479         CPLFree(iterCacheDirList->second);
1480     }
1481 
1482     std::map<GIntBig, CachedConnection*>::const_iterator iterConnections;
1483     for( iterConnections = mapConnections.begin(); iterConnections != mapConnections.end(); iterConnections++ )
1484     {
1485         curl_easy_cleanup(iterConnections->second->hCurlHandle);
1486         delete iterConnections->second;
1487     }
1488 
1489     if( hMutex != NULL )
1490         CPLDestroyMutex( hMutex );
1491     hMutex = NULL;
1492 }
1493 
1494 /************************************************************************/
1495 /*                      GetCurlHandleFor()                              */
1496 /************************************************************************/
1497 
GetCurlHandleFor(CPLString osURL)1498 CURL* VSICurlFilesystemHandler::GetCurlHandleFor(CPLString osURL)
1499 {
1500     CPLMutexHolder oHolder( &hMutex );
1501 
1502     std::map<GIntBig, CachedConnection*>::const_iterator iterConnections;
1503 
1504     iterConnections = mapConnections.find(CPLGetPID());
1505     if (iterConnections == mapConnections.end())
1506     {
1507         CURL* hCurlHandle = curl_easy_init();
1508         CachedConnection* psCachedConnection = new CachedConnection;
1509         psCachedConnection->osURL = osURL;
1510         psCachedConnection->hCurlHandle = hCurlHandle;
1511         mapConnections[CPLGetPID()] = psCachedConnection;
1512         return hCurlHandle;
1513     }
1514     else
1515     {
1516         CachedConnection* psCachedConnection = iterConnections->second;
1517         if (osURL == psCachedConnection->osURL)
1518             return psCachedConnection->hCurlHandle;
1519 
1520         const char* pszURL = osURL.c_str();
1521         const char* pszEndOfServ = strchr(pszURL, '.');
1522         if (pszEndOfServ != NULL)
1523             pszEndOfServ = strchr(pszEndOfServ, '/');
1524         if (pszEndOfServ == NULL)
1525             pszURL = pszURL + strlen(pszURL);
1526         int bReinitConnection = strncmp(psCachedConnection->osURL,
1527                                         pszURL, pszEndOfServ-pszURL) != 0;
1528 
1529         if (bReinitConnection)
1530         {
1531             if (psCachedConnection->hCurlHandle)
1532                 curl_easy_cleanup(psCachedConnection->hCurlHandle);
1533             psCachedConnection->hCurlHandle = curl_easy_init();
1534         }
1535         psCachedConnection->osURL = osURL;
1536 
1537         return psCachedConnection->hCurlHandle;
1538     }
1539 }
1540 
1541 
1542 /************************************************************************/
1543 /*                   GetRegionFromCacheDisk()                           */
1544 /************************************************************************/
1545 
1546 const CachedRegion*
GetRegionFromCacheDisk(const char * pszURL,vsi_l_offset nFileOffsetStart)1547 VSICurlFilesystemHandler::GetRegionFromCacheDisk(const char* pszURL,
1548                                                  vsi_l_offset nFileOffsetStart)
1549 {
1550     nFileOffsetStart = (nFileOffsetStart / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
1551     VSILFILE* fp = VSIFOpenL(VSICurlGetCacheFileName(), "rb");
1552     if (fp)
1553     {
1554         unsigned long   pszURLHash = CPLHashSetHashStr(pszURL);
1555         unsigned long   pszURLHashCached;
1556         vsi_l_offset    nFileOffsetStartCached;
1557         size_t          nSizeCached;
1558         while(TRUE)
1559         {
1560             if (VSIFReadL(&pszURLHashCached, 1, sizeof(unsigned long), fp) == 0)
1561                 break;
1562             VSIFReadL(&nFileOffsetStartCached, 1, sizeof(vsi_l_offset), fp);
1563             VSIFReadL(&nSizeCached, 1, sizeof(size_t), fp);
1564             if (pszURLHash == pszURLHashCached &&
1565                 nFileOffsetStart == nFileOffsetStartCached)
1566             {
1567                 if (ENABLE_DEBUG)
1568                     CPLDebug("VSICURL", "Got data at offset " CPL_FRMT_GUIB " from disk" , nFileOffsetStart);
1569                 if (nSizeCached)
1570                 {
1571                     char* pBuffer = (char*) CPLMalloc(nSizeCached);
1572                     VSIFReadL(pBuffer, 1, nSizeCached, fp);
1573                     AddRegion(pszURL, nFileOffsetStart, nSizeCached, pBuffer);
1574                     CPLFree(pBuffer);
1575                 }
1576                 else
1577                 {
1578                     AddRegion(pszURL, nFileOffsetStart, 0, NULL);
1579                 }
1580                 VSIFCloseL(fp);
1581                 return GetRegion(pszURL, nFileOffsetStart);
1582             }
1583             else
1584             {
1585                 VSIFSeekL(fp, nSizeCached, SEEK_CUR);
1586             }
1587         }
1588         VSIFCloseL(fp);
1589     }
1590     return NULL;
1591 }
1592 
1593 
1594 /************************************************************************/
1595 /*                  AddRegionToCacheDisk()                                */
1596 /************************************************************************/
1597 
AddRegionToCacheDisk(CachedRegion * psRegion)1598 void VSICurlFilesystemHandler::AddRegionToCacheDisk(CachedRegion* psRegion)
1599 {
1600     VSILFILE* fp = VSIFOpenL(VSICurlGetCacheFileName(), "r+b");
1601     if (fp)
1602     {
1603         unsigned long   pszURLHashCached;
1604         vsi_l_offset    nFileOffsetStartCached;
1605         size_t          nSizeCached;
1606         while(TRUE)
1607         {
1608             if (VSIFReadL(&pszURLHashCached, 1, sizeof(unsigned long), fp) == 0)
1609                 break;
1610             VSIFReadL(&nFileOffsetStartCached, 1, sizeof(vsi_l_offset), fp);
1611             VSIFReadL(&nSizeCached, 1, sizeof(size_t), fp);
1612             if (psRegion->pszURLHash == pszURLHashCached &&
1613                 psRegion->nFileOffsetStart == nFileOffsetStartCached)
1614             {
1615                 CPLAssert(psRegion->nSize == nSizeCached);
1616                 VSIFCloseL(fp);
1617                 return;
1618             }
1619             else
1620             {
1621                 VSIFSeekL(fp, nSizeCached, SEEK_CUR);
1622             }
1623         }
1624     }
1625     else
1626     {
1627         fp = VSIFOpenL(VSICurlGetCacheFileName(), "wb");
1628     }
1629     if (fp)
1630     {
1631         if (ENABLE_DEBUG)
1632              CPLDebug("VSICURL", "Write data at offset " CPL_FRMT_GUIB " to disk" , psRegion->nFileOffsetStart);
1633         VSIFWriteL(&psRegion->pszURLHash, 1, sizeof(unsigned long), fp);
1634         VSIFWriteL(&psRegion->nFileOffsetStart, 1, sizeof(vsi_l_offset), fp);
1635         VSIFWriteL(&psRegion->nSize, 1, sizeof(size_t), fp);
1636         if (psRegion->nSize)
1637             VSIFWriteL(psRegion->pData, 1, psRegion->nSize, fp);
1638 
1639         VSIFCloseL(fp);
1640     }
1641     return;
1642 }
1643 
1644 
1645 /************************************************************************/
1646 /*                          GetRegion()                                 */
1647 /************************************************************************/
1648 
GetRegion(const char * pszURL,vsi_l_offset nFileOffsetStart)1649 const CachedRegion* VSICurlFilesystemHandler::GetRegion(const char* pszURL,
1650                                                         vsi_l_offset nFileOffsetStart)
1651 {
1652     CPLMutexHolder oHolder( &hMutex );
1653 
1654     unsigned long   pszURLHash = CPLHashSetHashStr(pszURL);
1655 
1656     nFileOffsetStart = (nFileOffsetStart / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
1657     int i;
1658     for(i=0;i<nRegions;i++)
1659     {
1660         CachedRegion* psRegion = papsRegions[i];
1661         if (psRegion->pszURLHash == pszURLHash &&
1662             nFileOffsetStart == psRegion->nFileOffsetStart)
1663         {
1664             memmove(papsRegions + 1, papsRegions, i * sizeof(CachedRegion*));
1665             papsRegions[0] = psRegion;
1666             return psRegion;
1667         }
1668     }
1669     if (bUseCacheDisk)
1670         return GetRegionFromCacheDisk(pszURL, nFileOffsetStart);
1671     return NULL;
1672 }
1673 
1674 /************************************************************************/
1675 /*                          AddRegion()                                 */
1676 /************************************************************************/
1677 
AddRegion(const char * pszURL,vsi_l_offset nFileOffsetStart,size_t nSize,const char * pData)1678 void  VSICurlFilesystemHandler::AddRegion(const char* pszURL,
1679                                           vsi_l_offset    nFileOffsetStart,
1680                                           size_t          nSize,
1681                                           const char     *pData)
1682 {
1683     CPLMutexHolder oHolder( &hMutex );
1684 
1685     unsigned long   pszURLHash = CPLHashSetHashStr(pszURL);
1686 
1687     CachedRegion* psRegion;
1688     if (nRegions == N_MAX_REGIONS)
1689     {
1690         psRegion = papsRegions[N_MAX_REGIONS-1];
1691         memmove(papsRegions + 1, papsRegions, (N_MAX_REGIONS-1) * sizeof(CachedRegion*));
1692         papsRegions[0] = psRegion;
1693         CPLFree(psRegion->pData);
1694     }
1695     else
1696     {
1697         papsRegions = (CachedRegion**) CPLRealloc(papsRegions, (nRegions + 1) * sizeof(CachedRegion*));
1698         if (nRegions)
1699             memmove(papsRegions + 1, papsRegions, nRegions * sizeof(CachedRegion*));
1700         nRegions ++;
1701         papsRegions[0] = psRegion = (CachedRegion*) CPLMalloc(sizeof(CachedRegion));
1702     }
1703 
1704     psRegion->pszURLHash = pszURLHash;
1705     psRegion->nFileOffsetStart = nFileOffsetStart;
1706     psRegion->nSize = nSize;
1707     psRegion->pData = (nSize) ? (char*) CPLMalloc(nSize) : NULL;
1708     if (nSize)
1709         memcpy(psRegion->pData, pData, nSize);
1710 
1711     if (bUseCacheDisk)
1712         AddRegionToCacheDisk(psRegion);
1713 }
1714 
1715 /************************************************************************/
1716 /*                         GetCachedFileProp()                          */
1717 /************************************************************************/
1718 
GetCachedFileProp(const char * pszURL)1719 CachedFileProp*  VSICurlFilesystemHandler::GetCachedFileProp(const char* pszURL)
1720 {
1721     CPLMutexHolder oHolder( &hMutex );
1722 
1723     CachedFileProp* cachedFileProp = cacheFileSize[pszURL];
1724     if (cachedFileProp == NULL)
1725     {
1726         cachedFileProp = (CachedFileProp*) CPLMalloc(sizeof(CachedFileProp));
1727         cachedFileProp->eExists = EXIST_UNKNOWN;
1728         cachedFileProp->bHastComputedFileSize = FALSE;
1729         cachedFileProp->fileSize = 0;
1730         cachedFileProp->bIsDirectory = FALSE;
1731         cacheFileSize[pszURL] = cachedFileProp;
1732     }
1733 
1734     return cachedFileProp;
1735 }
1736 
1737 /************************************************************************/
1738 /*                                Open()                                */
1739 /************************************************************************/
1740 
Open(const char * pszFilename,const char * pszAccess)1741 VSIVirtualHandle* VSICurlFilesystemHandler::Open( const char *pszFilename,
1742                                                   const char *pszAccess)
1743 {
1744     if (strchr(pszAccess, 'w') != NULL ||
1745         strchr(pszAccess, '+') != NULL)
1746     {
1747         CPLError(CE_Failure, CPLE_AppDefined,
1748                  "Only read-only mode is supported for /vsicurl");
1749         return NULL;
1750     }
1751 
1752     const char* pszOptionVal =
1753         CPLGetConfigOption( "GDAL_DISABLE_READDIR_ON_OPEN", "NO" );
1754     int bSkipReadDir = EQUAL(pszOptionVal, "EMPTY_DIR") ||
1755                        CSLTestBoolean(pszOptionVal);
1756 
1757     CPLString osFilename(pszFilename);
1758     int bGotFileList = TRUE;
1759     if (strchr(CPLGetFilename(osFilename), '.') != NULL &&
1760         strncmp(CPLGetExtension(osFilename), "zip", 3) != 0 && !bSkipReadDir)
1761     {
1762         char** papszFileList = ReadDir(CPLGetDirname(osFilename), &bGotFileList);
1763         int bFound = (VSICurlIsFileInList(papszFileList, CPLGetFilename(osFilename)) != -1);
1764         CSLDestroy(papszFileList);
1765         if (bGotFileList && !bFound)
1766         {
1767             return NULL;
1768         }
1769     }
1770 
1771     VSICurlHandle* poHandle = new VSICurlHandle( this, osFilename + strlen("/vsicurl/"));
1772     if (!bGotFileList)
1773     {
1774         /* If we didn't get a filelist, check that the file really exists */
1775         if (!poHandle->Exists())
1776         {
1777             delete poHandle;
1778             poHandle = NULL;
1779         }
1780     }
1781 
1782     if( CSLTestBoolean( CPLGetConfigOption( "VSI_CACHE", "FALSE" ) ) )
1783         return VSICreateCachedFile( poHandle );
1784     else
1785         return poHandle;
1786 }
1787 
1788 /************************************************************************/
1789 /*                        VSICurlParserFindEOL()                        */
1790 /*                                                                      */
1791 /*      Small helper function for VSICurlPaseHTMLFileList() to find     */
1792 /*      the end of a line in the directory listing.  Either a <br>      */
1793 /*      or newline.                                                     */
1794 /************************************************************************/
1795 
VSICurlParserFindEOL(char * pszData)1796 static char *VSICurlParserFindEOL( char *pszData )
1797 
1798 {
1799     while( *pszData != '\0' && *pszData != '\n' && !EQUALN(pszData,"<br>",4) )
1800         pszData++;
1801 
1802     if( *pszData == '\0' )
1803         return NULL;
1804     else
1805         return pszData;
1806 }
1807 
1808 
1809 /************************************************************************/
1810 /*                   VSICurlParseHTMLDateTimeFileSize()                 */
1811 /************************************************************************/
1812 
1813 static const char* const apszMonths[] = { "January", "February", "March",
1814                                           "April", "May", "June", "July",
1815                                           "August", "September", "October",
1816                                           "November", "December" };
1817 
VSICurlParseHTMLDateTimeFileSize(const char * pszStr,struct tm & brokendowntime,GUIntBig & nFileSize,GIntBig & mTime)1818 static int VSICurlParseHTMLDateTimeFileSize(const char* pszStr,
1819                                             struct tm& brokendowntime,
1820                                             GUIntBig& nFileSize,
1821                                             GIntBig& mTime)
1822 {
1823     int iMonth;
1824     for(iMonth=0;iMonth<12;iMonth++)
1825     {
1826         char szMonth[32];
1827         szMonth[0] = '-';
1828         memcpy(szMonth + 1, apszMonths[iMonth], 3);
1829         szMonth[4] = '-';
1830         szMonth[5] = '\0';
1831         const char* pszMonthFound = strstr(pszStr, szMonth);
1832         if (pszMonthFound)
1833         {
1834             /* Format of Apache, like in http://download.osgeo.org/gdal/data/gtiff/ */
1835             /* "17-May-2010 12:26" */
1836             if (pszMonthFound - pszStr > 2 && strlen(pszMonthFound) > 15 &&
1837                 pszMonthFound[-2 + 11] == ' ' && pszMonthFound[-2 + 14] == ':')
1838             {
1839                 pszMonthFound -= 2;
1840                 int nDay = atoi(pszMonthFound);
1841                 int nYear = atoi(pszMonthFound + 7);
1842                 int nHour = atoi(pszMonthFound + 12);
1843                 int nMin = atoi(pszMonthFound + 15);
1844                 if (nDay >= 1 && nDay <= 31 && nYear >= 1900 &&
1845                     nHour >= 0 && nHour <= 24 && nMin >= 0 && nMin < 60)
1846                 {
1847                     brokendowntime.tm_year = nYear - 1900;
1848                     brokendowntime.tm_mon = iMonth;
1849                     brokendowntime.tm_mday = nDay;
1850                     brokendowntime.tm_hour = nHour;
1851                     brokendowntime.tm_min = nMin;
1852                     mTime = CPLYMDHMSToUnixTime(&brokendowntime);
1853 
1854                     return TRUE;
1855                 }
1856             }
1857             return FALSE;
1858         }
1859 
1860         /* Microsoft IIS */
1861         szMonth[0] = ' ';
1862         strcpy(szMonth + 1, apszMonths[iMonth]);
1863         strcat(szMonth, " ");
1864         pszMonthFound = strstr(pszStr, szMonth);
1865         if (pszMonthFound)
1866         {
1867             int nLenMonth = strlen(apszMonths[iMonth]);
1868             if (pszMonthFound - pszStr > 2 &&
1869                 pszMonthFound[-1] != ',' &&
1870                 pszMonthFound[-2] != ' ' &&
1871                 (int)strlen(pszMonthFound-2) > 2 + 1 + nLenMonth + 1 + 4 + 1 + 5 + 1 + 4)
1872             {
1873                 /* Format of http://ortho.linz.govt.nz/tifs/1994_95/ */
1874                 /* "        Friday, 21 April 2006 12:05 p.m.     48062343 m35a_fy_94_95.tif" */
1875                 pszMonthFound -= 2;
1876                     int nDay = atoi(pszMonthFound);
1877                 int nCurOffset = 2 + 1 + nLenMonth + 1;
1878                 int nYear = atoi(pszMonthFound + nCurOffset);
1879                 nCurOffset += 4 + 1;
1880                 int nHour = atoi(pszMonthFound + nCurOffset);
1881                 if (nHour < 10)
1882                     nCurOffset += 1 + 1;
1883                 else
1884                     nCurOffset += 2 + 1;
1885                 int nMin = atoi(pszMonthFound + nCurOffset);
1886                 nCurOffset += 2 + 1;
1887                 if (strncmp(pszMonthFound + nCurOffset, "p.m.", 4) == 0)
1888                     nHour += 12;
1889                 else if (strncmp(pszMonthFound + nCurOffset, "a.m.", 4) != 0)
1890                     nHour = -1;
1891                 nCurOffset += 4;
1892 
1893                 const char* pszFilesize = pszMonthFound + nCurOffset;
1894                 while(*pszFilesize == ' ')
1895                     pszFilesize ++;
1896                 if (*pszFilesize >= '1' && *pszFilesize <= '9')
1897                     nFileSize = CPLScanUIntBig(pszFilesize, strlen(pszFilesize));
1898 
1899                 if (nDay >= 1 && nDay <= 31 && nYear >= 1900 &&
1900                     nHour >= 0 && nHour <= 24 && nMin >= 0 && nMin < 60)
1901                 {
1902                     brokendowntime.tm_year = nYear - 1900;
1903                     brokendowntime.tm_mon = iMonth;
1904                     brokendowntime.tm_mday = nDay;
1905                     brokendowntime.tm_hour = nHour;
1906                     brokendowntime.tm_min = nMin;
1907                     mTime = CPLYMDHMSToUnixTime(&brokendowntime);
1908 
1909                     return TRUE;
1910                 }
1911                 nFileSize = 0;
1912             }
1913             else if (pszMonthFound - pszStr > 1 &&
1914                         pszMonthFound[-1] == ',' &&
1915                         (int)strlen(pszMonthFound) > 1 + nLenMonth + 1 + 2 + 1 + 1 + 4 + 1 + 5 + 1 + 2)
1916             {
1917                 /* Format of http://publicfiles.dep.state.fl.us/dear/BWR_GIS/2007NWFLULC/ */
1918                 /* "        Sunday, June 20, 2010  6:46 PM    233170905 NWF2007LULCForSDE.zip" */
1919                 pszMonthFound += 1;
1920                 int nCurOffset = nLenMonth + 1;
1921                 int nDay = atoi(pszMonthFound + nCurOffset);
1922                 nCurOffset += 2 + 1 + 1;
1923                 int nYear = atoi(pszMonthFound + nCurOffset);
1924                 nCurOffset += 4 + 1;
1925                 int nHour = atoi(pszMonthFound + nCurOffset);
1926                 nCurOffset += 2 + 1;
1927                 int nMin = atoi(pszMonthFound + nCurOffset);
1928                 nCurOffset += 2 + 1;
1929                 if (strncmp(pszMonthFound + nCurOffset, "PM", 2) == 0)
1930                     nHour += 12;
1931                 else if (strncmp(pszMonthFound + nCurOffset, "AM", 2) != 0)
1932                     nHour = -1;
1933                 nCurOffset += 2;
1934 
1935                 const char* pszFilesize = pszMonthFound + nCurOffset;
1936                 while(*pszFilesize == ' ')
1937                     pszFilesize ++;
1938                 if (*pszFilesize >= '1' && *pszFilesize <= '9')
1939                     nFileSize = CPLScanUIntBig(pszFilesize, strlen(pszFilesize));
1940 
1941                 if (nDay >= 1 && nDay <= 31 && nYear >= 1900 &&
1942                     nHour >= 0 && nHour <= 24 && nMin >= 0 && nMin < 60)
1943                 {
1944                     brokendowntime.tm_year = nYear - 1900;
1945                     brokendowntime.tm_mon = iMonth;
1946                     brokendowntime.tm_mday = nDay;
1947                     brokendowntime.tm_hour = nHour;
1948                     brokendowntime.tm_min = nMin;
1949                     mTime = CPLYMDHMSToUnixTime(&brokendowntime);
1950 
1951                     return TRUE;
1952                 }
1953                 nFileSize = 0;
1954             }
1955             return FALSE;
1956         }
1957     }
1958 
1959     return FALSE;
1960 }
1961 
1962 /************************************************************************/
1963 /*                          ParseHTMLFileList()                         */
1964 /*                                                                      */
1965 /*      Parse a file list document and return all the components.       */
1966 /************************************************************************/
1967 
ParseHTMLFileList(const char * pszFilename,char * pszData,int * pbGotFileList)1968 char** VSICurlFilesystemHandler::ParseHTMLFileList(const char* pszFilename,
1969                                        char* pszData,
1970                                        int* pbGotFileList)
1971 {
1972     CPLStringList oFileList;
1973     char* pszLine = pszData;
1974     char* c;
1975     int nCount = 0;
1976     int bIsHTMLDirList = FALSE;
1977     CPLString osExpectedString;
1978     CPLString osExpectedString2;
1979     CPLString osExpectedString3;
1980     CPLString osExpectedString4;
1981     CPLString osExpectedString_unescaped;
1982 
1983     *pbGotFileList = FALSE;
1984 
1985     const char* pszDir;
1986     if (EQUALN(pszFilename, "/vsicurl/http://", strlen("/vsicurl/http://")))
1987         pszDir = strchr(pszFilename + strlen("/vsicurl/http://"), '/');
1988     else if (EQUALN(pszFilename, "/vsicurl/https://", strlen("/vsicurl/https://")))
1989         pszDir = strchr(pszFilename + strlen("/vsicurl/https://"), '/');
1990     else
1991         pszDir = strchr(pszFilename + strlen("/vsicurl/ftp://"), '/');
1992     if (pszDir == NULL)
1993         pszDir = "";
1994     /* Apache */
1995     osExpectedString = "<title>Index of ";
1996     osExpectedString += pszDir;
1997     osExpectedString += "</title>";
1998     /* shttpd */
1999     osExpectedString2 = "<title>Index of ";
2000     osExpectedString2 += pszDir;
2001     osExpectedString2 += "/</title>";
2002     /* FTP */
2003     osExpectedString3 = "FTP Listing of ";
2004     osExpectedString3 += pszDir;
2005     osExpectedString3 += "/";
2006     /* Apache 1.3.33 */
2007     osExpectedString4 = "<TITLE>Index of ";
2008     osExpectedString4 += pszDir;
2009     osExpectedString4 += "</TITLE>";
2010 
2011     /* The listing of http://dds.cr.usgs.gov/srtm/SRTM_image_sample/picture%20examples/ */
2012     /* has "<title>Index of /srtm/SRTM_image_sample/picture examples</title>" so we must */
2013     /* try unescaped %20 also */
2014     /* Similar with http://datalib.usask.ca/gis/Data/Central_America_goodbutdoweown%3f/ */
2015     if (strchr(pszDir, '%'))
2016     {
2017         char* pszUnescapedDir = CPLUnescapeString(pszDir, NULL, CPLES_URL);
2018         osExpectedString_unescaped = "<title>Index of ";
2019         osExpectedString_unescaped += pszUnescapedDir;
2020         osExpectedString_unescaped += "</title>";
2021         CPLFree(pszUnescapedDir);
2022     }
2023 
2024     int nCountTable = 0;
2025 
2026     while( (c = VSICurlParserFindEOL( pszLine )) != NULL )
2027     {
2028         *c = 0;
2029 
2030         /* To avoid false positive on pages such as http://www.ngs.noaa.gov/PC_PROD/USGG2009BETA */
2031         /* This is a heuristics, but normal HTML listing of files have not more than one table */
2032         if (strstr(pszLine, "<table"))
2033         {
2034             nCountTable ++;
2035             if (nCountTable == 2)
2036             {
2037                 *pbGotFileList = FALSE;
2038                 return NULL;
2039             }
2040         }
2041 
2042         if (!bIsHTMLDirList &&
2043             (strstr(pszLine, osExpectedString.c_str()) ||
2044              strstr(pszLine, osExpectedString2.c_str()) ||
2045              strstr(pszLine, osExpectedString3.c_str()) ||
2046              strstr(pszLine, osExpectedString4.c_str()) ||
2047              (osExpectedString_unescaped.size() != 0 && strstr(pszLine, osExpectedString_unescaped.c_str()))))
2048         {
2049             bIsHTMLDirList = TRUE;
2050             *pbGotFileList = TRUE;
2051         }
2052         /* Subversion HTTP listing */
2053         /* or Microsoft-IIS/6.0 listing (e.g. http://ortho.linz.govt.nz/tifs/2005_06/) */
2054         else if (!bIsHTMLDirList && strstr(pszLine, "<title>"))
2055         {
2056             /* Detect something like : <html><head><title>gdal - Revision 20739: /trunk/autotest/gcore/data</title></head> */
2057             /* The annoying thing is that what is after ': ' is a subpart of what is after http://server/ */
2058             char* pszSubDir = strstr(pszLine, ": ");
2059             if (pszSubDir == NULL)
2060                 /* or <title>ortho.linz.govt.nz - /tifs/2005_06/</title> */
2061                 pszSubDir = strstr(pszLine, "- ");
2062             if (pszSubDir)
2063             {
2064                 pszSubDir += 2;
2065                 char* pszTmp = strstr(pszSubDir, "</title>");
2066                 if (pszTmp)
2067                 {
2068                     if (pszTmp[-1] == '/')
2069                         pszTmp[-1] = 0;
2070                     else
2071                         *pszTmp = 0;
2072                     if (strstr(pszDir, pszSubDir))
2073                     {
2074                         bIsHTMLDirList = TRUE;
2075                         *pbGotFileList = TRUE;
2076                     }
2077                 }
2078             }
2079         }
2080         else if (bIsHTMLDirList &&
2081                  (strstr(pszLine, "<a href=\"") != NULL || strstr(pszLine, "<A HREF=\"") != NULL) &&
2082                  strstr(pszLine, "<a href=\"http://") == NULL && /* exclude absolute links, like to subversion home */
2083                  strstr(pszLine, "Parent Directory") == NULL /* exclude parent directory */)
2084         {
2085             char *beginFilename = strstr(pszLine, "<a href=\"");
2086             if (beginFilename == NULL)
2087                 beginFilename = strstr(pszLine, "<A HREF=\"");
2088             beginFilename += strlen("<a href=\"");
2089             char *endQuote = strchr(beginFilename, '"');
2090             if (endQuote && strncmp(beginFilename, "?C=", 3) != 0 && strncmp(beginFilename, "?N=", 3) != 0)
2091             {
2092                 struct tm brokendowntime;
2093                 memset(&brokendowntime, 0, sizeof(brokendowntime));
2094                 GUIntBig nFileSize = 0;
2095                 GIntBig mTime = 0;
2096 
2097                 VSICurlParseHTMLDateTimeFileSize(pszLine,
2098                                                  brokendowntime,
2099                                                  nFileSize,
2100                                                  mTime);
2101 
2102                 *endQuote = '\0';
2103 
2104                 /* Remove trailing slash, that are returned for directories by */
2105                 /* Apache */
2106                 int bIsDirectory = FALSE;
2107                 if (endQuote[-1] == '/')
2108                 {
2109                     bIsDirectory = TRUE;
2110                     endQuote[-1] = 0;
2111                 }
2112 
2113                 /* shttpd links include slashes from the root directory. Skip them */
2114                 while(strchr(beginFilename, '/'))
2115                     beginFilename = strchr(beginFilename, '/') + 1;
2116 
2117                 if (strcmp(beginFilename, ".") != 0 &&
2118                     strcmp(beginFilename, "..") != 0)
2119                 {
2120                     CPLString osCachedFilename =
2121                         CPLSPrintf("%s/%s", pszFilename + strlen("/vsicurl/"), beginFilename);
2122                     CachedFileProp* cachedFileProp = GetCachedFileProp(osCachedFilename);
2123                     cachedFileProp->eExists = EXIST_YES;
2124                     cachedFileProp->bIsDirectory = bIsDirectory;
2125                     cachedFileProp->mTime = mTime;
2126                     cachedFileProp->bHastComputedFileSize = nFileSize > 0;
2127                     cachedFileProp->fileSize = nFileSize;
2128 
2129                     oFileList.AddString( beginFilename );
2130                     if (ENABLE_DEBUG)
2131                         CPLDebug("VSICURL", "File[%d] = %s, is_dir = %d, size = " CPL_FRMT_GUIB ", time = %04d/%02d/%02d %02d:%02d:%02d",
2132                                 nCount, beginFilename, bIsDirectory, nFileSize,
2133                                 brokendowntime.tm_year + 1900, brokendowntime.tm_mon + 1, brokendowntime.tm_mday,
2134                                 brokendowntime.tm_hour, brokendowntime.tm_min, brokendowntime.tm_sec);
2135                     nCount ++;
2136                 }
2137             }
2138         }
2139         pszLine = c + 1;
2140     }
2141 
2142     return oFileList.StealList();
2143 }
2144 
2145 
2146 /************************************************************************/
2147 /*                         VSICurlGetToken()                            */
2148 /************************************************************************/
2149 
VSICurlGetToken(char * pszCurPtr,char ** ppszNextToken)2150 static char* VSICurlGetToken(char* pszCurPtr, char** ppszNextToken)
2151 {
2152     if (pszCurPtr == NULL)
2153         return NULL;
2154 
2155     while((*pszCurPtr) == ' ')
2156         pszCurPtr ++;
2157     if (*pszCurPtr == '\0')
2158         return NULL;
2159 
2160     char* pszToken = pszCurPtr;
2161     while((*pszCurPtr) != ' ' && (*pszCurPtr) != '\0')
2162         pszCurPtr ++;
2163     if (*pszCurPtr == '\0')
2164         *ppszNextToken = NULL;
2165     else
2166     {
2167         *pszCurPtr = '\0';
2168         pszCurPtr ++;
2169         while((*pszCurPtr) == ' ')
2170             pszCurPtr ++;
2171         *ppszNextToken = pszCurPtr;
2172     }
2173 
2174     return pszToken;
2175 }
2176 
2177 /************************************************************************/
2178 /*                    VSICurlParseFullFTPLine()                         */
2179 /************************************************************************/
2180 
2181 /* Parse lines like the following ones :
2182 -rw-r--r--    1 10003    100           430 Jul 04  2008 COPYING
2183 lrwxrwxrwx    1 ftp      ftp            28 Jun 14 14:13 MPlayer -> mirrors/mplayerhq.hu/MPlayer
2184 -rw-r--r--    1 ftp      ftp      725614592 May 13 20:13 Fedora-15-x86_64-Live-KDE.iso
2185 drwxr-xr-x  280 1003  1003  6656 Aug 26 04:17 gnu
2186 */
2187 
VSICurlParseFullFTPLine(char * pszLine,char * & pszFilename,int & bSizeValid,GUIntBig & nSize,int & bIsDirectory,GIntBig & nUnixTime)2188 static int VSICurlParseFullFTPLine(char* pszLine,
2189                                    char*& pszFilename,
2190                                    int& bSizeValid,
2191                                    GUIntBig& nSize,
2192                                    int& bIsDirectory,
2193                                    GIntBig& nUnixTime)
2194 {
2195     char* pszNextToken = pszLine;
2196     char* pszPermissions = VSICurlGetToken(pszNextToken, &pszNextToken);
2197     if (pszPermissions == NULL || strlen(pszPermissions) != 10)
2198         return FALSE;
2199     bIsDirectory = (pszPermissions[0] == 'd');
2200 
2201     int i;
2202     for(i = 0; i < 3; i++)
2203     {
2204         if (VSICurlGetToken(pszNextToken, &pszNextToken) == NULL)
2205             return FALSE;
2206     }
2207 
2208     char* pszSize = VSICurlGetToken(pszNextToken, &pszNextToken);
2209     if (pszSize == NULL)
2210         return FALSE;
2211 
2212     if (pszPermissions[0] == '-')
2213     {
2214         /* Regular file */
2215         bSizeValid = TRUE;
2216         nSize = CPLScanUIntBig(pszSize, strlen(pszSize));
2217     }
2218 
2219     struct tm brokendowntime;
2220     memset(&brokendowntime, 0, sizeof(brokendowntime));
2221     int bBrokenDownTimeValid = TRUE;
2222 
2223     char* pszMonth = VSICurlGetToken(pszNextToken, &pszNextToken);
2224     if (pszMonth == NULL || strlen(pszMonth) != 3)
2225         return FALSE;
2226 
2227     for(i = 0; i < 12; i++)
2228     {
2229         if (EQUALN(pszMonth, apszMonths[i], 3))
2230             break;
2231     }
2232     if (i < 12)
2233         brokendowntime.tm_mon = i;
2234     else
2235         bBrokenDownTimeValid = FALSE;
2236 
2237     char* pszDay = VSICurlGetToken(pszNextToken, &pszNextToken);
2238     if (pszDay == NULL || (strlen(pszDay) != 1 && strlen(pszDay) != 2))
2239         return FALSE;
2240     int nDay = atoi(pszDay);
2241     if (nDay >= 1 && nDay <= 31)
2242         brokendowntime.tm_mday = nDay;
2243     else
2244         bBrokenDownTimeValid = FALSE;
2245 
2246     char* pszHourOrYear = VSICurlGetToken(pszNextToken, &pszNextToken);
2247     if (pszHourOrYear == NULL || (strlen(pszHourOrYear) != 4 && strlen(pszHourOrYear) != 5))
2248         return FALSE;
2249     if (strlen(pszHourOrYear) == 4)
2250     {
2251         brokendowntime.tm_year = atoi(pszHourOrYear) - 1900;
2252     }
2253     else
2254     {
2255         time_t sTime;
2256         time(&sTime);
2257         struct tm currentBrokendowntime;
2258         CPLUnixTimeToYMDHMS((GIntBig)sTime, &currentBrokendowntime);
2259         brokendowntime.tm_year = currentBrokendowntime.tm_year;
2260         brokendowntime.tm_hour = atoi(pszHourOrYear);
2261         brokendowntime.tm_min = atoi(pszHourOrYear + 3);
2262     }
2263 
2264     if (bBrokenDownTimeValid)
2265         nUnixTime = CPLYMDHMSToUnixTime(&brokendowntime);
2266     else
2267         nUnixTime = 0;
2268 
2269     if (pszNextToken == NULL)
2270         return FALSE;
2271 
2272     pszFilename = pszNextToken;
2273 
2274     char* pszCurPtr = pszFilename;
2275     while( *pszCurPtr != '\0')
2276     {
2277         /* In case of a link, stop before the pointed part of the link */
2278         if (pszPermissions[0] == 'l' && strncmp(pszCurPtr, " -> ", 4) == 0)
2279         {
2280             break;
2281         }
2282         pszCurPtr ++;
2283     }
2284     *pszCurPtr = '\0';
2285 
2286     return TRUE;
2287 }
2288 
2289 /************************************************************************/
2290 /*                          GetFileList()                               */
2291 /************************************************************************/
2292 
GetFileList(const char * pszDirname,int * pbGotFileList)2293 char** VSICurlFilesystemHandler::GetFileList(const char *pszDirname, int* pbGotFileList)
2294 {
2295     if (ENABLE_DEBUG)
2296         CPLDebug("VSICURL", "GetFileList(%s)" , pszDirname);
2297 
2298     *pbGotFileList = FALSE;
2299 
2300     /* HACK (optimization in fact) for MBTiles driver */
2301     if (strstr(pszDirname, ".tiles.mapbox.com") != NULL)
2302         return NULL;
2303 
2304     if (strncmp(pszDirname, "/vsicurl/ftp", strlen("/vsicurl/ftp")) == 0)
2305     {
2306         WriteFuncStruct sWriteFuncData;
2307         sWriteFuncData.pBuffer = NULL;
2308 
2309         CPLString osDirname(pszDirname + strlen("/vsicurl/"));
2310         osDirname += '/';
2311 
2312         char** papszFileList = NULL;
2313 
2314         for(int iTry=0;iTry<2;iTry++)
2315         {
2316             CURL* hCurlHandle = GetCurlHandleFor(osDirname);
2317             VSICurlSetOptions(hCurlHandle, osDirname.c_str());
2318 
2319             /* On the first pass, we want to try fetching all the possible */
2320             /* informations (filename, file/directory, size). If that */
2321             /* does not work, then try again with CURLOPT_DIRLISTONLY set */
2322             if (iTry == 1)
2323             {
2324         /* 7.16.4 */
2325         #if LIBCURL_VERSION_NUM <= 0x071004
2326                 curl_easy_setopt(hCurlHandle, CURLOPT_FTPLISTONLY, 1);
2327         #elif LIBCURL_VERSION_NUM > 0x071004
2328                 curl_easy_setopt(hCurlHandle, CURLOPT_DIRLISTONLY, 1);
2329         #endif
2330             }
2331 
2332             VSICURLInitWriteFuncStruct(&sWriteFuncData, NULL, NULL, NULL);
2333             curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
2334             curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
2335 
2336             char szCurlErrBuf[CURL_ERROR_SIZE+1];
2337             szCurlErrBuf[0] = '\0';
2338             curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
2339 
2340             curl_easy_perform(hCurlHandle);
2341 
2342             if (sWriteFuncData.pBuffer == NULL)
2343                 return NULL;
2344 
2345             char* pszLine = sWriteFuncData.pBuffer;
2346             char* c;
2347             int nCount = 0;
2348 
2349             if (EQUALN(pszLine, "<!DOCTYPE HTML", strlen("<!DOCTYPE HTML")) ||
2350                 EQUALN(pszLine, "<HTML>", 6))
2351             {
2352                 papszFileList = ParseHTMLFileList(pszDirname,
2353                                                   sWriteFuncData.pBuffer,
2354                                                   pbGotFileList);
2355                 break;
2356             }
2357             else if (iTry == 0)
2358             {
2359                 CPLStringList oFileList;
2360                 *pbGotFileList = TRUE;
2361 
2362                 while( (c = strchr(pszLine, '\n')) != NULL)
2363                 {
2364                     *c = 0;
2365                     if (c - pszLine > 0 && c[-1] == '\r')
2366                         c[-1] = 0;
2367 
2368                     char* pszFilename = NULL;
2369                     int bSizeValid = FALSE;
2370                     GUIntBig nFileSize = 0;
2371                     int bIsDirectory = FALSE;
2372                     GIntBig mUnixTime = 0;
2373                     if (!VSICurlParseFullFTPLine(pszLine, pszFilename,
2374                                                  bSizeValid, nFileSize,
2375                                                  bIsDirectory, mUnixTime))
2376                         break;
2377 
2378                     if (strcmp(pszFilename, ".") != 0 &&
2379                         strcmp(pszFilename, "..") != 0)
2380                     {
2381                         CPLString osCachedFilename =
2382                             CPLSPrintf("%s/%s", pszDirname + strlen("/vsicurl/"), pszFilename);
2383                         CachedFileProp* cachedFileProp = GetCachedFileProp(osCachedFilename);
2384                         cachedFileProp->eExists = EXIST_YES;
2385                         cachedFileProp->bHastComputedFileSize = bSizeValid;
2386                         cachedFileProp->fileSize = nFileSize;
2387                         cachedFileProp->bIsDirectory = bIsDirectory;
2388                         cachedFileProp->mTime = mUnixTime;
2389 
2390                         oFileList.AddString(pszFilename);
2391                         if (ENABLE_DEBUG)
2392                         {
2393                             struct tm brokendowntime;
2394                             CPLUnixTimeToYMDHMS(mUnixTime, &brokendowntime);
2395                             CPLDebug("VSICURL", "File[%d] = %s, is_dir = %d, size = " CPL_FRMT_GUIB ", time = %04d/%02d/%02d %02d:%02d:%02d",
2396                                     nCount, pszFilename, bIsDirectory, nFileSize,
2397                                     brokendowntime.tm_year + 1900, brokendowntime.tm_mon + 1, brokendowntime.tm_mday,
2398                                     brokendowntime.tm_hour, brokendowntime.tm_min, brokendowntime.tm_sec);
2399                         }
2400 
2401                         nCount ++;
2402                     }
2403 
2404                     pszLine = c + 1;
2405                 }
2406 
2407                 if (c == NULL)
2408                 {
2409                     papszFileList = oFileList.StealList();
2410                     break;
2411                 }
2412             }
2413             else
2414             {
2415                 CPLStringList oFileList;
2416                 *pbGotFileList = TRUE;
2417 
2418                 while( (c = strchr(pszLine, '\n')) != NULL)
2419                 {
2420                     *c = 0;
2421                     if (c - pszLine > 0 && c[-1] == '\r')
2422                         c[-1] = 0;
2423 
2424                     if (strcmp(pszLine, ".") != 0 &&
2425                         strcmp(pszLine, "..") != 0)
2426                     {
2427                         oFileList.AddString(pszLine);
2428                         if (ENABLE_DEBUG)
2429                             CPLDebug("VSICURL", "File[%d] = %s", nCount, pszLine);
2430                         nCount ++;
2431                     }
2432 
2433                     pszLine = c + 1;
2434                 }
2435 
2436                 papszFileList = oFileList.StealList();
2437             }
2438 
2439             CPLFree(sWriteFuncData.pBuffer);
2440             sWriteFuncData.pBuffer = NULL;
2441         }
2442 
2443         CPLFree(sWriteFuncData.pBuffer);
2444 
2445         return papszFileList;
2446     }
2447 
2448     /* Try to recognize HTML pages that list the content of a directory */
2449     /* Currently this supports what Apache and shttpd can return */
2450     else if (strncmp(pszDirname, "/vsicurl/http://", strlen("/vsicurl/http://")) == 0 ||
2451              strncmp(pszDirname, "/vsicurl/https://", strlen("/vsicurl/https://")) == 0)
2452     {
2453         WriteFuncStruct sWriteFuncData;
2454 
2455         CPLString osDirname(pszDirname + strlen("/vsicurl/"));
2456         osDirname += '/';
2457 
2458     #if LIBCURL_VERSION_NUM < 0x070B00
2459         /* Curl 7.10.X doesn't manage to unset the CURLOPT_RANGE that would have been */
2460         /* previously set, so we have to reinit the connection handle */
2461         GetCurlHandleFor("");
2462     #endif
2463 
2464         CURL* hCurlHandle = GetCurlHandleFor(osDirname);
2465         VSICurlSetOptions(hCurlHandle, osDirname.c_str());
2466 
2467         curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, NULL);
2468 
2469         VSICURLInitWriteFuncStruct(&sWriteFuncData, NULL, NULL, NULL);
2470         curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
2471         curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
2472 
2473         char szCurlErrBuf[CURL_ERROR_SIZE+1];
2474         szCurlErrBuf[0] = '\0';
2475         curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
2476 
2477         curl_easy_perform(hCurlHandle);
2478 
2479         if (sWriteFuncData.pBuffer == NULL)
2480             return NULL;
2481 
2482         char** papszFileList = ParseHTMLFileList(pszDirname,
2483                                                  sWriteFuncData.pBuffer,
2484                                                  pbGotFileList);
2485 
2486         CPLFree(sWriteFuncData.pBuffer);
2487         return papszFileList;
2488     }
2489 
2490     return NULL;
2491 }
2492 
2493 /************************************************************************/
2494 /*                                Stat()                                */
2495 /************************************************************************/
2496 
Stat(const char * pszFilename,VSIStatBufL * pStatBuf,int nFlags)2497 int VSICurlFilesystemHandler::Stat( const char *pszFilename, VSIStatBufL *pStatBuf,
2498                                     int nFlags )
2499 {
2500     CPLString osFilename(pszFilename);
2501 
2502     memset(pStatBuf, 0, sizeof(VSIStatBufL));
2503 
2504     const char* pszOptionVal =
2505         CPLGetConfigOption( "GDAL_DISABLE_READDIR_ON_OPEN", "NO" );
2506     int bSkipReadDir = EQUAL(pszOptionVal, "EMPTY_DIR") ||
2507                        CSLTestBoolean(pszOptionVal);
2508 
2509     /* Does it look like a FTP directory ? */
2510     if (strncmp(osFilename, "/vsicurl/ftp", strlen("/vsicurl/ftp")) == 0 &&
2511         pszFilename[strlen(osFilename) - 1] == '/' && !bSkipReadDir)
2512     {
2513         char** papszFileList = ReadDir(osFilename);
2514         if (papszFileList)
2515         {
2516             pStatBuf->st_mode = S_IFDIR;
2517             pStatBuf->st_size = 0;
2518 
2519             CSLDestroy(papszFileList);
2520 
2521             return 0;
2522         }
2523         return -1;
2524     }
2525     else if (strchr(CPLGetFilename(osFilename), '.') != NULL &&
2526              !EQUALN(CPLGetExtension(osFilename), "zip", 3) &&
2527              strstr(osFilename, ".zip.") != NULL &&
2528              strstr(osFilename, ".ZIP.") != NULL &&
2529              !bSkipReadDir)
2530     {
2531         int bGotFileList;
2532         char** papszFileList = ReadDir(CPLGetDirname(osFilename), &bGotFileList);
2533         int bFound = (VSICurlIsFileInList(papszFileList, CPLGetFilename(osFilename)) != -1);
2534         CSLDestroy(papszFileList);
2535         if (bGotFileList && !bFound)
2536         {
2537             return -1;
2538         }
2539     }
2540 
2541     VSICurlHandle oHandle( this, osFilename + strlen("/vsicurl/"));
2542 
2543     if ( oHandle.IsKnownFileSize() ||
2544          ((nFlags & VSI_STAT_SIZE_FLAG) && !oHandle.IsDirectory() &&
2545            CSLTestBoolean(CPLGetConfigOption("CPL_VSIL_CURL_SLOW_GET_SIZE", "YES"))) )
2546         pStatBuf->st_size = oHandle.GetFileSize();
2547 
2548     int nRet = (oHandle.Exists()) ? 0 : -1;
2549     pStatBuf->st_mtime = oHandle.GetMTime();
2550     pStatBuf->st_mode = oHandle.IsDirectory() ? S_IFDIR : S_IFREG;
2551     return nRet;
2552 }
2553 
2554 /************************************************************************/
2555 /*                               Unlink()                               */
2556 /************************************************************************/
2557 
Unlink(CPL_UNUSED const char * pszFilename)2558 int VSICurlFilesystemHandler::Unlink( CPL_UNUSED const char *pszFilename )
2559 {
2560     return -1;
2561 }
2562 
2563 /************************************************************************/
2564 /*                               Rename()                               */
2565 /************************************************************************/
2566 
Rename(CPL_UNUSED const char * oldpath,CPL_UNUSED const char * newpath)2567 int VSICurlFilesystemHandler::Rename( CPL_UNUSED const char *oldpath,
2568                                       CPL_UNUSED const char *newpath )
2569 {
2570     return -1;
2571 }
2572 
2573 /************************************************************************/
2574 /*                               Mkdir()                                */
2575 /************************************************************************/
2576 
Mkdir(CPL_UNUSED const char * pszDirname,CPL_UNUSED long nMode)2577 int VSICurlFilesystemHandler::Mkdir( CPL_UNUSED const char *pszDirname,
2578                                      CPL_UNUSED long nMode )
2579 {
2580     return -1;
2581 }
2582 /************************************************************************/
2583 /*                               Rmdir()                                */
2584 /************************************************************************/
2585 
Rmdir(CPL_UNUSED const char * pszDirname)2586 int VSICurlFilesystemHandler::Rmdir( CPL_UNUSED const char *pszDirname )
2587 {
2588     return -1;
2589 }
2590 
2591 /************************************************************************/
2592 /*                             ReadDir()                                */
2593 /************************************************************************/
2594 
ReadDir(const char * pszDirname,int * pbGotFileList)2595 char** VSICurlFilesystemHandler::ReadDir( const char *pszDirname, int* pbGotFileList )
2596 {
2597     CPLString osDirname(pszDirname);
2598     while (osDirname[strlen(osDirname) - 1] == '/')
2599         osDirname.erase(strlen(osDirname) - 1);
2600 
2601     const char* pszUpDir = strstr(osDirname, "/..");
2602     if (pszUpDir != NULL)
2603     {
2604         int pos = pszUpDir - osDirname.c_str() - 1;
2605         while(pos >= 0 && osDirname[pos] != '/')
2606             pos --;
2607         if (pos >= 1)
2608         {
2609             osDirname = osDirname.substr(0, pos) + CPLString(pszUpDir + 3);
2610         }
2611     }
2612 
2613     CPLMutexHolder oHolder( &hMutex );
2614 
2615     /* If we know the file exists and is not a directory, then don't try to list its content */
2616     CachedFileProp* cachedFileProp = GetCachedFileProp(osDirname.c_str() + strlen("/vsicurl/"));
2617     if (cachedFileProp->eExists == EXIST_YES && !cachedFileProp->bIsDirectory)
2618     {
2619         if (pbGotFileList)
2620             *pbGotFileList = TRUE;
2621         return NULL;
2622     }
2623 
2624     CachedDirList* psCachedDirList = cacheDirList[osDirname];
2625     if (psCachedDirList == NULL)
2626     {
2627         psCachedDirList = (CachedDirList*) CPLMalloc(sizeof(CachedDirList));
2628         psCachedDirList->papszFileList = GetFileList(osDirname, &psCachedDirList->bGotFileList);
2629         cacheDirList[osDirname] = psCachedDirList;
2630     }
2631 
2632     if (pbGotFileList)
2633         *pbGotFileList = psCachedDirList->bGotFileList;
2634 
2635     return CSLDuplicate(psCachedDirList->papszFileList);
2636 }
2637 
2638 /************************************************************************/
2639 /*                             ReadDir()                                */
2640 /************************************************************************/
2641 
ReadDir(const char * pszDirname)2642 char** VSICurlFilesystemHandler::ReadDir( const char *pszDirname )
2643 {
2644     return ReadDir(pszDirname, NULL);
2645 }
2646 
2647 /************************************************************************/
2648 /*                   VSIInstallCurlFileHandler()                        */
2649 /************************************************************************/
2650 
2651 /**
2652  * \brief Install /vsicurl/ HTTP/FTP file system handler (requires libcurl)
2653  *
2654  * A special file handler is installed that allows reading on-the-fly of files
2655  * available through HTTP/FTP web protocols, without downloading the entire file.
2656  *
2657  * Recognized filenames are of the form /vsicurl/http://path/to/remote/resource or
2658  * /vsicurl/ftp://path/to/remote/resource where path/to/remote/resource is the
2659  * URL of a remote resource.
2660  *
2661  * Partial downloads (requires the HTTP server to support random reading) are done
2662  * with a 16 KB granularity by default. If the driver detects sequential reading
2663  * it will progressively increase the chunk size up to 2 MB to improve download
2664  * performance.
2665  *
2666  * The GDAL_HTTP_PROXY, GDAL_HTTP_PROXYUSERPWD and GDAL_PROXY_AUTH configuration options can be
2667  * used to define a proxy server. The syntax to use is the one of Curl CURLOPT_PROXY,
2668  * CURLOPT_PROXYUSERPWD and CURLOPT_PROXYAUTH options.
2669  *
2670  * Starting with GDAL 1.10, the file can be cached in RAM by setting the configuration option
2671  * VSI_CACHE to TRUE. The cache size defaults to 25 MB, but can be modified by setting
2672  * the configuration option VSI_CACHE_SIZE (in bytes).
2673  *
2674  * VSIStatL() will return the size in st_size member and file
2675  * nature- file or directory - in st_mode member (the later only reliable with FTP
2676  * resources for now).
2677  *
2678  * VSIReadDir() should be able to parse the HTML directory listing returned by the
2679  * most popular web servers, such as Apache or Microsoft IIS.
2680  *
2681  * This special file handler can be combined with other virtual filesystems handlers,
2682  * such as /vsizip. For example, /vsizip//vsicurl/path/to/remote/file.zip/path/inside/zip
2683  *
2684  * @since GDAL 1.8.0
2685  */
VSIInstallCurlFileHandler(void)2686 void VSIInstallCurlFileHandler(void)
2687 {
2688     VSIFileManager::InstallHandler( "/vsicurl/", new VSICurlFilesystemHandler );
2689 }
2690 
2691 /************************************************************************/
2692 /*                      VSICurlInstallReadCbk()                         */
2693 /************************************************************************/
2694 
VSICurlInstallReadCbk(VSILFILE * fp,VSICurlReadCbkFunc pfnReadCbk,void * pfnUserData,int bStopOnInterrruptUntilUninstall)2695 int VSICurlInstallReadCbk (VSILFILE* fp,
2696                            VSICurlReadCbkFunc pfnReadCbk,
2697                            void* pfnUserData,
2698                            int bStopOnInterrruptUntilUninstall)
2699 {
2700     return ((VSICurlHandle*)fp)->InstallReadCbk(pfnReadCbk, pfnUserData,
2701                                                 bStopOnInterrruptUntilUninstall);
2702 }
2703 
2704 
2705 /************************************************************************/
2706 /*                    VSICurlUninstallReadCbk()                         */
2707 /************************************************************************/
2708 
VSICurlUninstallReadCbk(VSILFILE * fp)2709 int VSICurlUninstallReadCbk(VSILFILE* fp)
2710 {
2711     return ((VSICurlHandle*)fp)->UninstallReadCbk();
2712 }
2713 
2714 #endif /* HAVE_CURL */
2715