1 /******************************************************************************
2 * $Id: cpl_vsil_curl.cpp 28798 2015-03-27 19:37:50Z rouault $
3 *
4 * Project: CPL - Common Portability Library
5 * Purpose: Implement VSI large file api for HTTP/FTP files
6 * Author: Even Rouault, even.rouault at mines-paris.org
7 *
8 ******************************************************************************
9 * Copyright (c) 2010-2013, Even Rouault <even dot rouault at mines-paris dot org>
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a
12 * copy of this software and associated documentation files (the "Software"),
13 * to deal in the Software without restriction, including without limitation
14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 * and/or sell copies of the Software, and to permit persons to whom the
16 * Software is furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included
19 * in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 * DEALINGS IN THE SOFTWARE.
28 ****************************************************************************/
29
30 #include "cpl_vsi_virtual.h"
31 #include "cpl_string.h"
32 #include "cpl_multiproc.h"
33 #include "cpl_hash_set.h"
34 #include "cpl_time.h"
35 #include "cpl_vsil_curl_priv.h"
36
37 CPL_CVSID("$Id: cpl_vsil_curl.cpp 28798 2015-03-27 19:37:50Z rouault $");
38
39 #ifndef HAVE_CURL
40
VSIInstallCurlFileHandler(void)41 void VSIInstallCurlFileHandler(void)
42 {
43 /* not supported */
44 }
45
46 /************************************************************************/
47 /* VSICurlInstallReadCbk() */
48 /************************************************************************/
49
VSICurlInstallReadCbk(CPL_UNUSED VSILFILE * fp,CPL_UNUSED VSICurlReadCbkFunc pfnReadCbk,CPL_UNUSED void * pfnUserData,CPL_UNUSED int bStopOnInterrruptUntilUninstall)50 int VSICurlInstallReadCbk (CPL_UNUSED VSILFILE* fp,
51 CPL_UNUSED VSICurlReadCbkFunc pfnReadCbk,
52 CPL_UNUSED void* pfnUserData,
53 CPL_UNUSED int bStopOnInterrruptUntilUninstall)
54 {
55 return FALSE;
56 }
57
58
59 /************************************************************************/
60 /* VSICurlUninstallReadCbk() */
61 /************************************************************************/
62
VSICurlUninstallReadCbk(CPL_UNUSED VSILFILE * fp)63 int VSICurlUninstallReadCbk(CPL_UNUSED VSILFILE* fp)
64 {
65 return FALSE;
66 }
67
68 #else
69
70 #include <curl/curl.h>
71
72 void CPLHTTPSetOptions(CURL *http_handle, char** papszOptions);
73 void VSICurlSetOptions(CURL* hCurlHandle, const char* pszURL);
74
75 #include <map>
76
77 #define ENABLE_DEBUG 1
78
79 #define N_MAX_REGIONS 1000
80
81 #define DOWNLOAD_CHUNCK_SIZE 16384
82
83 typedef enum
84 {
85 EXIST_UNKNOWN = -1,
86 EXIST_NO,
87 EXIST_YES,
88 } ExistStatus;
89
90 typedef struct
91 {
92 ExistStatus eExists;
93 int bHastComputedFileSize;
94 vsi_l_offset fileSize;
95 int bIsDirectory;
96 time_t mTime;
97 } CachedFileProp;
98
99 typedef struct
100 {
101 int bGotFileList;
102 char** papszFileList; /* only file name without path */
103 } CachedDirList;
104
105 typedef struct
106 {
107 unsigned long pszURLHash;
108 vsi_l_offset nFileOffsetStart;
109 size_t nSize;
110 char *pData;
111 } CachedRegion;
112
113
VSICurlGetCacheFileName()114 static const char* VSICurlGetCacheFileName()
115 {
116 return "gdal_vsicurl_cache.bin";
117 }
118
119 /************************************************************************/
120 /* VSICurlFindStringSensitiveExceptEscapeSequences() */
121 /************************************************************************/
122
VSICurlFindStringSensitiveExceptEscapeSequences(char ** papszList,const char * pszTarget)123 static int VSICurlFindStringSensitiveExceptEscapeSequences( char ** papszList,
124 const char * pszTarget )
125
126 {
127 int i;
128
129 if( papszList == NULL )
130 return -1;
131
132 for( i = 0; papszList[i] != NULL; i++ )
133 {
134 const char* pszIter1 = papszList[i];
135 const char* pszIter2 = pszTarget;
136 char ch1, ch2;
137 /* The comparison is case-sensitive, escape for escaped */
138 /* sequences where letters of the hexadecimal sequence */
139 /* can be uppercase or lowercase depending on the quoting algorithm */
140 while(TRUE)
141 {
142 ch1 = *pszIter1;
143 ch2 = *pszIter2;
144 if (ch1 == '\0' || ch2 == '\0')
145 break;
146 if (ch1 == '%' && ch2 == '%' &&
147 pszIter1[1] != '\0' && pszIter1[2] != '\0' &&
148 pszIter2[1] != '\0' && pszIter2[2] != '\0')
149 {
150 if (!EQUALN(pszIter1+1, pszIter2+1, 2))
151 break;
152 pszIter1 += 2;
153 pszIter2 += 2;
154 }
155 if (ch1 != ch2)
156 break;
157 pszIter1 ++;
158 pszIter2 ++;
159 }
160 if (ch1 == ch2 && ch1 == '\0')
161 return i;
162 }
163
164 return -1;
165 }
166
167 /************************************************************************/
168 /* VSICurlIsFileInList() */
169 /************************************************************************/
170
VSICurlIsFileInList(char ** papszList,const char * pszTarget)171 static int VSICurlIsFileInList( char ** papszList, const char * pszTarget )
172 {
173 int nRet = VSICurlFindStringSensitiveExceptEscapeSequences(papszList, pszTarget);
174 if (nRet >= 0)
175 return nRet;
176
177 /* If we didn't find anything, try to URL-escape the target filename */
178 char* pszEscaped = CPLEscapeString(pszTarget, -1, CPLES_URL);
179 if (strcmp(pszTarget, pszEscaped) != 0)
180 {
181 nRet = VSICurlFindStringSensitiveExceptEscapeSequences(papszList, pszEscaped);
182 }
183 CPLFree(pszEscaped);
184 return nRet;
185 }
186
187 /************************************************************************/
188 /* VSICurlFilesystemHandler */
189 /************************************************************************/
190
191 typedef struct
192 {
193 CPLString osURL;
194 CURL *hCurlHandle;
195 } CachedConnection;
196
197
198 class VSICurlFilesystemHandler : public VSIFilesystemHandler
199 {
200 CPLMutex *hMutex;
201
202 CachedRegion **papsRegions;
203 int nRegions;
204
205 std::map<CPLString, CachedFileProp*> cacheFileSize;
206 std::map<CPLString, CachedDirList*> cacheDirList;
207
208 int bUseCacheDisk;
209
210 /* Per-thread Curl connection cache */
211 std::map<GIntBig, CachedConnection*> mapConnections;
212
213 char** GetFileList(const char *pszFilename, int* pbGotFileList);
214
215 char** ParseHTMLFileList(const char* pszFilename,
216 char* pszData,
217 int* pbGotFileList);
218 public:
219 VSICurlFilesystemHandler();
220 ~VSICurlFilesystemHandler();
221
222 virtual VSIVirtualHandle *Open( const char *pszFilename,
223 const char *pszAccess);
224 virtual int Stat( const char *pszFilename, VSIStatBufL *pStatBuf, int nFlags );
225 virtual int Unlink( const char *pszFilename );
226 virtual int Rename( const char *oldpath, const char *newpath );
227 virtual int Mkdir( const char *pszDirname, long nMode );
228 virtual int Rmdir( const char *pszDirname );
229 virtual char **ReadDir( const char *pszDirname );
230 virtual char **ReadDir( const char *pszDirname, int* pbGotFileList );
231
232
233 const CachedRegion* GetRegion(const char* pszURL,
234 vsi_l_offset nFileOffsetStart);
235
236 void AddRegion(const char* pszURL,
237 vsi_l_offset nFileOffsetStart,
238 size_t nSize,
239 const char *pData);
240
241 CachedFileProp* GetCachedFileProp(const char* pszURL);
242
243 void AddRegionToCacheDisk(CachedRegion* psRegion);
244 const CachedRegion* GetRegionFromCacheDisk(const char* pszURL,
245 vsi_l_offset nFileOffsetStart);
246
247 CURL *GetCurlHandleFor(CPLString osURL);
248 };
249
250 /************************************************************************/
251 /* VSICurlHandle */
252 /************************************************************************/
253
254 class VSICurlHandle : public VSIVirtualHandle
255 {
256 private:
257 VSICurlFilesystemHandler* poFS;
258
259 char* pszURL;
260
261 vsi_l_offset curOffset;
262 vsi_l_offset fileSize;
263 int bHastComputedFileSize;
264 ExistStatus eExists;
265 int bIsDirectory;
266 time_t mTime;
267
268 vsi_l_offset lastDownloadedOffset;
269 int nBlocksToDownload;
270 int bEOF;
271
272 int DownloadRegion(vsi_l_offset startOffset, int nBlocks);
273
274 VSICurlReadCbkFunc pfnReadCbk;
275 void *pReadCbkUserData;
276 int bStopOnInterrruptUntilUninstall;
277 int bInterrupted;
278
279 public:
280
281 VSICurlHandle(VSICurlFilesystemHandler* poFS, const char* pszURL);
282 ~VSICurlHandle();
283
284 virtual int Seek( vsi_l_offset nOffset, int nWhence );
285 virtual vsi_l_offset Tell();
286 virtual size_t Read( void *pBuffer, size_t nSize, size_t nMemb );
287 virtual int ReadMultiRange( int nRanges, void ** ppData,
288 const vsi_l_offset* panOffsets, const size_t* panSizes );
289 virtual size_t Write( const void *pBuffer, size_t nSize, size_t nMemb );
290 virtual int Eof();
291 virtual int Flush();
292 virtual int Close();
293
IsKnownFileSize() const294 int IsKnownFileSize() const { return bHastComputedFileSize; }
295 vsi_l_offset GetFileSize();
296 int Exists();
IsDirectory() const297 int IsDirectory() const { return bIsDirectory; }
GetMTime() const298 time_t GetMTime() const { return mTime; }
299
300 int InstallReadCbk(VSICurlReadCbkFunc pfnReadCbk,
301 void* pfnUserData,
302 int bStopOnInterrruptUntilUninstall);
303 int UninstallReadCbk();
304 };
305
306 /************************************************************************/
307 /* VSICurlHandle() */
308 /************************************************************************/
309
VSICurlHandle(VSICurlFilesystemHandler * poFS,const char * pszURL)310 VSICurlHandle::VSICurlHandle(VSICurlFilesystemHandler* poFS, const char* pszURL)
311 {
312 this->poFS = poFS;
313 this->pszURL = CPLStrdup(pszURL);
314
315 curOffset = 0;
316
317 CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
318 eExists = cachedFileProp->eExists;
319 fileSize = cachedFileProp->fileSize;
320 bHastComputedFileSize = cachedFileProp->bHastComputedFileSize;
321 bIsDirectory = cachedFileProp->bIsDirectory;
322 mTime = cachedFileProp->mTime;
323
324 lastDownloadedOffset = -1;
325 nBlocksToDownload = 1;
326 bEOF = FALSE;
327
328 pfnReadCbk = NULL;
329 pReadCbkUserData = NULL;
330 bStopOnInterrruptUntilUninstall = FALSE;
331 bInterrupted = FALSE;
332 }
333
334 /************************************************************************/
335 /* ~VSICurlHandle() */
336 /************************************************************************/
337
~VSICurlHandle()338 VSICurlHandle::~VSICurlHandle()
339 {
340 CPLFree(pszURL);
341 }
342
343 /************************************************************************/
344 /* InstallReadCbk() */
345 /************************************************************************/
346
InstallReadCbk(VSICurlReadCbkFunc pfnReadCbkIn,void * pfnUserDataIn,int bStopOnInterrruptUntilUninstallIn)347 int VSICurlHandle::InstallReadCbk(VSICurlReadCbkFunc pfnReadCbkIn,
348 void* pfnUserDataIn,
349 int bStopOnInterrruptUntilUninstallIn)
350 {
351 if (pfnReadCbk != NULL)
352 return FALSE;
353
354 pfnReadCbk = pfnReadCbkIn;
355 pReadCbkUserData = pfnUserDataIn;
356 bStopOnInterrruptUntilUninstall = bStopOnInterrruptUntilUninstallIn;
357 bInterrupted = FALSE;
358 return TRUE;
359 }
360
361 /************************************************************************/
362 /* UninstallReadCbk() */
363 /************************************************************************/
364
UninstallReadCbk()365 int VSICurlHandle::UninstallReadCbk()
366 {
367 if (pfnReadCbk == NULL)
368 return FALSE;
369
370 pfnReadCbk = NULL;
371 pReadCbkUserData = NULL;
372 bStopOnInterrruptUntilUninstall = FALSE;
373 bInterrupted = FALSE;
374 return TRUE;
375 }
376
377 /************************************************************************/
378 /* Seek() */
379 /************************************************************************/
380
Seek(vsi_l_offset nOffset,int nWhence)381 int VSICurlHandle::Seek( vsi_l_offset nOffset, int nWhence )
382 {
383 if (nWhence == SEEK_SET)
384 {
385 curOffset = nOffset;
386 }
387 else if (nWhence == SEEK_CUR)
388 {
389 curOffset = curOffset + nOffset;
390 }
391 else
392 {
393 curOffset = GetFileSize() + nOffset;
394 }
395 bEOF = FALSE;
396 return 0;
397 }
398
399 /************************************************************************/
400 /* VSICurlSetOptions() */
401 /************************************************************************/
402
VSICurlSetOptions(CURL * hCurlHandle,const char * pszURL)403 void VSICurlSetOptions(CURL* hCurlHandle, const char* pszURL)
404 {
405 curl_easy_setopt(hCurlHandle, CURLOPT_URL, pszURL);
406
407 CPLHTTPSetOptions(hCurlHandle, NULL);
408
409 /* 7.16 */
410 #if LIBCURL_VERSION_NUM >= 0x071000
411 long option = CURLFTPMETHOD_SINGLECWD;
412 curl_easy_setopt(hCurlHandle, CURLOPT_FTP_FILEMETHOD, option);
413 #endif
414
415 /* 7.12.3 */
416 #if LIBCURL_VERSION_NUM > 0x070C03
417 /* ftp://ftp2.cits.rncan.gc.ca/pub/cantopo/250k_tif/ doesn't like EPSV command */
418 curl_easy_setopt(hCurlHandle, CURLOPT_FTP_USE_EPSV, 0);
419 #endif
420
421 curl_easy_setopt(hCurlHandle, CURLOPT_NOBODY, 0);
422 curl_easy_setopt(hCurlHandle, CURLOPT_HTTPGET, 1);
423 curl_easy_setopt(hCurlHandle, CURLOPT_HEADER, 0);
424
425 /* 7.16.4 */
426 #if LIBCURL_VERSION_NUM <= 0x071004
427 curl_easy_setopt(hCurlHandle, CURLOPT_FTPLISTONLY, 0);
428 #elif LIBCURL_VERSION_NUM > 0x071004
429 curl_easy_setopt(hCurlHandle, CURLOPT_DIRLISTONLY, 0);
430 #endif
431
432 curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, NULL);
433 curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, NULL);
434 }
435
436
437 typedef struct
438 {
439 char* pBuffer;
440 size_t nSize;
441 int bIsHTTP;
442 int bIsInHeader;
443 int bMultiRange;
444 vsi_l_offset nStartOffset;
445 vsi_l_offset nEndOffset;
446 int nHTTPCode;
447 vsi_l_offset nContentLength;
448 int bFoundContentRange;
449 int bError;
450 int bDownloadHeaderOnly;
451
452 VSILFILE *fp;
453 VSICurlReadCbkFunc pfnReadCbk;
454 void *pReadCbkUserData;
455 int bInterrupted;
456 } WriteFuncStruct;
457
458 /************************************************************************/
459 /* VSICURLInitWriteFuncStruct() */
460 /************************************************************************/
461
VSICURLInitWriteFuncStruct(WriteFuncStruct * psStruct,VSILFILE * fp,VSICurlReadCbkFunc pfnReadCbk,void * pReadCbkUserData)462 static void VSICURLInitWriteFuncStruct(WriteFuncStruct *psStruct,
463 VSILFILE *fp,
464 VSICurlReadCbkFunc pfnReadCbk,
465 void *pReadCbkUserData)
466 {
467 psStruct->pBuffer = NULL;
468 psStruct->nSize = 0;
469 psStruct->bIsHTTP = FALSE;
470 psStruct->bIsInHeader = TRUE;
471 psStruct->bMultiRange = FALSE;
472 psStruct->nStartOffset = 0;
473 psStruct->nEndOffset = 0;
474 psStruct->nHTTPCode = 0;
475 psStruct->nContentLength = 0;
476 psStruct->bFoundContentRange = FALSE;
477 psStruct->bError = FALSE;
478 psStruct->bDownloadHeaderOnly = FALSE;
479
480 psStruct->fp = fp;
481 psStruct->pfnReadCbk = pfnReadCbk;
482 psStruct->pReadCbkUserData = pReadCbkUserData;
483 psStruct->bInterrupted = FALSE;
484 }
485
486 /************************************************************************/
487 /* VSICurlHandleWriteFunc() */
488 /************************************************************************/
489
VSICurlHandleWriteFunc(void * buffer,size_t count,size_t nmemb,void * req)490 static int VSICurlHandleWriteFunc(void *buffer, size_t count, size_t nmemb, void *req)
491 {
492 WriteFuncStruct* psStruct = (WriteFuncStruct*) req;
493 size_t nSize = count * nmemb;
494
495 char* pNewBuffer = (char*) VSIRealloc(psStruct->pBuffer,
496 psStruct->nSize + nSize + 1);
497 if (pNewBuffer)
498 {
499 psStruct->pBuffer = pNewBuffer;
500 memcpy(psStruct->pBuffer + psStruct->nSize, buffer, nSize);
501 psStruct->pBuffer[psStruct->nSize + nSize] = '\0';
502 if (psStruct->bIsHTTP && psStruct->bIsInHeader)
503 {
504 char* pszLine = psStruct->pBuffer + psStruct->nSize;
505 if (EQUALN(pszLine, "HTTP/1.0 ", 9) ||
506 EQUALN(pszLine, "HTTP/1.1 ", 9))
507 psStruct->nHTTPCode = atoi(pszLine + 9);
508 else if (EQUALN(pszLine, "Content-Length: ", 16))
509 psStruct->nContentLength = CPLScanUIntBig(pszLine + 16,
510 strlen(pszLine + 16));
511 else if (EQUALN(pszLine, "Content-Range: ", 15))
512 psStruct->bFoundContentRange = TRUE;
513
514 /*if (nSize > 2 && pszLine[nSize - 2] == '\r' &&
515 pszLine[nSize - 1] == '\n')
516 {
517 pszLine[nSize - 2] = 0;
518 CPLDebug("VSICURL", "%s", pszLine);
519 pszLine[nSize - 2] = '\r';
520 }*/
521
522 if (pszLine[0] == '\r' || pszLine[0] == '\n')
523 {
524 if (psStruct->bDownloadHeaderOnly)
525 {
526 /* If moved permanently/temporarily, go on. Otherwise stop now*/
527 if (!(psStruct->nHTTPCode == 301 || psStruct->nHTTPCode == 302))
528 return 0;
529 }
530 else
531 {
532 psStruct->bIsInHeader = FALSE;
533
534 /* Detect servers that don't support range downloading */
535 if (psStruct->nHTTPCode == 200 &&
536 !psStruct->bMultiRange &&
537 !psStruct->bFoundContentRange &&
538 (psStruct->nStartOffset != 0 || psStruct->nContentLength > 10 *
539 (psStruct->nEndOffset - psStruct->nStartOffset + 1)))
540 {
541 CPLError(CE_Failure, CPLE_AppDefined,
542 "Range downloading not supported by this server !");
543 psStruct->bError = TRUE;
544 return 0;
545 }
546 }
547 }
548 }
549 else
550 {
551 if (psStruct->pfnReadCbk)
552 {
553 if ( ! psStruct->pfnReadCbk(psStruct->fp, buffer, nSize,
554 psStruct->pReadCbkUserData) )
555 {
556 psStruct->bInterrupted = TRUE;
557 return 0;
558 }
559 }
560 }
561 psStruct->nSize += nSize;
562 return nmemb;
563 }
564 else
565 {
566 return 0;
567 }
568 }
569
570
571 /************************************************************************/
572 /* GetFileSize() */
573 /************************************************************************/
574
GetFileSize()575 vsi_l_offset VSICurlHandle::GetFileSize()
576 {
577 WriteFuncStruct sWriteFuncData;
578 WriteFuncStruct sWriteFuncHeaderData;
579
580 if (bHastComputedFileSize)
581 return fileSize;
582
583 bHastComputedFileSize = TRUE;
584
585 /* Consider that only the files whose extension ends up with one that is */
586 /* listed in CPL_VSIL_CURL_ALLOWED_EXTENSIONS exist on the server */
587 /* This can speeds up dramatically open experience, in case the server */
588 /* cannot return a file list */
589 /* {noext} can be used as a special token to mean file with no extension */
590 /* For example : */
591 /* gdalinfo --config CPL_VSIL_CURL_ALLOWED_EXTENSIONS ".tif" /vsicurl/http://igskmncngs506.cr.usgs.gov/gmted/Global_tiles_GMTED/075darcsec/bln/W030/30N030W_20101117_gmted_bln075.tif */
592 const char* pszAllowedExtensions =
593 CPLGetConfigOption("CPL_VSIL_CURL_ALLOWED_EXTENSIONS", NULL);
594 if (pszAllowedExtensions)
595 {
596 char** papszExtensions = CSLTokenizeString2( pszAllowedExtensions, ", ", 0 );
597 int nURLLen = strlen(pszURL);
598 int bFound = FALSE;
599 for(int i=0;papszExtensions[i] != NULL;i++)
600 {
601 int nExtensionLen = strlen(papszExtensions[i]);
602 if( EQUAL(papszExtensions[i], "{noext}") )
603 {
604 if( nURLLen > 4 && strchr(pszURL + nURLLen - 4, '.') == NULL )
605 {
606 bFound = TRUE;
607 break;
608 }
609 }
610 else if (nURLLen > nExtensionLen &&
611 EQUAL(pszURL + nURLLen - nExtensionLen, papszExtensions[i]))
612 {
613 bFound = TRUE;
614 break;
615 }
616 }
617
618 if (!bFound)
619 {
620 eExists = EXIST_NO;
621 fileSize = 0;
622
623 CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
624 cachedFileProp->bHastComputedFileSize = TRUE;
625 cachedFileProp->fileSize = fileSize;
626 cachedFileProp->eExists = eExists;
627
628 CSLDestroy(papszExtensions);
629
630 return 0;
631 }
632
633 CSLDestroy(papszExtensions);
634 }
635
636 #if LIBCURL_VERSION_NUM < 0x070B00
637 /* Curl 7.10.X doesn't manage to unset the CURLOPT_RANGE that would have been */
638 /* previously set, so we have to reinit the connection handle */
639 poFS->GetCurlHandleFor("");
640 #endif
641 CURL* hCurlHandle = poFS->GetCurlHandleFor(pszURL);
642
643 VSICurlSetOptions(hCurlHandle, pszURL);
644
645 VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, NULL, NULL, NULL);
646
647 /* HACK for mbtiles driver: proper fix would be to auto-detect servers that don't accept HEAD */
648 /* http://a.tiles.mapbox.com/v3/ doesn't accept HEAD, so let's start a GET */
649 /* and interrupt is as soon as the header is found */
650 if (strstr(pszURL, ".tiles.mapbox.com/") != NULL
651 || !CSLTestBoolean(CPLGetConfigOption("CPL_VSIL_CURL_USE_HEAD", "YES")))
652 {
653 curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, &sWriteFuncHeaderData);
654 curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, VSICurlHandleWriteFunc);
655
656 sWriteFuncHeaderData.bIsHTTP = strncmp(pszURL, "http", 4) == 0;
657 sWriteFuncHeaderData.bDownloadHeaderOnly = TRUE;
658 }
659 else
660 {
661 curl_easy_setopt(hCurlHandle, CURLOPT_NOBODY, 1);
662 curl_easy_setopt(hCurlHandle, CURLOPT_HTTPGET, 0);
663 curl_easy_setopt(hCurlHandle, CURLOPT_HEADER, 1);
664 }
665
666 /* We need that otherwise OSGEO4W's libcurl issue a dummy range request */
667 /* when doing a HEAD when recycling connections */
668 curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, NULL);
669
670 /* Bug with older curl versions (<=7.16.4) and FTP. See http://curl.haxx.se/mail/lib-2007-08/0312.html */
671 VSICURLInitWriteFuncStruct(&sWriteFuncData, NULL, NULL, NULL);
672 curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
673 curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
674
675 char szCurlErrBuf[CURL_ERROR_SIZE+1];
676 szCurlErrBuf[0] = '\0';
677 curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
678
679 double dfSize = 0;
680 curl_easy_perform(hCurlHandle);
681
682 eExists = EXIST_UNKNOWN;
683
684 if (strncmp(pszURL, "ftp", 3) == 0)
685 {
686 if (sWriteFuncData.pBuffer != NULL &&
687 strncmp(sWriteFuncData.pBuffer, "Content-Length: ", strlen( "Content-Length: ")) == 0)
688 {
689 const char* pszBuffer = sWriteFuncData.pBuffer + strlen("Content-Length: ");
690 eExists = EXIST_YES;
691 fileSize = CPLScanUIntBig(pszBuffer, sWriteFuncData.nSize - strlen("Content-Length: "));
692 if (ENABLE_DEBUG)
693 CPLDebug("VSICURL", "GetFileSize(%s)=" CPL_FRMT_GUIB,
694 pszURL, fileSize);
695 }
696 }
697
698 if (eExists != EXIST_YES)
699 {
700 CURLcode code = curl_easy_getinfo(hCurlHandle, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &dfSize );
701 if (code == 0)
702 {
703 eExists = EXIST_YES;
704 if (dfSize < 0)
705 fileSize = 0;
706 else
707 fileSize = (GUIntBig)dfSize;
708 }
709 else
710 {
711 eExists = EXIST_NO;
712 fileSize = 0;
713 CPLError(CE_Failure, CPLE_AppDefined, "VSICurlHandle::GetFileSize failed");
714 }
715
716 long response_code = 0;
717 curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
718 if (response_code != 200)
719 {
720 eExists = EXIST_NO;
721 fileSize = 0;
722 }
723
724 /* Try to guess if this is a directory. Generally if this is a directory, */
725 /* curl will retry with an URL with slash added */
726 char *pszEffectiveURL = NULL;
727 curl_easy_getinfo(hCurlHandle, CURLINFO_EFFECTIVE_URL, &pszEffectiveURL);
728 if (pszEffectiveURL != NULL && strncmp(pszURL, pszEffectiveURL, strlen(pszURL)) == 0 &&
729 pszEffectiveURL[strlen(pszURL)] == '/')
730 {
731 eExists = EXIST_YES;
732 fileSize = 0;
733 bIsDirectory = TRUE;
734 }
735
736 if (ENABLE_DEBUG)
737 CPLDebug("VSICURL", "GetFileSize(%s)=" CPL_FRMT_GUIB " response_code=%d",
738 pszURL, fileSize, (int)response_code);
739 }
740
741 CPLFree(sWriteFuncData.pBuffer);
742 CPLFree(sWriteFuncHeaderData.pBuffer);
743
744 CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
745 cachedFileProp->bHastComputedFileSize = TRUE;
746 cachedFileProp->fileSize = fileSize;
747 cachedFileProp->eExists = eExists;
748 cachedFileProp->bIsDirectory = bIsDirectory;
749
750 return fileSize;
751 }
752
753 /************************************************************************/
754 /* Exists() */
755 /************************************************************************/
756
Exists()757 int VSICurlHandle::Exists()
758 {
759 if (eExists == EXIST_UNKNOWN)
760 GetFileSize();
761 return eExists == EXIST_YES;
762 }
763
764 /************************************************************************/
765 /* Tell() */
766 /************************************************************************/
767
Tell()768 vsi_l_offset VSICurlHandle::Tell()
769 {
770 return curOffset;
771 }
772
773 /************************************************************************/
774 /* DownloadRegion() */
775 /************************************************************************/
776
DownloadRegion(vsi_l_offset startOffset,int nBlocks)777 int VSICurlHandle::DownloadRegion(vsi_l_offset startOffset, int nBlocks)
778 {
779 WriteFuncStruct sWriteFuncData;
780 WriteFuncStruct sWriteFuncHeaderData;
781
782 if (bInterrupted && bStopOnInterrruptUntilUninstall)
783 return FALSE;
784
785 CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
786 if (cachedFileProp->eExists == EXIST_NO)
787 return FALSE;
788
789 CURL* hCurlHandle = poFS->GetCurlHandleFor(pszURL);
790 VSICurlSetOptions(hCurlHandle, pszURL);
791
792 VSICURLInitWriteFuncStruct(&sWriteFuncData, (VSILFILE*)this, pfnReadCbk, pReadCbkUserData);
793 curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
794 curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
795
796 VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, NULL, NULL, NULL);
797 curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, &sWriteFuncHeaderData);
798 curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, VSICurlHandleWriteFunc);
799 sWriteFuncHeaderData.bIsHTTP = strncmp(pszURL, "http", 4) == 0;
800 sWriteFuncHeaderData.nStartOffset = startOffset;
801 sWriteFuncHeaderData.nEndOffset = startOffset + nBlocks * DOWNLOAD_CHUNCK_SIZE - 1;
802 /* Some servers don't like we try to read after end-of-file (#5786) */
803 if( cachedFileProp->bHastComputedFileSize &&
804 sWriteFuncHeaderData.nEndOffset >= cachedFileProp->fileSize )
805 {
806 sWriteFuncHeaderData.nEndOffset = cachedFileProp->fileSize - 1;
807 }
808
809 char rangeStr[512];
810 sprintf(rangeStr, CPL_FRMT_GUIB "-" CPL_FRMT_GUIB, startOffset,
811 sWriteFuncHeaderData.nEndOffset);
812
813 if (ENABLE_DEBUG)
814 CPLDebug("VSICURL", "Downloading %s (%s)...", rangeStr, pszURL);
815
816 curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, rangeStr);
817
818 char szCurlErrBuf[CURL_ERROR_SIZE+1];
819 szCurlErrBuf[0] = '\0';
820 curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
821
822 curl_easy_perform(hCurlHandle);
823
824 curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, NULL);
825 curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, NULL);
826 curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, NULL);
827 curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, NULL);
828
829 if (sWriteFuncData.bInterrupted)
830 {
831 bInterrupted = TRUE;
832
833 CPLFree(sWriteFuncData.pBuffer);
834 CPLFree(sWriteFuncHeaderData.pBuffer);
835
836 return FALSE;
837 }
838
839 long response_code = 0;
840 curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
841
842 char *content_type = 0;
843 curl_easy_getinfo(hCurlHandle, CURLINFO_CONTENT_TYPE, &content_type);
844
845 if (ENABLE_DEBUG)
846 CPLDebug("VSICURL", "Got reponse_code=%ld", response_code);
847
848 if ((response_code != 200 && response_code != 206 &&
849 response_code != 225 && response_code != 226 && response_code != 426) || sWriteFuncHeaderData.bError)
850 {
851 if (response_code >= 400 && szCurlErrBuf[0] != '\0')
852 {
853 if (strcmp(szCurlErrBuf, "Couldn't use REST") == 0)
854 CPLError(CE_Failure, CPLE_AppDefined, "%d: %s, %s",
855 (int)response_code, szCurlErrBuf,
856 "Range downloading not supported by this server !");
857 else
858 CPLError(CE_Failure, CPLE_AppDefined, "%d: %s", (int)response_code, szCurlErrBuf);
859 }
860 if (!bHastComputedFileSize && startOffset == 0)
861 {
862 cachedFileProp->bHastComputedFileSize = bHastComputedFileSize = TRUE;
863 cachedFileProp->fileSize = fileSize = 0;
864 cachedFileProp->eExists = eExists = EXIST_NO;
865 }
866 CPLFree(sWriteFuncData.pBuffer);
867 CPLFree(sWriteFuncHeaderData.pBuffer);
868 return FALSE;
869 }
870
871 if (!bHastComputedFileSize && sWriteFuncHeaderData.pBuffer)
872 {
873 /* Try to retrieve the filesize from the HTTP headers */
874 /* if in the form : "Content-Range: bytes x-y/filesize" */
875 char* pszContentRange = strstr(sWriteFuncHeaderData.pBuffer, "Content-Range: bytes ");
876 if (pszContentRange)
877 {
878 char* pszEOL = strchr(pszContentRange, '\n');
879 if (pszEOL)
880 {
881 *pszEOL = 0;
882 pszEOL = strchr(pszContentRange, '\r');
883 if (pszEOL)
884 *pszEOL = 0;
885 char* pszSlash = strchr(pszContentRange, '/');
886 if (pszSlash)
887 {
888 pszSlash ++;
889 fileSize = CPLScanUIntBig(pszSlash, strlen(pszSlash));
890 }
891 }
892 }
893 else if (strncmp(pszURL, "ftp", 3) == 0)
894 {
895 /* Parse 213 answer for FTP protocol */
896 char* pszSize = strstr(sWriteFuncHeaderData.pBuffer, "213 ");
897 if (pszSize)
898 {
899 pszSize += 4;
900 char* pszEOL = strchr(pszSize, '\n');
901 if (pszEOL)
902 {
903 *pszEOL = 0;
904 pszEOL = strchr(pszSize, '\r');
905 if (pszEOL)
906 *pszEOL = 0;
907
908 fileSize = CPLScanUIntBig(pszSize, strlen(pszSize));
909 }
910 }
911 }
912
913 if (fileSize != 0)
914 {
915 eExists = EXIST_YES;
916
917 if (ENABLE_DEBUG)
918 CPLDebug("VSICURL", "GetFileSize(%s)=" CPL_FRMT_GUIB " response_code=%d",
919 pszURL, fileSize, (int)response_code);
920
921 bHastComputedFileSize = cachedFileProp->bHastComputedFileSize = TRUE;
922 cachedFileProp->fileSize = fileSize;
923 cachedFileProp->eExists = eExists;
924 }
925 }
926
927 lastDownloadedOffset = startOffset + nBlocks * DOWNLOAD_CHUNCK_SIZE;
928
929 char* pBuffer = sWriteFuncData.pBuffer;
930 int nSize = sWriteFuncData.nSize;
931
932 if (nSize > nBlocks * DOWNLOAD_CHUNCK_SIZE)
933 {
934 if (ENABLE_DEBUG)
935 CPLDebug("VSICURL", "Got more data than expected : %d instead of %d",
936 nSize, nBlocks * DOWNLOAD_CHUNCK_SIZE);
937 }
938
939 while(nSize > 0)
940 {
941 //if (ENABLE_DEBUG)
942 // CPLDebug("VSICURL", "Add region %d - %d", startOffset, MIN(DOWNLOAD_CHUNCK_SIZE, nSize));
943 poFS->AddRegion(pszURL, startOffset, MIN(DOWNLOAD_CHUNCK_SIZE, nSize), pBuffer);
944 startOffset += DOWNLOAD_CHUNCK_SIZE;
945 pBuffer += DOWNLOAD_CHUNCK_SIZE;
946 nSize -= DOWNLOAD_CHUNCK_SIZE;
947 }
948
949 CPLFree(sWriteFuncData.pBuffer);
950 CPLFree(sWriteFuncHeaderData.pBuffer);
951
952 return TRUE;
953 }
954
955 /************************************************************************/
956 /* Read() */
957 /************************************************************************/
958
Read(void * pBuffer,size_t nSize,size_t nMemb)959 size_t VSICurlHandle::Read( void *pBuffer, size_t nSize, size_t nMemb )
960 {
961 size_t nBufferRequestSize = nSize * nMemb;
962 if (nBufferRequestSize == 0)
963 return 0;
964
965 //CPLDebug("VSICURL", "offset=%d, size=%d", (int)curOffset, (int)nBufferRequestSize);
966
967 vsi_l_offset iterOffset = curOffset;
968 while (nBufferRequestSize)
969 {
970 const CachedRegion* psRegion = poFS->GetRegion(pszURL, iterOffset);
971 if (psRegion == NULL)
972 {
973 vsi_l_offset nOffsetToDownload =
974 (iterOffset / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
975
976 if (nOffsetToDownload == lastDownloadedOffset)
977 {
978 /* In case of consecutive reads (of small size), we use a */
979 /* heuristic that we will read the file sequentially, so */
980 /* we double the requested size to decrease the number of */
981 /* client/server roundtrips. */
982 if (nBlocksToDownload < 100)
983 nBlocksToDownload *= 2;
984 }
985 else
986 {
987 /* Random reads. Cancel the above heuristics */
988 nBlocksToDownload = 1;
989 }
990
991 /* Ensure that we will request at least the number of blocks */
992 /* to satisfy the remaining buffer size to read */
993 vsi_l_offset nEndOffsetToDownload =
994 ((iterOffset + nBufferRequestSize) / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
995 int nMinBlocksToDownload = 1 + (int)
996 ((nEndOffsetToDownload - nOffsetToDownload) / DOWNLOAD_CHUNCK_SIZE);
997 if (nBlocksToDownload < nMinBlocksToDownload)
998 nBlocksToDownload = nMinBlocksToDownload;
999
1000 int i;
1001 /* Avoid reading already cached data */
1002 for(i=1;i<nBlocksToDownload;i++)
1003 {
1004 if (poFS->GetRegion(pszURL, nOffsetToDownload + i * DOWNLOAD_CHUNCK_SIZE) != NULL)
1005 {
1006 nBlocksToDownload = i;
1007 break;
1008 }
1009 }
1010
1011 if( nBlocksToDownload > N_MAX_REGIONS )
1012 nBlocksToDownload = N_MAX_REGIONS;
1013
1014 if (DownloadRegion(nOffsetToDownload, nBlocksToDownload) == FALSE)
1015 {
1016 if (!bInterrupted)
1017 bEOF = TRUE;
1018 return 0;
1019 }
1020 psRegion = poFS->GetRegion(pszURL, iterOffset);
1021 }
1022 if (psRegion == NULL || psRegion->pData == NULL)
1023 {
1024 bEOF = TRUE;
1025 return 0;
1026 }
1027 int nToCopy = (int) MIN(nBufferRequestSize, psRegion->nSize - (iterOffset - psRegion->nFileOffsetStart));
1028 memcpy(pBuffer, psRegion->pData + iterOffset - psRegion->nFileOffsetStart,
1029 nToCopy);
1030 pBuffer = (char*) pBuffer + nToCopy;
1031 iterOffset += nToCopy;
1032 nBufferRequestSize -= nToCopy;
1033 if (psRegion->nSize != DOWNLOAD_CHUNCK_SIZE && nBufferRequestSize != 0)
1034 {
1035 break;
1036 }
1037 }
1038
1039 size_t ret = (size_t) ((iterOffset - curOffset) / nSize);
1040 if (ret != nMemb)
1041 bEOF = TRUE;
1042
1043 curOffset = iterOffset;
1044
1045 return ret;
1046 }
1047
1048
1049 /************************************************************************/
1050 /* ReadMultiRange() */
1051 /************************************************************************/
1052
ReadMultiRange(int nRanges,void ** ppData,const vsi_l_offset * panOffsets,const size_t * panSizes)1053 int VSICurlHandle::ReadMultiRange( int nRanges, void ** ppData,
1054 const vsi_l_offset* panOffsets,
1055 const size_t* panSizes )
1056 {
1057 WriteFuncStruct sWriteFuncData;
1058 WriteFuncStruct sWriteFuncHeaderData;
1059
1060 if (bInterrupted && bStopOnInterrruptUntilUninstall)
1061 return FALSE;
1062
1063 CachedFileProp* cachedFileProp = poFS->GetCachedFileProp(pszURL);
1064 if (cachedFileProp->eExists == EXIST_NO)
1065 return -1;
1066
1067 CPLString osRanges, osFirstRange, osLastRange;
1068 int i;
1069 int nMergedRanges = 0;
1070 vsi_l_offset nTotalReqSize = 0;
1071 for(i=0;i<nRanges;i++)
1072 {
1073 CPLString osCurRange;
1074 if (i != 0)
1075 osRanges.append(",");
1076 osCurRange = CPLSPrintf(CPL_FRMT_GUIB "-", panOffsets[i]);
1077 while (i + 1 < nRanges && panOffsets[i] + panSizes[i] == panOffsets[i+1])
1078 {
1079 nTotalReqSize += panSizes[i];
1080 i ++;
1081 }
1082 nTotalReqSize += panSizes[i];
1083 osCurRange.append(CPLSPrintf(CPL_FRMT_GUIB, panOffsets[i] + panSizes[i]-1));
1084 nMergedRanges ++;
1085
1086 osRanges += osCurRange;
1087
1088 if (nMergedRanges == 1)
1089 osFirstRange = osCurRange;
1090 osLastRange = osCurRange;
1091 }
1092
1093 const char* pszMaxRanges = CPLGetConfigOption("CPL_VSIL_CURL_MAX_RANGES", "250");
1094 int nMaxRanges = atoi(pszMaxRanges);
1095 if (nMaxRanges <= 0)
1096 nMaxRanges = 250;
1097 if (nMergedRanges > nMaxRanges)
1098 {
1099 int nHalf = nRanges / 2;
1100 int nRet = ReadMultiRange(nHalf, ppData, panOffsets, panSizes);
1101 if (nRet != 0)
1102 return nRet;
1103 return ReadMultiRange(nRanges - nHalf, ppData + nHalf, panOffsets + nHalf, panSizes + nHalf);
1104 }
1105
1106 CURL* hCurlHandle = poFS->GetCurlHandleFor(pszURL);
1107 VSICurlSetOptions(hCurlHandle, pszURL);
1108
1109 VSICURLInitWriteFuncStruct(&sWriteFuncData, (VSILFILE*)this, pfnReadCbk, pReadCbkUserData);
1110 curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
1111 curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
1112
1113 VSICURLInitWriteFuncStruct(&sWriteFuncHeaderData, NULL, NULL, NULL);
1114 curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, &sWriteFuncHeaderData);
1115 curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, VSICurlHandleWriteFunc);
1116 sWriteFuncHeaderData.bIsHTTP = strncmp(pszURL, "http", 4) == 0;
1117 sWriteFuncHeaderData.bMultiRange = nMergedRanges > 1;
1118 if (nMergedRanges == 1)
1119 {
1120 sWriteFuncHeaderData.nStartOffset = panOffsets[0];
1121 sWriteFuncHeaderData.nEndOffset = panOffsets[0] + nTotalReqSize-1;
1122 }
1123
1124 if (ENABLE_DEBUG)
1125 {
1126 if (nMergedRanges == 1)
1127 CPLDebug("VSICURL", "Downloading %s (%s)...", osRanges.c_str(), pszURL);
1128 else
1129 CPLDebug("VSICURL", "Downloading %s, ..., %s (" CPL_FRMT_GUIB " bytes, %s)...",
1130 osFirstRange.c_str(), osLastRange.c_str(), (GUIntBig)nTotalReqSize, pszURL);
1131 }
1132
1133 curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, osRanges.c_str());
1134
1135 char szCurlErrBuf[CURL_ERROR_SIZE+1];
1136 szCurlErrBuf[0] = '\0';
1137 curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
1138
1139 curl_easy_perform(hCurlHandle);
1140
1141 curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, NULL);
1142 curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, NULL);
1143 curl_easy_setopt(hCurlHandle, CURLOPT_HEADERDATA, NULL);
1144 curl_easy_setopt(hCurlHandle, CURLOPT_HEADERFUNCTION, NULL);
1145
1146 if (sWriteFuncData.bInterrupted)
1147 {
1148 bInterrupted = TRUE;
1149
1150 CPLFree(sWriteFuncData.pBuffer);
1151 CPLFree(sWriteFuncHeaderData.pBuffer);
1152
1153 return -1;
1154 }
1155
1156 long response_code = 0;
1157 curl_easy_getinfo(hCurlHandle, CURLINFO_HTTP_CODE, &response_code);
1158
1159 char *content_type = 0;
1160 curl_easy_getinfo(hCurlHandle, CURLINFO_CONTENT_TYPE, &content_type);
1161
1162 if ((response_code != 200 && response_code != 206 &&
1163 response_code != 225 && response_code != 226 && response_code != 426) || sWriteFuncHeaderData.bError)
1164 {
1165 if (response_code >= 400 && szCurlErrBuf[0] != '\0')
1166 {
1167 if (strcmp(szCurlErrBuf, "Couldn't use REST") == 0)
1168 CPLError(CE_Failure, CPLE_AppDefined, "%d: %s, %s",
1169 (int)response_code, szCurlErrBuf,
1170 "Range downloading not supported by this server !");
1171 else
1172 CPLError(CE_Failure, CPLE_AppDefined, "%d: %s", (int)response_code, szCurlErrBuf);
1173 }
1174 /*
1175 if (!bHastComputedFileSize && startOffset == 0)
1176 {
1177 cachedFileProp->bHastComputedFileSize = bHastComputedFileSize = TRUE;
1178 cachedFileProp->fileSize = fileSize = 0;
1179 cachedFileProp->eExists = eExists = EXIST_NO;
1180 }
1181 */
1182 CPLFree(sWriteFuncData.pBuffer);
1183 CPLFree(sWriteFuncHeaderData.pBuffer);
1184 return -1;
1185 }
1186
1187 char* pBuffer = sWriteFuncData.pBuffer;
1188 int nSize = sWriteFuncData.nSize;
1189
1190 int nRet = -1;
1191 char* pszBoundary;
1192 CPLString osBoundary;
1193 char *pszNext;
1194 int iRange = 0;
1195 int iPart = 0;
1196 char* pszEOL;
1197
1198 /* -------------------------------------------------------------------- */
1199 /* No multipart if a single range has been requested */
1200 /* -------------------------------------------------------------------- */
1201
1202 if (nMergedRanges == 1)
1203 {
1204 int nAccSize = 0;
1205 if ((vsi_l_offset)nSize < nTotalReqSize)
1206 goto end;
1207
1208 for(i=0;i<nRanges;i++)
1209 {
1210 memcpy(ppData[i], pBuffer + nAccSize, panSizes[i]);
1211 nAccSize += panSizes[i];
1212 }
1213
1214 nRet = 0;
1215 goto end;
1216 }
1217
1218 /* -------------------------------------------------------------------- */
1219 /* Extract boundary name */
1220 /* -------------------------------------------------------------------- */
1221
1222 pszBoundary = strstr(sWriteFuncHeaderData.pBuffer,
1223 "Content-Type: multipart/byteranges; boundary=");
1224 if( pszBoundary == NULL )
1225 {
1226 CPLError( CE_Failure, CPLE_AppDefined, "Could not find '%s'",
1227 "Content-Type: multipart/byteranges; boundary=" );
1228 goto end;
1229 }
1230
1231 pszBoundary += strlen( "Content-Type: multipart/byteranges; boundary=" );
1232
1233 pszEOL = strchr(pszBoundary, '\r');
1234 if (pszEOL)
1235 *pszEOL = 0;
1236 pszEOL = strchr(pszBoundary, '\n');
1237 if (pszEOL)
1238 *pszEOL = 0;
1239
1240 /* Remove optional double-quote character around boundary name */
1241 if (pszBoundary[0] == '"')
1242 {
1243 pszBoundary ++;
1244 char* pszLastDoubleQuote = strrchr(pszBoundary, '"');
1245 if (pszLastDoubleQuote)
1246 *pszLastDoubleQuote = 0;
1247 }
1248
1249 osBoundary = "--";
1250 osBoundary += pszBoundary;
1251
1252 /* -------------------------------------------------------------------- */
1253 /* Find the start of the first chunk. */
1254 /* -------------------------------------------------------------------- */
1255 pszNext = strstr(pBuffer,osBoundary.c_str());
1256 if( pszNext == NULL )
1257 {
1258 CPLError( CE_Failure, CPLE_AppDefined, "No parts found." );
1259 goto end;
1260 }
1261
1262 pszNext += strlen(osBoundary);
1263 while( *pszNext != '\n' && *pszNext != '\r' && *pszNext != '\0' )
1264 pszNext++;
1265 if( *pszNext == '\r' )
1266 pszNext++;
1267 if( *pszNext == '\n' )
1268 pszNext++;
1269
1270 /* -------------------------------------------------------------------- */
1271 /* Loop over parts... */
1272 /* -------------------------------------------------------------------- */
1273 while( iPart < nRanges )
1274 {
1275 /* -------------------------------------------------------------------- */
1276 /* Collect headers. */
1277 /* -------------------------------------------------------------------- */
1278 int bExpectedRange = FALSE;
1279
1280 while( *pszNext != '\n' && *pszNext != '\r' && *pszNext != '\0' )
1281 {
1282 char *pszEOL = strstr(pszNext,"\n");
1283
1284 if( pszEOL == NULL )
1285 {
1286 CPLError(CE_Failure, CPLE_AppDefined,
1287 "Error while parsing multipart content (at line %d)", __LINE__);
1288 goto end;
1289 }
1290
1291 *pszEOL = '\0';
1292 int bRestoreAntislashR = FALSE;
1293 if (pszEOL - pszNext > 1 && pszEOL[-1] == '\r')
1294 {
1295 bRestoreAntislashR = TRUE;
1296 pszEOL[-1] = '\0';
1297 }
1298
1299 if (EQUALN(pszNext, "Content-Range: bytes ", strlen("Content-Range: bytes ")))
1300 {
1301 bExpectedRange = TRUE; /* FIXME */
1302 }
1303
1304 if (bRestoreAntislashR)
1305 pszEOL[-1] = '\r';
1306 *pszEOL = '\n';
1307
1308 pszNext = pszEOL + 1;
1309 }
1310
1311 if (!bExpectedRange)
1312 {
1313 CPLError(CE_Failure, CPLE_AppDefined,
1314 "Error while parsing multipart content (at line %d)", __LINE__);
1315 goto end;
1316 }
1317
1318 if( *pszNext == '\r' )
1319 pszNext++;
1320 if( *pszNext == '\n' )
1321 pszNext++;
1322
1323 /* -------------------------------------------------------------------- */
1324 /* Work out the data block size. */
1325 /* -------------------------------------------------------------------- */
1326 size_t nBytesAvail = nSize - (pszNext - pBuffer);
1327
1328 while(TRUE)
1329 {
1330 if (nBytesAvail < panSizes[iRange])
1331 {
1332 CPLError(CE_Failure, CPLE_AppDefined,
1333 "Error while parsing multipart content (at line %d)", __LINE__);
1334 goto end;
1335 }
1336
1337 memcpy(ppData[iRange], pszNext, panSizes[iRange]);
1338 pszNext += panSizes[iRange];
1339 nBytesAvail -= panSizes[iRange];
1340 if( iRange + 1 < nRanges &&
1341 panOffsets[iRange] + panSizes[iRange] == panOffsets[iRange + 1] )
1342 {
1343 iRange++;
1344 }
1345 else
1346 break;
1347 }
1348
1349 iPart ++;
1350 iRange ++;
1351
1352 while( nBytesAvail > 0
1353 && (*pszNext != '-'
1354 || strncmp(pszNext,osBoundary,strlen(osBoundary)) != 0) )
1355 {
1356 pszNext++;
1357 nBytesAvail--;
1358 }
1359
1360 if( nBytesAvail == 0 )
1361 {
1362 CPLError(CE_Failure, CPLE_AppDefined,
1363 "Error while parsing multipart content (at line %d)", __LINE__);
1364 goto end;
1365 }
1366
1367 pszNext += strlen(osBoundary);
1368 if( strncmp(pszNext,"--",2) == 0 )
1369 {
1370 /* End of multipart */
1371 break;
1372 }
1373
1374 if( *pszNext == '\r' )
1375 pszNext++;
1376 if( *pszNext == '\n' )
1377 pszNext++;
1378 else
1379 {
1380 CPLError(CE_Failure, CPLE_AppDefined,
1381 "Error while parsing multipart content (at line %d)", __LINE__);
1382 goto end;
1383 }
1384 }
1385
1386 if (iPart == nMergedRanges)
1387 nRet = 0;
1388 else
1389 CPLError(CE_Failure, CPLE_AppDefined,
1390 "Got only %d parts, where %d were expected", iPart, nMergedRanges);
1391
1392 end:
1393 CPLFree(sWriteFuncData.pBuffer);
1394 CPLFree(sWriteFuncHeaderData.pBuffer);
1395
1396 return nRet;
1397 }
1398
1399 /************************************************************************/
1400 /* Write() */
1401 /************************************************************************/
1402
Write(CPL_UNUSED const void * pBuffer,CPL_UNUSED size_t nSize,CPL_UNUSED size_t nMemb)1403 size_t VSICurlHandle::Write( CPL_UNUSED const void *pBuffer,
1404 CPL_UNUSED size_t nSize,
1405 CPL_UNUSED size_t nMemb )
1406 {
1407 return 0;
1408 }
1409
1410 /************************************************************************/
1411 /* Eof() */
1412 /************************************************************************/
1413
1414
Eof()1415 int VSICurlHandle::Eof()
1416 {
1417 return bEOF;
1418 }
1419
1420 /************************************************************************/
1421 /* Flush() */
1422 /************************************************************************/
1423
Flush()1424 int VSICurlHandle::Flush()
1425 {
1426 return 0;
1427 }
1428
1429 /************************************************************************/
1430 /* Close() */
1431 /************************************************************************/
1432
Close()1433 int VSICurlHandle::Close()
1434 {
1435 return 0;
1436 }
1437
1438
1439
1440
1441 /************************************************************************/
1442 /* VSICurlFilesystemHandler() */
1443 /************************************************************************/
1444
VSICurlFilesystemHandler()1445 VSICurlFilesystemHandler::VSICurlFilesystemHandler()
1446 {
1447 hMutex = NULL;
1448 papsRegions = NULL;
1449 nRegions = 0;
1450 bUseCacheDisk = CSLTestBoolean(CPLGetConfigOption("CPL_VSIL_CURL_USE_CACHE", "NO"));
1451 }
1452
1453 /************************************************************************/
1454 /* ~VSICurlFilesystemHandler() */
1455 /************************************************************************/
1456
~VSICurlFilesystemHandler()1457 VSICurlFilesystemHandler::~VSICurlFilesystemHandler()
1458 {
1459 int i;
1460 for(i=0;i<nRegions;i++)
1461 {
1462 CPLFree(papsRegions[i]->pData);
1463 CPLFree(papsRegions[i]);
1464 }
1465 CPLFree(papsRegions);
1466
1467 std::map<CPLString, CachedFileProp*>::const_iterator iterCacheFileSize;
1468
1469 for( iterCacheFileSize = cacheFileSize.begin(); iterCacheFileSize != cacheFileSize.end(); iterCacheFileSize++ )
1470 {
1471 CPLFree(iterCacheFileSize->second);
1472 }
1473
1474 std::map<CPLString, CachedDirList*>::const_iterator iterCacheDirList;
1475
1476 for( iterCacheDirList = cacheDirList.begin(); iterCacheDirList != cacheDirList.end(); iterCacheDirList++ )
1477 {
1478 CSLDestroy(iterCacheDirList->second->papszFileList);
1479 CPLFree(iterCacheDirList->second);
1480 }
1481
1482 std::map<GIntBig, CachedConnection*>::const_iterator iterConnections;
1483 for( iterConnections = mapConnections.begin(); iterConnections != mapConnections.end(); iterConnections++ )
1484 {
1485 curl_easy_cleanup(iterConnections->second->hCurlHandle);
1486 delete iterConnections->second;
1487 }
1488
1489 if( hMutex != NULL )
1490 CPLDestroyMutex( hMutex );
1491 hMutex = NULL;
1492 }
1493
1494 /************************************************************************/
1495 /* GetCurlHandleFor() */
1496 /************************************************************************/
1497
GetCurlHandleFor(CPLString osURL)1498 CURL* VSICurlFilesystemHandler::GetCurlHandleFor(CPLString osURL)
1499 {
1500 CPLMutexHolder oHolder( &hMutex );
1501
1502 std::map<GIntBig, CachedConnection*>::const_iterator iterConnections;
1503
1504 iterConnections = mapConnections.find(CPLGetPID());
1505 if (iterConnections == mapConnections.end())
1506 {
1507 CURL* hCurlHandle = curl_easy_init();
1508 CachedConnection* psCachedConnection = new CachedConnection;
1509 psCachedConnection->osURL = osURL;
1510 psCachedConnection->hCurlHandle = hCurlHandle;
1511 mapConnections[CPLGetPID()] = psCachedConnection;
1512 return hCurlHandle;
1513 }
1514 else
1515 {
1516 CachedConnection* psCachedConnection = iterConnections->second;
1517 if (osURL == psCachedConnection->osURL)
1518 return psCachedConnection->hCurlHandle;
1519
1520 const char* pszURL = osURL.c_str();
1521 const char* pszEndOfServ = strchr(pszURL, '.');
1522 if (pszEndOfServ != NULL)
1523 pszEndOfServ = strchr(pszEndOfServ, '/');
1524 if (pszEndOfServ == NULL)
1525 pszURL = pszURL + strlen(pszURL);
1526 int bReinitConnection = strncmp(psCachedConnection->osURL,
1527 pszURL, pszEndOfServ-pszURL) != 0;
1528
1529 if (bReinitConnection)
1530 {
1531 if (psCachedConnection->hCurlHandle)
1532 curl_easy_cleanup(psCachedConnection->hCurlHandle);
1533 psCachedConnection->hCurlHandle = curl_easy_init();
1534 }
1535 psCachedConnection->osURL = osURL;
1536
1537 return psCachedConnection->hCurlHandle;
1538 }
1539 }
1540
1541
1542 /************************************************************************/
1543 /* GetRegionFromCacheDisk() */
1544 /************************************************************************/
1545
1546 const CachedRegion*
GetRegionFromCacheDisk(const char * pszURL,vsi_l_offset nFileOffsetStart)1547 VSICurlFilesystemHandler::GetRegionFromCacheDisk(const char* pszURL,
1548 vsi_l_offset nFileOffsetStart)
1549 {
1550 nFileOffsetStart = (nFileOffsetStart / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
1551 VSILFILE* fp = VSIFOpenL(VSICurlGetCacheFileName(), "rb");
1552 if (fp)
1553 {
1554 unsigned long pszURLHash = CPLHashSetHashStr(pszURL);
1555 unsigned long pszURLHashCached;
1556 vsi_l_offset nFileOffsetStartCached;
1557 size_t nSizeCached;
1558 while(TRUE)
1559 {
1560 if (VSIFReadL(&pszURLHashCached, 1, sizeof(unsigned long), fp) == 0)
1561 break;
1562 VSIFReadL(&nFileOffsetStartCached, 1, sizeof(vsi_l_offset), fp);
1563 VSIFReadL(&nSizeCached, 1, sizeof(size_t), fp);
1564 if (pszURLHash == pszURLHashCached &&
1565 nFileOffsetStart == nFileOffsetStartCached)
1566 {
1567 if (ENABLE_DEBUG)
1568 CPLDebug("VSICURL", "Got data at offset " CPL_FRMT_GUIB " from disk" , nFileOffsetStart);
1569 if (nSizeCached)
1570 {
1571 char* pBuffer = (char*) CPLMalloc(nSizeCached);
1572 VSIFReadL(pBuffer, 1, nSizeCached, fp);
1573 AddRegion(pszURL, nFileOffsetStart, nSizeCached, pBuffer);
1574 CPLFree(pBuffer);
1575 }
1576 else
1577 {
1578 AddRegion(pszURL, nFileOffsetStart, 0, NULL);
1579 }
1580 VSIFCloseL(fp);
1581 return GetRegion(pszURL, nFileOffsetStart);
1582 }
1583 else
1584 {
1585 VSIFSeekL(fp, nSizeCached, SEEK_CUR);
1586 }
1587 }
1588 VSIFCloseL(fp);
1589 }
1590 return NULL;
1591 }
1592
1593
1594 /************************************************************************/
1595 /* AddRegionToCacheDisk() */
1596 /************************************************************************/
1597
AddRegionToCacheDisk(CachedRegion * psRegion)1598 void VSICurlFilesystemHandler::AddRegionToCacheDisk(CachedRegion* psRegion)
1599 {
1600 VSILFILE* fp = VSIFOpenL(VSICurlGetCacheFileName(), "r+b");
1601 if (fp)
1602 {
1603 unsigned long pszURLHashCached;
1604 vsi_l_offset nFileOffsetStartCached;
1605 size_t nSizeCached;
1606 while(TRUE)
1607 {
1608 if (VSIFReadL(&pszURLHashCached, 1, sizeof(unsigned long), fp) == 0)
1609 break;
1610 VSIFReadL(&nFileOffsetStartCached, 1, sizeof(vsi_l_offset), fp);
1611 VSIFReadL(&nSizeCached, 1, sizeof(size_t), fp);
1612 if (psRegion->pszURLHash == pszURLHashCached &&
1613 psRegion->nFileOffsetStart == nFileOffsetStartCached)
1614 {
1615 CPLAssert(psRegion->nSize == nSizeCached);
1616 VSIFCloseL(fp);
1617 return;
1618 }
1619 else
1620 {
1621 VSIFSeekL(fp, nSizeCached, SEEK_CUR);
1622 }
1623 }
1624 }
1625 else
1626 {
1627 fp = VSIFOpenL(VSICurlGetCacheFileName(), "wb");
1628 }
1629 if (fp)
1630 {
1631 if (ENABLE_DEBUG)
1632 CPLDebug("VSICURL", "Write data at offset " CPL_FRMT_GUIB " to disk" , psRegion->nFileOffsetStart);
1633 VSIFWriteL(&psRegion->pszURLHash, 1, sizeof(unsigned long), fp);
1634 VSIFWriteL(&psRegion->nFileOffsetStart, 1, sizeof(vsi_l_offset), fp);
1635 VSIFWriteL(&psRegion->nSize, 1, sizeof(size_t), fp);
1636 if (psRegion->nSize)
1637 VSIFWriteL(psRegion->pData, 1, psRegion->nSize, fp);
1638
1639 VSIFCloseL(fp);
1640 }
1641 return;
1642 }
1643
1644
1645 /************************************************************************/
1646 /* GetRegion() */
1647 /************************************************************************/
1648
GetRegion(const char * pszURL,vsi_l_offset nFileOffsetStart)1649 const CachedRegion* VSICurlFilesystemHandler::GetRegion(const char* pszURL,
1650 vsi_l_offset nFileOffsetStart)
1651 {
1652 CPLMutexHolder oHolder( &hMutex );
1653
1654 unsigned long pszURLHash = CPLHashSetHashStr(pszURL);
1655
1656 nFileOffsetStart = (nFileOffsetStart / DOWNLOAD_CHUNCK_SIZE) * DOWNLOAD_CHUNCK_SIZE;
1657 int i;
1658 for(i=0;i<nRegions;i++)
1659 {
1660 CachedRegion* psRegion = papsRegions[i];
1661 if (psRegion->pszURLHash == pszURLHash &&
1662 nFileOffsetStart == psRegion->nFileOffsetStart)
1663 {
1664 memmove(papsRegions + 1, papsRegions, i * sizeof(CachedRegion*));
1665 papsRegions[0] = psRegion;
1666 return psRegion;
1667 }
1668 }
1669 if (bUseCacheDisk)
1670 return GetRegionFromCacheDisk(pszURL, nFileOffsetStart);
1671 return NULL;
1672 }
1673
1674 /************************************************************************/
1675 /* AddRegion() */
1676 /************************************************************************/
1677
AddRegion(const char * pszURL,vsi_l_offset nFileOffsetStart,size_t nSize,const char * pData)1678 void VSICurlFilesystemHandler::AddRegion(const char* pszURL,
1679 vsi_l_offset nFileOffsetStart,
1680 size_t nSize,
1681 const char *pData)
1682 {
1683 CPLMutexHolder oHolder( &hMutex );
1684
1685 unsigned long pszURLHash = CPLHashSetHashStr(pszURL);
1686
1687 CachedRegion* psRegion;
1688 if (nRegions == N_MAX_REGIONS)
1689 {
1690 psRegion = papsRegions[N_MAX_REGIONS-1];
1691 memmove(papsRegions + 1, papsRegions, (N_MAX_REGIONS-1) * sizeof(CachedRegion*));
1692 papsRegions[0] = psRegion;
1693 CPLFree(psRegion->pData);
1694 }
1695 else
1696 {
1697 papsRegions = (CachedRegion**) CPLRealloc(papsRegions, (nRegions + 1) * sizeof(CachedRegion*));
1698 if (nRegions)
1699 memmove(papsRegions + 1, papsRegions, nRegions * sizeof(CachedRegion*));
1700 nRegions ++;
1701 papsRegions[0] = psRegion = (CachedRegion*) CPLMalloc(sizeof(CachedRegion));
1702 }
1703
1704 psRegion->pszURLHash = pszURLHash;
1705 psRegion->nFileOffsetStart = nFileOffsetStart;
1706 psRegion->nSize = nSize;
1707 psRegion->pData = (nSize) ? (char*) CPLMalloc(nSize) : NULL;
1708 if (nSize)
1709 memcpy(psRegion->pData, pData, nSize);
1710
1711 if (bUseCacheDisk)
1712 AddRegionToCacheDisk(psRegion);
1713 }
1714
1715 /************************************************************************/
1716 /* GetCachedFileProp() */
1717 /************************************************************************/
1718
GetCachedFileProp(const char * pszURL)1719 CachedFileProp* VSICurlFilesystemHandler::GetCachedFileProp(const char* pszURL)
1720 {
1721 CPLMutexHolder oHolder( &hMutex );
1722
1723 CachedFileProp* cachedFileProp = cacheFileSize[pszURL];
1724 if (cachedFileProp == NULL)
1725 {
1726 cachedFileProp = (CachedFileProp*) CPLMalloc(sizeof(CachedFileProp));
1727 cachedFileProp->eExists = EXIST_UNKNOWN;
1728 cachedFileProp->bHastComputedFileSize = FALSE;
1729 cachedFileProp->fileSize = 0;
1730 cachedFileProp->bIsDirectory = FALSE;
1731 cacheFileSize[pszURL] = cachedFileProp;
1732 }
1733
1734 return cachedFileProp;
1735 }
1736
1737 /************************************************************************/
1738 /* Open() */
1739 /************************************************************************/
1740
Open(const char * pszFilename,const char * pszAccess)1741 VSIVirtualHandle* VSICurlFilesystemHandler::Open( const char *pszFilename,
1742 const char *pszAccess)
1743 {
1744 if (strchr(pszAccess, 'w') != NULL ||
1745 strchr(pszAccess, '+') != NULL)
1746 {
1747 CPLError(CE_Failure, CPLE_AppDefined,
1748 "Only read-only mode is supported for /vsicurl");
1749 return NULL;
1750 }
1751
1752 const char* pszOptionVal =
1753 CPLGetConfigOption( "GDAL_DISABLE_READDIR_ON_OPEN", "NO" );
1754 int bSkipReadDir = EQUAL(pszOptionVal, "EMPTY_DIR") ||
1755 CSLTestBoolean(pszOptionVal);
1756
1757 CPLString osFilename(pszFilename);
1758 int bGotFileList = TRUE;
1759 if (strchr(CPLGetFilename(osFilename), '.') != NULL &&
1760 strncmp(CPLGetExtension(osFilename), "zip", 3) != 0 && !bSkipReadDir)
1761 {
1762 char** papszFileList = ReadDir(CPLGetDirname(osFilename), &bGotFileList);
1763 int bFound = (VSICurlIsFileInList(papszFileList, CPLGetFilename(osFilename)) != -1);
1764 CSLDestroy(papszFileList);
1765 if (bGotFileList && !bFound)
1766 {
1767 return NULL;
1768 }
1769 }
1770
1771 VSICurlHandle* poHandle = new VSICurlHandle( this, osFilename + strlen("/vsicurl/"));
1772 if (!bGotFileList)
1773 {
1774 /* If we didn't get a filelist, check that the file really exists */
1775 if (!poHandle->Exists())
1776 {
1777 delete poHandle;
1778 poHandle = NULL;
1779 }
1780 }
1781
1782 if( CSLTestBoolean( CPLGetConfigOption( "VSI_CACHE", "FALSE" ) ) )
1783 return VSICreateCachedFile( poHandle );
1784 else
1785 return poHandle;
1786 }
1787
1788 /************************************************************************/
1789 /* VSICurlParserFindEOL() */
1790 /* */
1791 /* Small helper function for VSICurlPaseHTMLFileList() to find */
1792 /* the end of a line in the directory listing. Either a <br> */
1793 /* or newline. */
1794 /************************************************************************/
1795
VSICurlParserFindEOL(char * pszData)1796 static char *VSICurlParserFindEOL( char *pszData )
1797
1798 {
1799 while( *pszData != '\0' && *pszData != '\n' && !EQUALN(pszData,"<br>",4) )
1800 pszData++;
1801
1802 if( *pszData == '\0' )
1803 return NULL;
1804 else
1805 return pszData;
1806 }
1807
1808
1809 /************************************************************************/
1810 /* VSICurlParseHTMLDateTimeFileSize() */
1811 /************************************************************************/
1812
1813 static const char* const apszMonths[] = { "January", "February", "March",
1814 "April", "May", "June", "July",
1815 "August", "September", "October",
1816 "November", "December" };
1817
VSICurlParseHTMLDateTimeFileSize(const char * pszStr,struct tm & brokendowntime,GUIntBig & nFileSize,GIntBig & mTime)1818 static int VSICurlParseHTMLDateTimeFileSize(const char* pszStr,
1819 struct tm& brokendowntime,
1820 GUIntBig& nFileSize,
1821 GIntBig& mTime)
1822 {
1823 int iMonth;
1824 for(iMonth=0;iMonth<12;iMonth++)
1825 {
1826 char szMonth[32];
1827 szMonth[0] = '-';
1828 memcpy(szMonth + 1, apszMonths[iMonth], 3);
1829 szMonth[4] = '-';
1830 szMonth[5] = '\0';
1831 const char* pszMonthFound = strstr(pszStr, szMonth);
1832 if (pszMonthFound)
1833 {
1834 /* Format of Apache, like in http://download.osgeo.org/gdal/data/gtiff/ */
1835 /* "17-May-2010 12:26" */
1836 if (pszMonthFound - pszStr > 2 && strlen(pszMonthFound) > 15 &&
1837 pszMonthFound[-2 + 11] == ' ' && pszMonthFound[-2 + 14] == ':')
1838 {
1839 pszMonthFound -= 2;
1840 int nDay = atoi(pszMonthFound);
1841 int nYear = atoi(pszMonthFound + 7);
1842 int nHour = atoi(pszMonthFound + 12);
1843 int nMin = atoi(pszMonthFound + 15);
1844 if (nDay >= 1 && nDay <= 31 && nYear >= 1900 &&
1845 nHour >= 0 && nHour <= 24 && nMin >= 0 && nMin < 60)
1846 {
1847 brokendowntime.tm_year = nYear - 1900;
1848 brokendowntime.tm_mon = iMonth;
1849 brokendowntime.tm_mday = nDay;
1850 brokendowntime.tm_hour = nHour;
1851 brokendowntime.tm_min = nMin;
1852 mTime = CPLYMDHMSToUnixTime(&brokendowntime);
1853
1854 return TRUE;
1855 }
1856 }
1857 return FALSE;
1858 }
1859
1860 /* Microsoft IIS */
1861 szMonth[0] = ' ';
1862 strcpy(szMonth + 1, apszMonths[iMonth]);
1863 strcat(szMonth, " ");
1864 pszMonthFound = strstr(pszStr, szMonth);
1865 if (pszMonthFound)
1866 {
1867 int nLenMonth = strlen(apszMonths[iMonth]);
1868 if (pszMonthFound - pszStr > 2 &&
1869 pszMonthFound[-1] != ',' &&
1870 pszMonthFound[-2] != ' ' &&
1871 (int)strlen(pszMonthFound-2) > 2 + 1 + nLenMonth + 1 + 4 + 1 + 5 + 1 + 4)
1872 {
1873 /* Format of http://ortho.linz.govt.nz/tifs/1994_95/ */
1874 /* " Friday, 21 April 2006 12:05 p.m. 48062343 m35a_fy_94_95.tif" */
1875 pszMonthFound -= 2;
1876 int nDay = atoi(pszMonthFound);
1877 int nCurOffset = 2 + 1 + nLenMonth + 1;
1878 int nYear = atoi(pszMonthFound + nCurOffset);
1879 nCurOffset += 4 + 1;
1880 int nHour = atoi(pszMonthFound + nCurOffset);
1881 if (nHour < 10)
1882 nCurOffset += 1 + 1;
1883 else
1884 nCurOffset += 2 + 1;
1885 int nMin = atoi(pszMonthFound + nCurOffset);
1886 nCurOffset += 2 + 1;
1887 if (strncmp(pszMonthFound + nCurOffset, "p.m.", 4) == 0)
1888 nHour += 12;
1889 else if (strncmp(pszMonthFound + nCurOffset, "a.m.", 4) != 0)
1890 nHour = -1;
1891 nCurOffset += 4;
1892
1893 const char* pszFilesize = pszMonthFound + nCurOffset;
1894 while(*pszFilesize == ' ')
1895 pszFilesize ++;
1896 if (*pszFilesize >= '1' && *pszFilesize <= '9')
1897 nFileSize = CPLScanUIntBig(pszFilesize, strlen(pszFilesize));
1898
1899 if (nDay >= 1 && nDay <= 31 && nYear >= 1900 &&
1900 nHour >= 0 && nHour <= 24 && nMin >= 0 && nMin < 60)
1901 {
1902 brokendowntime.tm_year = nYear - 1900;
1903 brokendowntime.tm_mon = iMonth;
1904 brokendowntime.tm_mday = nDay;
1905 brokendowntime.tm_hour = nHour;
1906 brokendowntime.tm_min = nMin;
1907 mTime = CPLYMDHMSToUnixTime(&brokendowntime);
1908
1909 return TRUE;
1910 }
1911 nFileSize = 0;
1912 }
1913 else if (pszMonthFound - pszStr > 1 &&
1914 pszMonthFound[-1] == ',' &&
1915 (int)strlen(pszMonthFound) > 1 + nLenMonth + 1 + 2 + 1 + 1 + 4 + 1 + 5 + 1 + 2)
1916 {
1917 /* Format of http://publicfiles.dep.state.fl.us/dear/BWR_GIS/2007NWFLULC/ */
1918 /* " Sunday, June 20, 2010 6:46 PM 233170905 NWF2007LULCForSDE.zip" */
1919 pszMonthFound += 1;
1920 int nCurOffset = nLenMonth + 1;
1921 int nDay = atoi(pszMonthFound + nCurOffset);
1922 nCurOffset += 2 + 1 + 1;
1923 int nYear = atoi(pszMonthFound + nCurOffset);
1924 nCurOffset += 4 + 1;
1925 int nHour = atoi(pszMonthFound + nCurOffset);
1926 nCurOffset += 2 + 1;
1927 int nMin = atoi(pszMonthFound + nCurOffset);
1928 nCurOffset += 2 + 1;
1929 if (strncmp(pszMonthFound + nCurOffset, "PM", 2) == 0)
1930 nHour += 12;
1931 else if (strncmp(pszMonthFound + nCurOffset, "AM", 2) != 0)
1932 nHour = -1;
1933 nCurOffset += 2;
1934
1935 const char* pszFilesize = pszMonthFound + nCurOffset;
1936 while(*pszFilesize == ' ')
1937 pszFilesize ++;
1938 if (*pszFilesize >= '1' && *pszFilesize <= '9')
1939 nFileSize = CPLScanUIntBig(pszFilesize, strlen(pszFilesize));
1940
1941 if (nDay >= 1 && nDay <= 31 && nYear >= 1900 &&
1942 nHour >= 0 && nHour <= 24 && nMin >= 0 && nMin < 60)
1943 {
1944 brokendowntime.tm_year = nYear - 1900;
1945 brokendowntime.tm_mon = iMonth;
1946 brokendowntime.tm_mday = nDay;
1947 brokendowntime.tm_hour = nHour;
1948 brokendowntime.tm_min = nMin;
1949 mTime = CPLYMDHMSToUnixTime(&brokendowntime);
1950
1951 return TRUE;
1952 }
1953 nFileSize = 0;
1954 }
1955 return FALSE;
1956 }
1957 }
1958
1959 return FALSE;
1960 }
1961
1962 /************************************************************************/
1963 /* ParseHTMLFileList() */
1964 /* */
1965 /* Parse a file list document and return all the components. */
1966 /************************************************************************/
1967
ParseHTMLFileList(const char * pszFilename,char * pszData,int * pbGotFileList)1968 char** VSICurlFilesystemHandler::ParseHTMLFileList(const char* pszFilename,
1969 char* pszData,
1970 int* pbGotFileList)
1971 {
1972 CPLStringList oFileList;
1973 char* pszLine = pszData;
1974 char* c;
1975 int nCount = 0;
1976 int bIsHTMLDirList = FALSE;
1977 CPLString osExpectedString;
1978 CPLString osExpectedString2;
1979 CPLString osExpectedString3;
1980 CPLString osExpectedString4;
1981 CPLString osExpectedString_unescaped;
1982
1983 *pbGotFileList = FALSE;
1984
1985 const char* pszDir;
1986 if (EQUALN(pszFilename, "/vsicurl/http://", strlen("/vsicurl/http://")))
1987 pszDir = strchr(pszFilename + strlen("/vsicurl/http://"), '/');
1988 else if (EQUALN(pszFilename, "/vsicurl/https://", strlen("/vsicurl/https://")))
1989 pszDir = strchr(pszFilename + strlen("/vsicurl/https://"), '/');
1990 else
1991 pszDir = strchr(pszFilename + strlen("/vsicurl/ftp://"), '/');
1992 if (pszDir == NULL)
1993 pszDir = "";
1994 /* Apache */
1995 osExpectedString = "<title>Index of ";
1996 osExpectedString += pszDir;
1997 osExpectedString += "</title>";
1998 /* shttpd */
1999 osExpectedString2 = "<title>Index of ";
2000 osExpectedString2 += pszDir;
2001 osExpectedString2 += "/</title>";
2002 /* FTP */
2003 osExpectedString3 = "FTP Listing of ";
2004 osExpectedString3 += pszDir;
2005 osExpectedString3 += "/";
2006 /* Apache 1.3.33 */
2007 osExpectedString4 = "<TITLE>Index of ";
2008 osExpectedString4 += pszDir;
2009 osExpectedString4 += "</TITLE>";
2010
2011 /* The listing of http://dds.cr.usgs.gov/srtm/SRTM_image_sample/picture%20examples/ */
2012 /* has "<title>Index of /srtm/SRTM_image_sample/picture examples</title>" so we must */
2013 /* try unescaped %20 also */
2014 /* Similar with http://datalib.usask.ca/gis/Data/Central_America_goodbutdoweown%3f/ */
2015 if (strchr(pszDir, '%'))
2016 {
2017 char* pszUnescapedDir = CPLUnescapeString(pszDir, NULL, CPLES_URL);
2018 osExpectedString_unescaped = "<title>Index of ";
2019 osExpectedString_unescaped += pszUnescapedDir;
2020 osExpectedString_unescaped += "</title>";
2021 CPLFree(pszUnescapedDir);
2022 }
2023
2024 int nCountTable = 0;
2025
2026 while( (c = VSICurlParserFindEOL( pszLine )) != NULL )
2027 {
2028 *c = 0;
2029
2030 /* To avoid false positive on pages such as http://www.ngs.noaa.gov/PC_PROD/USGG2009BETA */
2031 /* This is a heuristics, but normal HTML listing of files have not more than one table */
2032 if (strstr(pszLine, "<table"))
2033 {
2034 nCountTable ++;
2035 if (nCountTable == 2)
2036 {
2037 *pbGotFileList = FALSE;
2038 return NULL;
2039 }
2040 }
2041
2042 if (!bIsHTMLDirList &&
2043 (strstr(pszLine, osExpectedString.c_str()) ||
2044 strstr(pszLine, osExpectedString2.c_str()) ||
2045 strstr(pszLine, osExpectedString3.c_str()) ||
2046 strstr(pszLine, osExpectedString4.c_str()) ||
2047 (osExpectedString_unescaped.size() != 0 && strstr(pszLine, osExpectedString_unescaped.c_str()))))
2048 {
2049 bIsHTMLDirList = TRUE;
2050 *pbGotFileList = TRUE;
2051 }
2052 /* Subversion HTTP listing */
2053 /* or Microsoft-IIS/6.0 listing (e.g. http://ortho.linz.govt.nz/tifs/2005_06/) */
2054 else if (!bIsHTMLDirList && strstr(pszLine, "<title>"))
2055 {
2056 /* Detect something like : <html><head><title>gdal - Revision 20739: /trunk/autotest/gcore/data</title></head> */
2057 /* The annoying thing is that what is after ': ' is a subpart of what is after http://server/ */
2058 char* pszSubDir = strstr(pszLine, ": ");
2059 if (pszSubDir == NULL)
2060 /* or <title>ortho.linz.govt.nz - /tifs/2005_06/</title> */
2061 pszSubDir = strstr(pszLine, "- ");
2062 if (pszSubDir)
2063 {
2064 pszSubDir += 2;
2065 char* pszTmp = strstr(pszSubDir, "</title>");
2066 if (pszTmp)
2067 {
2068 if (pszTmp[-1] == '/')
2069 pszTmp[-1] = 0;
2070 else
2071 *pszTmp = 0;
2072 if (strstr(pszDir, pszSubDir))
2073 {
2074 bIsHTMLDirList = TRUE;
2075 *pbGotFileList = TRUE;
2076 }
2077 }
2078 }
2079 }
2080 else if (bIsHTMLDirList &&
2081 (strstr(pszLine, "<a href=\"") != NULL || strstr(pszLine, "<A HREF=\"") != NULL) &&
2082 strstr(pszLine, "<a href=\"http://") == NULL && /* exclude absolute links, like to subversion home */
2083 strstr(pszLine, "Parent Directory") == NULL /* exclude parent directory */)
2084 {
2085 char *beginFilename = strstr(pszLine, "<a href=\"");
2086 if (beginFilename == NULL)
2087 beginFilename = strstr(pszLine, "<A HREF=\"");
2088 beginFilename += strlen("<a href=\"");
2089 char *endQuote = strchr(beginFilename, '"');
2090 if (endQuote && strncmp(beginFilename, "?C=", 3) != 0 && strncmp(beginFilename, "?N=", 3) != 0)
2091 {
2092 struct tm brokendowntime;
2093 memset(&brokendowntime, 0, sizeof(brokendowntime));
2094 GUIntBig nFileSize = 0;
2095 GIntBig mTime = 0;
2096
2097 VSICurlParseHTMLDateTimeFileSize(pszLine,
2098 brokendowntime,
2099 nFileSize,
2100 mTime);
2101
2102 *endQuote = '\0';
2103
2104 /* Remove trailing slash, that are returned for directories by */
2105 /* Apache */
2106 int bIsDirectory = FALSE;
2107 if (endQuote[-1] == '/')
2108 {
2109 bIsDirectory = TRUE;
2110 endQuote[-1] = 0;
2111 }
2112
2113 /* shttpd links include slashes from the root directory. Skip them */
2114 while(strchr(beginFilename, '/'))
2115 beginFilename = strchr(beginFilename, '/') + 1;
2116
2117 if (strcmp(beginFilename, ".") != 0 &&
2118 strcmp(beginFilename, "..") != 0)
2119 {
2120 CPLString osCachedFilename =
2121 CPLSPrintf("%s/%s", pszFilename + strlen("/vsicurl/"), beginFilename);
2122 CachedFileProp* cachedFileProp = GetCachedFileProp(osCachedFilename);
2123 cachedFileProp->eExists = EXIST_YES;
2124 cachedFileProp->bIsDirectory = bIsDirectory;
2125 cachedFileProp->mTime = mTime;
2126 cachedFileProp->bHastComputedFileSize = nFileSize > 0;
2127 cachedFileProp->fileSize = nFileSize;
2128
2129 oFileList.AddString( beginFilename );
2130 if (ENABLE_DEBUG)
2131 CPLDebug("VSICURL", "File[%d] = %s, is_dir = %d, size = " CPL_FRMT_GUIB ", time = %04d/%02d/%02d %02d:%02d:%02d",
2132 nCount, beginFilename, bIsDirectory, nFileSize,
2133 brokendowntime.tm_year + 1900, brokendowntime.tm_mon + 1, brokendowntime.tm_mday,
2134 brokendowntime.tm_hour, brokendowntime.tm_min, brokendowntime.tm_sec);
2135 nCount ++;
2136 }
2137 }
2138 }
2139 pszLine = c + 1;
2140 }
2141
2142 return oFileList.StealList();
2143 }
2144
2145
2146 /************************************************************************/
2147 /* VSICurlGetToken() */
2148 /************************************************************************/
2149
VSICurlGetToken(char * pszCurPtr,char ** ppszNextToken)2150 static char* VSICurlGetToken(char* pszCurPtr, char** ppszNextToken)
2151 {
2152 if (pszCurPtr == NULL)
2153 return NULL;
2154
2155 while((*pszCurPtr) == ' ')
2156 pszCurPtr ++;
2157 if (*pszCurPtr == '\0')
2158 return NULL;
2159
2160 char* pszToken = pszCurPtr;
2161 while((*pszCurPtr) != ' ' && (*pszCurPtr) != '\0')
2162 pszCurPtr ++;
2163 if (*pszCurPtr == '\0')
2164 *ppszNextToken = NULL;
2165 else
2166 {
2167 *pszCurPtr = '\0';
2168 pszCurPtr ++;
2169 while((*pszCurPtr) == ' ')
2170 pszCurPtr ++;
2171 *ppszNextToken = pszCurPtr;
2172 }
2173
2174 return pszToken;
2175 }
2176
2177 /************************************************************************/
2178 /* VSICurlParseFullFTPLine() */
2179 /************************************************************************/
2180
2181 /* Parse lines like the following ones :
2182 -rw-r--r-- 1 10003 100 430 Jul 04 2008 COPYING
2183 lrwxrwxrwx 1 ftp ftp 28 Jun 14 14:13 MPlayer -> mirrors/mplayerhq.hu/MPlayer
2184 -rw-r--r-- 1 ftp ftp 725614592 May 13 20:13 Fedora-15-x86_64-Live-KDE.iso
2185 drwxr-xr-x 280 1003 1003 6656 Aug 26 04:17 gnu
2186 */
2187
VSICurlParseFullFTPLine(char * pszLine,char * & pszFilename,int & bSizeValid,GUIntBig & nSize,int & bIsDirectory,GIntBig & nUnixTime)2188 static int VSICurlParseFullFTPLine(char* pszLine,
2189 char*& pszFilename,
2190 int& bSizeValid,
2191 GUIntBig& nSize,
2192 int& bIsDirectory,
2193 GIntBig& nUnixTime)
2194 {
2195 char* pszNextToken = pszLine;
2196 char* pszPermissions = VSICurlGetToken(pszNextToken, &pszNextToken);
2197 if (pszPermissions == NULL || strlen(pszPermissions) != 10)
2198 return FALSE;
2199 bIsDirectory = (pszPermissions[0] == 'd');
2200
2201 int i;
2202 for(i = 0; i < 3; i++)
2203 {
2204 if (VSICurlGetToken(pszNextToken, &pszNextToken) == NULL)
2205 return FALSE;
2206 }
2207
2208 char* pszSize = VSICurlGetToken(pszNextToken, &pszNextToken);
2209 if (pszSize == NULL)
2210 return FALSE;
2211
2212 if (pszPermissions[0] == '-')
2213 {
2214 /* Regular file */
2215 bSizeValid = TRUE;
2216 nSize = CPLScanUIntBig(pszSize, strlen(pszSize));
2217 }
2218
2219 struct tm brokendowntime;
2220 memset(&brokendowntime, 0, sizeof(brokendowntime));
2221 int bBrokenDownTimeValid = TRUE;
2222
2223 char* pszMonth = VSICurlGetToken(pszNextToken, &pszNextToken);
2224 if (pszMonth == NULL || strlen(pszMonth) != 3)
2225 return FALSE;
2226
2227 for(i = 0; i < 12; i++)
2228 {
2229 if (EQUALN(pszMonth, apszMonths[i], 3))
2230 break;
2231 }
2232 if (i < 12)
2233 brokendowntime.tm_mon = i;
2234 else
2235 bBrokenDownTimeValid = FALSE;
2236
2237 char* pszDay = VSICurlGetToken(pszNextToken, &pszNextToken);
2238 if (pszDay == NULL || (strlen(pszDay) != 1 && strlen(pszDay) != 2))
2239 return FALSE;
2240 int nDay = atoi(pszDay);
2241 if (nDay >= 1 && nDay <= 31)
2242 brokendowntime.tm_mday = nDay;
2243 else
2244 bBrokenDownTimeValid = FALSE;
2245
2246 char* pszHourOrYear = VSICurlGetToken(pszNextToken, &pszNextToken);
2247 if (pszHourOrYear == NULL || (strlen(pszHourOrYear) != 4 && strlen(pszHourOrYear) != 5))
2248 return FALSE;
2249 if (strlen(pszHourOrYear) == 4)
2250 {
2251 brokendowntime.tm_year = atoi(pszHourOrYear) - 1900;
2252 }
2253 else
2254 {
2255 time_t sTime;
2256 time(&sTime);
2257 struct tm currentBrokendowntime;
2258 CPLUnixTimeToYMDHMS((GIntBig)sTime, ¤tBrokendowntime);
2259 brokendowntime.tm_year = currentBrokendowntime.tm_year;
2260 brokendowntime.tm_hour = atoi(pszHourOrYear);
2261 brokendowntime.tm_min = atoi(pszHourOrYear + 3);
2262 }
2263
2264 if (bBrokenDownTimeValid)
2265 nUnixTime = CPLYMDHMSToUnixTime(&brokendowntime);
2266 else
2267 nUnixTime = 0;
2268
2269 if (pszNextToken == NULL)
2270 return FALSE;
2271
2272 pszFilename = pszNextToken;
2273
2274 char* pszCurPtr = pszFilename;
2275 while( *pszCurPtr != '\0')
2276 {
2277 /* In case of a link, stop before the pointed part of the link */
2278 if (pszPermissions[0] == 'l' && strncmp(pszCurPtr, " -> ", 4) == 0)
2279 {
2280 break;
2281 }
2282 pszCurPtr ++;
2283 }
2284 *pszCurPtr = '\0';
2285
2286 return TRUE;
2287 }
2288
2289 /************************************************************************/
2290 /* GetFileList() */
2291 /************************************************************************/
2292
GetFileList(const char * pszDirname,int * pbGotFileList)2293 char** VSICurlFilesystemHandler::GetFileList(const char *pszDirname, int* pbGotFileList)
2294 {
2295 if (ENABLE_DEBUG)
2296 CPLDebug("VSICURL", "GetFileList(%s)" , pszDirname);
2297
2298 *pbGotFileList = FALSE;
2299
2300 /* HACK (optimization in fact) for MBTiles driver */
2301 if (strstr(pszDirname, ".tiles.mapbox.com") != NULL)
2302 return NULL;
2303
2304 if (strncmp(pszDirname, "/vsicurl/ftp", strlen("/vsicurl/ftp")) == 0)
2305 {
2306 WriteFuncStruct sWriteFuncData;
2307 sWriteFuncData.pBuffer = NULL;
2308
2309 CPLString osDirname(pszDirname + strlen("/vsicurl/"));
2310 osDirname += '/';
2311
2312 char** papszFileList = NULL;
2313
2314 for(int iTry=0;iTry<2;iTry++)
2315 {
2316 CURL* hCurlHandle = GetCurlHandleFor(osDirname);
2317 VSICurlSetOptions(hCurlHandle, osDirname.c_str());
2318
2319 /* On the first pass, we want to try fetching all the possible */
2320 /* informations (filename, file/directory, size). If that */
2321 /* does not work, then try again with CURLOPT_DIRLISTONLY set */
2322 if (iTry == 1)
2323 {
2324 /* 7.16.4 */
2325 #if LIBCURL_VERSION_NUM <= 0x071004
2326 curl_easy_setopt(hCurlHandle, CURLOPT_FTPLISTONLY, 1);
2327 #elif LIBCURL_VERSION_NUM > 0x071004
2328 curl_easy_setopt(hCurlHandle, CURLOPT_DIRLISTONLY, 1);
2329 #endif
2330 }
2331
2332 VSICURLInitWriteFuncStruct(&sWriteFuncData, NULL, NULL, NULL);
2333 curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
2334 curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
2335
2336 char szCurlErrBuf[CURL_ERROR_SIZE+1];
2337 szCurlErrBuf[0] = '\0';
2338 curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
2339
2340 curl_easy_perform(hCurlHandle);
2341
2342 if (sWriteFuncData.pBuffer == NULL)
2343 return NULL;
2344
2345 char* pszLine = sWriteFuncData.pBuffer;
2346 char* c;
2347 int nCount = 0;
2348
2349 if (EQUALN(pszLine, "<!DOCTYPE HTML", strlen("<!DOCTYPE HTML")) ||
2350 EQUALN(pszLine, "<HTML>", 6))
2351 {
2352 papszFileList = ParseHTMLFileList(pszDirname,
2353 sWriteFuncData.pBuffer,
2354 pbGotFileList);
2355 break;
2356 }
2357 else if (iTry == 0)
2358 {
2359 CPLStringList oFileList;
2360 *pbGotFileList = TRUE;
2361
2362 while( (c = strchr(pszLine, '\n')) != NULL)
2363 {
2364 *c = 0;
2365 if (c - pszLine > 0 && c[-1] == '\r')
2366 c[-1] = 0;
2367
2368 char* pszFilename = NULL;
2369 int bSizeValid = FALSE;
2370 GUIntBig nFileSize = 0;
2371 int bIsDirectory = FALSE;
2372 GIntBig mUnixTime = 0;
2373 if (!VSICurlParseFullFTPLine(pszLine, pszFilename,
2374 bSizeValid, nFileSize,
2375 bIsDirectory, mUnixTime))
2376 break;
2377
2378 if (strcmp(pszFilename, ".") != 0 &&
2379 strcmp(pszFilename, "..") != 0)
2380 {
2381 CPLString osCachedFilename =
2382 CPLSPrintf("%s/%s", pszDirname + strlen("/vsicurl/"), pszFilename);
2383 CachedFileProp* cachedFileProp = GetCachedFileProp(osCachedFilename);
2384 cachedFileProp->eExists = EXIST_YES;
2385 cachedFileProp->bHastComputedFileSize = bSizeValid;
2386 cachedFileProp->fileSize = nFileSize;
2387 cachedFileProp->bIsDirectory = bIsDirectory;
2388 cachedFileProp->mTime = mUnixTime;
2389
2390 oFileList.AddString(pszFilename);
2391 if (ENABLE_DEBUG)
2392 {
2393 struct tm brokendowntime;
2394 CPLUnixTimeToYMDHMS(mUnixTime, &brokendowntime);
2395 CPLDebug("VSICURL", "File[%d] = %s, is_dir = %d, size = " CPL_FRMT_GUIB ", time = %04d/%02d/%02d %02d:%02d:%02d",
2396 nCount, pszFilename, bIsDirectory, nFileSize,
2397 brokendowntime.tm_year + 1900, brokendowntime.tm_mon + 1, brokendowntime.tm_mday,
2398 brokendowntime.tm_hour, brokendowntime.tm_min, brokendowntime.tm_sec);
2399 }
2400
2401 nCount ++;
2402 }
2403
2404 pszLine = c + 1;
2405 }
2406
2407 if (c == NULL)
2408 {
2409 papszFileList = oFileList.StealList();
2410 break;
2411 }
2412 }
2413 else
2414 {
2415 CPLStringList oFileList;
2416 *pbGotFileList = TRUE;
2417
2418 while( (c = strchr(pszLine, '\n')) != NULL)
2419 {
2420 *c = 0;
2421 if (c - pszLine > 0 && c[-1] == '\r')
2422 c[-1] = 0;
2423
2424 if (strcmp(pszLine, ".") != 0 &&
2425 strcmp(pszLine, "..") != 0)
2426 {
2427 oFileList.AddString(pszLine);
2428 if (ENABLE_DEBUG)
2429 CPLDebug("VSICURL", "File[%d] = %s", nCount, pszLine);
2430 nCount ++;
2431 }
2432
2433 pszLine = c + 1;
2434 }
2435
2436 papszFileList = oFileList.StealList();
2437 }
2438
2439 CPLFree(sWriteFuncData.pBuffer);
2440 sWriteFuncData.pBuffer = NULL;
2441 }
2442
2443 CPLFree(sWriteFuncData.pBuffer);
2444
2445 return papszFileList;
2446 }
2447
2448 /* Try to recognize HTML pages that list the content of a directory */
2449 /* Currently this supports what Apache and shttpd can return */
2450 else if (strncmp(pszDirname, "/vsicurl/http://", strlen("/vsicurl/http://")) == 0 ||
2451 strncmp(pszDirname, "/vsicurl/https://", strlen("/vsicurl/https://")) == 0)
2452 {
2453 WriteFuncStruct sWriteFuncData;
2454
2455 CPLString osDirname(pszDirname + strlen("/vsicurl/"));
2456 osDirname += '/';
2457
2458 #if LIBCURL_VERSION_NUM < 0x070B00
2459 /* Curl 7.10.X doesn't manage to unset the CURLOPT_RANGE that would have been */
2460 /* previously set, so we have to reinit the connection handle */
2461 GetCurlHandleFor("");
2462 #endif
2463
2464 CURL* hCurlHandle = GetCurlHandleFor(osDirname);
2465 VSICurlSetOptions(hCurlHandle, osDirname.c_str());
2466
2467 curl_easy_setopt(hCurlHandle, CURLOPT_RANGE, NULL);
2468
2469 VSICURLInitWriteFuncStruct(&sWriteFuncData, NULL, NULL, NULL);
2470 curl_easy_setopt(hCurlHandle, CURLOPT_WRITEDATA, &sWriteFuncData);
2471 curl_easy_setopt(hCurlHandle, CURLOPT_WRITEFUNCTION, VSICurlHandleWriteFunc);
2472
2473 char szCurlErrBuf[CURL_ERROR_SIZE+1];
2474 szCurlErrBuf[0] = '\0';
2475 curl_easy_setopt(hCurlHandle, CURLOPT_ERRORBUFFER, szCurlErrBuf );
2476
2477 curl_easy_perform(hCurlHandle);
2478
2479 if (sWriteFuncData.pBuffer == NULL)
2480 return NULL;
2481
2482 char** papszFileList = ParseHTMLFileList(pszDirname,
2483 sWriteFuncData.pBuffer,
2484 pbGotFileList);
2485
2486 CPLFree(sWriteFuncData.pBuffer);
2487 return papszFileList;
2488 }
2489
2490 return NULL;
2491 }
2492
2493 /************************************************************************/
2494 /* Stat() */
2495 /************************************************************************/
2496
Stat(const char * pszFilename,VSIStatBufL * pStatBuf,int nFlags)2497 int VSICurlFilesystemHandler::Stat( const char *pszFilename, VSIStatBufL *pStatBuf,
2498 int nFlags )
2499 {
2500 CPLString osFilename(pszFilename);
2501
2502 memset(pStatBuf, 0, sizeof(VSIStatBufL));
2503
2504 const char* pszOptionVal =
2505 CPLGetConfigOption( "GDAL_DISABLE_READDIR_ON_OPEN", "NO" );
2506 int bSkipReadDir = EQUAL(pszOptionVal, "EMPTY_DIR") ||
2507 CSLTestBoolean(pszOptionVal);
2508
2509 /* Does it look like a FTP directory ? */
2510 if (strncmp(osFilename, "/vsicurl/ftp", strlen("/vsicurl/ftp")) == 0 &&
2511 pszFilename[strlen(osFilename) - 1] == '/' && !bSkipReadDir)
2512 {
2513 char** papszFileList = ReadDir(osFilename);
2514 if (papszFileList)
2515 {
2516 pStatBuf->st_mode = S_IFDIR;
2517 pStatBuf->st_size = 0;
2518
2519 CSLDestroy(papszFileList);
2520
2521 return 0;
2522 }
2523 return -1;
2524 }
2525 else if (strchr(CPLGetFilename(osFilename), '.') != NULL &&
2526 !EQUALN(CPLGetExtension(osFilename), "zip", 3) &&
2527 strstr(osFilename, ".zip.") != NULL &&
2528 strstr(osFilename, ".ZIP.") != NULL &&
2529 !bSkipReadDir)
2530 {
2531 int bGotFileList;
2532 char** papszFileList = ReadDir(CPLGetDirname(osFilename), &bGotFileList);
2533 int bFound = (VSICurlIsFileInList(papszFileList, CPLGetFilename(osFilename)) != -1);
2534 CSLDestroy(papszFileList);
2535 if (bGotFileList && !bFound)
2536 {
2537 return -1;
2538 }
2539 }
2540
2541 VSICurlHandle oHandle( this, osFilename + strlen("/vsicurl/"));
2542
2543 if ( oHandle.IsKnownFileSize() ||
2544 ((nFlags & VSI_STAT_SIZE_FLAG) && !oHandle.IsDirectory() &&
2545 CSLTestBoolean(CPLGetConfigOption("CPL_VSIL_CURL_SLOW_GET_SIZE", "YES"))) )
2546 pStatBuf->st_size = oHandle.GetFileSize();
2547
2548 int nRet = (oHandle.Exists()) ? 0 : -1;
2549 pStatBuf->st_mtime = oHandle.GetMTime();
2550 pStatBuf->st_mode = oHandle.IsDirectory() ? S_IFDIR : S_IFREG;
2551 return nRet;
2552 }
2553
2554 /************************************************************************/
2555 /* Unlink() */
2556 /************************************************************************/
2557
Unlink(CPL_UNUSED const char * pszFilename)2558 int VSICurlFilesystemHandler::Unlink( CPL_UNUSED const char *pszFilename )
2559 {
2560 return -1;
2561 }
2562
2563 /************************************************************************/
2564 /* Rename() */
2565 /************************************************************************/
2566
Rename(CPL_UNUSED const char * oldpath,CPL_UNUSED const char * newpath)2567 int VSICurlFilesystemHandler::Rename( CPL_UNUSED const char *oldpath,
2568 CPL_UNUSED const char *newpath )
2569 {
2570 return -1;
2571 }
2572
2573 /************************************************************************/
2574 /* Mkdir() */
2575 /************************************************************************/
2576
Mkdir(CPL_UNUSED const char * pszDirname,CPL_UNUSED long nMode)2577 int VSICurlFilesystemHandler::Mkdir( CPL_UNUSED const char *pszDirname,
2578 CPL_UNUSED long nMode )
2579 {
2580 return -1;
2581 }
2582 /************************************************************************/
2583 /* Rmdir() */
2584 /************************************************************************/
2585
Rmdir(CPL_UNUSED const char * pszDirname)2586 int VSICurlFilesystemHandler::Rmdir( CPL_UNUSED const char *pszDirname )
2587 {
2588 return -1;
2589 }
2590
2591 /************************************************************************/
2592 /* ReadDir() */
2593 /************************************************************************/
2594
ReadDir(const char * pszDirname,int * pbGotFileList)2595 char** VSICurlFilesystemHandler::ReadDir( const char *pszDirname, int* pbGotFileList )
2596 {
2597 CPLString osDirname(pszDirname);
2598 while (osDirname[strlen(osDirname) - 1] == '/')
2599 osDirname.erase(strlen(osDirname) - 1);
2600
2601 const char* pszUpDir = strstr(osDirname, "/..");
2602 if (pszUpDir != NULL)
2603 {
2604 int pos = pszUpDir - osDirname.c_str() - 1;
2605 while(pos >= 0 && osDirname[pos] != '/')
2606 pos --;
2607 if (pos >= 1)
2608 {
2609 osDirname = osDirname.substr(0, pos) + CPLString(pszUpDir + 3);
2610 }
2611 }
2612
2613 CPLMutexHolder oHolder( &hMutex );
2614
2615 /* If we know the file exists and is not a directory, then don't try to list its content */
2616 CachedFileProp* cachedFileProp = GetCachedFileProp(osDirname.c_str() + strlen("/vsicurl/"));
2617 if (cachedFileProp->eExists == EXIST_YES && !cachedFileProp->bIsDirectory)
2618 {
2619 if (pbGotFileList)
2620 *pbGotFileList = TRUE;
2621 return NULL;
2622 }
2623
2624 CachedDirList* psCachedDirList = cacheDirList[osDirname];
2625 if (psCachedDirList == NULL)
2626 {
2627 psCachedDirList = (CachedDirList*) CPLMalloc(sizeof(CachedDirList));
2628 psCachedDirList->papszFileList = GetFileList(osDirname, &psCachedDirList->bGotFileList);
2629 cacheDirList[osDirname] = psCachedDirList;
2630 }
2631
2632 if (pbGotFileList)
2633 *pbGotFileList = psCachedDirList->bGotFileList;
2634
2635 return CSLDuplicate(psCachedDirList->papszFileList);
2636 }
2637
2638 /************************************************************************/
2639 /* ReadDir() */
2640 /************************************************************************/
2641
ReadDir(const char * pszDirname)2642 char** VSICurlFilesystemHandler::ReadDir( const char *pszDirname )
2643 {
2644 return ReadDir(pszDirname, NULL);
2645 }
2646
2647 /************************************************************************/
2648 /* VSIInstallCurlFileHandler() */
2649 /************************************************************************/
2650
2651 /**
2652 * \brief Install /vsicurl/ HTTP/FTP file system handler (requires libcurl)
2653 *
2654 * A special file handler is installed that allows reading on-the-fly of files
2655 * available through HTTP/FTP web protocols, without downloading the entire file.
2656 *
2657 * Recognized filenames are of the form /vsicurl/http://path/to/remote/resource or
2658 * /vsicurl/ftp://path/to/remote/resource where path/to/remote/resource is the
2659 * URL of a remote resource.
2660 *
2661 * Partial downloads (requires the HTTP server to support random reading) are done
2662 * with a 16 KB granularity by default. If the driver detects sequential reading
2663 * it will progressively increase the chunk size up to 2 MB to improve download
2664 * performance.
2665 *
2666 * The GDAL_HTTP_PROXY, GDAL_HTTP_PROXYUSERPWD and GDAL_PROXY_AUTH configuration options can be
2667 * used to define a proxy server. The syntax to use is the one of Curl CURLOPT_PROXY,
2668 * CURLOPT_PROXYUSERPWD and CURLOPT_PROXYAUTH options.
2669 *
2670 * Starting with GDAL 1.10, the file can be cached in RAM by setting the configuration option
2671 * VSI_CACHE to TRUE. The cache size defaults to 25 MB, but can be modified by setting
2672 * the configuration option VSI_CACHE_SIZE (in bytes).
2673 *
2674 * VSIStatL() will return the size in st_size member and file
2675 * nature- file or directory - in st_mode member (the later only reliable with FTP
2676 * resources for now).
2677 *
2678 * VSIReadDir() should be able to parse the HTML directory listing returned by the
2679 * most popular web servers, such as Apache or Microsoft IIS.
2680 *
2681 * This special file handler can be combined with other virtual filesystems handlers,
2682 * such as /vsizip. For example, /vsizip//vsicurl/path/to/remote/file.zip/path/inside/zip
2683 *
2684 * @since GDAL 1.8.0
2685 */
VSIInstallCurlFileHandler(void)2686 void VSIInstallCurlFileHandler(void)
2687 {
2688 VSIFileManager::InstallHandler( "/vsicurl/", new VSICurlFilesystemHandler );
2689 }
2690
2691 /************************************************************************/
2692 /* VSICurlInstallReadCbk() */
2693 /************************************************************************/
2694
VSICurlInstallReadCbk(VSILFILE * fp,VSICurlReadCbkFunc pfnReadCbk,void * pfnUserData,int bStopOnInterrruptUntilUninstall)2695 int VSICurlInstallReadCbk (VSILFILE* fp,
2696 VSICurlReadCbkFunc pfnReadCbk,
2697 void* pfnUserData,
2698 int bStopOnInterrruptUntilUninstall)
2699 {
2700 return ((VSICurlHandle*)fp)->InstallReadCbk(pfnReadCbk, pfnUserData,
2701 bStopOnInterrruptUntilUninstall);
2702 }
2703
2704
2705 /************************************************************************/
2706 /* VSICurlUninstallReadCbk() */
2707 /************************************************************************/
2708
VSICurlUninstallReadCbk(VSILFILE * fp)2709 int VSICurlUninstallReadCbk(VSILFILE* fp)
2710 {
2711 return ((VSICurlHandle*)fp)->UninstallReadCbk();
2712 }
2713
2714 #endif /* HAVE_CURL */
2715