1 /******************************************************************************
2  *
3  * Project:  VSI Virtual File System
4  * Purpose:  Implementation of sparse file virtual io driver.
5  * Author:   Frank Warmerdam, warmerdam@pobox.com
6  *
7  ******************************************************************************
8  * Copyright (c) 2010, Frank Warmerdam <warmerdam@pobox.com>
9  * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com>
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included
19  * in all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27  * DEALINGS IN THE SOFTWARE.
28  ****************************************************************************/
29 
30 #include "cpl_port.h"
31 #include "cpl_vsi.h"
32 
33 #include <cerrno>
34 #include <cstddef>
35 #include <cstdlib>
36 #include <cstring>
37 
38 #if HAVE_FCNTL_H
39 #  include <fcntl.h>
40 #endif
41 
42 #include <algorithm>
43 #include <map>
44 #include <memory>
45 #include <vector>
46 
47 #include "cpl_conv.h"
48 #include "cpl_error.h"
49 #include "cpl_minixml.h"
50 #include "cpl_multiproc.h"
51 #include "cpl_string.h"
52 #include "cpl_vsi_virtual.h"
53 
54 CPL_CVSID("$Id: cpl_vsil_sparsefile.cpp a044c83f8091becdd11e27be6e9c08d0d3478126 2021-02-24 11:38:17 +0100 Even Rouault $")
55 
56 class SFRegion {
57 public:
58     CPLString     osFilename{};
59     VSILFILE     *fp = nullptr;
60     GUIntBig      nDstOffset = 0;
61     GUIntBig      nSrcOffset = 0;
62     GUIntBig      nLength = 0;
63     GByte         byValue = 0;
64     bool          bTriedOpen = false;
65 };
66 
67 /************************************************************************/
68 /* ==================================================================== */
69 /*                         VSISparseFileHandle                          */
70 /* ==================================================================== */
71 /************************************************************************/
72 
73 class VSISparseFileFilesystemHandler;
74 
75 class VSISparseFileHandle : public VSIVirtualHandle
76 {
77     CPL_DISALLOW_COPY_ASSIGN(VSISparseFileHandle)
78 
79     VSISparseFileFilesystemHandler* m_poFS = nullptr;
80     bool               bEOF = false;
81 
82   public:
VSISparseFileHandle(VSISparseFileFilesystemHandler * poFS)83     explicit VSISparseFileHandle(VSISparseFileFilesystemHandler* poFS) :
84                 m_poFS(poFS) {}
85 
86     GUIntBig           nOverallLength = 0;
87     GUIntBig           nCurOffset = 0;
88 
89     std::vector<SFRegion> aoRegions{};
90 
91     int Seek( vsi_l_offset nOffset, int nWhence ) override;
92     vsi_l_offset Tell() override;
93     size_t Read( void *pBuffer, size_t nSize, size_t nMemb ) override;
94     size_t Write( const void *pBuffer, size_t nSize, size_t nMemb ) override;
95     int Eof() override;
96     int Close() override;
97 };
98 
99 /************************************************************************/
100 /* ==================================================================== */
101 /*                   VSISparseFileFilesystemHandler                     */
102 /* ==================================================================== */
103 /************************************************************************/
104 
105 class VSISparseFileFilesystemHandler : public VSIFilesystemHandler
106 {
107     std::map<GIntBig, int> oRecOpenCount{};
108     CPL_DISALLOW_COPY_ASSIGN(VSISparseFileFilesystemHandler)
109 
110 public:
111     VSISparseFileFilesystemHandler() = default;
112     ~VSISparseFileFilesystemHandler() override = default;
113 
114     int              DecomposePath( const char *pszPath,
115                                     CPLString &osFilename,
116                                     vsi_l_offset &nSparseFileOffset,
117                                     vsi_l_offset &nSparseFileSize );
118 
119     // TODO(schwehr): Fix VSISparseFileFilesystemHandler::Stat to not need using.
120     using VSIFilesystemHandler::Open;
121 
122     VSIVirtualHandle *Open( const char *pszFilename,
123                             const char *pszAccess,
124                             bool bSetError,
125                             CSLConstList /* papszOptions */ ) override;
126     int Stat( const char *pszFilename, VSIStatBufL *pStatBuf,
127               int nFlags ) override;
128     int Unlink( const char *pszFilename ) override;
129     int Mkdir( const char *pszDirname, long nMode ) override;
130     int Rmdir( const char *pszDirname ) override;
131     char **ReadDir( const char *pszDirname ) override;
132 
GetRecCounter()133     int              GetRecCounter() { return oRecOpenCount[CPLGetPID()]; }
IncRecCounter()134     void             IncRecCounter() { oRecOpenCount[CPLGetPID()] ++; }
DecRecCounter()135     void             DecRecCounter() { oRecOpenCount[CPLGetPID()] --; }
136 };
137 
138 /************************************************************************/
139 /* ==================================================================== */
140 /*                             VSISparseFileHandle                      */
141 /* ==================================================================== */
142 /************************************************************************/
143 
144 /************************************************************************/
145 /*                               Close()                                */
146 /************************************************************************/
147 
Close()148 int VSISparseFileHandle::Close()
149 
150 {
151     for( unsigned int i = 0; i < aoRegions.size(); i++ )
152     {
153         if( aoRegions[i].fp != nullptr )
154             CPL_IGNORE_RET_VAL(VSIFCloseL( aoRegions[i].fp ));
155     }
156 
157     return 0;
158 }
159 
160 /************************************************************************/
161 /*                                Seek()                                */
162 /************************************************************************/
163 
Seek(vsi_l_offset nOffset,int nWhence)164 int VSISparseFileHandle::Seek( vsi_l_offset nOffset, int nWhence )
165 
166 {
167     bEOF = false;
168     if( nWhence == SEEK_SET )
169         nCurOffset = nOffset;
170     else if( nWhence == SEEK_CUR )
171     {
172         nCurOffset += nOffset;
173     }
174     else if( nWhence == SEEK_END )
175     {
176         nCurOffset = nOverallLength + nOffset;
177     }
178     else
179     {
180         errno = EINVAL;
181         return -1;
182     }
183 
184     return 0;
185 }
186 
187 /************************************************************************/
188 /*                                Tell()                                */
189 /************************************************************************/
190 
Tell()191 vsi_l_offset VSISparseFileHandle::Tell()
192 
193 {
194     return nCurOffset;
195 }
196 
197 /************************************************************************/
198 /*                                Read()                                */
199 /************************************************************************/
200 
Read(void * pBuffer,size_t nSize,size_t nCount)201 size_t VSISparseFileHandle::Read( void * pBuffer, size_t nSize, size_t nCount )
202 
203 {
204     if( nCurOffset >= nOverallLength )
205     {
206         bEOF = true;
207         return 0;
208     }
209 
210 /* -------------------------------------------------------------------- */
211 /*      Find what region we are in, searching linearly from the         */
212 /*      start.                                                          */
213 /* -------------------------------------------------------------------- */
214     unsigned int iRegion = 0;  // Used after for.
215 
216     for( ; iRegion < aoRegions.size(); iRegion++ )
217     {
218         if( nCurOffset >= aoRegions[iRegion].nDstOffset &&
219             nCurOffset <
220                 aoRegions[iRegion].nDstOffset + aoRegions[iRegion].nLength )
221             break;
222     }
223 
224     size_t nBytesRequested = nSize * nCount;
225     if( nBytesRequested == 0 )
226     {
227         return 0;
228     }
229     if( nCurOffset + nBytesRequested > nOverallLength )
230     {
231         nBytesRequested = static_cast<size_t>(nOverallLength - nCurOffset);
232         bEOF = true;
233     }
234 
235 /* -------------------------------------------------------------------- */
236 /*      Default to zeroing the buffer if no corresponding region was    */
237 /*      found.                                                          */
238 /* -------------------------------------------------------------------- */
239     if( iRegion == aoRegions.size() )
240     {
241         memset( pBuffer, 0, nBytesRequested);
242         nCurOffset += nBytesRequested;
243         return nBytesRequested / nSize;
244     }
245 
246 /* -------------------------------------------------------------------- */
247 /*      If this request crosses region boundaries, split it into two    */
248 /*      requests.                                                       */
249 /* -------------------------------------------------------------------- */
250     size_t nBytesReturnCount = 0;
251     const GUIntBig nEndOffsetOfRegion =
252         aoRegions[iRegion].nDstOffset + aoRegions[iRegion].nLength;
253 
254     if( nCurOffset + nBytesRequested > nEndOffsetOfRegion )
255     {
256         const size_t nExtraBytes =
257             static_cast<size_t>(nCurOffset + nBytesRequested - nEndOffsetOfRegion);
258         // Recurse to get the rest of the request.
259 
260         const GUIntBig nCurOffsetSave = nCurOffset;
261         nCurOffset += nBytesRequested - nExtraBytes;
262         bool bEOFSave = bEOF;
263         bEOF = false;
264         const size_t nBytesRead =
265             this->Read( static_cast<char *>(pBuffer) + nBytesRequested - nExtraBytes,
266                         1, nExtraBytes );
267         nCurOffset = nCurOffsetSave;
268         bEOF = bEOFSave;
269 
270         nBytesReturnCount += nBytesRead;
271         nBytesRequested -= nExtraBytes;
272     }
273 
274 /* -------------------------------------------------------------------- */
275 /*      Handle a constant region.                                       */
276 /* -------------------------------------------------------------------- */
277     if( aoRegions[iRegion].osFilename.empty() )
278     {
279         memset( pBuffer, aoRegions[iRegion].byValue,
280                 static_cast<size_t>(nBytesRequested) );
281 
282         nBytesReturnCount += nBytesRequested;
283     }
284 
285 /* -------------------------------------------------------------------- */
286 /*      Otherwise handle as a file.                                     */
287 /* -------------------------------------------------------------------- */
288     else
289     {
290         if( aoRegions[iRegion].fp == nullptr )
291         {
292             if( !aoRegions[iRegion].bTriedOpen )
293             {
294                 aoRegions[iRegion].fp =
295                     VSIFOpenL( aoRegions[iRegion].osFilename, "r" );
296                 if( aoRegions[iRegion].fp == nullptr )
297                 {
298                     CPLDebug( "/vsisparse/", "Failed to open '%s'.",
299                               aoRegions[iRegion].osFilename.c_str() );
300                 }
301                 aoRegions[iRegion].bTriedOpen = true;
302             }
303             if( aoRegions[iRegion].fp == nullptr )
304             {
305                 return 0;
306             }
307         }
308 
309         if( VSIFSeekL( aoRegions[iRegion].fp,
310                        nCurOffset
311                        - aoRegions[iRegion].nDstOffset
312                        + aoRegions[iRegion].nSrcOffset,
313                        SEEK_SET ) != 0 )
314             return 0;
315 
316         m_poFS->IncRecCounter();
317         const size_t nBytesRead =
318             VSIFReadL( pBuffer, 1, static_cast<size_t>(nBytesRequested),
319                        aoRegions[iRegion].fp );
320         m_poFS->DecRecCounter();
321 
322         nBytesReturnCount += nBytesRead;
323     }
324 
325     nCurOffset += nBytesReturnCount;
326 
327     return nBytesReturnCount / nSize;
328 }
329 
330 /************************************************************************/
331 /*                               Write()                                */
332 /************************************************************************/
333 
Write(const void *,size_t,size_t)334 size_t VSISparseFileHandle::Write( const void * /* pBuffer */,
335                                    size_t /* nSize */,
336                                    size_t /* nCount */ )
337 {
338     errno = EBADF;
339     return 0;
340 }
341 
342 /************************************************************************/
343 /*                                Eof()                                 */
344 /************************************************************************/
345 
Eof()346 int VSISparseFileHandle::Eof()
347 
348 {
349     return bEOF ? 1 : 0;
350 }
351 
352 /************************************************************************/
353 /* ==================================================================== */
354 /*                       VSISparseFileFilesystemHandler                 */
355 /* ==================================================================== */
356 /************************************************************************/
357 
358 /************************************************************************/
359 /*                                Open()                                */
360 /************************************************************************/
361 
362 VSIVirtualHandle *
Open(const char * pszFilename,const char * pszAccess,bool,CSLConstList)363 VSISparseFileFilesystemHandler::Open( const char *pszFilename,
364                                       const char *pszAccess,
365                                       bool /* bSetError */,
366                                       CSLConstList /* papszOptions */ )
367 
368 {
369     if( !STARTS_WITH_CI(pszFilename, "/vsisparse/") )
370         return nullptr;
371 
372     if( !EQUAL(pszAccess, "r") && !EQUAL(pszAccess, "rb") )
373     {
374         errno = EACCES;
375         return nullptr;
376     }
377 
378     // Arbitrary number.
379     if( GetRecCounter() == 32 )
380         return nullptr;
381 
382     const CPLString osSparseFilePath = pszFilename + 11;
383 
384 /* -------------------------------------------------------------------- */
385 /*      Does this file even exist?                                      */
386 /* -------------------------------------------------------------------- */
387     VSILFILE *fp = VSIFOpenL( osSparseFilePath, "r" );
388     if( fp == nullptr )
389         return nullptr;
390     CPL_IGNORE_RET_VAL(VSIFCloseL( fp ));
391 
392 /* -------------------------------------------------------------------- */
393 /*      Read the XML file.                                              */
394 /* -------------------------------------------------------------------- */
395     CPLXMLNode *psXMLRoot = CPLParseXMLFile( osSparseFilePath );
396 
397     if( psXMLRoot == nullptr )
398         return nullptr;
399 
400 /* -------------------------------------------------------------------- */
401 /*      Setup the file handle on this file.                             */
402 /* -------------------------------------------------------------------- */
403     VSISparseFileHandle *poHandle = new VSISparseFileHandle(this);
404 
405 /* -------------------------------------------------------------------- */
406 /*      Translate the desired fields out of the XML tree.               */
407 /* -------------------------------------------------------------------- */
408     for( CPLXMLNode *psRegion = psXMLRoot->psChild;
409          psRegion != nullptr;
410          psRegion = psRegion->psNext )
411     {
412         if( psRegion->eType != CXT_Element )
413             continue;
414 
415         if( !EQUAL(psRegion->pszValue, "SubfileRegion")
416             && !EQUAL(psRegion->pszValue, "ConstantRegion") )
417             continue;
418 
419         SFRegion oRegion;
420 
421         oRegion.osFilename = CPLGetXMLValue( psRegion, "Filename", "" );
422         if( atoi(CPLGetXMLValue( psRegion, "Filename.relative", "0" )) != 0 )
423         {
424             const CPLString osSFPath = CPLGetPath(osSparseFilePath);
425             oRegion.osFilename = CPLFormFilename( osSFPath,
426                                                   oRegion.osFilename, nullptr );
427         }
428 
429         // TODO(schwehr): Symbolic constant and an explanation for 32.
430         oRegion.nDstOffset =
431             CPLScanUIntBig( CPLGetXMLValue(psRegion, "DestinationOffset", "0"),
432                             32 );
433 
434         oRegion.nSrcOffset =
435             CPLScanUIntBig( CPLGetXMLValue(psRegion, "SourceOffset", "0"), 32);
436 
437         oRegion.nLength =
438             CPLScanUIntBig( CPLGetXMLValue(psRegion, "RegionLength", "0"), 32);
439 
440         oRegion.byValue = static_cast<GByte>(
441             atoi(CPLGetXMLValue(psRegion, "Value", "0")));
442 
443         poHandle->aoRegions.push_back( oRegion );
444     }
445 
446 /* -------------------------------------------------------------------- */
447 /*      Get sparse file length, use maximum bound of regions if not     */
448 /*      explicit in file.                                               */
449 /* -------------------------------------------------------------------- */
450     poHandle->nOverallLength =
451         CPLScanUIntBig( CPLGetXMLValue(psXMLRoot, "Length", "0" ), 32);
452     if( poHandle->nOverallLength == 0 )
453     {
454         for( unsigned int i = 0; i < poHandle->aoRegions.size(); i++ )
455         {
456             poHandle->nOverallLength =
457                 std::max(poHandle->nOverallLength,
458                          poHandle->aoRegions[i].nDstOffset
459                          + poHandle->aoRegions[i].nLength);
460         }
461     }
462 
463     CPLDestroyXMLNode( psXMLRoot );
464 
465     return poHandle;
466 }
467 
468 /************************************************************************/
469 /*                                Stat()                                */
470 /************************************************************************/
471 
Stat(const char * pszFilename,VSIStatBufL * psStatBuf,int nFlags)472 int VSISparseFileFilesystemHandler::Stat( const char * pszFilename,
473                                           VSIStatBufL * psStatBuf,
474                                           int nFlags )
475 
476 {
477     // TODO(schwehr): Fix this so that the using statement is not needed.
478     // Will just adding the bool for bSetError be okay?
479     VSIVirtualHandle *poFile = Open( pszFilename, "r" );
480 
481     memset( psStatBuf, 0, sizeof(VSIStatBufL) );
482 
483     if( poFile == nullptr )
484         return -1;
485 
486     poFile->Seek( 0, SEEK_END );
487     const size_t nLength = static_cast<size_t>(poFile->Tell());
488     delete poFile;
489 
490     const int nResult =
491         VSIStatExL( pszFilename + strlen("/vsisparse/"), psStatBuf, nFlags );
492 
493     psStatBuf->st_size = nLength;
494 
495     return nResult;
496 }
497 
498 /************************************************************************/
499 /*                               Unlink()                               */
500 /************************************************************************/
501 
Unlink(const char *)502 int VSISparseFileFilesystemHandler::Unlink( const char * /* pszFilename */ )
503 {
504     errno = EACCES;
505     return -1;
506 }
507 
508 /************************************************************************/
509 /*                               Mkdir()                                */
510 /************************************************************************/
511 
Mkdir(const char *,long)512 int VSISparseFileFilesystemHandler::Mkdir( const char * /* pszPathname */,
513                                            long /* nMode */ )
514 {
515     errno = EACCES;
516     return -1;
517 }
518 
519 /************************************************************************/
520 /*                               Rmdir()                                */
521 /************************************************************************/
522 
Rmdir(const char *)523 int VSISparseFileFilesystemHandler::Rmdir( const char * /* pszPathname */ )
524 {
525     errno = EACCES;
526     return -1;
527 }
528 
529 /************************************************************************/
530 /*                              ReadDir()                               */
531 /************************************************************************/
532 
ReadDir(const char *)533 char **VSISparseFileFilesystemHandler::ReadDir( const char * /* pszPath */ )
534 {
535     errno = EACCES;
536     return nullptr;
537 }
538 
539 /************************************************************************/
540 /*                 VSIInstallSparseFileFilesystemHandler()              */
541 /************************************************************************/
542 
543 /**
544  * Install /vsisparse/ virtual file handler.
545  *
546  * @see <a href="gdal_virtual_file_systems.html#gdal_virtual_file_systems_vsisparse">/vsisparse/ documentation</a>
547  */
548 
VSIInstallSparseFileHandler()549 void VSIInstallSparseFileHandler()
550 {
551     VSIFileManager::InstallHandler( "/vsisparse/",
552                                     new VSISparseFileFilesystemHandler );
553 }
554