1 /******************************************************************************
2  *
3  * Project:  VSI Virtual File System
4  * Purpose:  Implementation of caching IO layer.
5  * Author:   Frank Warmerdam, warmerdam@pobox.com
6  *
7  ******************************************************************************
8  * Copyright (c) 2011, Frank Warmerdam <warmerdam@pobox.com>
9  * Copyright (c) 2011-2014, Even Rouault <even dot rouault at spatialys.com>
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included
19  * in all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27  * DEALINGS IN THE SOFTWARE.
28  ****************************************************************************/
29 
30 #include "cpl_port.h"
31 #include "cpl_vsi_virtual.h"
32 
33 #include <cstddef>
34 #include <cstring>
35 #if HAVE_FCNTL_H
36 #include <fcntl.h>
37 #endif
38 
39 #include <algorithm>
40 #include <map>
41 #include <memory>
42 #include <utility>
43 
44 #include "cpl_conv.h"
45 #include "cpl_error.h"
46 #include "cpl_vsi.h"
47 #include "cpl_vsi_virtual.h"
48 
49 //! @cond Doxygen_Suppress
50 
51 CPL_CVSID("$Id: cpl_vsil_cache.cpp d4c2d0da2a653242903804163f22189561a0b7e4 2021-09-28 19:24:15 +0200 Even Rouault $")
52 
53 /************************************************************************/
54 /* ==================================================================== */
55 /*                             VSICacheChunk                            */
56 /* ==================================================================== */
57 /************************************************************************/
58 
59 class VSICacheChunk
60 {
61     CPL_DISALLOW_COPY_ASSIGN(VSICacheChunk)
62 
63 public:
64     VSICacheChunk() = default;
65 
~VSICacheChunk()66     virtual ~VSICacheChunk()
67     {
68         VSIFree( pabyData );
69     }
70 
Allocate(size_t nChunkSize)71     bool Allocate( size_t nChunkSize )
72     {
73         CPLAssert( pabyData == nullptr );
74         pabyData = static_cast<GByte *>(VSIMalloc( nChunkSize ));
75         return (pabyData != nullptr);
76     }
77 
78     vsi_l_offset   iBlock = 0;
79 
80     VSICacheChunk *poLRUPrev = nullptr;
81     VSICacheChunk *poLRUNext = nullptr;
82 
83     size_t          nDataFilled = 0;
84     GByte          *pabyData = nullptr;
85 };
86 
87 /************************************************************************/
88 /* ==================================================================== */
89 /*                             VSICachedFile                            */
90 /* ==================================================================== */
91 /************************************************************************/
92 
93 class VSICachedFile final : public VSIVirtualHandle
94 {
95     CPL_DISALLOW_COPY_ASSIGN(VSICachedFile)
96 
97   public:
98     VSICachedFile( VSIVirtualHandle *poBaseHandle,
99                    size_t nChunkSize,
100                    size_t nCacheSize );
~VSICachedFile()101     ~VSICachedFile() override { VSICachedFile::Close(); }
102 
103     void          FlushLRU();
104     int           LoadBlocks( vsi_l_offset nStartBlock, size_t nBlockCount,
105                               void *pBuffer, size_t nBufferSize );
106     void          Demote( VSICacheChunk * );
107 
108     VSIVirtualHandle *poBase = nullptr;
109 
110     vsi_l_offset  nOffset = 0;
111     vsi_l_offset  nFileSize = 0;
112 
113     GUIntBig      nCacheUsed = 0;
114     GUIntBig      nCacheMax = 0;
115 
116     size_t        m_nChunkSize = 0;
117 
118     VSICacheChunk *poLRUStart = nullptr;
119     VSICacheChunk *poLRUEnd = nullptr;
120 
121     std::map<vsi_l_offset, std::unique_ptr<VSICacheChunk>> oMapOffsetToCache{};
122 
123     bool           bEOF = false;
124 
125     int Seek( vsi_l_offset nOffset, int nWhence ) override;
126     vsi_l_offset Tell() override;
127     size_t Read( void *pBuffer, size_t nSize,
128                  size_t nMemb ) override;
129     int ReadMultiRange( int nRanges, void ** ppData,
130                         const vsi_l_offset* panOffsets,
131                         const size_t* panSizes ) override;
132 
133     size_t Write( const void *pBuffer, size_t nSize,
134                   size_t nMemb ) override;
135     int Eof() override;
136     int Flush() override;
137     int Close() override;
GetNativeFileDescriptor()138     void *GetNativeFileDescriptor() override
139         { return poBase->GetNativeFileDescriptor(); }
140 };
141 
142 /************************************************************************/
143 /*                           VSICachedFile()                            */
144 /************************************************************************/
145 
VSICachedFile(VSIVirtualHandle * poBaseHandle,size_t nChunkSize,size_t nCacheSize)146 VSICachedFile::VSICachedFile( VSIVirtualHandle *poBaseHandle, size_t nChunkSize,
147                               size_t nCacheSize ) :
148     poBase(poBaseHandle),
149     nCacheMax(nCacheSize),
150     m_nChunkSize(nChunkSize)
151 {
152     if( nCacheSize == 0 )
153         nCacheMax = CPLScanUIntBig(
154              CPLGetConfigOption( "VSI_CACHE_SIZE", "25000000" ), 40 );
155 
156     poBase->Seek( 0, SEEK_END );
157     nFileSize = poBase->Tell();
158 }
159 
160 /************************************************************************/
161 /*                               Close()                                */
162 /************************************************************************/
163 
Close()164 int VSICachedFile::Close()
165 
166 {
167     oMapOffsetToCache.clear();
168 
169     poLRUStart = nullptr;
170     poLRUEnd = nullptr;
171 
172     nCacheUsed = 0;
173 
174     if( poBase )
175     {
176         poBase->Close();
177         delete poBase;
178         poBase = nullptr;
179     }
180 
181     return 0;
182 }
183 
184 /************************************************************************/
185 /*                                Seek()                                */
186 /************************************************************************/
187 
Seek(vsi_l_offset nReqOffset,int nWhence)188 int VSICachedFile::Seek( vsi_l_offset nReqOffset, int nWhence )
189 
190 {
191     bEOF = false;
192 
193     if( nWhence == SEEK_SET )
194     {
195         // Use offset directly.
196     }
197     else if( nWhence == SEEK_CUR )
198     {
199         nReqOffset += nOffset;
200     }
201     else if( nWhence == SEEK_END )
202     {
203         nReqOffset += nFileSize;
204     }
205 
206     nOffset = nReqOffset;
207 
208     return 0;
209 }
210 
211 /************************************************************************/
212 /*                                Tell()                                */
213 /************************************************************************/
214 
Tell()215 vsi_l_offset VSICachedFile::Tell()
216 
217 {
218     return nOffset;
219 }
220 
221 /************************************************************************/
222 /*                              FlushLRU()                              */
223 /************************************************************************/
224 
FlushLRU()225 void VSICachedFile::FlushLRU()
226 
227 {
228     CPLAssert( poLRUStart != nullptr );
229 
230     VSICacheChunk *poBlock = poLRUStart;
231 
232     CPLAssert( nCacheUsed >= poBlock->nDataFilled );
233 
234     nCacheUsed -= poBlock->nDataFilled;
235 
236     poLRUStart = poBlock->poLRUNext;
237     if( poLRUEnd == poBlock )
238         poLRUEnd = nullptr;
239 
240     if( poBlock->poLRUNext != nullptr )
241         poBlock->poLRUNext->poLRUPrev = nullptr;
242 
243     oMapOffsetToCache.erase(oMapOffsetToCache.find(poBlock->iBlock));
244 }
245 
246 /************************************************************************/
247 /*                               Demote()                               */
248 /*                                                                      */
249 /*      Demote the indicated block to the end of the LRU list.          */
250 /*      Potentially integrate the link into the list if it is not       */
251 /*      already there.                                                  */
252 /************************************************************************/
253 
Demote(VSICacheChunk * poBlock)254 void VSICachedFile::Demote( VSICacheChunk *poBlock )
255 
256 {
257     // Already at end?
258     if( poLRUEnd == poBlock )
259         return;
260 
261     if( poLRUStart == poBlock )
262         poLRUStart = poBlock->poLRUNext;
263 
264     if( poBlock->poLRUPrev != nullptr )
265         poBlock->poLRUPrev->poLRUNext = poBlock->poLRUNext;
266 
267     if( poBlock->poLRUNext != nullptr )
268         poBlock->poLRUNext->poLRUPrev = poBlock->poLRUPrev;
269 
270     poBlock->poLRUNext = nullptr;
271     poBlock->poLRUPrev = nullptr;
272 
273     if( poLRUEnd != nullptr )
274         poLRUEnd->poLRUNext = poBlock;
275 
276     poLRUEnd = poBlock;
277 
278     if( poLRUStart == nullptr )
279         poLRUStart = poBlock;
280 }
281 
282 /************************************************************************/
283 /*                             LoadBlocks()                             */
284 /*                                                                      */
285 /*      Load the desired set of blocks.  Use pBuffer as a temporary     */
286 /*      buffer if it would be helpful.                                  */
287 /*                                                                      */
288 /*  RETURNS: TRUE on success; FALSE on failure.                         */
289 /************************************************************************/
290 
LoadBlocks(vsi_l_offset nStartBlock,size_t nBlockCount,void * pBuffer,size_t nBufferSize)291 int VSICachedFile::LoadBlocks( vsi_l_offset nStartBlock, size_t nBlockCount,
292                                void *pBuffer, size_t nBufferSize )
293 
294 {
295     if( nBlockCount == 0 )
296         return TRUE;
297 
298 /* -------------------------------------------------------------------- */
299 /*      When we want to load only one block, we can directly load it    */
300 /*      into the target buffer with no concern about intermediaries.    */
301 /* -------------------------------------------------------------------- */
302     if( nBlockCount == 1 )
303     {
304         poBase->Seek( static_cast<vsi_l_offset>(nStartBlock) * m_nChunkSize,
305                       SEEK_SET );
306 
307         auto poBlock = std::unique_ptr<VSICacheChunk>(new VSICacheChunk());
308         if( !poBlock || !poBlock->Allocate( m_nChunkSize ) )
309         {
310             return FALSE;
311         }
312 
313         poBlock->iBlock = nStartBlock;
314         poBlock->nDataFilled =
315             poBase->Read( poBlock->pabyData, 1, m_nChunkSize );
316         if( poBlock->nDataFilled == 0 )
317             return FALSE;
318         nCacheUsed += poBlock->nDataFilled;
319 
320         // Merges into the LRU list.
321         Demote( poBlock.get() );
322 
323         oMapOffsetToCache[nStartBlock] = std::move(poBlock);
324 
325         return TRUE;
326     }
327 
328 /* -------------------------------------------------------------------- */
329 /*      If the buffer is quite large but not quite large enough to      */
330 /*      hold all the blocks we will take the pain of splitting the      */
331 /*      io request in two in order to avoid allocating a large          */
332 /*      temporary buffer.                                               */
333 /* -------------------------------------------------------------------- */
334     if( nBufferSize > m_nChunkSize * 20
335         && nBufferSize < nBlockCount * m_nChunkSize )
336     {
337         if( !LoadBlocks( nStartBlock, 2, pBuffer, nBufferSize ) )
338             return FALSE;
339 
340         return LoadBlocks( nStartBlock+2, nBlockCount-2, pBuffer, nBufferSize );
341     }
342 
343     if( poBase->Seek( static_cast<vsi_l_offset>(nStartBlock) * m_nChunkSize,
344                       SEEK_SET ) != 0 )
345         return FALSE;
346 
347 /* -------------------------------------------------------------------- */
348 /*      Do we need to allocate our own buffer?                          */
349 /* -------------------------------------------------------------------- */
350     GByte *pabyWorkBuffer = static_cast<GByte *>(pBuffer);
351 
352     if( nBufferSize < m_nChunkSize * nBlockCount )
353         pabyWorkBuffer =
354             static_cast<GByte *>( CPLMalloc(m_nChunkSize * nBlockCount) );
355 
356 /* -------------------------------------------------------------------- */
357 /*      Read the whole request into the working buffer.                 */
358 /* -------------------------------------------------------------------- */
359 
360     const size_t nDataRead =
361         poBase->Read( pabyWorkBuffer, 1, nBlockCount*m_nChunkSize);
362 
363     if( nBlockCount * m_nChunkSize > nDataRead + m_nChunkSize - 1 )
364         nBlockCount = (nDataRead + m_nChunkSize - 1) / m_nChunkSize;
365 
366     for( size_t i = 0; i < nBlockCount; i++ )
367     {
368         const vsi_l_offset iBlock = nStartBlock + i;
369 
370         auto poBlock = std::unique_ptr<VSICacheChunk>(new VSICacheChunk());
371         if( !poBlock || !poBlock->Allocate( m_nChunkSize ) )
372         {
373             return FALSE;
374         }
375 
376         poBlock->iBlock = iBlock;
377 
378         if( nDataRead >= (i+1) * m_nChunkSize )
379             poBlock->nDataFilled = m_nChunkSize;
380         else
381             poBlock->nDataFilled = nDataRead - i*m_nChunkSize;
382 
383         memcpy( poBlock->pabyData, pabyWorkBuffer + i*m_nChunkSize,
384                 poBlock->nDataFilled );
385 
386         nCacheUsed += poBlock->nDataFilled;
387 
388         // Merges into the LRU list.
389         Demote( poBlock.get() );
390 
391         oMapOffsetToCache[iBlock] = std::move(poBlock);
392     }
393 
394     if( pabyWorkBuffer != pBuffer )
395         CPLFree( pabyWorkBuffer );
396 
397     return TRUE;
398 }
399 
400 /************************************************************************/
401 /*                                Read()                                */
402 /************************************************************************/
403 
Read(void * pBuffer,size_t nSize,size_t nCount)404 size_t VSICachedFile::Read( void * pBuffer, size_t nSize, size_t nCount )
405 
406 {
407     if( nSize == 0 || nCount == 0 )
408         return 0;
409     const size_t nRequestedBytes = nSize * nCount;
410 
411     // nFileSize might be set wrongly to 0 by underlying layers, such as
412     // /vsicurl_streaming/https://query.data.world/s/jgsghstpphjhicstradhy5kpjwrnfy
413     if( nFileSize > 0 && nOffset >= nFileSize )
414     {
415         bEOF = true;
416         return 0;
417     }
418 
419 /* ==================================================================== */
420 /*      Make sure the cache is loaded for the whole request region.     */
421 /* ==================================================================== */
422     const vsi_l_offset nStartBlock = nOffset / m_nChunkSize;
423     const vsi_l_offset nEndBlock =
424         (nOffset + nRequestedBytes - 1) / m_nChunkSize;
425 
426     for( vsi_l_offset iBlock = nStartBlock; iBlock <= nEndBlock; iBlock++ )
427     {
428         auto oIter = oMapOffsetToCache.find(iBlock);
429         if( oIter == oMapOffsetToCache.end() )
430         {
431             size_t nBlocksToLoad = 1;
432             while( iBlock + nBlocksToLoad <= nEndBlock &&
433                    oMapOffsetToCache.find(iBlock + nBlocksToLoad) == oMapOffsetToCache.end() )
434             {
435                 nBlocksToLoad++;
436             }
437 
438             LoadBlocks( iBlock, nBlocksToLoad, pBuffer, nRequestedBytes );
439         }
440     }
441 
442 /* ==================================================================== */
443 /*      Copy data into the target buffer to the extent possible.        */
444 /* ==================================================================== */
445     size_t nAmountCopied = 0;
446 
447     while( nAmountCopied < nRequestedBytes )
448     {
449         const vsi_l_offset iBlock = (nOffset + nAmountCopied) / m_nChunkSize;
450         auto oIter = oMapOffsetToCache.find(iBlock);
451         if( oIter == oMapOffsetToCache.end() )
452         {
453             // We can reach that point when the amount to read exceeds
454             // the cache size.
455             LoadBlocks(iBlock, 1,
456                        static_cast<GByte *>(pBuffer) + nAmountCopied,
457                        std::min(nRequestedBytes - nAmountCopied, m_nChunkSize));
458             oIter = oMapOffsetToCache.find(iBlock);
459             if( oIter == oMapOffsetToCache.end() )
460             {
461                 break;
462             }
463         }
464 
465         VSICacheChunk* poBlock = oIter->second.get();
466         const vsi_l_offset nStartOffset =
467             static_cast<vsi_l_offset>(iBlock) * m_nChunkSize;
468         if(nStartOffset + poBlock->nDataFilled < nAmountCopied + nOffset)
469             break;
470         const size_t nThisCopy = std::min(
471             nRequestedBytes - nAmountCopied,
472             static_cast<size_t>(
473                 ((nStartOffset + poBlock->nDataFilled)
474                  - nAmountCopied - nOffset)));
475         if( nThisCopy == 0 )
476             break;
477 
478         memcpy( static_cast<GByte *>(pBuffer) + nAmountCopied,
479                 poBlock->pabyData
480                 + (nOffset + nAmountCopied) - nStartOffset,
481                 nThisCopy );
482 
483         nAmountCopied += nThisCopy;
484     }
485 
486     nOffset += nAmountCopied;
487 
488 /* -------------------------------------------------------------------- */
489 /*      Ensure the cache is reduced to our limit.                       */
490 /* -------------------------------------------------------------------- */
491     while( nCacheUsed > nCacheMax )
492         FlushLRU();
493 
494     const size_t nRet = nAmountCopied / nSize;
495     if( nRet != nCount )
496         bEOF = true;
497     return nRet;
498 }
499 
500 /************************************************************************/
501 /*                           ReadMultiRange()                           */
502 /************************************************************************/
503 
ReadMultiRange(int const nRanges,void ** const ppData,const vsi_l_offset * const panOffsets,const size_t * const panSizes)504 int VSICachedFile::ReadMultiRange( int const nRanges, void ** const ppData,
505                                    const vsi_l_offset* const panOffsets,
506                                    const size_t* const panSizes )
507 {
508     // If the base is /vsicurl/
509     return poBase->ReadMultiRange( nRanges, ppData, panOffsets, panSizes );
510 }
511 
512 /************************************************************************/
513 /*                               Write()                                */
514 /************************************************************************/
515 
Write(const void *,size_t,size_t)516 size_t VSICachedFile::Write( const void * /* pBuffer */,
517                              size_t /*nSize */ ,
518                              size_t /* nCount */ )
519 {
520     return 0;
521 }
522 
523 /************************************************************************/
524 /*                                Eof()                                 */
525 /************************************************************************/
526 
Eof()527 int VSICachedFile::Eof()
528 
529 {
530     return bEOF;
531 }
532 
533 /************************************************************************/
534 /*                               Flush()                                */
535 /************************************************************************/
536 
Flush()537 int VSICachedFile::Flush()
538 
539 {
540     return 0;
541 }
542 
543 //! @endcond
544 
545 /************************************************************************/
546 /*                        VSICreateCachedFile()                         */
547 /************************************************************************/
548 
549 VSIVirtualHandle *
VSICreateCachedFile(VSIVirtualHandle * poBaseHandle,size_t nChunkSize,size_t nCacheSize)550 VSICreateCachedFile( VSIVirtualHandle *poBaseHandle,
551                      size_t nChunkSize, size_t nCacheSize )
552 
553 {
554     return new VSICachedFile( poBaseHandle, nChunkSize, nCacheSize );
555 }
556