1 /******************************************************************************
2 *
3 * Project: VSI Virtual File System
4 * Purpose: Implementation of caching IO layer.
5 * Author: Frank Warmerdam, warmerdam@pobox.com
6 *
7 ******************************************************************************
8 * Copyright (c) 2011, Frank Warmerdam <warmerdam@pobox.com>
9 * Copyright (c) 2011-2014, Even Rouault <even dot rouault at spatialys.com>
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a
12 * copy of this software and associated documentation files (the "Software"),
13 * to deal in the Software without restriction, including without limitation
14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 * and/or sell copies of the Software, and to permit persons to whom the
16 * Software is furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included
19 * in all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
22 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27 * DEALINGS IN THE SOFTWARE.
28 ****************************************************************************/
29
30 #include "cpl_port.h"
31 #include "cpl_vsi_virtual.h"
32
33 #include <cstddef>
34 #include <cstring>
35 #if HAVE_FCNTL_H
36 #include <fcntl.h>
37 #endif
38
39 #include <algorithm>
40 #include <map>
41 #include <memory>
42 #include <utility>
43
44 #include "cpl_conv.h"
45 #include "cpl_error.h"
46 #include "cpl_vsi.h"
47 #include "cpl_vsi_virtual.h"
48
49 //! @cond Doxygen_Suppress
50
51 CPL_CVSID("$Id: cpl_vsil_cache.cpp d4c2d0da2a653242903804163f22189561a0b7e4 2021-09-28 19:24:15 +0200 Even Rouault $")
52
53 /************************************************************************/
54 /* ==================================================================== */
55 /* VSICacheChunk */
56 /* ==================================================================== */
57 /************************************************************************/
58
59 class VSICacheChunk
60 {
61 CPL_DISALLOW_COPY_ASSIGN(VSICacheChunk)
62
63 public:
64 VSICacheChunk() = default;
65
~VSICacheChunk()66 virtual ~VSICacheChunk()
67 {
68 VSIFree( pabyData );
69 }
70
Allocate(size_t nChunkSize)71 bool Allocate( size_t nChunkSize )
72 {
73 CPLAssert( pabyData == nullptr );
74 pabyData = static_cast<GByte *>(VSIMalloc( nChunkSize ));
75 return (pabyData != nullptr);
76 }
77
78 vsi_l_offset iBlock = 0;
79
80 VSICacheChunk *poLRUPrev = nullptr;
81 VSICacheChunk *poLRUNext = nullptr;
82
83 size_t nDataFilled = 0;
84 GByte *pabyData = nullptr;
85 };
86
87 /************************************************************************/
88 /* ==================================================================== */
89 /* VSICachedFile */
90 /* ==================================================================== */
91 /************************************************************************/
92
93 class VSICachedFile final : public VSIVirtualHandle
94 {
95 CPL_DISALLOW_COPY_ASSIGN(VSICachedFile)
96
97 public:
98 VSICachedFile( VSIVirtualHandle *poBaseHandle,
99 size_t nChunkSize,
100 size_t nCacheSize );
~VSICachedFile()101 ~VSICachedFile() override { VSICachedFile::Close(); }
102
103 void FlushLRU();
104 int LoadBlocks( vsi_l_offset nStartBlock, size_t nBlockCount,
105 void *pBuffer, size_t nBufferSize );
106 void Demote( VSICacheChunk * );
107
108 VSIVirtualHandle *poBase = nullptr;
109
110 vsi_l_offset nOffset = 0;
111 vsi_l_offset nFileSize = 0;
112
113 GUIntBig nCacheUsed = 0;
114 GUIntBig nCacheMax = 0;
115
116 size_t m_nChunkSize = 0;
117
118 VSICacheChunk *poLRUStart = nullptr;
119 VSICacheChunk *poLRUEnd = nullptr;
120
121 std::map<vsi_l_offset, std::unique_ptr<VSICacheChunk>> oMapOffsetToCache{};
122
123 bool bEOF = false;
124
125 int Seek( vsi_l_offset nOffset, int nWhence ) override;
126 vsi_l_offset Tell() override;
127 size_t Read( void *pBuffer, size_t nSize,
128 size_t nMemb ) override;
129 int ReadMultiRange( int nRanges, void ** ppData,
130 const vsi_l_offset* panOffsets,
131 const size_t* panSizes ) override;
132
133 size_t Write( const void *pBuffer, size_t nSize,
134 size_t nMemb ) override;
135 int Eof() override;
136 int Flush() override;
137 int Close() override;
GetNativeFileDescriptor()138 void *GetNativeFileDescriptor() override
139 { return poBase->GetNativeFileDescriptor(); }
140 };
141
142 /************************************************************************/
143 /* VSICachedFile() */
144 /************************************************************************/
145
VSICachedFile(VSIVirtualHandle * poBaseHandle,size_t nChunkSize,size_t nCacheSize)146 VSICachedFile::VSICachedFile( VSIVirtualHandle *poBaseHandle, size_t nChunkSize,
147 size_t nCacheSize ) :
148 poBase(poBaseHandle),
149 nCacheMax(nCacheSize),
150 m_nChunkSize(nChunkSize)
151 {
152 if( nCacheSize == 0 )
153 nCacheMax = CPLScanUIntBig(
154 CPLGetConfigOption( "VSI_CACHE_SIZE", "25000000" ), 40 );
155
156 poBase->Seek( 0, SEEK_END );
157 nFileSize = poBase->Tell();
158 }
159
160 /************************************************************************/
161 /* Close() */
162 /************************************************************************/
163
Close()164 int VSICachedFile::Close()
165
166 {
167 oMapOffsetToCache.clear();
168
169 poLRUStart = nullptr;
170 poLRUEnd = nullptr;
171
172 nCacheUsed = 0;
173
174 if( poBase )
175 {
176 poBase->Close();
177 delete poBase;
178 poBase = nullptr;
179 }
180
181 return 0;
182 }
183
184 /************************************************************************/
185 /* Seek() */
186 /************************************************************************/
187
Seek(vsi_l_offset nReqOffset,int nWhence)188 int VSICachedFile::Seek( vsi_l_offset nReqOffset, int nWhence )
189
190 {
191 bEOF = false;
192
193 if( nWhence == SEEK_SET )
194 {
195 // Use offset directly.
196 }
197 else if( nWhence == SEEK_CUR )
198 {
199 nReqOffset += nOffset;
200 }
201 else if( nWhence == SEEK_END )
202 {
203 nReqOffset += nFileSize;
204 }
205
206 nOffset = nReqOffset;
207
208 return 0;
209 }
210
211 /************************************************************************/
212 /* Tell() */
213 /************************************************************************/
214
Tell()215 vsi_l_offset VSICachedFile::Tell()
216
217 {
218 return nOffset;
219 }
220
221 /************************************************************************/
222 /* FlushLRU() */
223 /************************************************************************/
224
FlushLRU()225 void VSICachedFile::FlushLRU()
226
227 {
228 CPLAssert( poLRUStart != nullptr );
229
230 VSICacheChunk *poBlock = poLRUStart;
231
232 CPLAssert( nCacheUsed >= poBlock->nDataFilled );
233
234 nCacheUsed -= poBlock->nDataFilled;
235
236 poLRUStart = poBlock->poLRUNext;
237 if( poLRUEnd == poBlock )
238 poLRUEnd = nullptr;
239
240 if( poBlock->poLRUNext != nullptr )
241 poBlock->poLRUNext->poLRUPrev = nullptr;
242
243 oMapOffsetToCache.erase(oMapOffsetToCache.find(poBlock->iBlock));
244 }
245
246 /************************************************************************/
247 /* Demote() */
248 /* */
249 /* Demote the indicated block to the end of the LRU list. */
250 /* Potentially integrate the link into the list if it is not */
251 /* already there. */
252 /************************************************************************/
253
Demote(VSICacheChunk * poBlock)254 void VSICachedFile::Demote( VSICacheChunk *poBlock )
255
256 {
257 // Already at end?
258 if( poLRUEnd == poBlock )
259 return;
260
261 if( poLRUStart == poBlock )
262 poLRUStart = poBlock->poLRUNext;
263
264 if( poBlock->poLRUPrev != nullptr )
265 poBlock->poLRUPrev->poLRUNext = poBlock->poLRUNext;
266
267 if( poBlock->poLRUNext != nullptr )
268 poBlock->poLRUNext->poLRUPrev = poBlock->poLRUPrev;
269
270 poBlock->poLRUNext = nullptr;
271 poBlock->poLRUPrev = nullptr;
272
273 if( poLRUEnd != nullptr )
274 poLRUEnd->poLRUNext = poBlock;
275
276 poLRUEnd = poBlock;
277
278 if( poLRUStart == nullptr )
279 poLRUStart = poBlock;
280 }
281
282 /************************************************************************/
283 /* LoadBlocks() */
284 /* */
285 /* Load the desired set of blocks. Use pBuffer as a temporary */
286 /* buffer if it would be helpful. */
287 /* */
288 /* RETURNS: TRUE on success; FALSE on failure. */
289 /************************************************************************/
290
LoadBlocks(vsi_l_offset nStartBlock,size_t nBlockCount,void * pBuffer,size_t nBufferSize)291 int VSICachedFile::LoadBlocks( vsi_l_offset nStartBlock, size_t nBlockCount,
292 void *pBuffer, size_t nBufferSize )
293
294 {
295 if( nBlockCount == 0 )
296 return TRUE;
297
298 /* -------------------------------------------------------------------- */
299 /* When we want to load only one block, we can directly load it */
300 /* into the target buffer with no concern about intermediaries. */
301 /* -------------------------------------------------------------------- */
302 if( nBlockCount == 1 )
303 {
304 poBase->Seek( static_cast<vsi_l_offset>(nStartBlock) * m_nChunkSize,
305 SEEK_SET );
306
307 auto poBlock = std::unique_ptr<VSICacheChunk>(new VSICacheChunk());
308 if( !poBlock || !poBlock->Allocate( m_nChunkSize ) )
309 {
310 return FALSE;
311 }
312
313 poBlock->iBlock = nStartBlock;
314 poBlock->nDataFilled =
315 poBase->Read( poBlock->pabyData, 1, m_nChunkSize );
316 if( poBlock->nDataFilled == 0 )
317 return FALSE;
318 nCacheUsed += poBlock->nDataFilled;
319
320 // Merges into the LRU list.
321 Demote( poBlock.get() );
322
323 oMapOffsetToCache[nStartBlock] = std::move(poBlock);
324
325 return TRUE;
326 }
327
328 /* -------------------------------------------------------------------- */
329 /* If the buffer is quite large but not quite large enough to */
330 /* hold all the blocks we will take the pain of splitting the */
331 /* io request in two in order to avoid allocating a large */
332 /* temporary buffer. */
333 /* -------------------------------------------------------------------- */
334 if( nBufferSize > m_nChunkSize * 20
335 && nBufferSize < nBlockCount * m_nChunkSize )
336 {
337 if( !LoadBlocks( nStartBlock, 2, pBuffer, nBufferSize ) )
338 return FALSE;
339
340 return LoadBlocks( nStartBlock+2, nBlockCount-2, pBuffer, nBufferSize );
341 }
342
343 if( poBase->Seek( static_cast<vsi_l_offset>(nStartBlock) * m_nChunkSize,
344 SEEK_SET ) != 0 )
345 return FALSE;
346
347 /* -------------------------------------------------------------------- */
348 /* Do we need to allocate our own buffer? */
349 /* -------------------------------------------------------------------- */
350 GByte *pabyWorkBuffer = static_cast<GByte *>(pBuffer);
351
352 if( nBufferSize < m_nChunkSize * nBlockCount )
353 pabyWorkBuffer =
354 static_cast<GByte *>( CPLMalloc(m_nChunkSize * nBlockCount) );
355
356 /* -------------------------------------------------------------------- */
357 /* Read the whole request into the working buffer. */
358 /* -------------------------------------------------------------------- */
359
360 const size_t nDataRead =
361 poBase->Read( pabyWorkBuffer, 1, nBlockCount*m_nChunkSize);
362
363 if( nBlockCount * m_nChunkSize > nDataRead + m_nChunkSize - 1 )
364 nBlockCount = (nDataRead + m_nChunkSize - 1) / m_nChunkSize;
365
366 for( size_t i = 0; i < nBlockCount; i++ )
367 {
368 const vsi_l_offset iBlock = nStartBlock + i;
369
370 auto poBlock = std::unique_ptr<VSICacheChunk>(new VSICacheChunk());
371 if( !poBlock || !poBlock->Allocate( m_nChunkSize ) )
372 {
373 return FALSE;
374 }
375
376 poBlock->iBlock = iBlock;
377
378 if( nDataRead >= (i+1) * m_nChunkSize )
379 poBlock->nDataFilled = m_nChunkSize;
380 else
381 poBlock->nDataFilled = nDataRead - i*m_nChunkSize;
382
383 memcpy( poBlock->pabyData, pabyWorkBuffer + i*m_nChunkSize,
384 poBlock->nDataFilled );
385
386 nCacheUsed += poBlock->nDataFilled;
387
388 // Merges into the LRU list.
389 Demote( poBlock.get() );
390
391 oMapOffsetToCache[iBlock] = std::move(poBlock);
392 }
393
394 if( pabyWorkBuffer != pBuffer )
395 CPLFree( pabyWorkBuffer );
396
397 return TRUE;
398 }
399
400 /************************************************************************/
401 /* Read() */
402 /************************************************************************/
403
Read(void * pBuffer,size_t nSize,size_t nCount)404 size_t VSICachedFile::Read( void * pBuffer, size_t nSize, size_t nCount )
405
406 {
407 if( nSize == 0 || nCount == 0 )
408 return 0;
409 const size_t nRequestedBytes = nSize * nCount;
410
411 // nFileSize might be set wrongly to 0 by underlying layers, such as
412 // /vsicurl_streaming/https://query.data.world/s/jgsghstpphjhicstradhy5kpjwrnfy
413 if( nFileSize > 0 && nOffset >= nFileSize )
414 {
415 bEOF = true;
416 return 0;
417 }
418
419 /* ==================================================================== */
420 /* Make sure the cache is loaded for the whole request region. */
421 /* ==================================================================== */
422 const vsi_l_offset nStartBlock = nOffset / m_nChunkSize;
423 const vsi_l_offset nEndBlock =
424 (nOffset + nRequestedBytes - 1) / m_nChunkSize;
425
426 for( vsi_l_offset iBlock = nStartBlock; iBlock <= nEndBlock; iBlock++ )
427 {
428 auto oIter = oMapOffsetToCache.find(iBlock);
429 if( oIter == oMapOffsetToCache.end() )
430 {
431 size_t nBlocksToLoad = 1;
432 while( iBlock + nBlocksToLoad <= nEndBlock &&
433 oMapOffsetToCache.find(iBlock + nBlocksToLoad) == oMapOffsetToCache.end() )
434 {
435 nBlocksToLoad++;
436 }
437
438 LoadBlocks( iBlock, nBlocksToLoad, pBuffer, nRequestedBytes );
439 }
440 }
441
442 /* ==================================================================== */
443 /* Copy data into the target buffer to the extent possible. */
444 /* ==================================================================== */
445 size_t nAmountCopied = 0;
446
447 while( nAmountCopied < nRequestedBytes )
448 {
449 const vsi_l_offset iBlock = (nOffset + nAmountCopied) / m_nChunkSize;
450 auto oIter = oMapOffsetToCache.find(iBlock);
451 if( oIter == oMapOffsetToCache.end() )
452 {
453 // We can reach that point when the amount to read exceeds
454 // the cache size.
455 LoadBlocks(iBlock, 1,
456 static_cast<GByte *>(pBuffer) + nAmountCopied,
457 std::min(nRequestedBytes - nAmountCopied, m_nChunkSize));
458 oIter = oMapOffsetToCache.find(iBlock);
459 if( oIter == oMapOffsetToCache.end() )
460 {
461 break;
462 }
463 }
464
465 VSICacheChunk* poBlock = oIter->second.get();
466 const vsi_l_offset nStartOffset =
467 static_cast<vsi_l_offset>(iBlock) * m_nChunkSize;
468 if(nStartOffset + poBlock->nDataFilled < nAmountCopied + nOffset)
469 break;
470 const size_t nThisCopy = std::min(
471 nRequestedBytes - nAmountCopied,
472 static_cast<size_t>(
473 ((nStartOffset + poBlock->nDataFilled)
474 - nAmountCopied - nOffset)));
475 if( nThisCopy == 0 )
476 break;
477
478 memcpy( static_cast<GByte *>(pBuffer) + nAmountCopied,
479 poBlock->pabyData
480 + (nOffset + nAmountCopied) - nStartOffset,
481 nThisCopy );
482
483 nAmountCopied += nThisCopy;
484 }
485
486 nOffset += nAmountCopied;
487
488 /* -------------------------------------------------------------------- */
489 /* Ensure the cache is reduced to our limit. */
490 /* -------------------------------------------------------------------- */
491 while( nCacheUsed > nCacheMax )
492 FlushLRU();
493
494 const size_t nRet = nAmountCopied / nSize;
495 if( nRet != nCount )
496 bEOF = true;
497 return nRet;
498 }
499
500 /************************************************************************/
501 /* ReadMultiRange() */
502 /************************************************************************/
503
ReadMultiRange(int const nRanges,void ** const ppData,const vsi_l_offset * const panOffsets,const size_t * const panSizes)504 int VSICachedFile::ReadMultiRange( int const nRanges, void ** const ppData,
505 const vsi_l_offset* const panOffsets,
506 const size_t* const panSizes )
507 {
508 // If the base is /vsicurl/
509 return poBase->ReadMultiRange( nRanges, ppData, panOffsets, panSizes );
510 }
511
512 /************************************************************************/
513 /* Write() */
514 /************************************************************************/
515
Write(const void *,size_t,size_t)516 size_t VSICachedFile::Write( const void * /* pBuffer */,
517 size_t /*nSize */ ,
518 size_t /* nCount */ )
519 {
520 return 0;
521 }
522
523 /************************************************************************/
524 /* Eof() */
525 /************************************************************************/
526
Eof()527 int VSICachedFile::Eof()
528
529 {
530 return bEOF;
531 }
532
533 /************************************************************************/
534 /* Flush() */
535 /************************************************************************/
536
Flush()537 int VSICachedFile::Flush()
538
539 {
540 return 0;
541 }
542
543 //! @endcond
544
545 /************************************************************************/
546 /* VSICreateCachedFile() */
547 /************************************************************************/
548
549 VSIVirtualHandle *
VSICreateCachedFile(VSIVirtualHandle * poBaseHandle,size_t nChunkSize,size_t nCacheSize)550 VSICreateCachedFile( VSIVirtualHandle *poBaseHandle,
551 size_t nChunkSize, size_t nCacheSize )
552
553 {
554 return new VSICachedFile( poBaseHandle, nChunkSize, nCacheSize );
555 }
556