1 /******************************************************************************
2  *
3  * Project:  VSI Virtual File System
4  * Purpose:  Implementation of buffered reader IO functions.
5  * Author:   Even Rouault, even.rouault at spatialys.com
6  *
7  ******************************************************************************
8  * Copyright (c) 2010-2011, Even Rouault <even dot rouault at spatialys.com>
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a
11  * copy of this software and associated documentation files (the "Software"),
12  * to deal in the Software without restriction, including without limitation
13  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14  * and/or sell copies of the Software, and to permit persons to whom the
15  * Software is furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice shall be included
18  * in all copies or substantial portions of the Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26  * DEALINGS IN THE SOFTWARE.
27  ****************************************************************************/
28 
29 //! @cond Doxygen_Suppress
30 
31 // The intent of this class is to be a wrapper around an underlying virtual
32 // handle and add very basic caching of last read bytes, so that a backward
33 // seek of a few bytes doesn't require a seek on the underlying virtual handle.
34 // This enable us to improve dramatically the performance of CPLReadLine2L() on
35 // a gzip file.
36 
37 #include "cpl_port.h"
38 #include "cpl_vsi_virtual.h"
39 
40 #include <cstddef>
41 #include <cstring>
42 #if HAVE_FCNTL_H
43 #  include <fcntl.h>
44 #endif
45 
46 #include <algorithm>
47 #include <vector>
48 
49 #include "cpl_conv.h"
50 #include "cpl_error.h"
51 #include "cpl_vsi.h"
52 
53 constexpr int MAX_BUFFER_SIZE = 65536;
54 
55 CPL_CVSID("$Id: cpl_vsil_buffered_reader.cpp b1c9c12ad373e40b955162b45d704070d4ebf7b0 2019-06-19 16:50:15 +0200 Even Rouault $")
56 
57 class VSIBufferedReaderHandle final : public VSIVirtualHandle
58 {
59     CPL_DISALLOW_COPY_ASSIGN(VSIBufferedReaderHandle)
60 
61     VSIVirtualHandle* m_poBaseHandle = nullptr;
62     GByte*            pabyBuffer = nullptr;
63     GUIntBig          nBufferOffset = 0;
64     int               nBufferSize = 0;
65     GUIntBig          nCurOffset = 0;
66     bool              bNeedBaseHandleSeek = false;
67     bool              bEOF =  false;
68     vsi_l_offset      nCheatFileSize = 0;
69 
70     int               SeekBaseTo( vsi_l_offset nTargetOffset );
71 
72   public:
73     explicit VSIBufferedReaderHandle( VSIVirtualHandle* poBaseHandle );
74     VSIBufferedReaderHandle( VSIVirtualHandle* poBaseHandle,
75                              const GByte* pabyBeginningContent,
76                              vsi_l_offset nCheatFileSizeIn );
77     // TODO(schwehr): Add override when support dropped for VS2008.
78     ~VSIBufferedReaderHandle() override;
79 
80     int Seek( vsi_l_offset nOffset, int nWhence ) override;
81     vsi_l_offset Tell() override;
82     size_t Read( void *pBuffer, size_t nSize,
83                  size_t nMemb ) override;
84     size_t Write( const void *pBuffer, size_t nSize,
85                   size_t nMemb ) override;
86     int Eof() override;
87     int Flush() override;
88     int Close() override;
89 };
90 
91 //! @endcond
92 
93 /************************************************************************/
94 /*                    VSICreateBufferedReaderHandle()                   */
95 /************************************************************************/
96 
97 VSIVirtualHandle *
VSICreateBufferedReaderHandle(VSIVirtualHandle * poBaseHandle)98 VSICreateBufferedReaderHandle( VSIVirtualHandle* poBaseHandle )
99 {
100     return new VSIBufferedReaderHandle(poBaseHandle);
101 }
102 
VSICreateBufferedReaderHandle(VSIVirtualHandle * poBaseHandle,const GByte * pabyBeginningContent,vsi_l_offset nCheatFileSizeIn)103 VSIVirtualHandle* VSICreateBufferedReaderHandle(
104     VSIVirtualHandle* poBaseHandle,
105     const GByte* pabyBeginningContent,
106     vsi_l_offset nCheatFileSizeIn )
107 {
108     return new VSIBufferedReaderHandle(poBaseHandle,
109                                        pabyBeginningContent,
110                                        nCheatFileSizeIn);
111 }
112 
113 //! @cond Doxygen_Suppress
114 
115 /************************************************************************/
116 /*                        VSIBufferedReaderHandle()                     */
117 /************************************************************************/
118 
VSIBufferedReaderHandle(VSIVirtualHandle * poBaseHandle)119 VSIBufferedReaderHandle::VSIBufferedReaderHandle(
120     VSIVirtualHandle* poBaseHandle) :
121     m_poBaseHandle(poBaseHandle),
122     pabyBuffer(static_cast<GByte*>(CPLMalloc(MAX_BUFFER_SIZE)))
123 {}
124 
VSIBufferedReaderHandle(VSIVirtualHandle * poBaseHandle,const GByte * pabyBeginningContent,vsi_l_offset nCheatFileSizeIn)125 VSIBufferedReaderHandle::VSIBufferedReaderHandle(
126     VSIVirtualHandle* poBaseHandle,
127     const GByte* pabyBeginningContent,
128     vsi_l_offset nCheatFileSizeIn ) :
129     m_poBaseHandle(poBaseHandle),
130     pabyBuffer(static_cast<GByte *>(
131         CPLMalloc(std::max(MAX_BUFFER_SIZE,
132                            static_cast<int>(poBaseHandle->Tell()))))),
133     nBufferOffset(0),
134     nBufferSize(static_cast<int>(poBaseHandle->Tell())),
135     nCurOffset(0),
136     bNeedBaseHandleSeek(true),
137     bEOF(false),
138     nCheatFileSize(nCheatFileSizeIn)
139 {
140     memcpy(pabyBuffer, pabyBeginningContent, nBufferSize);
141 }
142 
143 /************************************************************************/
144 /*                        ~VSIBufferedReaderHandle()                    */
145 /************************************************************************/
146 
~VSIBufferedReaderHandle()147 VSIBufferedReaderHandle::~VSIBufferedReaderHandle()
148 {
149     delete m_poBaseHandle;
150     CPLFree(pabyBuffer);
151 }
152 
153 /************************************************************************/
154 /*                               Seek()                                 */
155 /************************************************************************/
156 
Seek(vsi_l_offset nOffset,int nWhence)157 int VSIBufferedReaderHandle::Seek( vsi_l_offset nOffset, int nWhence )
158 {
159 #ifdef DEBUG_VERBOSE
160     CPLDebug( "BUFFERED", "Seek(%d,%d)",
161               static_cast<int>(nOffset), static_cast<int>(nWhence) );
162 #endif
163     bEOF = false;
164     int ret = 0;
165     if( nWhence == SEEK_CUR )
166     {
167         nCurOffset += nOffset;
168     }
169     else if( nWhence == SEEK_END )
170     {
171         if( nCheatFileSize )
172         {
173             nCurOffset = nCheatFileSize;
174         }
175         else
176         {
177             ret = m_poBaseHandle->Seek(nOffset, nWhence);
178             nCurOffset = m_poBaseHandle->Tell();
179             bNeedBaseHandleSeek = true;
180         }
181     }
182     else
183     {
184         nCurOffset = nOffset;
185     }
186 
187     return ret;
188 }
189 
190 /************************************************************************/
191 /*                               Tell()                                 */
192 /************************************************************************/
193 
Tell()194 vsi_l_offset VSIBufferedReaderHandle::Tell()
195 {
196 #ifdef DEBUG_VERBOSE
197     CPLDebug( "BUFFERED", "Tell() = %d", static_cast<int>(nCurOffset));
198 #endif
199     return nCurOffset;
200 }
201 
202 /************************************************************************/
203 /*                           SeekBaseTo()                               */
204 /************************************************************************/
205 
SeekBaseTo(vsi_l_offset nTargetOffset)206 int VSIBufferedReaderHandle::SeekBaseTo( vsi_l_offset nTargetOffset )
207 {
208     if( m_poBaseHandle->Seek(nTargetOffset, SEEK_SET) == 0 )
209         return TRUE;
210 
211     nCurOffset = m_poBaseHandle->Tell();
212     if( nCurOffset > nTargetOffset )
213         return FALSE;
214 
215     const vsi_l_offset nMaxOffset = 8192;
216 
217     std::vector<char> oTemp(nMaxOffset, 0);
218     char *pabyTemp = &oTemp[0];
219 
220     while( true )
221     {
222         const size_t nToRead = static_cast<size_t>(
223             std::min(nMaxOffset, nTargetOffset - nCurOffset));
224         const size_t nRead = m_poBaseHandle->Read(pabyTemp, 1, nToRead);
225 
226         nCurOffset += nRead;
227 
228         if( nRead < nToRead )
229         {
230             bEOF = true;
231             return FALSE;
232         }
233         if( nToRead < nMaxOffset )
234             break;
235     }
236     return TRUE;
237 }
238 
239 /************************************************************************/
240 /*                               Read()                                 */
241 /************************************************************************/
242 
Read(void * pBuffer,size_t nSize,size_t nMemb)243 size_t VSIBufferedReaderHandle::Read( void *pBuffer, size_t nSize,
244                                       size_t nMemb )
245 {
246     const size_t nTotalToRead = nSize * nMemb;
247 #ifdef DEBUG_VERBOSE
248     CPLDebug( "BUFFERED", "Read(%d)", static_cast<int>(nTotalToRead));
249 #endif
250 
251     if( nSize == 0 )
252         return 0;
253 
254     if( nBufferSize != 0 &&
255         nCurOffset >= nBufferOffset &&
256         nCurOffset <= nBufferOffset + nBufferSize )
257     {
258         // We try to read from an offset located within the buffer.
259         const size_t nReadInBuffer =
260             static_cast<size_t>(
261                 std::min(nTotalToRead,
262                          static_cast<size_t>(nBufferOffset + nBufferSize -
263                                              nCurOffset)));
264         memcpy(pBuffer, pabyBuffer + nCurOffset - nBufferOffset, nReadInBuffer);
265         const size_t nToReadInFile = nTotalToRead - nReadInBuffer;
266         if( nToReadInFile > 0 )
267         {
268             // The beginning of the data to read is located in the buffer
269             // but the end must be read from the file.
270             if( bNeedBaseHandleSeek )
271             {
272                 if( !SeekBaseTo(nBufferOffset + nBufferSize) )
273                 {
274                     nCurOffset += nReadInBuffer;
275                     return nReadInBuffer / nSize;
276                 }
277             }
278             bNeedBaseHandleSeek = false;
279 #ifdef DEBUG_VERBOSE
280             CPLAssert(m_poBaseHandle->Tell() == nBufferOffset + nBufferSize);
281 #endif
282 
283             const size_t nReadInFile =
284                 m_poBaseHandle->Read(
285                     static_cast<GByte *>(pBuffer) + nReadInBuffer,
286                     1, nToReadInFile);
287             const size_t nRead = nReadInBuffer + nReadInFile;
288 
289             nBufferSize = static_cast<int>(
290                 std::min(nRead, static_cast<size_t>(MAX_BUFFER_SIZE)));
291             nBufferOffset = nCurOffset + nRead - nBufferSize;
292             memcpy(pabyBuffer,
293                    static_cast<GByte *>(pBuffer) + nRead - nBufferSize,
294                    nBufferSize);
295 
296             nCurOffset += nRead;
297 #ifdef DEBUG_VERBOSE
298             CPLAssert(m_poBaseHandle->Tell() == nBufferOffset + nBufferSize);
299             CPLAssert(m_poBaseHandle->Tell() == nCurOffset);
300 #endif
301 
302             bEOF = CPL_TO_BOOL(m_poBaseHandle->Eof());
303 
304             return nRead / nSize;
305         }
306         else
307         {
308             // The data to read is completely located within the buffer.
309             nCurOffset += nTotalToRead;
310             return nTotalToRead / nSize;
311         }
312     }
313     else
314     {
315         // We try either to read before or after the buffer, so a seek is
316         // necessary.
317         if( !SeekBaseTo(nCurOffset) )
318             return 0;
319         bNeedBaseHandleSeek = false;
320         const size_t nReadInFile =
321             m_poBaseHandle->Read(pBuffer, 1, nTotalToRead);
322         nBufferSize = static_cast<int>(
323             std::min(nReadInFile, static_cast<size_t>(MAX_BUFFER_SIZE)));
324         nBufferOffset = nCurOffset + nReadInFile - nBufferSize;
325         memcpy(pabyBuffer,
326                static_cast<GByte *>(pBuffer) + nReadInFile - nBufferSize,
327                nBufferSize);
328 
329         nCurOffset += nReadInFile;
330 #ifdef DEBUG_VERBOSE
331         CPLAssert(m_poBaseHandle->Tell() == nBufferOffset + nBufferSize);
332         CPLAssert(m_poBaseHandle->Tell() == nCurOffset);
333 #endif
334 
335         bEOF = CPL_TO_BOOL(m_poBaseHandle->Eof());
336 
337         return nReadInFile / nSize;
338     }
339 }
340 
341 /************************************************************************/
342 /*                              Write()                                 */
343 /************************************************************************/
344 
Write(const void *,size_t,size_t)345 size_t VSIBufferedReaderHandle::Write( const void * /* pBuffer */,
346                                        size_t /* nSize */,
347                                        size_t /* nMemb */)
348 {
349     CPLError(CE_Failure, CPLE_NotSupported,
350              "VSIFWriteL is not supported on buffer reader streams");
351     return 0;
352 }
353 
354 /************************************************************************/
355 /*                               Eof()                                  */
356 /************************************************************************/
357 
Eof()358 int VSIBufferedReaderHandle::Eof()
359 {
360     return bEOF;
361 }
362 
363 /************************************************************************/
364 /*                              Flush()                                 */
365 /************************************************************************/
366 
Flush()367 int VSIBufferedReaderHandle::Flush()
368 {
369     return 0;
370 }
371 
372 /************************************************************************/
373 /*                              Close()                                 */
374 /************************************************************************/
375 
Close()376 int VSIBufferedReaderHandle::Close()
377 {
378     if( m_poBaseHandle )
379     {
380         m_poBaseHandle->Close();
381         delete m_poBaseHandle;
382         m_poBaseHandle = nullptr;
383     }
384     return 0;
385 }
386 
387 //! @endcond
388