1 /******************************************************************************
2 *
3 * Project: VSI Virtual File System
4 * Purpose: Implementation of buffered reader IO functions.
5 * Author: Even Rouault, even.rouault at spatialys.com
6 *
7 ******************************************************************************
8 * Copyright (c) 2010-2011, Even Rouault <even dot rouault at spatialys.com>
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a
11 * copy of this software and associated documentation files (the "Software"),
12 * to deal in the Software without restriction, including without limitation
13 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 * and/or sell copies of the Software, and to permit persons to whom the
15 * Software is furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included
18 * in all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 ****************************************************************************/
28
29 //! @cond Doxygen_Suppress
30
31 // The intent of this class is to be a wrapper around an underlying virtual
32 // handle and add very basic caching of last read bytes, so that a backward
33 // seek of a few bytes doesn't require a seek on the underlying virtual handle.
34 // This enable us to improve dramatically the performance of CPLReadLine2L() on
35 // a gzip file.
36
37 #include "cpl_port.h"
38 #include "cpl_vsi_virtual.h"
39
40 #include <cstddef>
41 #include <cstring>
42 #if HAVE_FCNTL_H
43 # include <fcntl.h>
44 #endif
45
46 #include <algorithm>
47 #include <vector>
48
49 #include "cpl_conv.h"
50 #include "cpl_error.h"
51 #include "cpl_vsi.h"
52
53 constexpr int MAX_BUFFER_SIZE = 65536;
54
55 CPL_CVSID("$Id: cpl_vsil_buffered_reader.cpp b1c9c12ad373e40b955162b45d704070d4ebf7b0 2019-06-19 16:50:15 +0200 Even Rouault $")
56
57 class VSIBufferedReaderHandle final : public VSIVirtualHandle
58 {
59 CPL_DISALLOW_COPY_ASSIGN(VSIBufferedReaderHandle)
60
61 VSIVirtualHandle* m_poBaseHandle = nullptr;
62 GByte* pabyBuffer = nullptr;
63 GUIntBig nBufferOffset = 0;
64 int nBufferSize = 0;
65 GUIntBig nCurOffset = 0;
66 bool bNeedBaseHandleSeek = false;
67 bool bEOF = false;
68 vsi_l_offset nCheatFileSize = 0;
69
70 int SeekBaseTo( vsi_l_offset nTargetOffset );
71
72 public:
73 explicit VSIBufferedReaderHandle( VSIVirtualHandle* poBaseHandle );
74 VSIBufferedReaderHandle( VSIVirtualHandle* poBaseHandle,
75 const GByte* pabyBeginningContent,
76 vsi_l_offset nCheatFileSizeIn );
77 // TODO(schwehr): Add override when support dropped for VS2008.
78 ~VSIBufferedReaderHandle() override;
79
80 int Seek( vsi_l_offset nOffset, int nWhence ) override;
81 vsi_l_offset Tell() override;
82 size_t Read( void *pBuffer, size_t nSize,
83 size_t nMemb ) override;
84 size_t Write( const void *pBuffer, size_t nSize,
85 size_t nMemb ) override;
86 int Eof() override;
87 int Flush() override;
88 int Close() override;
89 };
90
91 //! @endcond
92
93 /************************************************************************/
94 /* VSICreateBufferedReaderHandle() */
95 /************************************************************************/
96
97 VSIVirtualHandle *
VSICreateBufferedReaderHandle(VSIVirtualHandle * poBaseHandle)98 VSICreateBufferedReaderHandle( VSIVirtualHandle* poBaseHandle )
99 {
100 return new VSIBufferedReaderHandle(poBaseHandle);
101 }
102
VSICreateBufferedReaderHandle(VSIVirtualHandle * poBaseHandle,const GByte * pabyBeginningContent,vsi_l_offset nCheatFileSizeIn)103 VSIVirtualHandle* VSICreateBufferedReaderHandle(
104 VSIVirtualHandle* poBaseHandle,
105 const GByte* pabyBeginningContent,
106 vsi_l_offset nCheatFileSizeIn )
107 {
108 return new VSIBufferedReaderHandle(poBaseHandle,
109 pabyBeginningContent,
110 nCheatFileSizeIn);
111 }
112
113 //! @cond Doxygen_Suppress
114
115 /************************************************************************/
116 /* VSIBufferedReaderHandle() */
117 /************************************************************************/
118
VSIBufferedReaderHandle(VSIVirtualHandle * poBaseHandle)119 VSIBufferedReaderHandle::VSIBufferedReaderHandle(
120 VSIVirtualHandle* poBaseHandle) :
121 m_poBaseHandle(poBaseHandle),
122 pabyBuffer(static_cast<GByte*>(CPLMalloc(MAX_BUFFER_SIZE)))
123 {}
124
VSIBufferedReaderHandle(VSIVirtualHandle * poBaseHandle,const GByte * pabyBeginningContent,vsi_l_offset nCheatFileSizeIn)125 VSIBufferedReaderHandle::VSIBufferedReaderHandle(
126 VSIVirtualHandle* poBaseHandle,
127 const GByte* pabyBeginningContent,
128 vsi_l_offset nCheatFileSizeIn ) :
129 m_poBaseHandle(poBaseHandle),
130 pabyBuffer(static_cast<GByte *>(
131 CPLMalloc(std::max(MAX_BUFFER_SIZE,
132 static_cast<int>(poBaseHandle->Tell()))))),
133 nBufferOffset(0),
134 nBufferSize(static_cast<int>(poBaseHandle->Tell())),
135 nCurOffset(0),
136 bNeedBaseHandleSeek(true),
137 bEOF(false),
138 nCheatFileSize(nCheatFileSizeIn)
139 {
140 memcpy(pabyBuffer, pabyBeginningContent, nBufferSize);
141 }
142
143 /************************************************************************/
144 /* ~VSIBufferedReaderHandle() */
145 /************************************************************************/
146
~VSIBufferedReaderHandle()147 VSIBufferedReaderHandle::~VSIBufferedReaderHandle()
148 {
149 delete m_poBaseHandle;
150 CPLFree(pabyBuffer);
151 }
152
153 /************************************************************************/
154 /* Seek() */
155 /************************************************************************/
156
Seek(vsi_l_offset nOffset,int nWhence)157 int VSIBufferedReaderHandle::Seek( vsi_l_offset nOffset, int nWhence )
158 {
159 #ifdef DEBUG_VERBOSE
160 CPLDebug( "BUFFERED", "Seek(%d,%d)",
161 static_cast<int>(nOffset), static_cast<int>(nWhence) );
162 #endif
163 bEOF = false;
164 int ret = 0;
165 if( nWhence == SEEK_CUR )
166 {
167 nCurOffset += nOffset;
168 }
169 else if( nWhence == SEEK_END )
170 {
171 if( nCheatFileSize )
172 {
173 nCurOffset = nCheatFileSize;
174 }
175 else
176 {
177 ret = m_poBaseHandle->Seek(nOffset, nWhence);
178 nCurOffset = m_poBaseHandle->Tell();
179 bNeedBaseHandleSeek = true;
180 }
181 }
182 else
183 {
184 nCurOffset = nOffset;
185 }
186
187 return ret;
188 }
189
190 /************************************************************************/
191 /* Tell() */
192 /************************************************************************/
193
Tell()194 vsi_l_offset VSIBufferedReaderHandle::Tell()
195 {
196 #ifdef DEBUG_VERBOSE
197 CPLDebug( "BUFFERED", "Tell() = %d", static_cast<int>(nCurOffset));
198 #endif
199 return nCurOffset;
200 }
201
202 /************************************************************************/
203 /* SeekBaseTo() */
204 /************************************************************************/
205
SeekBaseTo(vsi_l_offset nTargetOffset)206 int VSIBufferedReaderHandle::SeekBaseTo( vsi_l_offset nTargetOffset )
207 {
208 if( m_poBaseHandle->Seek(nTargetOffset, SEEK_SET) == 0 )
209 return TRUE;
210
211 nCurOffset = m_poBaseHandle->Tell();
212 if( nCurOffset > nTargetOffset )
213 return FALSE;
214
215 const vsi_l_offset nMaxOffset = 8192;
216
217 std::vector<char> oTemp(nMaxOffset, 0);
218 char *pabyTemp = &oTemp[0];
219
220 while( true )
221 {
222 const size_t nToRead = static_cast<size_t>(
223 std::min(nMaxOffset, nTargetOffset - nCurOffset));
224 const size_t nRead = m_poBaseHandle->Read(pabyTemp, 1, nToRead);
225
226 nCurOffset += nRead;
227
228 if( nRead < nToRead )
229 {
230 bEOF = true;
231 return FALSE;
232 }
233 if( nToRead < nMaxOffset )
234 break;
235 }
236 return TRUE;
237 }
238
239 /************************************************************************/
240 /* Read() */
241 /************************************************************************/
242
Read(void * pBuffer,size_t nSize,size_t nMemb)243 size_t VSIBufferedReaderHandle::Read( void *pBuffer, size_t nSize,
244 size_t nMemb )
245 {
246 const size_t nTotalToRead = nSize * nMemb;
247 #ifdef DEBUG_VERBOSE
248 CPLDebug( "BUFFERED", "Read(%d)", static_cast<int>(nTotalToRead));
249 #endif
250
251 if( nSize == 0 )
252 return 0;
253
254 if( nBufferSize != 0 &&
255 nCurOffset >= nBufferOffset &&
256 nCurOffset <= nBufferOffset + nBufferSize )
257 {
258 // We try to read from an offset located within the buffer.
259 const size_t nReadInBuffer =
260 static_cast<size_t>(
261 std::min(nTotalToRead,
262 static_cast<size_t>(nBufferOffset + nBufferSize -
263 nCurOffset)));
264 memcpy(pBuffer, pabyBuffer + nCurOffset - nBufferOffset, nReadInBuffer);
265 const size_t nToReadInFile = nTotalToRead - nReadInBuffer;
266 if( nToReadInFile > 0 )
267 {
268 // The beginning of the data to read is located in the buffer
269 // but the end must be read from the file.
270 if( bNeedBaseHandleSeek )
271 {
272 if( !SeekBaseTo(nBufferOffset + nBufferSize) )
273 {
274 nCurOffset += nReadInBuffer;
275 return nReadInBuffer / nSize;
276 }
277 }
278 bNeedBaseHandleSeek = false;
279 #ifdef DEBUG_VERBOSE
280 CPLAssert(m_poBaseHandle->Tell() == nBufferOffset + nBufferSize);
281 #endif
282
283 const size_t nReadInFile =
284 m_poBaseHandle->Read(
285 static_cast<GByte *>(pBuffer) + nReadInBuffer,
286 1, nToReadInFile);
287 const size_t nRead = nReadInBuffer + nReadInFile;
288
289 nBufferSize = static_cast<int>(
290 std::min(nRead, static_cast<size_t>(MAX_BUFFER_SIZE)));
291 nBufferOffset = nCurOffset + nRead - nBufferSize;
292 memcpy(pabyBuffer,
293 static_cast<GByte *>(pBuffer) + nRead - nBufferSize,
294 nBufferSize);
295
296 nCurOffset += nRead;
297 #ifdef DEBUG_VERBOSE
298 CPLAssert(m_poBaseHandle->Tell() == nBufferOffset + nBufferSize);
299 CPLAssert(m_poBaseHandle->Tell() == nCurOffset);
300 #endif
301
302 bEOF = CPL_TO_BOOL(m_poBaseHandle->Eof());
303
304 return nRead / nSize;
305 }
306 else
307 {
308 // The data to read is completely located within the buffer.
309 nCurOffset += nTotalToRead;
310 return nTotalToRead / nSize;
311 }
312 }
313 else
314 {
315 // We try either to read before or after the buffer, so a seek is
316 // necessary.
317 if( !SeekBaseTo(nCurOffset) )
318 return 0;
319 bNeedBaseHandleSeek = false;
320 const size_t nReadInFile =
321 m_poBaseHandle->Read(pBuffer, 1, nTotalToRead);
322 nBufferSize = static_cast<int>(
323 std::min(nReadInFile, static_cast<size_t>(MAX_BUFFER_SIZE)));
324 nBufferOffset = nCurOffset + nReadInFile - nBufferSize;
325 memcpy(pabyBuffer,
326 static_cast<GByte *>(pBuffer) + nReadInFile - nBufferSize,
327 nBufferSize);
328
329 nCurOffset += nReadInFile;
330 #ifdef DEBUG_VERBOSE
331 CPLAssert(m_poBaseHandle->Tell() == nBufferOffset + nBufferSize);
332 CPLAssert(m_poBaseHandle->Tell() == nCurOffset);
333 #endif
334
335 bEOF = CPL_TO_BOOL(m_poBaseHandle->Eof());
336
337 return nReadInFile / nSize;
338 }
339 }
340
341 /************************************************************************/
342 /* Write() */
343 /************************************************************************/
344
Write(const void *,size_t,size_t)345 size_t VSIBufferedReaderHandle::Write( const void * /* pBuffer */,
346 size_t /* nSize */,
347 size_t /* nMemb */)
348 {
349 CPLError(CE_Failure, CPLE_NotSupported,
350 "VSIFWriteL is not supported on buffer reader streams");
351 return 0;
352 }
353
354 /************************************************************************/
355 /* Eof() */
356 /************************************************************************/
357
Eof()358 int VSIBufferedReaderHandle::Eof()
359 {
360 return bEOF;
361 }
362
363 /************************************************************************/
364 /* Flush() */
365 /************************************************************************/
366
Flush()367 int VSIBufferedReaderHandle::Flush()
368 {
369 return 0;
370 }
371
372 /************************************************************************/
373 /* Close() */
374 /************************************************************************/
375
Close()376 int VSIBufferedReaderHandle::Close()
377 {
378 if( m_poBaseHandle )
379 {
380 m_poBaseHandle->Close();
381 delete m_poBaseHandle;
382 m_poBaseHandle = nullptr;
383 }
384 return 0;
385 }
386
387 //! @endcond
388