1 /*
2  * Copyright (c) 2017-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 /* *********************************************************
12 *  Turn on Large Files support (>4GB) for 32-bit Linux/Unix
13 ***********************************************************/
14 #if !defined(__64BIT__) || defined(__MINGW32__)       /* No point defining Large file for 64 bit but MinGW-w64 requires it */
15 #  if !defined(_FILE_OFFSET_BITS)
16 #    define _FILE_OFFSET_BITS 64                      /* turn off_t into a 64-bit type for ftello, fseeko */
17 #  endif
18 #  if !defined(_LARGEFILE_SOURCE)                     /* obsolete macro, replaced with _FILE_OFFSET_BITS */
19 #    define _LARGEFILE_SOURCE 1                       /* Large File Support extension (LFS) - fseeko, ftello */
20 #  endif
21 #  if defined(_AIX) || defined(__hpux)
22 #    define _LARGE_FILES                              /* Large file support on 32-bits AIX and HP-UX */
23 #  endif
24 #endif
25 
26 /* ************************************************************
27 * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
28 ***************************************************************/
29 #if defined(_MSC_VER) && _MSC_VER >= 1400
30 #   define LONG_SEEK _fseeki64
31 #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
32 #  define LONG_SEEK fseeko
33 #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
34 #   define LONG_SEEK fseeko64
35 #elif defined(_WIN32) && !defined(__DJGPP__)
36 #   include <windows.h>
LONG_SEEK(FILE * file,__int64 offset,int origin)37     static int LONG_SEEK(FILE* file, __int64 offset, int origin) {
38         LARGE_INTEGER off;
39         DWORD method;
40         off.QuadPart = offset;
41         if (origin == SEEK_END)
42             method = FILE_END;
43         else if (origin == SEEK_CUR)
44             method = FILE_CURRENT;
45         else
46             method = FILE_BEGIN;
47 
48         if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method))
49             return 0;
50         else
51             return -1;
52     }
53 #else
54 #   define LONG_SEEK fseek
55 #endif
56 
57 #include <stdlib.h>  /* malloc, free */
58 #include <stdio.h>   /* FILE* */
59 #include <limits.h>  /* UNIT_MAX */
60 #include <assert.h>
61 
62 #define XXH_STATIC_LINKING_ONLY
63 #define XXH_NAMESPACE ZSTD_
64 #include "xxhash.h"
65 
66 #define ZSTD_STATIC_LINKING_ONLY
67 #include "zstd.h"
68 #include "zstd_errors.h"
69 #include "mem.h"
70 #include "zstd_seekable.h"
71 
72 #undef ERROR
73 #define ERROR(name) ((size_t)-ZSTD_error_##name)
74 
75 #define CHECK_IO(f) { int const errcod = (f); if (errcod < 0) return ERROR(seekableIO); }
76 
77 #undef MIN
78 #undef MAX
79 #define MIN(a, b) ((a) < (b) ? (a) : (b))
80 #define MAX(a, b) ((a) > (b) ? (a) : (b))
81 
82 /* Special-case callbacks for FILE* and in-memory modes, so that we can treat
83  * them the same way as the advanced API */
ZSTD_seekable_read_FILE(void * opaque,void * buffer,size_t n)84 static int ZSTD_seekable_read_FILE(void* opaque, void* buffer, size_t n)
85 {
86     size_t const result = fread(buffer, 1, n, (FILE*)opaque);
87     if (result != n) {
88         return -1;
89     }
90     return 0;
91 }
92 
ZSTD_seekable_seek_FILE(void * opaque,long long offset,int origin)93 static int ZSTD_seekable_seek_FILE(void* opaque, long long offset, int origin)
94 {
95     int const ret = LONG_SEEK((FILE*)opaque, offset, origin);
96     if (ret) return ret;
97     return fflush((FILE*)opaque);
98 }
99 
100 typedef struct {
101     const void *ptr;
102     size_t size;
103     size_t pos;
104 } buffWrapper_t;
105 
ZSTD_seekable_read_buff(void * opaque,void * buffer,size_t n)106 static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n)
107 {
108     buffWrapper_t* buff = (buffWrapper_t*) opaque;
109     if (buff->pos + n > buff->size) return -1;
110     memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n);
111     buff->pos += n;
112     return 0;
113 }
114 
ZSTD_seekable_seek_buff(void * opaque,long long offset,int origin)115 static int ZSTD_seekable_seek_buff(void* opaque, long long offset, int origin)
116 {
117     buffWrapper_t* const buff = (buffWrapper_t*) opaque;
118     unsigned long long newOffset;
119     switch (origin) {
120     case SEEK_SET:
121         newOffset = offset;
122         break;
123     case SEEK_CUR:
124         newOffset = (unsigned long long)buff->pos + offset;
125         break;
126     case SEEK_END:
127         newOffset = (unsigned long long)buff->size + offset;
128         break;
129     default:
130         assert(0);  /* not possible */
131     }
132     if (newOffset > buff->size) {
133         return -1;
134     }
135     buff->pos = newOffset;
136     return 0;
137 }
138 
139 typedef struct {
140     U64 cOffset;
141     U64 dOffset;
142     U32 checksum;
143 } seekEntry_t;
144 
145 typedef struct {
146     seekEntry_t* entries;
147     size_t tableLen;
148 
149     int checksumFlag;
150 } seekTable_t;
151 
152 #define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_MAX
153 
154 struct ZSTD_seekable_s {
155     ZSTD_DStream* dstream;
156     seekTable_t seekTable;
157     ZSTD_seekable_customFile src;
158 
159     U64 decompressedOffset;
160     U32 curFrame;
161 
162     BYTE inBuff[SEEKABLE_BUFF_SIZE]; /* need to do our own input buffering */
163     BYTE outBuff[SEEKABLE_BUFF_SIZE]; /* so we can efficiently decompress the
164                                          starts of chunks before we get to the
165                                          desired section */
166     ZSTD_inBuffer in; /* maintain continuity across ZSTD_seekable_decompress operations */
167     buffWrapper_t buffWrapper; /* for `src.opaque` in in-memory mode */
168 
169     XXH64_state_t xxhState;
170 };
171 
ZSTD_seekable_create(void)172 ZSTD_seekable* ZSTD_seekable_create(void)
173 {
174     ZSTD_seekable* zs = malloc(sizeof(ZSTD_seekable));
175 
176     if (zs == NULL) return NULL;
177 
178     /* also initializes stage to zsds_init */
179     memset(zs, 0, sizeof(*zs));
180 
181     zs->dstream = ZSTD_createDStream();
182     if (zs->dstream == NULL) {
183         free(zs);
184         return NULL;
185     }
186 
187     return zs;
188 }
189 
ZSTD_seekable_free(ZSTD_seekable * zs)190 size_t ZSTD_seekable_free(ZSTD_seekable* zs)
191 {
192     if (zs == NULL) return 0; /* support free on null */
193     ZSTD_freeDStream(zs->dstream);
194     free(zs->seekTable.entries);
195     free(zs);
196 
197     return 0;
198 }
199 
200 /** ZSTD_seekable_offsetToFrameIndex() :
201  *  Performs a binary search to find the last frame with a decompressed offset
202  *  <= pos
203  *  @return : the frame's index */
ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable * const zs,unsigned long long pos)204 unsigned ZSTD_seekable_offsetToFrameIndex(ZSTD_seekable* const zs, unsigned long long pos)
205 {
206     U32 lo = 0;
207     U32 hi = (U32)zs->seekTable.tableLen;
208     assert(zs->seekTable.tableLen <= UINT_MAX);
209 
210     if (pos >= zs->seekTable.entries[zs->seekTable.tableLen].dOffset) {
211         return (U32)zs->seekTable.tableLen;
212     }
213 
214     while (lo + 1 < hi) {
215         U32 const mid = lo + ((hi - lo) >> 1);
216         if (zs->seekTable.entries[mid].dOffset <= pos) {
217             lo = mid;
218         } else {
219             hi = mid;
220         }
221     }
222     return lo;
223 }
224 
ZSTD_seekable_getNumFrames(ZSTD_seekable * const zs)225 unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs)
226 {
227     assert(zs->seekTable.tableLen <= UINT_MAX);
228     return (unsigned)zs->seekTable.tableLen;
229 }
230 
ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable * const zs,unsigned frameIndex)231 unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex)
232 {
233     if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
234     return zs->seekTable.entries[frameIndex].cOffset;
235 }
236 
ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable * const zs,unsigned frameIndex)237 unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex)
238 {
239     if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
240     return zs->seekTable.entries[frameIndex].dOffset;
241 }
242 
ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable * const zs,unsigned frameIndex)243 size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex)
244 {
245     if (frameIndex >= zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
246     return zs->seekTable.entries[frameIndex + 1].cOffset -
247            zs->seekTable.entries[frameIndex].cOffset;
248 }
249 
ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable * const zs,unsigned frameIndex)250 size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex)
251 {
252     if (frameIndex > zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
253     return zs->seekTable.entries[frameIndex + 1].dOffset -
254            zs->seekTable.entries[frameIndex].dOffset;
255 }
256 
ZSTD_seekable_loadSeekTable(ZSTD_seekable * zs)257 static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs)
258 {
259     int checksumFlag;
260     ZSTD_seekable_customFile src = zs->src;
261     /* read the footer, fixed size */
262     CHECK_IO(src.seek(src.opaque, -(int)ZSTD_seekTableFooterSize, SEEK_END));
263     CHECK_IO(src.read(src.opaque, zs->inBuff, ZSTD_seekTableFooterSize));
264 
265     if (MEM_readLE32(zs->inBuff + 5) != ZSTD_SEEKABLE_MAGICNUMBER) {
266         return ERROR(prefix_unknown);
267     }
268 
269     {   BYTE const sfd = zs->inBuff[4];
270         checksumFlag = sfd >> 7;
271 
272         /* check reserved bits */
273         if ((checksumFlag >> 2) & 0x1f) {
274             return ERROR(corruption_detected);
275         }
276     }
277 
278     {   U32 const numFrames = MEM_readLE32(zs->inBuff);
279         U32 const sizePerEntry = 8 + (checksumFlag?4:0);
280         U32 const tableSize = sizePerEntry * numFrames;
281         U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_SKIPPABLEHEADERSIZE;
282 
283         U32 remaining = frameSize - ZSTD_seekTableFooterSize; /* don't need to re-read footer */
284         {
285             U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE);
286 
287             CHECK_IO(src.seek(src.opaque, -(S64)frameSize, SEEK_END));
288             CHECK_IO(src.read(src.opaque, zs->inBuff, toRead));
289 
290             remaining -= toRead;
291         }
292 
293         if (MEM_readLE32(zs->inBuff) != (ZSTD_MAGIC_SKIPPABLE_START | 0xE)) {
294             return ERROR(prefix_unknown);
295         }
296         if (MEM_readLE32(zs->inBuff+4) + ZSTD_SKIPPABLEHEADERSIZE != frameSize) {
297             return ERROR(prefix_unknown);
298         }
299 
300         {   /* Allocate an extra entry at the end so that we can do size
301              * computations on the last element without special case */
302             seekEntry_t* entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1));
303 
304             U32 idx = 0;
305             U32 pos = 8;
306 
307 
308             U64 cOffset = 0;
309             U64 dOffset = 0;
310 
311             if (!entries) {
312                 free(entries);
313                 return ERROR(memory_allocation);
314             }
315 
316             /* compute cumulative positions */
317             for (; idx < numFrames; idx++) {
318                 if (pos + sizePerEntry > SEEKABLE_BUFF_SIZE) {
319                     U32 const offset = SEEKABLE_BUFF_SIZE - pos;
320                     U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE - offset);
321                     memmove(zs->inBuff, zs->inBuff + pos, offset); /* move any data we haven't read yet */
322                     CHECK_IO(src.read(src.opaque, zs->inBuff+offset, toRead));
323                     remaining -= toRead;
324                     pos = 0;
325                 }
326                 entries[idx].cOffset = cOffset;
327                 entries[idx].dOffset = dOffset;
328 
329                 cOffset += MEM_readLE32(zs->inBuff + pos);
330                 pos += 4;
331                 dOffset += MEM_readLE32(zs->inBuff + pos);
332                 pos += 4;
333                 if (checksumFlag) {
334                     entries[idx].checksum = MEM_readLE32(zs->inBuff + pos);
335                     pos += 4;
336                 }
337             }
338             entries[numFrames].cOffset = cOffset;
339             entries[numFrames].dOffset = dOffset;
340 
341             zs->seekTable.entries = entries;
342             zs->seekTable.tableLen = numFrames;
343             zs->seekTable.checksumFlag = checksumFlag;
344             return 0;
345         }
346     }
347 }
348 
ZSTD_seekable_initBuff(ZSTD_seekable * zs,const void * src,size_t srcSize)349 size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize)
350 {
351     zs->buffWrapper = (buffWrapper_t){src, srcSize, 0};
352     {   ZSTD_seekable_customFile srcFile = {&zs->buffWrapper,
353                                             &ZSTD_seekable_read_buff,
354                                             &ZSTD_seekable_seek_buff};
355         return ZSTD_seekable_initAdvanced(zs, srcFile); }
356 }
357 
ZSTD_seekable_initFile(ZSTD_seekable * zs,FILE * src)358 size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src)
359 {
360     ZSTD_seekable_customFile srcFile = {src, &ZSTD_seekable_read_FILE,
361                                         &ZSTD_seekable_seek_FILE};
362     return ZSTD_seekable_initAdvanced(zs, srcFile);
363 }
364 
ZSTD_seekable_initAdvanced(ZSTD_seekable * zs,ZSTD_seekable_customFile src)365 size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src)
366 {
367     zs->src = src;
368 
369     {   const size_t seekTableInit = ZSTD_seekable_loadSeekTable(zs);
370         if (ZSTD_isError(seekTableInit)) return seekTableInit; }
371 
372     zs->decompressedOffset = (U64)-1;
373     zs->curFrame = (U32)-1;
374 
375     {   const size_t dstreamInit = ZSTD_initDStream(zs->dstream);
376         if (ZSTD_isError(dstreamInit)) return dstreamInit; }
377     return 0;
378 }
379 
ZSTD_seekable_decompress(ZSTD_seekable * zs,void * dst,size_t len,unsigned long long offset)380 size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, unsigned long long offset)
381 {
382     U32 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, offset);
383     do {
384         /* check if we can continue from a previous decompress job */
385         if (targetFrame != zs->curFrame || offset != zs->decompressedOffset) {
386             zs->decompressedOffset = zs->seekTable.entries[targetFrame].dOffset;
387             zs->curFrame = targetFrame;
388 
389             CHECK_IO(zs->src.seek(zs->src.opaque,
390                                   zs->seekTable.entries[targetFrame].cOffset,
391                                   SEEK_SET));
392             zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0};
393             XXH64_reset(&zs->xxhState, 0);
394             ZSTD_resetDStream(zs->dstream);
395         }
396 
397         while (zs->decompressedOffset < offset + len) {
398             size_t toRead;
399             ZSTD_outBuffer outTmp;
400             size_t prevOutPos;
401             if (zs->decompressedOffset < offset) {
402                 /* dummy decompressions until we get to the target offset */
403                 outTmp = (ZSTD_outBuffer){zs->outBuff, MIN(SEEKABLE_BUFF_SIZE, offset - zs->decompressedOffset), 0};
404             } else {
405                 outTmp = (ZSTD_outBuffer){dst, len, zs->decompressedOffset - offset};
406             }
407 
408             prevOutPos = outTmp.pos;
409             toRead = ZSTD_decompressStream(zs->dstream, &outTmp, &zs->in);
410             if (ZSTD_isError(toRead)) {
411                 return toRead;
412             }
413 
414             if (zs->seekTable.checksumFlag) {
415                 XXH64_update(&zs->xxhState, (BYTE*)outTmp.dst + prevOutPos,
416                              outTmp.pos - prevOutPos);
417             }
418             zs->decompressedOffset += outTmp.pos - prevOutPos;
419 
420             if (toRead == 0) {
421                 /* frame complete */
422 
423                 /* verify checksum */
424                 if (zs->seekTable.checksumFlag &&
425                     (XXH64_digest(&zs->xxhState) & 0xFFFFFFFFU) !=
426                             zs->seekTable.entries[targetFrame].checksum) {
427                     return ERROR(corruption_detected);
428                 }
429 
430                 if (zs->decompressedOffset < offset + len) {
431                     /* go back to the start and force a reset of the stream */
432                     targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, zs->decompressedOffset);
433                 }
434                 break;
435             }
436 
437             /* read in more data if we're done with this buffer */
438             if (zs->in.pos == zs->in.size) {
439                 toRead = MIN(toRead, SEEKABLE_BUFF_SIZE);
440                 CHECK_IO(zs->src.read(zs->src.opaque, zs->inBuff, toRead));
441                 zs->in.size = toRead;
442                 zs->in.pos = 0;
443             }
444         }
445     } while (zs->decompressedOffset != offset + len);
446 
447     return len;
448 }
449 
ZSTD_seekable_decompressFrame(ZSTD_seekable * zs,void * dst,size_t dstSize,unsigned frameIndex)450 size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex)
451 {
452     if (frameIndex >= zs->seekTable.tableLen) {
453         return ERROR(frameIndex_tooLarge);
454     }
455 
456     {
457         size_t const decompressedSize =
458                 zs->seekTable.entries[frameIndex + 1].dOffset -
459                 zs->seekTable.entries[frameIndex].dOffset;
460         if (dstSize < decompressedSize) {
461             return ERROR(dstSize_tooSmall);
462         }
463         return ZSTD_seekable_decompress(
464                 zs, dst, decompressedSize,
465                 zs->seekTable.entries[frameIndex].dOffset);
466     }
467 }
468