xref: /dragonfly/contrib/zstd/lib/common/bitstream.h (revision a28cd43d)
1*a28cd43dSSascha Wildner /* ******************************************************************
2*a28cd43dSSascha Wildner  * bitstream
3*a28cd43dSSascha Wildner  * Part of FSE library
4*a28cd43dSSascha Wildner  * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
5*a28cd43dSSascha Wildner  *
6*a28cd43dSSascha Wildner  * You can contact the author at :
7*a28cd43dSSascha Wildner  * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
8*a28cd43dSSascha Wildner  *
9*a28cd43dSSascha Wildner  * This source code is licensed under both the BSD-style license (found in the
10*a28cd43dSSascha Wildner  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11*a28cd43dSSascha Wildner  * in the COPYING file in the root directory of this source tree).
12*a28cd43dSSascha Wildner  * You may select, at your option, one of the above-listed licenses.
13*a28cd43dSSascha Wildner ****************************************************************** */
14*a28cd43dSSascha Wildner #ifndef BITSTREAM_H_MODULE
15*a28cd43dSSascha Wildner #define BITSTREAM_H_MODULE
16*a28cd43dSSascha Wildner 
17*a28cd43dSSascha Wildner #if defined (__cplusplus)
18*a28cd43dSSascha Wildner extern "C" {
19*a28cd43dSSascha Wildner #endif
20*a28cd43dSSascha Wildner /*
21*a28cd43dSSascha Wildner *  This API consists of small unitary functions, which must be inlined for best performance.
22*a28cd43dSSascha Wildner *  Since link-time-optimization is not available for all compilers,
23*a28cd43dSSascha Wildner *  these functions are defined into a .h to be included.
24*a28cd43dSSascha Wildner */
25*a28cd43dSSascha Wildner 
26*a28cd43dSSascha Wildner /*-****************************************
27*a28cd43dSSascha Wildner *  Dependencies
28*a28cd43dSSascha Wildner ******************************************/
29*a28cd43dSSascha Wildner #include "mem.h"            /* unaligned access routines */
30*a28cd43dSSascha Wildner #include "compiler.h"       /* UNLIKELY() */
31*a28cd43dSSascha Wildner #include "debug.h"          /* assert(), DEBUGLOG(), RAWLOG() */
32*a28cd43dSSascha Wildner #include "error_private.h"  /* error codes and messages */
33*a28cd43dSSascha Wildner 
34*a28cd43dSSascha Wildner 
35*a28cd43dSSascha Wildner /*=========================================
36*a28cd43dSSascha Wildner *  Target specific
37*a28cd43dSSascha Wildner =========================================*/
38*a28cd43dSSascha Wildner #ifndef ZSTD_NO_INTRINSICS
39*a28cd43dSSascha Wildner #  if defined(__BMI__) && defined(__GNUC__)
40*a28cd43dSSascha Wildner #    include <immintrin.h>   /* support for bextr (experimental) */
41*a28cd43dSSascha Wildner #  elif defined(__ICCARM__)
42*a28cd43dSSascha Wildner #    include <intrinsics.h>
43*a28cd43dSSascha Wildner #  endif
44*a28cd43dSSascha Wildner #endif
45*a28cd43dSSascha Wildner 
46*a28cd43dSSascha Wildner #define STREAM_ACCUMULATOR_MIN_32  25
47*a28cd43dSSascha Wildner #define STREAM_ACCUMULATOR_MIN_64  57
48*a28cd43dSSascha Wildner #define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
49*a28cd43dSSascha Wildner 
50*a28cd43dSSascha Wildner 
51*a28cd43dSSascha Wildner /*-******************************************
52*a28cd43dSSascha Wildner *  bitStream encoding API (write forward)
53*a28cd43dSSascha Wildner ********************************************/
54*a28cd43dSSascha Wildner /* bitStream can mix input from multiple sources.
55*a28cd43dSSascha Wildner  * A critical property of these streams is that they encode and decode in **reverse** direction.
56*a28cd43dSSascha Wildner  * So the first bit sequence you add will be the last to be read, like a LIFO stack.
57*a28cd43dSSascha Wildner  */
58*a28cd43dSSascha Wildner typedef struct {
59*a28cd43dSSascha Wildner     size_t bitContainer;
60*a28cd43dSSascha Wildner     unsigned bitPos;
61*a28cd43dSSascha Wildner     char*  startPtr;
62*a28cd43dSSascha Wildner     char*  ptr;
63*a28cd43dSSascha Wildner     char*  endPtr;
64*a28cd43dSSascha Wildner } BIT_CStream_t;
65*a28cd43dSSascha Wildner 
66*a28cd43dSSascha Wildner MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
67*a28cd43dSSascha Wildner MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
68*a28cd43dSSascha Wildner MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
69*a28cd43dSSascha Wildner MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
70*a28cd43dSSascha Wildner 
71*a28cd43dSSascha Wildner /* Start with initCStream, providing the size of buffer to write into.
72*a28cd43dSSascha Wildner *  bitStream will never write outside of this buffer.
73*a28cd43dSSascha Wildner *  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
74*a28cd43dSSascha Wildner *
75*a28cd43dSSascha Wildner *  bits are first added to a local register.
76*a28cd43dSSascha Wildner *  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
77*a28cd43dSSascha Wildner *  Writing data into memory is an explicit operation, performed by the flushBits function.
78*a28cd43dSSascha Wildner *  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
79*a28cd43dSSascha Wildner *  After a flushBits, a maximum of 7 bits might still be stored into local register.
80*a28cd43dSSascha Wildner *
81*a28cd43dSSascha Wildner *  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
82*a28cd43dSSascha Wildner *
83*a28cd43dSSascha Wildner *  Last operation is to close the bitStream.
84*a28cd43dSSascha Wildner *  The function returns the final size of CStream in bytes.
85*a28cd43dSSascha Wildner *  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
86*a28cd43dSSascha Wildner */
87*a28cd43dSSascha Wildner 
88*a28cd43dSSascha Wildner 
89*a28cd43dSSascha Wildner /*-********************************************
90*a28cd43dSSascha Wildner *  bitStream decoding API (read backward)
91*a28cd43dSSascha Wildner **********************************************/
92*a28cd43dSSascha Wildner typedef struct {
93*a28cd43dSSascha Wildner     size_t   bitContainer;
94*a28cd43dSSascha Wildner     unsigned bitsConsumed;
95*a28cd43dSSascha Wildner     const char* ptr;
96*a28cd43dSSascha Wildner     const char* start;
97*a28cd43dSSascha Wildner     const char* limitPtr;
98*a28cd43dSSascha Wildner } BIT_DStream_t;
99*a28cd43dSSascha Wildner 
100*a28cd43dSSascha Wildner typedef enum { BIT_DStream_unfinished = 0,
101*a28cd43dSSascha Wildner                BIT_DStream_endOfBuffer = 1,
102*a28cd43dSSascha Wildner                BIT_DStream_completed = 2,
103*a28cd43dSSascha Wildner                BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
104*a28cd43dSSascha Wildner                /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
105*a28cd43dSSascha Wildner 
106*a28cd43dSSascha Wildner MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
107*a28cd43dSSascha Wildner MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
108*a28cd43dSSascha Wildner MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
109*a28cd43dSSascha Wildner MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
110*a28cd43dSSascha Wildner 
111*a28cd43dSSascha Wildner 
112*a28cd43dSSascha Wildner /* Start by invoking BIT_initDStream().
113*a28cd43dSSascha Wildner *  A chunk of the bitStream is then stored into a local register.
114*a28cd43dSSascha Wildner *  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
115*a28cd43dSSascha Wildner *  You can then retrieve bitFields stored into the local register, **in reverse order**.
116*a28cd43dSSascha Wildner *  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
117*a28cd43dSSascha Wildner *  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
118*a28cd43dSSascha Wildner *  Otherwise, it can be less than that, so proceed accordingly.
119*a28cd43dSSascha Wildner *  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
120*a28cd43dSSascha Wildner */
121*a28cd43dSSascha Wildner 
122*a28cd43dSSascha Wildner 
123*a28cd43dSSascha Wildner /*-****************************************
124*a28cd43dSSascha Wildner *  unsafe API
125*a28cd43dSSascha Wildner ******************************************/
126*a28cd43dSSascha Wildner MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
127*a28cd43dSSascha Wildner /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
128*a28cd43dSSascha Wildner 
129*a28cd43dSSascha Wildner MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
130*a28cd43dSSascha Wildner /* unsafe version; does not check buffer overflow */
131*a28cd43dSSascha Wildner 
132*a28cd43dSSascha Wildner MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
133*a28cd43dSSascha Wildner /* faster, but works only if nbBits >= 1 */
134*a28cd43dSSascha Wildner 
135*a28cd43dSSascha Wildner 
136*a28cd43dSSascha Wildner 
137*a28cd43dSSascha Wildner /*-**************************************************************
138*a28cd43dSSascha Wildner *  Internal functions
139*a28cd43dSSascha Wildner ****************************************************************/
BIT_highbit32(U32 val)140*a28cd43dSSascha Wildner MEM_STATIC unsigned BIT_highbit32 (U32 val)
141*a28cd43dSSascha Wildner {
142*a28cd43dSSascha Wildner     assert(val != 0);
143*a28cd43dSSascha Wildner     {
144*a28cd43dSSascha Wildner #   if defined(_MSC_VER)   /* Visual */
145*a28cd43dSSascha Wildner #       if STATIC_BMI2 == 1
146*a28cd43dSSascha Wildner 		return _lzcnt_u32(val) ^ 31;
147*a28cd43dSSascha Wildner #       else
148*a28cd43dSSascha Wildner 		unsigned long r = 0;
149*a28cd43dSSascha Wildner 		return _BitScanReverse(&r, val) ? (unsigned)r : 0;
150*a28cd43dSSascha Wildner #       endif
151*a28cd43dSSascha Wildner #   elif defined(__GNUC__) && (__GNUC__ >= 3)   /* Use GCC Intrinsic */
152*a28cd43dSSascha Wildner         return __builtin_clz (val) ^ 31;
153*a28cd43dSSascha Wildner #   elif defined(__ICCARM__)    /* IAR Intrinsic */
154*a28cd43dSSascha Wildner         return 31 - __CLZ(val);
155*a28cd43dSSascha Wildner #   else   /* Software version */
156*a28cd43dSSascha Wildner         static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
157*a28cd43dSSascha Wildner                                                  11, 14, 16, 18, 22, 25,  3, 30,
158*a28cd43dSSascha Wildner                                                   8, 12, 20, 28, 15, 17, 24,  7,
159*a28cd43dSSascha Wildner                                                  19, 27, 23,  6, 26,  5,  4, 31 };
160*a28cd43dSSascha Wildner         U32 v = val;
161*a28cd43dSSascha Wildner         v |= v >> 1;
162*a28cd43dSSascha Wildner         v |= v >> 2;
163*a28cd43dSSascha Wildner         v |= v >> 4;
164*a28cd43dSSascha Wildner         v |= v >> 8;
165*a28cd43dSSascha Wildner         v |= v >> 16;
166*a28cd43dSSascha Wildner         return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
167*a28cd43dSSascha Wildner #   endif
168*a28cd43dSSascha Wildner     }
169*a28cd43dSSascha Wildner }
170*a28cd43dSSascha Wildner 
171*a28cd43dSSascha Wildner /*=====    Local Constants   =====*/
172*a28cd43dSSascha Wildner static const unsigned BIT_mask[] = {
173*a28cd43dSSascha Wildner     0,          1,         3,         7,         0xF,       0x1F,
174*a28cd43dSSascha Wildner     0x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
175*a28cd43dSSascha Wildner     0xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
176*a28cd43dSSascha Wildner     0x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
177*a28cd43dSSascha Wildner     0xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
178*a28cd43dSSascha Wildner     0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
179*a28cd43dSSascha Wildner #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
180*a28cd43dSSascha Wildner 
181*a28cd43dSSascha Wildner /*-**************************************************************
182*a28cd43dSSascha Wildner *  bitStream encoding
183*a28cd43dSSascha Wildner ****************************************************************/
184*a28cd43dSSascha Wildner /*! BIT_initCStream() :
185*a28cd43dSSascha Wildner  *  `dstCapacity` must be > sizeof(size_t)
186*a28cd43dSSascha Wildner  *  @return : 0 if success,
187*a28cd43dSSascha Wildner  *            otherwise an error code (can be tested using ERR_isError()) */
BIT_initCStream(BIT_CStream_t * bitC,void * startPtr,size_t dstCapacity)188*a28cd43dSSascha Wildner MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
189*a28cd43dSSascha Wildner                                   void* startPtr, size_t dstCapacity)
190*a28cd43dSSascha Wildner {
191*a28cd43dSSascha Wildner     bitC->bitContainer = 0;
192*a28cd43dSSascha Wildner     bitC->bitPos = 0;
193*a28cd43dSSascha Wildner     bitC->startPtr = (char*)startPtr;
194*a28cd43dSSascha Wildner     bitC->ptr = bitC->startPtr;
195*a28cd43dSSascha Wildner     bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
196*a28cd43dSSascha Wildner     if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
197*a28cd43dSSascha Wildner     return 0;
198*a28cd43dSSascha Wildner }
199*a28cd43dSSascha Wildner 
200*a28cd43dSSascha Wildner /*! BIT_addBits() :
201*a28cd43dSSascha Wildner  *  can add up to 31 bits into `bitC`.
202*a28cd43dSSascha Wildner  *  Note : does not check for register overflow ! */
BIT_addBits(BIT_CStream_t * bitC,size_t value,unsigned nbBits)203*a28cd43dSSascha Wildner MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
204*a28cd43dSSascha Wildner                             size_t value, unsigned nbBits)
205*a28cd43dSSascha Wildner {
206*a28cd43dSSascha Wildner     DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
207*a28cd43dSSascha Wildner     assert(nbBits < BIT_MASK_SIZE);
208*a28cd43dSSascha Wildner     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
209*a28cd43dSSascha Wildner     bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
210*a28cd43dSSascha Wildner     bitC->bitPos += nbBits;
211*a28cd43dSSascha Wildner }
212*a28cd43dSSascha Wildner 
213*a28cd43dSSascha Wildner /*! BIT_addBitsFast() :
214*a28cd43dSSascha Wildner  *  works only if `value` is _clean_,
215*a28cd43dSSascha Wildner  *  meaning all high bits above nbBits are 0 */
BIT_addBitsFast(BIT_CStream_t * bitC,size_t value,unsigned nbBits)216*a28cd43dSSascha Wildner MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
217*a28cd43dSSascha Wildner                                 size_t value, unsigned nbBits)
218*a28cd43dSSascha Wildner {
219*a28cd43dSSascha Wildner     assert((value>>nbBits) == 0);
220*a28cd43dSSascha Wildner     assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
221*a28cd43dSSascha Wildner     bitC->bitContainer |= value << bitC->bitPos;
222*a28cd43dSSascha Wildner     bitC->bitPos += nbBits;
223*a28cd43dSSascha Wildner }
224*a28cd43dSSascha Wildner 
225*a28cd43dSSascha Wildner /*! BIT_flushBitsFast() :
226*a28cd43dSSascha Wildner  *  assumption : bitContainer has not overflowed
227*a28cd43dSSascha Wildner  *  unsafe version; does not check buffer overflow */
BIT_flushBitsFast(BIT_CStream_t * bitC)228*a28cd43dSSascha Wildner MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
229*a28cd43dSSascha Wildner {
230*a28cd43dSSascha Wildner     size_t const nbBytes = bitC->bitPos >> 3;
231*a28cd43dSSascha Wildner     assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
232*a28cd43dSSascha Wildner     assert(bitC->ptr <= bitC->endPtr);
233*a28cd43dSSascha Wildner     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
234*a28cd43dSSascha Wildner     bitC->ptr += nbBytes;
235*a28cd43dSSascha Wildner     bitC->bitPos &= 7;
236*a28cd43dSSascha Wildner     bitC->bitContainer >>= nbBytes*8;
237*a28cd43dSSascha Wildner }
238*a28cd43dSSascha Wildner 
239*a28cd43dSSascha Wildner /*! BIT_flushBits() :
240*a28cd43dSSascha Wildner  *  assumption : bitContainer has not overflowed
241*a28cd43dSSascha Wildner  *  safe version; check for buffer overflow, and prevents it.
242*a28cd43dSSascha Wildner  *  note : does not signal buffer overflow.
243*a28cd43dSSascha Wildner  *  overflow will be revealed later on using BIT_closeCStream() */
BIT_flushBits(BIT_CStream_t * bitC)244*a28cd43dSSascha Wildner MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
245*a28cd43dSSascha Wildner {
246*a28cd43dSSascha Wildner     size_t const nbBytes = bitC->bitPos >> 3;
247*a28cd43dSSascha Wildner     assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
248*a28cd43dSSascha Wildner     assert(bitC->ptr <= bitC->endPtr);
249*a28cd43dSSascha Wildner     MEM_writeLEST(bitC->ptr, bitC->bitContainer);
250*a28cd43dSSascha Wildner     bitC->ptr += nbBytes;
251*a28cd43dSSascha Wildner     if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
252*a28cd43dSSascha Wildner     bitC->bitPos &= 7;
253*a28cd43dSSascha Wildner     bitC->bitContainer >>= nbBytes*8;
254*a28cd43dSSascha Wildner }
255*a28cd43dSSascha Wildner 
256*a28cd43dSSascha Wildner /*! BIT_closeCStream() :
257*a28cd43dSSascha Wildner  *  @return : size of CStream, in bytes,
258*a28cd43dSSascha Wildner  *            or 0 if it could not fit into dstBuffer */
BIT_closeCStream(BIT_CStream_t * bitC)259*a28cd43dSSascha Wildner MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
260*a28cd43dSSascha Wildner {
261*a28cd43dSSascha Wildner     BIT_addBitsFast(bitC, 1, 1);   /* endMark */
262*a28cd43dSSascha Wildner     BIT_flushBits(bitC);
263*a28cd43dSSascha Wildner     if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
264*a28cd43dSSascha Wildner     return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
265*a28cd43dSSascha Wildner }
266*a28cd43dSSascha Wildner 
267*a28cd43dSSascha Wildner 
268*a28cd43dSSascha Wildner /*-********************************************************
269*a28cd43dSSascha Wildner *  bitStream decoding
270*a28cd43dSSascha Wildner **********************************************************/
271*a28cd43dSSascha Wildner /*! BIT_initDStream() :
272*a28cd43dSSascha Wildner  *  Initialize a BIT_DStream_t.
273*a28cd43dSSascha Wildner  * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
274*a28cd43dSSascha Wildner  * `srcSize` must be the *exact* size of the bitStream, in bytes.
275*a28cd43dSSascha Wildner  * @return : size of stream (== srcSize), or an errorCode if a problem is detected
276*a28cd43dSSascha Wildner  */
BIT_initDStream(BIT_DStream_t * bitD,const void * srcBuffer,size_t srcSize)277*a28cd43dSSascha Wildner MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
278*a28cd43dSSascha Wildner {
279*a28cd43dSSascha Wildner     if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
280*a28cd43dSSascha Wildner 
281*a28cd43dSSascha Wildner     bitD->start = (const char*)srcBuffer;
282*a28cd43dSSascha Wildner     bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
283*a28cd43dSSascha Wildner 
284*a28cd43dSSascha Wildner     if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
285*a28cd43dSSascha Wildner         bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
286*a28cd43dSSascha Wildner         bitD->bitContainer = MEM_readLEST(bitD->ptr);
287*a28cd43dSSascha Wildner         { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
288*a28cd43dSSascha Wildner           bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
289*a28cd43dSSascha Wildner           if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
290*a28cd43dSSascha Wildner     } else {
291*a28cd43dSSascha Wildner         bitD->ptr   = bitD->start;
292*a28cd43dSSascha Wildner         bitD->bitContainer = *(const BYTE*)(bitD->start);
293*a28cd43dSSascha Wildner         switch(srcSize)
294*a28cd43dSSascha Wildner         {
295*a28cd43dSSascha Wildner         case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
296*a28cd43dSSascha Wildner                 /* fall-through */
297*a28cd43dSSascha Wildner 
298*a28cd43dSSascha Wildner         case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
299*a28cd43dSSascha Wildner                 /* fall-through */
300*a28cd43dSSascha Wildner 
301*a28cd43dSSascha Wildner         case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
302*a28cd43dSSascha Wildner                 /* fall-through */
303*a28cd43dSSascha Wildner 
304*a28cd43dSSascha Wildner         case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
305*a28cd43dSSascha Wildner                 /* fall-through */
306*a28cd43dSSascha Wildner 
307*a28cd43dSSascha Wildner         case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
308*a28cd43dSSascha Wildner                 /* fall-through */
309*a28cd43dSSascha Wildner 
310*a28cd43dSSascha Wildner         case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
311*a28cd43dSSascha Wildner                 /* fall-through */
312*a28cd43dSSascha Wildner 
313*a28cd43dSSascha Wildner         default: break;
314*a28cd43dSSascha Wildner         }
315*a28cd43dSSascha Wildner         {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
316*a28cd43dSSascha Wildner             bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
317*a28cd43dSSascha Wildner             if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
318*a28cd43dSSascha Wildner         }
319*a28cd43dSSascha Wildner         bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
320*a28cd43dSSascha Wildner     }
321*a28cd43dSSascha Wildner 
322*a28cd43dSSascha Wildner     return srcSize;
323*a28cd43dSSascha Wildner }
324*a28cd43dSSascha Wildner 
BIT_getUpperBits(size_t bitContainer,U32 const start)325*a28cd43dSSascha Wildner MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
326*a28cd43dSSascha Wildner {
327*a28cd43dSSascha Wildner     return bitContainer >> start;
328*a28cd43dSSascha Wildner }
329*a28cd43dSSascha Wildner 
BIT_getMiddleBits(size_t bitContainer,U32 const start,U32 const nbBits)330*a28cd43dSSascha Wildner MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
331*a28cd43dSSascha Wildner {
332*a28cd43dSSascha Wildner     U32 const regMask = sizeof(bitContainer)*8 - 1;
333*a28cd43dSSascha Wildner     /* if start > regMask, bitstream is corrupted, and result is undefined */
334*a28cd43dSSascha Wildner     assert(nbBits < BIT_MASK_SIZE);
335*a28cd43dSSascha Wildner     return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
336*a28cd43dSSascha Wildner }
337*a28cd43dSSascha Wildner 
BIT_getLowerBits(size_t bitContainer,U32 const nbBits)338*a28cd43dSSascha Wildner MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
339*a28cd43dSSascha Wildner {
340*a28cd43dSSascha Wildner #if defined(STATIC_BMI2) && STATIC_BMI2 == 1
341*a28cd43dSSascha Wildner 	return  _bzhi_u64(bitContainer, nbBits);
342*a28cd43dSSascha Wildner #else
343*a28cd43dSSascha Wildner     assert(nbBits < BIT_MASK_SIZE);
344*a28cd43dSSascha Wildner     return bitContainer & BIT_mask[nbBits];
345*a28cd43dSSascha Wildner #endif
346*a28cd43dSSascha Wildner }
347*a28cd43dSSascha Wildner 
348*a28cd43dSSascha Wildner /*! BIT_lookBits() :
349*a28cd43dSSascha Wildner  *  Provides next n bits from local register.
350*a28cd43dSSascha Wildner  *  local register is not modified.
351*a28cd43dSSascha Wildner  *  On 32-bits, maxNbBits==24.
352*a28cd43dSSascha Wildner  *  On 64-bits, maxNbBits==56.
353*a28cd43dSSascha Wildner  * @return : value extracted */
BIT_lookBits(const BIT_DStream_t * bitD,U32 nbBits)354*a28cd43dSSascha Wildner MEM_STATIC  FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
355*a28cd43dSSascha Wildner {
356*a28cd43dSSascha Wildner     /* arbitrate between double-shift and shift+mask */
357*a28cd43dSSascha Wildner #if 1
358*a28cd43dSSascha Wildner     /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
359*a28cd43dSSascha Wildner      * bitstream is likely corrupted, and result is undefined */
360*a28cd43dSSascha Wildner     return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
361*a28cd43dSSascha Wildner #else
362*a28cd43dSSascha Wildner     /* this code path is slower on my os-x laptop */
363*a28cd43dSSascha Wildner     U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
364*a28cd43dSSascha Wildner     return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
365*a28cd43dSSascha Wildner #endif
366*a28cd43dSSascha Wildner }
367*a28cd43dSSascha Wildner 
368*a28cd43dSSascha Wildner /*! BIT_lookBitsFast() :
369*a28cd43dSSascha Wildner  *  unsafe version; only works if nbBits >= 1 */
BIT_lookBitsFast(const BIT_DStream_t * bitD,U32 nbBits)370*a28cd43dSSascha Wildner MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
371*a28cd43dSSascha Wildner {
372*a28cd43dSSascha Wildner     U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
373*a28cd43dSSascha Wildner     assert(nbBits >= 1);
374*a28cd43dSSascha Wildner     return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
375*a28cd43dSSascha Wildner }
376*a28cd43dSSascha Wildner 
BIT_skipBits(BIT_DStream_t * bitD,U32 nbBits)377*a28cd43dSSascha Wildner MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
378*a28cd43dSSascha Wildner {
379*a28cd43dSSascha Wildner     bitD->bitsConsumed += nbBits;
380*a28cd43dSSascha Wildner }
381*a28cd43dSSascha Wildner 
382*a28cd43dSSascha Wildner /*! BIT_readBits() :
383*a28cd43dSSascha Wildner  *  Read (consume) next n bits from local register and update.
384*a28cd43dSSascha Wildner  *  Pay attention to not read more than nbBits contained into local register.
385*a28cd43dSSascha Wildner  * @return : extracted value. */
BIT_readBits(BIT_DStream_t * bitD,unsigned nbBits)386*a28cd43dSSascha Wildner MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
387*a28cd43dSSascha Wildner {
388*a28cd43dSSascha Wildner     size_t const value = BIT_lookBits(bitD, nbBits);
389*a28cd43dSSascha Wildner     BIT_skipBits(bitD, nbBits);
390*a28cd43dSSascha Wildner     return value;
391*a28cd43dSSascha Wildner }
392*a28cd43dSSascha Wildner 
393*a28cd43dSSascha Wildner /*! BIT_readBitsFast() :
394*a28cd43dSSascha Wildner  *  unsafe version; only works only if nbBits >= 1 */
BIT_readBitsFast(BIT_DStream_t * bitD,unsigned nbBits)395*a28cd43dSSascha Wildner MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
396*a28cd43dSSascha Wildner {
397*a28cd43dSSascha Wildner     size_t const value = BIT_lookBitsFast(bitD, nbBits);
398*a28cd43dSSascha Wildner     assert(nbBits >= 1);
399*a28cd43dSSascha Wildner     BIT_skipBits(bitD, nbBits);
400*a28cd43dSSascha Wildner     return value;
401*a28cd43dSSascha Wildner }
402*a28cd43dSSascha Wildner 
403*a28cd43dSSascha Wildner /*! BIT_reloadDStreamFast() :
404*a28cd43dSSascha Wildner  *  Similar to BIT_reloadDStream(), but with two differences:
405*a28cd43dSSascha Wildner  *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
406*a28cd43dSSascha Wildner  *  2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
407*a28cd43dSSascha Wildner  *     point you must use BIT_reloadDStream() to reload.
408*a28cd43dSSascha Wildner  */
BIT_reloadDStreamFast(BIT_DStream_t * bitD)409*a28cd43dSSascha Wildner MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
410*a28cd43dSSascha Wildner {
411*a28cd43dSSascha Wildner     if (UNLIKELY(bitD->ptr < bitD->limitPtr))
412*a28cd43dSSascha Wildner         return BIT_DStream_overflow;
413*a28cd43dSSascha Wildner     assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
414*a28cd43dSSascha Wildner     bitD->ptr -= bitD->bitsConsumed >> 3;
415*a28cd43dSSascha Wildner     bitD->bitsConsumed &= 7;
416*a28cd43dSSascha Wildner     bitD->bitContainer = MEM_readLEST(bitD->ptr);
417*a28cd43dSSascha Wildner     return BIT_DStream_unfinished;
418*a28cd43dSSascha Wildner }
419*a28cd43dSSascha Wildner 
420*a28cd43dSSascha Wildner /*! BIT_reloadDStream() :
421*a28cd43dSSascha Wildner  *  Refill `bitD` from buffer previously set in BIT_initDStream() .
422*a28cd43dSSascha Wildner  *  This function is safe, it guarantees it will not read beyond src buffer.
423*a28cd43dSSascha Wildner  * @return : status of `BIT_DStream_t` internal register.
424*a28cd43dSSascha Wildner  *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
BIT_reloadDStream(BIT_DStream_t * bitD)425*a28cd43dSSascha Wildner MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
426*a28cd43dSSascha Wildner {
427*a28cd43dSSascha Wildner     if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
428*a28cd43dSSascha Wildner         return BIT_DStream_overflow;
429*a28cd43dSSascha Wildner 
430*a28cd43dSSascha Wildner     if (bitD->ptr >= bitD->limitPtr) {
431*a28cd43dSSascha Wildner         return BIT_reloadDStreamFast(bitD);
432*a28cd43dSSascha Wildner     }
433*a28cd43dSSascha Wildner     if (bitD->ptr == bitD->start) {
434*a28cd43dSSascha Wildner         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
435*a28cd43dSSascha Wildner         return BIT_DStream_completed;
436*a28cd43dSSascha Wildner     }
437*a28cd43dSSascha Wildner     /* start < ptr < limitPtr */
438*a28cd43dSSascha Wildner     {   U32 nbBytes = bitD->bitsConsumed >> 3;
439*a28cd43dSSascha Wildner         BIT_DStream_status result = BIT_DStream_unfinished;
440*a28cd43dSSascha Wildner         if (bitD->ptr - nbBytes < bitD->start) {
441*a28cd43dSSascha Wildner             nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
442*a28cd43dSSascha Wildner             result = BIT_DStream_endOfBuffer;
443*a28cd43dSSascha Wildner         }
444*a28cd43dSSascha Wildner         bitD->ptr -= nbBytes;
445*a28cd43dSSascha Wildner         bitD->bitsConsumed -= nbBytes*8;
446*a28cd43dSSascha Wildner         bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
447*a28cd43dSSascha Wildner         return result;
448*a28cd43dSSascha Wildner     }
449*a28cd43dSSascha Wildner }
450*a28cd43dSSascha Wildner 
451*a28cd43dSSascha Wildner /*! BIT_endOfDStream() :
452*a28cd43dSSascha Wildner  * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
453*a28cd43dSSascha Wildner  */
BIT_endOfDStream(const BIT_DStream_t * DStream)454*a28cd43dSSascha Wildner MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
455*a28cd43dSSascha Wildner {
456*a28cd43dSSascha Wildner     return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
457*a28cd43dSSascha Wildner }
458*a28cd43dSSascha Wildner 
459*a28cd43dSSascha Wildner #if defined (__cplusplus)
460*a28cd43dSSascha Wildner }
461*a28cd43dSSascha Wildner #endif
462*a28cd43dSSascha Wildner 
463*a28cd43dSSascha Wildner #endif /* BITSTREAM_H_MODULE */
464