1 /************************************************************************************
2 Includes
3 ************************************************************************************/
4 #include "All.h"
5 #include "BitArray.h"
6 #include "MD5.h"
7 
8 namespace APE
9 {
10 
11 /************************************************************************************
12 Declares
13 ************************************************************************************/
14 #define BIT_ARRAY_ELEMENTS            (4096)                        // the number of elements in the bit array (4 MB)
15 #define BIT_ARRAY_BYTES                (BIT_ARRAY_ELEMENTS * 4)    // the number of bytes in the bit array
16 #define BIT_ARRAY_BITS                (BIT_ARRAY_BYTES    * 8)    // the number of bits in the bit array
17 
18 #define MAX_ELEMENT_BITS            128
19 #define REFILL_BIT_THRESHOLD        (BIT_ARRAY_BITS - MAX_ELEMENT_BITS)
20 
21 #define CODE_BITS 32
22 #define TOP_VALUE ((unsigned int) 1 << (CODE_BITS - 1))
23 #define SHIFT_BITS (CODE_BITS - 9)
24 #define EXTRA_BITS ((CODE_BITS - 2) % 8 + 1)
25 #define BOTTOM_VALUE (TOP_VALUE >> 8)
26 
27 /************************************************************************************
28 Lookup tables
29 ************************************************************************************/
30 const uint32 K_SUM_MIN_BOUNDARY[32] = {0,32,64,128,256,512,1024,2048,4096,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,16777216,33554432,67108864,134217728,268435456,536870912,1073741824,2147483648,0,0,0,0};
31 
32 #define MODEL_ELEMENTS                    64
33 #define RANGE_OVERFLOW_TOTAL_WIDTH        65536
34 #define RANGE_OVERFLOW_SHIFT            16
35 
36 const uint32 RANGE_TOTAL[64] = {0,19578,36160,48417,56323,60899,63265,64435,64971,65232,65351,65416,65447,65466,65476,65482,65485,65488,65490,65491,65492,65493,65494,65495,65496,65497,65498,65499,65500,65501,65502,65503,65504,65505,65506,65507,65508,65509,65510,65511,65512,65513,65514,65515,65516,65517,65518,65519,65520,65521,65522,65523,65524,65525,65526,65527,65528,65529,65530,65531,65532,65533,65534,65535,};
37 const uint32 RANGE_WIDTH[64] = {19578,16582,12257,7906,4576,2366,1170,536,261,119,65,31,19,10,6,3,3,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,};
38 
39 #ifdef BUILD_RANGE_TABLE
40     int g_aryOverflows[256] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
41     int g_nTotalOverflow = 0;
42 #endif
43 
44 /************************************************************************************
45 Constructor
46 ************************************************************************************/
CBitArray(CIO * pIO)47 CBitArray::CBitArray(CIO *pIO)
48 {
49     // allocate memory for the bit array
50     m_pBitArray = new uint32 [BIT_ARRAY_ELEMENTS];
51     memset(m_pBitArray, 0, BIT_ARRAY_BYTES);
52 
53     // initialize other variables
54     m_nCurrentBitIndex = 0;
55     m_pIO = pIO;
56 }
57 
58 /************************************************************************************
59 Destructor
60 ************************************************************************************/
~CBitArray()61 CBitArray::~CBitArray()
62 {
63     // free the bit array
64     SAFE_ARRAY_DELETE(m_pBitArray)
65 #ifdef BUILD_RANGE_TABLE
66     OutputRangeTable();
67 #endif
68 }
69 
70 /************************************************************************************
71 Output the bit array via the CIO (typically saves to disk)
72 ************************************************************************************/
OutputBitArray(bool bFinalize)73 int CBitArray::OutputBitArray(bool bFinalize)
74 {
75     // write the entire file to disk
76     unsigned int nBytesWritten = 0;
77     unsigned int nBytesToWrite = 0;
78     unsigned int nResult = 0;
79 
80     if (bFinalize)
81     {
82         nBytesToWrite = ((m_nCurrentBitIndex >> 5) * 4) + 4;
83 
84         m_MD5.AddData(m_pBitArray, nBytesToWrite);
85 
86         RETURN_ON_ERROR(m_pIO->Write(m_pBitArray, nBytesToWrite, &nBytesWritten))
87 
88         // reset the bit pointer
89         m_nCurrentBitIndex = 0;
90     }
91     else
92     {
93         nBytesToWrite = (m_nCurrentBitIndex >> 5) * 4;
94 
95         m_MD5.AddData(m_pBitArray, nBytesToWrite);
96 
97         RETURN_ON_ERROR(m_pIO->Write(m_pBitArray, nBytesToWrite, &nBytesWritten))
98 
99         // move the last value to the front of the bit array
100         m_pBitArray[0] = m_pBitArray[m_nCurrentBitIndex >> 5];
101         m_nCurrentBitIndex = (m_nCurrentBitIndex & 31);
102 
103         // zero the rest of the memory (may not need the +1 because of frame byte alignment)
104         memset(&m_pBitArray[1], 0, ape_min((int)nBytesToWrite + 1, BIT_ARRAY_BYTES - 1));
105     }
106 
107     // return a success
108     return ERROR_SUCCESS;
109 }
110 
111 /************************************************************************************
112 Range coding macros -- ugly, but outperform inline's (every cycle counts here)
113 ************************************************************************************/
114 #define PUTC(VALUE) m_pBitArray[m_nCurrentBitIndex >> 5] |= ((VALUE) & 0xFF) << (24 - (m_nCurrentBitIndex & 31)); m_nCurrentBitIndex += 8;
115 #define PUTC_NOCAP(VALUE) m_pBitArray[m_nCurrentBitIndex >> 5] |= (VALUE) << (24 - (m_nCurrentBitIndex & 31)); m_nCurrentBitIndex += 8;
116 
117 #define NORMALIZE_RANGE_CODER                                                                    \
118     while (m_RangeCoderInfo.range <= BOTTOM_VALUE)                                                \
119     {                                                                                            \
120         if (m_RangeCoderInfo.low < (0xFF << SHIFT_BITS))                                        \
121         {                                                                                        \
122             PUTC(m_RangeCoderInfo.buffer);                                                        \
123             for ( ; m_RangeCoderInfo.help; m_RangeCoderInfo.help--) { PUTC_NOCAP(0xFF); }        \
124             m_RangeCoderInfo.buffer = (m_RangeCoderInfo.low >> SHIFT_BITS);                        \
125         }                                                                                        \
126         else if (m_RangeCoderInfo.low & TOP_VALUE)                                                \
127         {                                                                                        \
128             PUTC(m_RangeCoderInfo.buffer + 1);                                                    \
129             m_nCurrentBitIndex += (m_RangeCoderInfo.help * 8);                                    \
130             m_RangeCoderInfo.help = 0;                                                            \
131             m_RangeCoderInfo.buffer = (m_RangeCoderInfo.low >> SHIFT_BITS);                        \
132         }                                                                                        \
133         else                                                                                    \
134         {                                                                                        \
135             m_RangeCoderInfo.help++;                                                            \
136         }                                                                                        \
137                                                                                                 \
138         m_RangeCoderInfo.low = (m_RangeCoderInfo.low << 8) & (TOP_VALUE - 1);                    \
139         m_RangeCoderInfo.range <<= 8;                                                            \
140     }
141 
142 #define ENCODE_FAST(RANGE_WIDTH, RANGE_TOTAL, SHIFT)                                            \
143     NORMALIZE_RANGE_CODER                                                                        \
144     const int nTemp = m_RangeCoderInfo.range >> (SHIFT);                                        \
145     m_RangeCoderInfo.range = nTemp * (RANGE_WIDTH);                                                \
146     m_RangeCoderInfo.low += nTemp * (RANGE_TOTAL);
147 
148 #define ENCODE_DIRECT(VALUE, SHIFT)                                                                \
149     NORMALIZE_RANGE_CODER                                                                        \
150     m_RangeCoderInfo.range = m_RangeCoderInfo.range >> (SHIFT);                                    \
151     m_RangeCoderInfo.low += m_RangeCoderInfo.range * (VALUE);
152 
153 /************************************************************************************
154 Directly encode bits to the bitstream
155 ************************************************************************************/
EncodeBits(unsigned int nValue,int nBits)156 int CBitArray::EncodeBits(unsigned int nValue, int nBits)
157 {
158     // make sure there is room for the data
159     // this is a little slower than ensuring a huge block to start with, but it's safer
160     if (m_nCurrentBitIndex > REFILL_BIT_THRESHOLD)
161     {
162         RETURN_ON_ERROR(OutputBitArray())
163     }
164 
165     ENCODE_DIRECT(nValue, nBits);
166     return 0;
167 }
168 
169 /************************************************************************************
170 Encodes an unsigned int to the bit array (no rice coding)
171 ************************************************************************************/
EncodeUnsignedLong(unsigned int n)172 int CBitArray::EncodeUnsignedLong(unsigned int n)
173 {
174     // make sure there are at least 8 bytes in the buffer
175     if (m_nCurrentBitIndex > (BIT_ARRAY_BYTES - 8))
176     {
177         RETURN_ON_ERROR(OutputBitArray())
178     }
179 
180     // encode the value
181     uint32 nBitArrayIndex = m_nCurrentBitIndex >> 5;
182     int nBitIndex = m_nCurrentBitIndex & 31;
183 
184     if (nBitIndex == 0)
185     {
186         m_pBitArray[nBitArrayIndex] = n;
187     }
188     else
189     {
190         m_pBitArray[nBitArrayIndex] |= n >> nBitIndex;
191         m_pBitArray[nBitArrayIndex + 1] = n << (32 - nBitIndex);
192     }
193 
194     m_nCurrentBitIndex += 32;
195 
196     return 0;
197 }
198 
199 /************************************************************************************
200 Advance to a byte boundary (for frame alignment)
201 ************************************************************************************/
AdvanceToByteBoundary()202 void CBitArray::AdvanceToByteBoundary()
203 {
204     while (m_nCurrentBitIndex % 8)
205         m_nCurrentBitIndex++;
206 }
207 
208 /************************************************************************************
209 Encode a value
210 ************************************************************************************/
EncodeValue(int nEncode,BIT_ARRAY_STATE & BitArrayState)211 int CBitArray::EncodeValue(int nEncode, BIT_ARRAY_STATE & BitArrayState)
212 {
213     // make sure there is room for the data
214     // this is a little slower than ensuring a huge block to start with, but it's safer
215     if (m_nCurrentBitIndex > REFILL_BIT_THRESHOLD)
216     {
217         RETURN_ON_ERROR(OutputBitArray())
218     }
219 
220     // convert to unsigned
221     nEncode = (nEncode > 0) ? nEncode * 2 - 1 : -nEncode * 2;
222 
223     // figure the pivot value
224     int nPivotValue = ape_max(BitArrayState.nKSum / 32, (uint32)1);
225     int nOverflow = nEncode / nPivotValue;
226     int nBase = nEncode - (nOverflow * nPivotValue);
227 
228     // update nKSum
229     BitArrayState.nKSum += ((nEncode + 1) / 2) - ((BitArrayState.nKSum + 16) >> 5);
230 
231     // store the overflow
232     if (nOverflow < (MODEL_ELEMENTS - 1))
233     {
234         ENCODE_FAST(RANGE_WIDTH[nOverflow], RANGE_TOTAL[nOverflow], RANGE_OVERFLOW_SHIFT);
235 
236         #ifdef BUILD_RANGE_TABLE
237             g_aryOverflows[nOverflow]++;
238             g_nTotalOverflow++;
239         #endif
240     }
241     else
242     {
243         // store the "special" overflow (tells that perfect k is encoded next)
244         ENCODE_FAST(RANGE_WIDTH[MODEL_ELEMENTS - 1], RANGE_TOTAL[MODEL_ELEMENTS - 1], RANGE_OVERFLOW_SHIFT);
245 
246         #ifdef BUILD_RANGE_TABLE
247             g_aryOverflows[MODEL_ELEMENTS - 1]++;
248             g_nTotalOverflow++;
249         #endif
250 
251         // code the overflow using straight bits
252         ENCODE_DIRECT((nOverflow >> 16) & 0xFFFF, 16);
253         ENCODE_DIRECT(nOverflow & 0xFFFF, 16);
254     }
255 
256     // code the base
257     {
258         if (nPivotValue >= (1 << 16))
259         {
260             int nPivotValueBits = 0;
261             while ((nPivotValue >> nPivotValueBits) > 0) { nPivotValueBits++; }
262             int nSplitFactor = 1 << (nPivotValueBits - 16);
263 
264             // we know that base is smaller than pivot coming into this
265             // however, after we divide both by an integer, they could be the same
266             // we account by adding one to the pivot, but this hurts compression
267             // by (1 / nSplitFactor) -- therefore we maximize the split factor
268             // that gets one added to it
269 
270             // encode the pivot as two pieces
271             int nPivotValueA = (nPivotValue / nSplitFactor) + 1;
272             int nPivotValueB = nSplitFactor;
273 
274             int nBaseA = nBase / nSplitFactor;
275             int nBaseB = nBase % nSplitFactor;
276 
277             {
278                 NORMALIZE_RANGE_CODER
279                 const int nTemp = m_RangeCoderInfo.range / nPivotValueA;
280                 m_RangeCoderInfo.range = nTemp;
281                 m_RangeCoderInfo.low += nTemp * nBaseA;
282             }
283 
284             {
285                 NORMALIZE_RANGE_CODER
286                 const int nTemp = m_RangeCoderInfo.range / nPivotValueB;
287                 m_RangeCoderInfo.range = nTemp;
288                 m_RangeCoderInfo.low += nTemp * nBaseB;
289             }
290         }
291         else
292         {
293             NORMALIZE_RANGE_CODER
294             const int nTemp = m_RangeCoderInfo.range / nPivotValue;
295             m_RangeCoderInfo.range = nTemp;
296             m_RangeCoderInfo.low += nTemp * nBase;
297         }
298     }
299 
300     return 0;
301 }
302 
303 /************************************************************************************
304 Flush
305 ************************************************************************************/
FlushBitArray()306 void CBitArray::FlushBitArray()
307 {
308     // advance to a byte boundary (for alignment)
309     AdvanceToByteBoundary();
310 
311     // the range coder
312     m_RangeCoderInfo.low = 0;  // full code range
313     m_RangeCoderInfo.range = TOP_VALUE;
314     m_RangeCoderInfo.buffer = 0;
315     m_RangeCoderInfo.help = 0;  // no bytes to follow
316 }
317 
FlushState(BIT_ARRAY_STATE & BitArrayState)318 void CBitArray::FlushState(BIT_ARRAY_STATE & BitArrayState)
319 {
320     // ksum
321     BitArrayState.nKSum = (1 << 10) * 16;
322 }
323 
324 /************************************************************************************
325 Finalize
326 ************************************************************************************/
Finalize()327 void CBitArray::Finalize()
328 {
329     NORMALIZE_RANGE_CODER
330 
331     unsigned int nTemp = (m_RangeCoderInfo.low >> SHIFT_BITS) + 1;
332 
333     if (nTemp > 0xFF) // we have a carry
334     {
335         PUTC(m_RangeCoderInfo.buffer + 1);
336         for ( ; m_RangeCoderInfo.help; m_RangeCoderInfo.help--)
337         {
338             PUTC(0);
339         }
340     }
341     else  // no carry
342     {
343         PUTC(m_RangeCoderInfo.buffer);
344         for ( ; m_RangeCoderInfo.help; m_RangeCoderInfo.help--)
345         {
346             PUTC(((unsigned char) 0xFF));
347         }
348     }
349 
350     // we must output these bytes so the decoder can properly work at the end of the stream
351     PUTC(nTemp & 0xFF);
352     PUTC(0);
353     PUTC(0);
354     PUTC(0);
355 }
356 
357 /************************************************************************************
358 Build a range table (for development / debugging)
359 ************************************************************************************/
360 #ifdef BUILD_RANGE_TABLE
OutputRangeTable()361 void CBitArray::OutputRangeTable()
362 {
363     int z;
364 
365     if (g_nTotalOverflow == 0) return;
366 
367     int nTotal = 0;
368     int aryWidth[256]; ZeroMemory(aryWidth, 256 * 4);
369     for (z = 0; z < MODEL_ELEMENTS; z++)
370     {
371         aryWidth[z] = int(((float(g_aryOverflows[z]) * float(65536)) + (g_nTotalOverflow / 2)) / float(g_nTotalOverflow));
372         if (aryWidth[z] == 0) aryWidth[z] = 1;
373         nTotal += aryWidth[z];
374     }
375 
376     z = 0;
377     while (nTotal > 65536)
378     {
379         if (aryWidth[z] != 1)
380         {
381             aryWidth[z]--;
382             nTotal--;
383         }
384         z++;
385         if (z == MODEL_ELEMENTS) z = 0;
386     }
387 
388     z = 0;
389     while (nTotal < 65536)
390     {
391         aryWidth[z++]++;
392         nTotal++;
393         if (z == MODEL_ELEMENTS) z = 0;
394     }
395 
396     int aryTotal[256]; ZeroMemory(aryTotal, 256 * 4);
397     for (z = 0; z < MODEL_ELEMENTS; z++)
398     {
399         for (int q = 0; q < z; q++)
400         {
401             aryTotal[z] += aryWidth[q];
402         }
403     }
404 
405     TCHAR buf[1024];
406     _stprintf(buf, _T("const uint32 RANGE_TOTAL[%d] = {"), MODEL_ELEMENTS);
407     ODS(buf);
408     for (z = 0; z < MODEL_ELEMENTS; z++)
409     {
410         _stprintf(buf, _T("%d,"), aryTotal[z]);
411         OutputDebugString(buf);
412     }
413     ODS(_T("};\n"));
414 
415     _stprintf(buf, _T("const uint32 RANGE_WIDTH[%d] = {"), MODEL_ELEMENTS);
416     ODS(buf);
417     for (z = 0; z < MODEL_ELEMENTS; z++)
418     {
419         _stprintf(buf, _T("%d,"), aryWidth[z]);
420         OutputDebugString(buf);
421     }
422     ODS(_T("};\n\n"));
423 }
424 #endif // #ifdef BUILD_RANGE_TABLE
425 
426 }
427