1 /************************************************************************************
2 Includes
3 ************************************************************************************/
4 #include "All.h"
5 #include "BitArray.h"
6 #include "MD5.h"
7
8 /************************************************************************************
9 Declares
10 ************************************************************************************/
11 #define BIT_ARRAY_ELEMENTS (4096) // the number of elements in the bit array (4 MB)
12 #define BIT_ARRAY_BYTES (BIT_ARRAY_ELEMENTS * 4) // the number of bytes in the bit array
13 #define BIT_ARRAY_BITS (BIT_ARRAY_BYTES * 8) // the number of bits in the bit array
14
15 #define MAX_ELEMENT_BITS 128
16 #define REFILL_BIT_THRESHOLD (BIT_ARRAY_BITS - MAX_ELEMENT_BITS)
17
18 #define CODE_BITS 32
19 #define TOP_VALUE ((unsigned int) 1 << (CODE_BITS - 1))
20 #define SHIFT_BITS (CODE_BITS - 9)
21 #define EXTRA_BITS ((CODE_BITS - 2) % 8 + 1)
22 #define BOTTOM_VALUE (TOP_VALUE >> 8)
23
24 /************************************************************************************
25 Lookup tables
26 ************************************************************************************/
27 const uint32 K_SUM_MIN_BOUNDARY[32] = {0,32,64,128,256,512,1024,2048,4096,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,16777216,33554432,67108864,134217728,268435456,536870912,1073741824,2147483648,0,0,0,0};
28
29 #define MODEL_ELEMENTS 64
30 #define RANGE_OVERFLOW_TOTAL_WIDTH 65536
31 #define RANGE_OVERFLOW_SHIFT 16
32
33 const uint32 RANGE_TOTAL[64] = {0,19578,36160,48417,56323,60899,63265,64435,64971,65232,65351,65416,65447,65466,65476,65482,65485,65488,65490,65491,65492,65493,65494,65495,65496,65497,65498,65499,65500,65501,65502,65503,65504,65505,65506,65507,65508,65509,65510,65511,65512,65513,65514,65515,65516,65517,65518,65519,65520,65521,65522,65523,65524,65525,65526,65527,65528,65529,65530,65531,65532,65533,65534,65535,};
34 const uint32 RANGE_WIDTH[64] = {19578,16582,12257,7906,4576,2366,1170,536,261,119,65,31,19,10,6,3,3,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,};
35
36 #ifdef BUILD_RANGE_TABLE
37 int g_aryOverflows[256] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
38 int g_nTotalOverflow = 0;
39 #endif
40
41 /************************************************************************************
42 Constructor
43 ************************************************************************************/
CBitArray(CIO * pIO)44 CBitArray::CBitArray(CIO *pIO)
45 {
46 // allocate memory for the bit array
47 m_pBitArray = new uint32 [BIT_ARRAY_ELEMENTS];
48 memset(m_pBitArray, 0, BIT_ARRAY_BYTES);
49
50 // initialize other variables
51 m_nCurrentBitIndex = 0;
52 m_pIO = pIO;
53 }
54
55 /************************************************************************************
56 Destructor
57 ************************************************************************************/
~CBitArray()58 CBitArray::~CBitArray()
59 {
60 // free the bit array
61 SAFE_ARRAY_DELETE(m_pBitArray)
62 #ifdef BUILD_RANGE_TABLE
63 OutputRangeTable();
64 #endif
65 }
66
67 /************************************************************************************
68 Output the bit array via the CIO (typically saves to disk)
69 ************************************************************************************/
OutputBitArray(BOOL bFinalize)70 int CBitArray::OutputBitArray(BOOL bFinalize)
71 {
72 // write the entire file to disk
73 unsigned int nBytesWritten = 0;
74 unsigned int nBytesToWrite = 0;
75 // unsigned int nRetVal = 0;
76
77 if (bFinalize)
78 {
79 nBytesToWrite = ((m_nCurrentBitIndex >> 5) * 4) + 4;
80
81 m_MD5.AddData(m_pBitArray, nBytesToWrite);
82
83 #ifdef WORDS_BIGENDIAN
84 uint32 i ;
85 for (i = 0; i < nBytesToWrite / 4; i ++)
86 {
87 m_pBitArray[i] = swap_int32(m_pBitArray[i]);
88 }
89 #endif
90 RETURN_ON_ERROR(m_pIO->Write(m_pBitArray, nBytesToWrite, &nBytesWritten))
91
92 // reset the bit pointer
93 m_nCurrentBitIndex = 0;
94 }
95 else
96 {
97 nBytesToWrite = (m_nCurrentBitIndex >> 5) * 4;
98
99 m_MD5.AddData(m_pBitArray, nBytesToWrite);
100
101 #ifdef WORDS_BIGENDIAN
102 uint32 i ;
103 for (i = 0; i < nBytesToWrite / 4; i ++)
104 {
105 m_pBitArray[i] = swap_int32(m_pBitArray[i]);
106 }
107 #endif
108
109 RETURN_ON_ERROR(m_pIO->Write(m_pBitArray, nBytesToWrite, &nBytesWritten))
110
111 // move the last value to the front of the bit array
112 m_pBitArray[0] = m_pBitArray[m_nCurrentBitIndex >> 5];
113 m_nCurrentBitIndex = (m_nCurrentBitIndex & 31);
114
115 // zero the rest of the memory (may not need the +1 because of frame byte alignment)
116 memset(&m_pBitArray[1], 0, min(nBytesToWrite + 1, BIT_ARRAY_BYTES - 1));
117 }
118
119 // return a success
120 return ERROR_SUCCESS;
121 }
122
123 /************************************************************************************
124 Range coding macros -- ugly, but outperform inline's (every cycle counts here)
125 ************************************************************************************/
126 #define PUTC(VALUE) m_pBitArray[m_nCurrentBitIndex >> 5] |= ((VALUE) & 0xFF) << (24 - (m_nCurrentBitIndex & 31)); m_nCurrentBitIndex += 8;
127 #define PUTC_NOCAP(VALUE) m_pBitArray[m_nCurrentBitIndex >> 5] |= (VALUE) << (24 - (m_nCurrentBitIndex & 31)); m_nCurrentBitIndex += 8;
128
129 #define NORMALIZE_RANGE_CODER \
130 while (m_RangeCoderInfo.range <= BOTTOM_VALUE) \
131 { \
132 if (m_RangeCoderInfo.low < (0xFF << SHIFT_BITS)) \
133 { \
134 PUTC(m_RangeCoderInfo.buffer); \
135 for ( ; m_RangeCoderInfo.help; m_RangeCoderInfo.help--) { PUTC_NOCAP(0xFF); } \
136 m_RangeCoderInfo.buffer = (m_RangeCoderInfo.low >> SHIFT_BITS); \
137 } \
138 else if (m_RangeCoderInfo.low & TOP_VALUE) \
139 { \
140 PUTC(m_RangeCoderInfo.buffer + 1); \
141 m_nCurrentBitIndex += (m_RangeCoderInfo.help * 8); \
142 m_RangeCoderInfo.help = 0; \
143 m_RangeCoderInfo.buffer = (m_RangeCoderInfo.low >> SHIFT_BITS); \
144 } \
145 else \
146 { \
147 m_RangeCoderInfo.help++; \
148 } \
149 \
150 m_RangeCoderInfo.low = (m_RangeCoderInfo.low << 8) & (TOP_VALUE - 1); \
151 m_RangeCoderInfo.range <<= 8; \
152 }
153
154 #define ENCODE_FAST(RANGE_WIDTH, RANGE_TOTAL, SHIFT) \
155 NORMALIZE_RANGE_CODER \
156 const int nTemp = m_RangeCoderInfo.range >> (SHIFT); \
157 m_RangeCoderInfo.range = nTemp * (RANGE_WIDTH); \
158 m_RangeCoderInfo.low += nTemp * (RANGE_TOTAL);
159
160 #define ENCODE_DIRECT(VALUE, SHIFT) \
161 NORMALIZE_RANGE_CODER \
162 m_RangeCoderInfo.range = m_RangeCoderInfo.range >> (SHIFT); \
163 m_RangeCoderInfo.low += m_RangeCoderInfo.range * (VALUE);
164
165 /************************************************************************************
166 Directly encode bits to the bitstream
167 ************************************************************************************/
EncodeBits(unsigned int nValue,int nBits)168 int CBitArray::EncodeBits(unsigned int nValue, int nBits)
169 {
170 // make sure there is room for the data
171 // this is a little slower than ensuring a huge block to start with, but it's safer
172 if (m_nCurrentBitIndex > REFILL_BIT_THRESHOLD)
173 {
174 RETURN_ON_ERROR(OutputBitArray())
175 }
176
177 ENCODE_DIRECT(nValue, nBits);
178 return 0;
179 }
180
181 /************************************************************************************
182 Encodes an unsigned int to the bit array (no rice coding)
183 ************************************************************************************/
EncodeUnsignedLong(unsigned int n)184 int CBitArray::EncodeUnsignedLong(unsigned int n)
185 {
186 // make sure there are at least 8 bytes in the buffer
187 if (m_nCurrentBitIndex > (BIT_ARRAY_BYTES - 8))
188 {
189 RETURN_ON_ERROR(OutputBitArray())
190 }
191
192 // encode the value
193 uint32 nBitArrayIndex = m_nCurrentBitIndex >> 5;
194 int nBitIndex = m_nCurrentBitIndex & 31;
195
196 if (nBitIndex == 0)
197 {
198 m_pBitArray[nBitArrayIndex] = n;
199 }
200 else
201 {
202 m_pBitArray[nBitArrayIndex] |= n >> nBitIndex;
203 m_pBitArray[nBitArrayIndex + 1] = n << (32 - nBitIndex);
204 }
205
206 m_nCurrentBitIndex += 32;
207
208 return 0;
209 }
210
211 /************************************************************************************
212 Advance to a byte boundary (for frame alignment)
213 ************************************************************************************/
AdvanceToByteBoundary()214 void CBitArray::AdvanceToByteBoundary()
215 {
216 while (m_nCurrentBitIndex % 8)
217 m_nCurrentBitIndex++;
218 }
219
220 /************************************************************************************
221 Encode a value
222 ************************************************************************************/
EncodeValue(int nEncode,BIT_ARRAY_STATE & BitArrayState)223 int CBitArray::EncodeValue(int nEncode, BIT_ARRAY_STATE & BitArrayState)
224 {
225 // make sure there is room for the data
226 // this is a little slower than ensuring a huge block to start with, but it's safer
227 if (m_nCurrentBitIndex > REFILL_BIT_THRESHOLD)
228 {
229 RETURN_ON_ERROR(OutputBitArray())
230 }
231
232 // convert to unsigned
233 nEncode = (nEncode > 0) ? nEncode * 2 - 1 : -nEncode * 2;
234
235 int nOriginalKSum = BitArrayState.nKSum;
236
237 // get the working k
238 // int nTempK = (BitArrayState.k) ? BitArrayState.k - 1 : 0;
239
240 // update nKSum
241 BitArrayState.nKSum += ((nEncode + 1) / 2) - ((BitArrayState.nKSum + 16) >> 5);
242
243 // update k
244 if (BitArrayState.nKSum < K_SUM_MIN_BOUNDARY[BitArrayState.k])
245 BitArrayState.k--;
246 else if (BitArrayState.nKSum >= K_SUM_MIN_BOUNDARY[BitArrayState.k + 1])
247 BitArrayState.k++;
248
249 // figure the pivot value
250 int nPivotValue = max(nOriginalKSum / 32, 1);
251 int nOverflow = nEncode / nPivotValue;
252 int nBase = nEncode - (nOverflow * nPivotValue);
253
254 // store the overflow
255 if (nOverflow < (MODEL_ELEMENTS - 1))
256 {
257 ENCODE_FAST(RANGE_WIDTH[nOverflow], RANGE_TOTAL[nOverflow], RANGE_OVERFLOW_SHIFT);
258
259 #ifdef BUILD_RANGE_TABLE
260 g_aryOverflows[nOverflow]++;
261 g_nTotalOverflow++;
262 #endif
263 }
264 else
265 {
266 // store the "special" overflow (tells that perfect k is encoded next)
267 ENCODE_FAST(RANGE_WIDTH[MODEL_ELEMENTS - 1], RANGE_TOTAL[MODEL_ELEMENTS - 1], RANGE_OVERFLOW_SHIFT);
268
269 #ifdef BUILD_RANGE_TABLE
270 g_aryOverflows[MODEL_ELEMENTS - 1]++;
271 g_nTotalOverflow++;
272 #endif
273
274 // code the overflow using straight bits
275 ENCODE_DIRECT((nOverflow >> 16) & 0xFFFF, 16);
276 ENCODE_DIRECT(nOverflow & 0xFFFF, 16);
277 }
278
279 // code the base
280 {
281 if (nPivotValue >= (1 << 16))
282 {
283 int nPivotValueBits = 0;
284 while ((nPivotValue >> nPivotValueBits) > 0) { nPivotValueBits++; }
285 int nSplitFactor = 1 << (nPivotValueBits - 16);
286
287 // we know that base is smaller than pivot coming into this
288 // however, after we divide both by an integer, they could be the same
289 // we account by adding one to the pivot, but this hurts compression
290 // by (1 / nSplitFactor) -- therefore we maximize the split factor
291 // that gets one added to it
292
293 // encode the pivot as two pieces
294 int nPivotValueA = (nPivotValue / nSplitFactor) + 1;
295 int nPivotValueB = nSplitFactor;
296
297 int nBaseA = nBase / nSplitFactor;
298 int nBaseB = nBase % nSplitFactor;
299
300 {
301 NORMALIZE_RANGE_CODER
302 const int nTemp = m_RangeCoderInfo.range / nPivotValueA;
303 m_RangeCoderInfo.range = nTemp;
304 m_RangeCoderInfo.low += nTemp * nBaseA;
305 }
306
307 {
308 NORMALIZE_RANGE_CODER
309 const int nTemp = m_RangeCoderInfo.range / nPivotValueB;
310 m_RangeCoderInfo.range = nTemp;
311 m_RangeCoderInfo.low += nTemp * nBaseB;
312 }
313 }
314 else
315 {
316
317 NORMALIZE_RANGE_CODER
318 const int nTemp = m_RangeCoderInfo.range / nPivotValue;
319 m_RangeCoderInfo.range = nTemp;
320 m_RangeCoderInfo.low += nTemp * nBase;
321 }
322 }
323
324 return 0;
325 }
326
327 /************************************************************************************
328 Flush
329 ************************************************************************************/
FlushBitArray()330 void CBitArray::FlushBitArray()
331 {
332 // advance to a byte boundary (for alignment)
333 AdvanceToByteBoundary();
334
335 // the range coder
336 m_RangeCoderInfo.low = 0; // full code range
337 m_RangeCoderInfo.range = TOP_VALUE;
338 m_RangeCoderInfo.buffer = 0;
339 m_RangeCoderInfo.help = 0; // no bytes to follow
340 }
341
FlushState(BIT_ARRAY_STATE & BitArrayState)342 void CBitArray::FlushState(BIT_ARRAY_STATE & BitArrayState)
343 {
344 // k and ksum
345 BitArrayState.k = 10;
346 BitArrayState.nKSum = (1 << BitArrayState.k) * 16;
347 }
348
349 /************************************************************************************
350 Finalize
351 ************************************************************************************/
Finalize()352 void CBitArray::Finalize()
353 {
354 NORMALIZE_RANGE_CODER
355
356 unsigned int nTemp = (m_RangeCoderInfo.low >> SHIFT_BITS) + 1;
357
358 if (nTemp > 0xFF) // we have a carry
359 {
360 PUTC(m_RangeCoderInfo.buffer + 1);
361 for ( ; m_RangeCoderInfo.help; m_RangeCoderInfo.help--)
362 {
363 PUTC(0);
364 }
365 }
366 else // no carry
367 {
368 PUTC(m_RangeCoderInfo.buffer);
369 for ( ; m_RangeCoderInfo.help; m_RangeCoderInfo.help--)
370 {
371 PUTC(((unsigned char) 0xFF));
372 }
373 }
374
375 // we must output these bytes so the decoder can properly work at the end of the stream
376 PUTC(nTemp & 0xFF);
377 PUTC(0);
378 PUTC(0);
379 PUTC(0);
380 }
381
382 /************************************************************************************
383 Build a range table (for development / debugging)
384 ************************************************************************************/
385 #ifdef BUILD_RANGE_TABLE
OutputRangeTable()386 void CBitArray::OutputRangeTable()
387 {
388 int z;
389
390 if (g_nTotalOverflow == 0) return;
391
392 int nTotal = 0;
393 int aryWidth[256]; ZeroMemory(aryWidth, 256 * 4);
394 for (z = 0; z < MODEL_ELEMENTS; z++)
395 {
396 aryWidth[z] = int(((float(g_aryOverflows[z]) * float(65536)) + (g_nTotalOverflow / 2)) / float(g_nTotalOverflow));
397 if (aryWidth[z] == 0) aryWidth[z] = 1;
398 nTotal += aryWidth[z];
399 }
400
401 z = 0;
402 while (nTotal > 65536)
403 {
404 if (aryWidth[z] != 1)
405 {
406 aryWidth[z]--;
407 nTotal--;
408 }
409 z++;
410 if (z == MODEL_ELEMENTS) z = 0;
411 }
412
413 z = 0;
414 while (nTotal < 65536)
415 {
416 aryWidth[z++]++;
417 nTotal++;
418 if (z == MODEL_ELEMENTS) z = 0;
419 }
420
421 int aryTotal[256]; ZeroMemory(aryTotal, 256 * 4);
422 for (z = 0; z < MODEL_ELEMENTS; z++)
423 {
424 for (int q = 0; q < z; q++)
425 {
426 aryTotal[z] += aryWidth[q];
427 }
428 }
429
430 TCHAR buf[1024];
431 _stprintf(buf, _T("const uint32 RANGE_TOTAL[%d] = {"), MODEL_ELEMENTS);
432 ODS(buf);
433 for (z = 0; z < MODEL_ELEMENTS; z++)
434 {
435 _stprintf(buf, _T("%d,"), aryTotal[z]);
436 OutputDebugString(buf);
437 }
438 ODS(_T("};\n"));
439
440 _stprintf(buf, _T("const uint32 RANGE_WIDTH[%d] = {"), MODEL_ELEMENTS);
441 ODS(buf);
442 for (z = 0; z < MODEL_ELEMENTS; z++)
443 {
444 _stprintf(buf, _T("%d,"), aryWidth[z]);
445 OutputDebugString(buf);
446 }
447 ODS(_T("};\n\n"));
448 }
449 #endif // #ifdef BUILD_RANGE_TABLE
450