1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 /*-------------------------------------------------------------*/
18 /*--- Huffman coding low-level stuff                        ---*/
19 /*---                                           huffman.cpp ---*/
20 /*-------------------------------------------------------------*/
21 
22 /* ------------------------------------------------------------------
23    The original source for this example:
24    This file is part of bzip2/libbzip2, a program and library for
25    lossless, block-sorting data compression.
26 
27    bzip2/libbzip2 version 1.0.6 of 6 September 2010
28    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
29 
30    This program, "bzip2", the associated library "libbzip2", and all
31    documentation, are copyright (C) 1996-2010 Julian R Seward.  All
32    rights reserved.
33 
34    Redistribution and use in source and binary forms, with or without
35    modification, are permitted provided that the following conditions
36    are met:
37 
38    1. Redistributions of source code must retain the above copyright
39    notice, this list of conditions and the following disclaimer.
40 
41    2. The origin of this software must not be misrepresented; you must
42    not claim that you wrote the original software.  If you use this
43    software in a product, an acknowledgment in the product
44    documentation would be appreciated but is not required.
45 
46    3. Altered source versions must be plainly marked as such, and must
47    not be misrepresented as being the original software.
48 
49    4. The name of the author may not be used to endorse or promote
50    products derived from this software without specific prior written
51    permission.
52 
53    THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
54    OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
55    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56    ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
57    DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
59    GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
61    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64 
65    Julian Seward, jseward@bzip.org
66    bzip2/libbzip2 version 1.0.6 of 6 September 2010
67    ------------------------------------------------------------------ */
68 
69 #include "bzlib_private.hpp"
70 
71 /*---------------------------------------------------*/
72 #define WEIGHTOF(zz0)   ((zz0)&0xffffff00)
73 #define DEPTHOF(zz1)    ((zz1)&0x000000ff)
74 #define MYMAX(zz2, zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
75 
76 #define ADDWEIGHTS(zw1, zw2) \
77     (WEIGHTOF(zw1) + WEIGHTOF(zw2)) | (1 + MYMAX(DEPTHOF(zw1), DEPTHOF(zw2)))
78 
79 #define UPHEAP(z)                                     \
80     {                                                 \
81         Int32 zz, tmp;                                \
82         zz = z;                                       \
83         tmp = heap[zz];                               \
84         while (weight[tmp] < weight[heap[zz >> 1]]) { \
85             heap[zz] = heap[zz >> 1];                 \
86             zz >>= 1;                                 \
87         }                                             \
88         heap[zz] = tmp;                               \
89     }
90 
91 #define DOWNHEAP(z)                                                    \
92     {                                                                  \
93         Int32 zz, yy, tmp;                                             \
94         zz = z;                                                        \
95         tmp = heap[zz];                                                \
96         while (True) {                                                 \
97             yy = zz << 1;                                              \
98             if (yy > nHeap)                                            \
99                 break;                                                 \
100             if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) \
101                 yy++;                                                  \
102             if (weight[tmp] < weight[heap[yy]])                        \
103                 break;                                                 \
104             heap[zz] = heap[yy];                                       \
105             zz = yy;                                                   \
106         }                                                              \
107         heap[zz] = tmp;                                                \
108     }
109 
110 /*---------------------------------------------------*/
BZ2_hbMakeCodeLengths(UChar * len,Int32 * freq,Int32 alphaSize,Int32 maxLen)111 void BZ2_hbMakeCodeLengths(UChar *len, Int32 *freq, Int32 alphaSize, Int32 maxLen) {
112     /*--
113       Nodes and heap entries run from 1.  Entry 0
114       for both the heap and nodes is a sentinel.
115    --*/
116     Int32 nNodes, nHeap, n1, n2, i, j, k;
117     Bool tooLong;
118 
119     Int32 heap[BZ_MAX_ALPHA_SIZE + 2];
120     Int32 weight[BZ_MAX_ALPHA_SIZE * 2];
121     Int32 parent[BZ_MAX_ALPHA_SIZE * 2];
122 
123     for (i = 0; i < alphaSize; i++)
124         weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
125 
126     while (True) {
127         nNodes = alphaSize;
128         nHeap = 0;
129 
130         heap[0] = 0;
131         weight[0] = 0;
132         parent[0] = -2;
133 
134         for (i = 1; i <= alphaSize; i++) {
135             parent[i] = -1;
136             nHeap++;
137             heap[nHeap] = i;
138             UPHEAP(nHeap);
139         }
140 
141         AssertH(nHeap < (BZ_MAX_ALPHA_SIZE + 2), 2001);
142 
143         while (nHeap > 1) {
144             n1 = heap[1];
145             heap[1] = heap[nHeap];
146             nHeap--;
147             DOWNHEAP(1);
148             n2 = heap[1];
149             heap[1] = heap[nHeap];
150             nHeap--;
151             DOWNHEAP(1);
152             nNodes++;
153             parent[n1] = parent[n2] = nNodes;
154             weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
155             parent[nNodes] = -1;
156             nHeap++;
157             heap[nHeap] = nNodes;
158             UPHEAP(nHeap);
159         }
160 
161         AssertH(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002);
162 
163         tooLong = False;
164         for (i = 1; i <= alphaSize; i++) {
165             j = 0;
166             k = i;
167             while (parent[k] >= 0) {
168                 k = parent[k];
169                 j++;
170             }
171             len[i - 1] = j;
172             if (j > maxLen)
173                 tooLong = True;
174         }
175 
176         if (!tooLong)
177             break;
178 
179         /* 17 Oct 04: keep-going condition for the following loop used
180          to be 'i < alphaSize', which missed the last element,
181          theoretically leading to the possibility of the compressor
182          looping.  However, this count-scaling step is only needed if
183          one of the generated Huffman code words is longer than
184          maxLen, which up to and including version 1.0.2 was 20 bits,
185          which is extremely unlikely.  In version 1.0.3 maxLen was
186          changed to 17 bits, which has minimal effect on compression
187          ratio, but does mean this scaling step is used from time to
188          time, enough to verify that it works.
189 
190          This means that bzip2-1.0.3 and later will only produce
191          Huffman codes with a maximum length of 17 bits.  However, in
192          order to preserve backwards compatibility with bitstreams
193          produced by versions pre-1.0.3, the decompressor must still
194          handle lengths of up to 20. */
195 
196         for (i = 1; i <= alphaSize; i++) {
197             j = weight[i] >> 8;
198             j = 1 + (j / 2);
199             weight[i] = j << 8;
200         }
201     }
202 }
203 
204 /*---------------------------------------------------*/
BZ2_hbAssignCodes(Int32 * code,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)205 void BZ2_hbAssignCodes(Int32 *code, UChar *length, Int32 minLen, Int32 maxLen, Int32 alphaSize) {
206     Int32 j, vec, i;
207 
208     vec = 0;
209     for (j = minLen; j <= maxLen; j++) {
210         for (i = 0; i < alphaSize; i++)
211             if (length[i] == j) {
212                 code[i] = vec;
213                 vec++;
214             };
215         vec <<= 1;
216     }
217 }
218 
219 /*---------------------------------------------------*/
BZ2_hbCreateDecodeTables(Int32 * limit,Int32 * base,Int32 * perm,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)220 void BZ2_hbCreateDecodeTables(Int32 *limit,
221                               Int32 *base,
222                               Int32 *perm,
223                               UChar *length,
224                               Int32 minLen,
225                               Int32 maxLen,
226                               Int32 alphaSize) {
227     Int32 pp, i, j, vec;
228 
229     pp = 0;
230     for (i = minLen; i <= maxLen; i++)
231         for (j = 0; j < alphaSize; j++)
232             if (length[j] == i) {
233                 perm[pp] = j;
234                 pp++;
235             };
236 
237     for (i = 0; i < BZ_MAX_CODE_LEN; i++)
238         base[i] = 0;
239     for (i = 0; i < alphaSize; i++)
240         base[length[i] + 1]++;
241 
242     for (i = 1; i < BZ_MAX_CODE_LEN; i++)
243         base[i] += base[i - 1];
244 
245     for (i = 0; i < BZ_MAX_CODE_LEN; i++)
246         limit[i] = 0;
247     vec = 0;
248 
249     for (i = minLen; i <= maxLen; i++) {
250         vec += (base[i + 1] - base[i]);
251         limit[i] = vec - 1;
252         vec <<= 1;
253     }
254     for (i = minLen + 1; i <= maxLen; i++)
255         base[i] = ((limit[i - 1] + 1) << 1) - base[i];
256 }
257 
258 /*-------------------------------------------------------------*/
259 /*--- end                                         huffman.c ---*/
260 /*-------------------------------------------------------------*/
261