1 /*
2 Copyright (c) 2005-2021 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 /*-------------------------------------------------------------*/
18 /*--- Huffman coding low-level stuff ---*/
19 /*--- huffman.cpp ---*/
20 /*-------------------------------------------------------------*/
21
22 /* ------------------------------------------------------------------
23 The original source for this example:
24 This file is part of bzip2/libbzip2, a program and library for
25 lossless, block-sorting data compression.
26
27 bzip2/libbzip2 version 1.0.6 of 6 September 2010
28 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
29
30 This program, "bzip2", the associated library "libbzip2", and all
31 documentation, are copyright (C) 1996-2010 Julian R Seward. All
32 rights reserved.
33
34 Redistribution and use in source and binary forms, with or without
35 modification, are permitted provided that the following conditions
36 are met:
37
38 1. Redistributions of source code must retain the above copyright
39 notice, this list of conditions and the following disclaimer.
40
41 2. The origin of this software must not be misrepresented; you must
42 not claim that you wrote the original software. If you use this
43 software in a product, an acknowledgment in the product
44 documentation would be appreciated but is not required.
45
46 3. Altered source versions must be plainly marked as such, and must
47 not be misrepresented as being the original software.
48
49 4. The name of the author may not be used to endorse or promote
50 products derived from this software without specific prior written
51 permission.
52
53 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
54 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
55 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
57 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
59 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
60 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
61 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64
65 Julian Seward, jseward@bzip.org
66 bzip2/libbzip2 version 1.0.6 of 6 September 2010
67 ------------------------------------------------------------------ */
68
69 #include "bzlib_private.hpp"
70
71 /*---------------------------------------------------*/
72 #define WEIGHTOF(zz0) ((zz0)&0xffffff00)
73 #define DEPTHOF(zz1) ((zz1)&0x000000ff)
74 #define MYMAX(zz2, zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
75
76 #define ADDWEIGHTS(zw1, zw2) \
77 (WEIGHTOF(zw1) + WEIGHTOF(zw2)) | (1 + MYMAX(DEPTHOF(zw1), DEPTHOF(zw2)))
78
79 #define UPHEAP(z) \
80 { \
81 Int32 zz, tmp; \
82 zz = z; \
83 tmp = heap[zz]; \
84 while (weight[tmp] < weight[heap[zz >> 1]]) { \
85 heap[zz] = heap[zz >> 1]; \
86 zz >>= 1; \
87 } \
88 heap[zz] = tmp; \
89 }
90
91 #define DOWNHEAP(z) \
92 { \
93 Int32 zz, yy, tmp; \
94 zz = z; \
95 tmp = heap[zz]; \
96 while (True) { \
97 yy = zz << 1; \
98 if (yy > nHeap) \
99 break; \
100 if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) \
101 yy++; \
102 if (weight[tmp] < weight[heap[yy]]) \
103 break; \
104 heap[zz] = heap[yy]; \
105 zz = yy; \
106 } \
107 heap[zz] = tmp; \
108 }
109
110 /*---------------------------------------------------*/
BZ2_hbMakeCodeLengths(UChar * len,Int32 * freq,Int32 alphaSize,Int32 maxLen)111 void BZ2_hbMakeCodeLengths(UChar *len, Int32 *freq, Int32 alphaSize, Int32 maxLen) {
112 /*--
113 Nodes and heap entries run from 1. Entry 0
114 for both the heap and nodes is a sentinel.
115 --*/
116 Int32 nNodes, nHeap, n1, n2, i, j, k;
117 Bool tooLong;
118
119 Int32 heap[BZ_MAX_ALPHA_SIZE + 2];
120 Int32 weight[BZ_MAX_ALPHA_SIZE * 2];
121 Int32 parent[BZ_MAX_ALPHA_SIZE * 2];
122
123 for (i = 0; i < alphaSize; i++)
124 weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
125
126 while (True) {
127 nNodes = alphaSize;
128 nHeap = 0;
129
130 heap[0] = 0;
131 weight[0] = 0;
132 parent[0] = -2;
133
134 for (i = 1; i <= alphaSize; i++) {
135 parent[i] = -1;
136 nHeap++;
137 heap[nHeap] = i;
138 UPHEAP(nHeap);
139 }
140
141 AssertH(nHeap < (BZ_MAX_ALPHA_SIZE + 2), 2001);
142
143 while (nHeap > 1) {
144 n1 = heap[1];
145 heap[1] = heap[nHeap];
146 nHeap--;
147 DOWNHEAP(1);
148 n2 = heap[1];
149 heap[1] = heap[nHeap];
150 nHeap--;
151 DOWNHEAP(1);
152 nNodes++;
153 parent[n1] = parent[n2] = nNodes;
154 weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
155 parent[nNodes] = -1;
156 nHeap++;
157 heap[nHeap] = nNodes;
158 UPHEAP(nHeap);
159 }
160
161 AssertH(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002);
162
163 tooLong = False;
164 for (i = 1; i <= alphaSize; i++) {
165 j = 0;
166 k = i;
167 while (parent[k] >= 0) {
168 k = parent[k];
169 j++;
170 }
171 len[i - 1] = j;
172 if (j > maxLen)
173 tooLong = True;
174 }
175
176 if (!tooLong)
177 break;
178
179 /* 17 Oct 04: keep-going condition for the following loop used
180 to be 'i < alphaSize', which missed the last element,
181 theoretically leading to the possibility of the compressor
182 looping. However, this count-scaling step is only needed if
183 one of the generated Huffman code words is longer than
184 maxLen, which up to and including version 1.0.2 was 20 bits,
185 which is extremely unlikely. In version 1.0.3 maxLen was
186 changed to 17 bits, which has minimal effect on compression
187 ratio, but does mean this scaling step is used from time to
188 time, enough to verify that it works.
189
190 This means that bzip2-1.0.3 and later will only produce
191 Huffman codes with a maximum length of 17 bits. However, in
192 order to preserve backwards compatibility with bitstreams
193 produced by versions pre-1.0.3, the decompressor must still
194 handle lengths of up to 20. */
195
196 for (i = 1; i <= alphaSize; i++) {
197 j = weight[i] >> 8;
198 j = 1 + (j / 2);
199 weight[i] = j << 8;
200 }
201 }
202 }
203
204 /*---------------------------------------------------*/
BZ2_hbAssignCodes(Int32 * code,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)205 void BZ2_hbAssignCodes(Int32 *code, UChar *length, Int32 minLen, Int32 maxLen, Int32 alphaSize) {
206 Int32 j, vec, i;
207
208 vec = 0;
209 for (j = minLen; j <= maxLen; j++) {
210 for (i = 0; i < alphaSize; i++)
211 if (length[i] == j) {
212 code[i] = vec;
213 vec++;
214 };
215 vec <<= 1;
216 }
217 }
218
219 /*---------------------------------------------------*/
BZ2_hbCreateDecodeTables(Int32 * limit,Int32 * base,Int32 * perm,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)220 void BZ2_hbCreateDecodeTables(Int32 *limit,
221 Int32 *base,
222 Int32 *perm,
223 UChar *length,
224 Int32 minLen,
225 Int32 maxLen,
226 Int32 alphaSize) {
227 Int32 pp, i, j, vec;
228
229 pp = 0;
230 for (i = minLen; i <= maxLen; i++)
231 for (j = 0; j < alphaSize; j++)
232 if (length[j] == i) {
233 perm[pp] = j;
234 pp++;
235 };
236
237 for (i = 0; i < BZ_MAX_CODE_LEN; i++)
238 base[i] = 0;
239 for (i = 0; i < alphaSize; i++)
240 base[length[i] + 1]++;
241
242 for (i = 1; i < BZ_MAX_CODE_LEN; i++)
243 base[i] += base[i - 1];
244
245 for (i = 0; i < BZ_MAX_CODE_LEN; i++)
246 limit[i] = 0;
247 vec = 0;
248
249 for (i = minLen; i <= maxLen; i++) {
250 vec += (base[i + 1] - base[i]);
251 limit[i] = vec - 1;
252 vec <<= 1;
253 }
254 for (i = minLen + 1; i <= maxLen; i++)
255 base[i] = ((limit[i - 1] + 1) << 1) - base[i];
256 }
257
258 /*-------------------------------------------------------------*/
259 /*--- end huffman.c ---*/
260 /*-------------------------------------------------------------*/
261