1 /*
2 Copyright 2011 Google Inc. All Rights Reserved.
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15
16 Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 */
19
20 #include "deflate.h"
21
22 #include <assert.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25
26 #include "blocksplitter.h"
27 #include "squeeze.h"
28 #include "symbols.h"
29 #include "tree.h"
30
31 /*
32 bp = bitpointer, always in range [0, 7].
33 The outsize is number of necessary bytes to encode the bits.
34 Given the value of bp and the amount of bytes, the amount of bits represented
35 is not simply bytesize * 8 + bp because even representing one bit requires a
36 whole byte. It is: (bp == 0) ? (bytesize * 8) : ((bytesize - 1) * 8 + bp)
37 */
AddBit(int bit,unsigned char * bp,unsigned char ** out,size_t * outsize)38 static void AddBit(int bit,
39 unsigned char* bp, unsigned char** out, size_t* outsize) {
40 if (*bp == 0) ZOPFLI_APPEND_DATA(0, out, outsize);
41 (*out)[*outsize - 1] |= bit << *bp;
42 *bp = (*bp + 1) & 7;
43 }
44
AddBits(unsigned symbol,unsigned length,unsigned char * bp,unsigned char ** out,size_t * outsize)45 static void AddBits(unsigned symbol, unsigned length,
46 unsigned char* bp, unsigned char** out, size_t* outsize) {
47 /* TODO(lode): make more efficient (add more bits at once). */
48 unsigned i;
49 for (i = 0; i < length; i++) {
50 unsigned bit = (symbol >> i) & 1;
51 if (*bp == 0) ZOPFLI_APPEND_DATA(0, out, outsize);
52 (*out)[*outsize - 1] |= bit << *bp;
53 *bp = (*bp + 1) & 7;
54 }
55 }
56
57 /*
58 Adds bits, like AddBits, but the order is inverted. The deflate specification
59 uses both orders in one standard.
60 */
AddHuffmanBits(unsigned symbol,unsigned length,unsigned char * bp,unsigned char ** out,size_t * outsize)61 static void AddHuffmanBits(unsigned symbol, unsigned length,
62 unsigned char* bp, unsigned char** out,
63 size_t* outsize) {
64 /* TODO(lode): make more efficient (add more bits at once). */
65 unsigned i;
66 for (i = 0; i < length; i++) {
67 unsigned bit = (symbol >> (length - i - 1)) & 1;
68 if (*bp == 0) ZOPFLI_APPEND_DATA(0, out, outsize);
69 (*out)[*outsize - 1] |= bit << *bp;
70 *bp = (*bp + 1) & 7;
71 }
72 }
73
74 /*
75 Ensures there are at least 2 distance codes to support buggy decoders.
76 Zlib 1.2.1 and below have a bug where it fails if there isn't at least 1
77 distance code (with length > 0), even though it's valid according to the
78 deflate spec to have 0 distance codes. On top of that, some mobile phones
79 require at least two distance codes. To support these decoders too (but
80 potentially at the cost of a few bytes), add dummy code lengths of 1.
81 References to this bug can be found in the changelog of
82 Zlib 1.2.2 and here: http://www.jonof.id.au/forum/index.php?topic=515.0.
83
84 d_lengths: the 32 lengths of the distance codes.
85 */
PatchDistanceCodesForBuggyDecoders(unsigned * d_lengths)86 static void PatchDistanceCodesForBuggyDecoders(unsigned* d_lengths) {
87 int num_dist_codes = 0; /* Amount of non-zero distance codes */
88 int i;
89 for (i = 0; i < 30 /* Ignore the two unused codes from the spec */; i++) {
90 if (d_lengths[i]) num_dist_codes++;
91 if (num_dist_codes >= 2) return; /* Two or more codes is fine. */
92 }
93
94 if (num_dist_codes == 0) {
95 d_lengths[0] = d_lengths[1] = 1;
96 } else if (num_dist_codes == 1) {
97 d_lengths[d_lengths[0] ? 1 : 0] = 1;
98 }
99 }
100
101 /*
102 Encodes the Huffman tree and returns how many bits its encoding takes. If out
103 is a null pointer, only returns the size and runs faster.
104 */
EncodeTree(const unsigned * ll_lengths,const unsigned * d_lengths,int use_16,int use_17,int use_18,unsigned char * bp,unsigned char ** out,size_t * outsize)105 static size_t EncodeTree(const unsigned* ll_lengths,
106 const unsigned* d_lengths,
107 int use_16, int use_17, int use_18,
108 unsigned char* bp,
109 unsigned char** out, size_t* outsize) {
110 unsigned lld_total; /* Total amount of literal, length, distance codes. */
111 /* Runlength encoded version of lengths of litlen and dist trees. */
112 unsigned* rle = 0;
113 unsigned* rle_bits = 0; /* Extra bits for rle values 16, 17 and 18. */
114 size_t rle_size = 0; /* Size of rle array. */
115 size_t rle_bits_size = 0; /* Should have same value as rle_size. */
116 unsigned hlit = 29; /* 286 - 257 */
117 unsigned hdist = 29; /* 32 - 1, but gzip does not like hdist > 29.*/
118 unsigned hclen;
119 unsigned hlit2;
120 size_t i, j;
121 size_t clcounts[19];
122 unsigned clcl[19]; /* Code length code lengths. */
123 unsigned clsymbols[19];
124 /* The order in which code length code lengths are encoded as per deflate. */
125 static const unsigned order[19] = {
126 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
127 };
128 int size_only = !out;
129 size_t result_size = 0;
130
131 for(i = 0; i < 19; i++) clcounts[i] = 0;
132
133 /* Trim zeros. */
134 while (hlit > 0 && ll_lengths[257 + hlit - 1] == 0) hlit--;
135 while (hdist > 0 && d_lengths[1 + hdist - 1] == 0) hdist--;
136 hlit2 = hlit + 257;
137
138 lld_total = hlit2 + hdist + 1;
139
140 for (i = 0; i < lld_total; i++) {
141 /* This is an encoding of a huffman tree, so now the length is a symbol */
142 unsigned char symbol = i < hlit2 ? ll_lengths[i] : d_lengths[i - hlit2];
143 unsigned count = 1;
144 if(use_16 || (symbol == 0 && (use_17 || use_18))) {
145 for (j = i + 1; j < lld_total && symbol ==
146 (j < hlit2 ? ll_lengths[j] : d_lengths[j - hlit2]); j++) {
147 count++;
148 }
149 }
150 i += count - 1;
151
152 /* Repetitions of zeroes */
153 if (symbol == 0 && count >= 3) {
154 if (use_18) {
155 while (count >= 11) {
156 unsigned count2 = count > 138 ? 138 : count;
157 if (!size_only) {
158 ZOPFLI_APPEND_DATA(18, &rle, &rle_size);
159 ZOPFLI_APPEND_DATA(count2 - 11, &rle_bits, &rle_bits_size);
160 }
161 clcounts[18]++;
162 count -= count2;
163 }
164 }
165 if (use_17) {
166 while (count >= 3) {
167 unsigned count2 = count > 10 ? 10 : count;
168 if (!size_only) {
169 ZOPFLI_APPEND_DATA(17, &rle, &rle_size);
170 ZOPFLI_APPEND_DATA(count2 - 3, &rle_bits, &rle_bits_size);
171 }
172 clcounts[17]++;
173 count -= count2;
174 }
175 }
176 }
177
178 /* Repetitions of any symbol */
179 if (use_16 && count >= 4) {
180 count--; /* Since the first one is hardcoded. */
181 clcounts[symbol]++;
182 if (!size_only) {
183 ZOPFLI_APPEND_DATA(symbol, &rle, &rle_size);
184 ZOPFLI_APPEND_DATA(0, &rle_bits, &rle_bits_size);
185 }
186 while (count >= 3) {
187 unsigned count2 = count > 6 ? 6 : count;
188 if (!size_only) {
189 ZOPFLI_APPEND_DATA(16, &rle, &rle_size);
190 ZOPFLI_APPEND_DATA(count2 - 3, &rle_bits, &rle_bits_size);
191 }
192 clcounts[16]++;
193 count -= count2;
194 }
195 }
196
197 /* No or insufficient repetition */
198 clcounts[symbol] += count;
199 while (count > 0) {
200 if (!size_only) {
201 ZOPFLI_APPEND_DATA(symbol, &rle, &rle_size);
202 ZOPFLI_APPEND_DATA(0, &rle_bits, &rle_bits_size);
203 }
204 count--;
205 }
206 }
207
208 ZopfliCalculateBitLengths(clcounts, 19, 7, clcl);
209 if (!size_only) ZopfliLengthsToSymbols(clcl, 19, 7, clsymbols);
210
211 hclen = 15;
212 /* Trim zeros. */
213 while (hclen > 0 && clcounts[order[hclen + 4 - 1]] == 0) hclen--;
214
215 if (!size_only) {
216 AddBits(hlit, 5, bp, out, outsize);
217 AddBits(hdist, 5, bp, out, outsize);
218 AddBits(hclen, 4, bp, out, outsize);
219
220 for (i = 0; i < hclen + 4; i++) {
221 AddBits(clcl[order[i]], 3, bp, out, outsize);
222 }
223
224 for (i = 0; i < rle_size; i++) {
225 unsigned symbol = clsymbols[rle[i]];
226 AddHuffmanBits(symbol, clcl[rle[i]], bp, out, outsize);
227 /* Extra bits. */
228 if (rle[i] == 16) AddBits(rle_bits[i], 2, bp, out, outsize);
229 else if (rle[i] == 17) AddBits(rle_bits[i], 3, bp, out, outsize);
230 else if (rle[i] == 18) AddBits(rle_bits[i], 7, bp, out, outsize);
231 }
232 }
233
234 result_size += 14; /* hlit, hdist, hclen bits */
235 result_size += (hclen + 4) * 3; /* clcl bits */
236 for(i = 0; i < 19; i++) {
237 result_size += clcl[i] * clcounts[i];
238 }
239 /* Extra bits. */
240 result_size += clcounts[16] * 2;
241 result_size += clcounts[17] * 3;
242 result_size += clcounts[18] * 7;
243
244 /* Note: in case of "size_only" these are null pointers so no effect. */
245 free(rle);
246 free(rle_bits);
247
248 return result_size;
249 }
250
AddDynamicTree(const unsigned * ll_lengths,const unsigned * d_lengths,unsigned char * bp,unsigned char ** out,size_t * outsize)251 static void AddDynamicTree(const unsigned* ll_lengths,
252 const unsigned* d_lengths,
253 unsigned char* bp,
254 unsigned char** out, size_t* outsize) {
255 int i;
256 int best = 0;
257 size_t bestsize = 0;
258
259 for(i = 0; i < 8; i++) {
260 size_t size = EncodeTree(ll_lengths, d_lengths,
261 i & 1, i & 2, i & 4,
262 0, 0, 0);
263 if (bestsize == 0 || size < bestsize) {
264 bestsize = size;
265 best = i;
266 }
267 }
268
269 EncodeTree(ll_lengths, d_lengths,
270 best & 1, best & 2, best & 4,
271 bp, out, outsize);
272 }
273
274 /*
275 Gives the exact size of the tree, in bits, as it will be encoded in DEFLATE.
276 */
CalculateTreeSize(const unsigned * ll_lengths,const unsigned * d_lengths)277 static size_t CalculateTreeSize(const unsigned* ll_lengths,
278 const unsigned* d_lengths) {
279 size_t result = 0;
280 int i;
281
282 for(i = 0; i < 8; i++) {
283 size_t size = EncodeTree(ll_lengths, d_lengths,
284 i & 1, i & 2, i & 4,
285 0, 0, 0);
286 if (result == 0 || size < result) result = size;
287 }
288
289 return result;
290 }
291
292 /*
293 Adds all lit/len and dist codes from the lists as huffman symbols. Does not add
294 end code 256. expected_data_size is the uncompressed block size, used for
295 assert, but you can set it to 0 to not do the assertion.
296 */
AddLZ77Data(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,size_t expected_data_size,const unsigned * ll_symbols,const unsigned * ll_lengths,const unsigned * d_symbols,const unsigned * d_lengths,unsigned char * bp,unsigned char ** out,size_t * outsize)297 static void AddLZ77Data(const ZopfliLZ77Store* lz77,
298 size_t lstart, size_t lend,
299 size_t expected_data_size,
300 const unsigned* ll_symbols, const unsigned* ll_lengths,
301 const unsigned* d_symbols, const unsigned* d_lengths,
302 unsigned char* bp,
303 unsigned char** out, size_t* outsize) {
304 size_t testlength = 0;
305 size_t i;
306
307 for (i = lstart; i < lend; i++) {
308 unsigned dist = lz77->dists[i];
309 unsigned litlen = lz77->litlens[i];
310 if (dist == 0) {
311 assert(litlen < 256);
312 assert(ll_lengths[litlen] > 0);
313 AddHuffmanBits(ll_symbols[litlen], ll_lengths[litlen], bp, out, outsize);
314 testlength++;
315 } else {
316 unsigned lls = ZopfliGetLengthSymbol(litlen);
317 unsigned ds = ZopfliGetDistSymbol(dist);
318 assert(litlen >= 3 && litlen <= 288);
319 assert(ll_lengths[lls] > 0);
320 assert(d_lengths[ds] > 0);
321 AddHuffmanBits(ll_symbols[lls], ll_lengths[lls], bp, out, outsize);
322 AddBits(ZopfliGetLengthExtraBitsValue(litlen),
323 ZopfliGetLengthExtraBits(litlen),
324 bp, out, outsize);
325 AddHuffmanBits(d_symbols[ds], d_lengths[ds], bp, out, outsize);
326 AddBits(ZopfliGetDistExtraBitsValue(dist),
327 ZopfliGetDistExtraBits(dist),
328 bp, out, outsize);
329 testlength += litlen;
330 }
331 }
332 assert(expected_data_size == 0 || testlength == expected_data_size);
333 }
334
GetFixedTree(unsigned * ll_lengths,unsigned * d_lengths)335 static void GetFixedTree(unsigned* ll_lengths, unsigned* d_lengths) {
336 size_t i;
337 for (i = 0; i < 144; i++) ll_lengths[i] = 8;
338 for (i = 144; i < 256; i++) ll_lengths[i] = 9;
339 for (i = 256; i < 280; i++) ll_lengths[i] = 7;
340 for (i = 280; i < 288; i++) ll_lengths[i] = 8;
341 for (i = 0; i < 32; i++) d_lengths[i] = 5;
342 }
343
344 /*
345 Same as CalculateBlockSymbolSize, but for block size smaller than histogram
346 size.
347 */
CalculateBlockSymbolSizeSmall(const unsigned * ll_lengths,const unsigned * d_lengths,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend)348 static size_t CalculateBlockSymbolSizeSmall(const unsigned* ll_lengths,
349 const unsigned* d_lengths,
350 const ZopfliLZ77Store* lz77,
351 size_t lstart, size_t lend) {
352 size_t result = 0;
353 size_t i;
354 for (i = lstart; i < lend; i++) {
355 assert(i < lz77->size);
356 assert(lz77->litlens[i] < 259);
357 if (lz77->dists[i] == 0) {
358 result += ll_lengths[lz77->litlens[i]];
359 } else {
360 int ll_symbol = ZopfliGetLengthSymbol(lz77->litlens[i]);
361 int d_symbol = ZopfliGetDistSymbol(lz77->dists[i]);
362 result += ll_lengths[ll_symbol];
363 result += d_lengths[d_symbol];
364 result += ZopfliGetLengthSymbolExtraBits(ll_symbol);
365 result += ZopfliGetDistSymbolExtraBits(d_symbol);
366 }
367 }
368 result += ll_lengths[256]; /*end symbol*/
369 return result;
370 }
371
372 /*
373 Same as CalculateBlockSymbolSize, but with the histogram provided by the caller.
374 */
CalculateBlockSymbolSizeGivenCounts(const size_t * ll_counts,const size_t * d_counts,const unsigned * ll_lengths,const unsigned * d_lengths,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend)375 static size_t CalculateBlockSymbolSizeGivenCounts(const size_t* ll_counts,
376 const size_t* d_counts,
377 const unsigned* ll_lengths,
378 const unsigned* d_lengths,
379 const ZopfliLZ77Store* lz77,
380 size_t lstart, size_t lend) {
381 size_t result = 0;
382 size_t i;
383 if (lstart + ZOPFLI_NUM_LL * 3 > lend) {
384 return CalculateBlockSymbolSizeSmall(
385 ll_lengths, d_lengths, lz77, lstart, lend);
386 } else {
387 for (i = 0; i < 256; i++) {
388 result += ll_lengths[i] * ll_counts[i];
389 }
390 for (i = 257; i < 286; i++) {
391 result += ll_lengths[i] * ll_counts[i];
392 result += ZopfliGetLengthSymbolExtraBits(i) * ll_counts[i];
393 }
394 for (i = 0; i < 30; i++) {
395 result += d_lengths[i] * d_counts[i];
396 result += ZopfliGetDistSymbolExtraBits(i) * d_counts[i];
397 }
398 result += ll_lengths[256]; /*end symbol*/
399 return result;
400 }
401 }
402
403 /*
404 Calculates size of the part after the header and tree of an LZ77 block, in bits.
405 */
CalculateBlockSymbolSize(const unsigned * ll_lengths,const unsigned * d_lengths,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend)406 static size_t CalculateBlockSymbolSize(const unsigned* ll_lengths,
407 const unsigned* d_lengths,
408 const ZopfliLZ77Store* lz77,
409 size_t lstart, size_t lend) {
410 if (lstart + ZOPFLI_NUM_LL * 3 > lend) {
411 return CalculateBlockSymbolSizeSmall(
412 ll_lengths, d_lengths, lz77, lstart, lend);
413 } else {
414 size_t ll_counts[ZOPFLI_NUM_LL];
415 size_t d_counts[ZOPFLI_NUM_D];
416 ZopfliLZ77GetHistogram(lz77, lstart, lend, ll_counts, d_counts);
417 return CalculateBlockSymbolSizeGivenCounts(
418 ll_counts, d_counts, ll_lengths, d_lengths, lz77, lstart, lend);
419 }
420 }
421
AbsDiff(size_t x,size_t y)422 static size_t AbsDiff(size_t x, size_t y) {
423 if (x > y)
424 return x - y;
425 else
426 return y - x;
427 }
428
429 /*
430 Changes the population counts in a way that the consequent Huffman tree
431 compression, especially its rle-part, will be more likely to compress this data
432 more efficiently. length contains the size of the histogram.
433 */
OptimizeHuffmanForRle(int length,size_t * counts)434 void OptimizeHuffmanForRle(int length, size_t* counts) {
435 int i, k, stride;
436 size_t symbol, sum, limit;
437 int* good_for_rle;
438
439 /* 1) We don't want to touch the trailing zeros. We may break the
440 rules of the format by adding more data in the distance codes. */
441 for (; length >= 0; --length) {
442 if (length == 0) {
443 return;
444 }
445 if (counts[length - 1] != 0) {
446 /* Now counts[0..length - 1] does not have trailing zeros. */
447 break;
448 }
449 }
450 /* 2) Let's mark all population counts that already can be encoded
451 with an rle code.*/
452 good_for_rle = (int*)malloc(length * sizeof(int));
453 for (i = 0; i < length; ++i) good_for_rle[i] = 0;
454
455 /* Let's not spoil any of the existing good rle codes.
456 Mark any seq of 0's that is longer than 5 as a good_for_rle.
457 Mark any seq of non-0's that is longer than 7 as a good_for_rle.*/
458 symbol = counts[0];
459 stride = 0;
460 for (i = 0; i < length + 1; ++i) {
461 if (i == length || counts[i] != symbol) {
462 if ((symbol == 0 && stride >= 5) || (symbol != 0 && stride >= 7)) {
463 for (k = 0; k < stride; ++k) {
464 good_for_rle[i - k - 1] = 1;
465 }
466 }
467 stride = 1;
468 if (i != length) {
469 symbol = counts[i];
470 }
471 } else {
472 ++stride;
473 }
474 }
475
476 /* 3) Let's replace those population counts that lead to more rle codes. */
477 stride = 0;
478 limit = counts[0];
479 sum = 0;
480 for (i = 0; i < length + 1; ++i) {
481 if (i == length || good_for_rle[i]
482 /* Heuristic for selecting the stride ranges to collapse. */
483 || AbsDiff(counts[i], limit) >= 4) {
484 if (stride >= 4 || (stride >= 3 && sum == 0)) {
485 /* The stride must end, collapse what we have, if we have enough (4). */
486 int count = (sum + stride / 2) / stride;
487 if (count < 1) count = 1;
488 if (sum == 0) {
489 /* Don't make an all zeros stride to be upgraded to ones. */
490 count = 0;
491 }
492 for (k = 0; k < stride; ++k) {
493 /* We don't want to change value at counts[i],
494 that is already belonging to the next stride. Thus - 1. */
495 counts[i - k - 1] = count;
496 }
497 }
498 stride = 0;
499 sum = 0;
500 if (i < length - 3) {
501 /* All interesting strides have a count of at least 4,
502 at least when non-zeros. */
503 limit = (counts[i] + counts[i + 1] +
504 counts[i + 2] + counts[i + 3] + 2) / 4;
505 } else if (i < length) {
506 limit = counts[i];
507 } else {
508 limit = 0;
509 }
510 }
511 ++stride;
512 if (i != length) {
513 sum += counts[i];
514 }
515 }
516
517 free(good_for_rle);
518 }
519
520 /*
521 Tries out OptimizeHuffmanForRle for this block, if the result is smaller,
522 uses it, otherwise keeps the original. Returns size of encoded tree and data in
523 bits, not including the 3-bit block header.
524 */
TryOptimizeHuffmanForRle(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,const size_t * ll_counts,const size_t * d_counts,unsigned * ll_lengths,unsigned * d_lengths)525 static double TryOptimizeHuffmanForRle(
526 const ZopfliLZ77Store* lz77, size_t lstart, size_t lend,
527 const size_t* ll_counts, const size_t* d_counts,
528 unsigned* ll_lengths, unsigned* d_lengths) {
529 size_t ll_counts2[ZOPFLI_NUM_LL];
530 size_t d_counts2[ZOPFLI_NUM_D];
531 unsigned ll_lengths2[ZOPFLI_NUM_LL];
532 unsigned d_lengths2[ZOPFLI_NUM_D];
533 double treesize;
534 double datasize;
535 double treesize2;
536 double datasize2;
537
538 treesize = CalculateTreeSize(ll_lengths, d_lengths);
539 datasize = CalculateBlockSymbolSizeGivenCounts(ll_counts, d_counts,
540 ll_lengths, d_lengths, lz77, lstart, lend);
541
542 memcpy(ll_counts2, ll_counts, sizeof(ll_counts2));
543 memcpy(d_counts2, d_counts, sizeof(d_counts2));
544 OptimizeHuffmanForRle(ZOPFLI_NUM_LL, ll_counts2);
545 OptimizeHuffmanForRle(ZOPFLI_NUM_D, d_counts2);
546 ZopfliCalculateBitLengths(ll_counts2, ZOPFLI_NUM_LL, 15, ll_lengths2);
547 ZopfliCalculateBitLengths(d_counts2, ZOPFLI_NUM_D, 15, d_lengths2);
548 PatchDistanceCodesForBuggyDecoders(d_lengths2);
549
550 treesize2 = CalculateTreeSize(ll_lengths2, d_lengths2);
551 datasize2 = CalculateBlockSymbolSizeGivenCounts(ll_counts, d_counts,
552 ll_lengths2, d_lengths2, lz77, lstart, lend);
553
554 if (treesize2 + datasize2 < treesize + datasize) {
555 memcpy(ll_lengths, ll_lengths2, sizeof(ll_lengths2));
556 memcpy(d_lengths, d_lengths2, sizeof(d_lengths2));
557 return treesize2 + datasize2;
558 }
559 return treesize + datasize;
560 }
561
562 /*
563 Calculates the bit lengths for the symbols for dynamic blocks. Chooses bit
564 lengths that give the smallest size of tree encoding + encoding of all the
565 symbols to have smallest output size. This are not necessarily the ideal Huffman
566 bit lengths. Returns size of encoded tree and data in bits, not including the
567 3-bit block header.
568 */
GetDynamicLengths(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,unsigned * ll_lengths,unsigned * d_lengths)569 static double GetDynamicLengths(const ZopfliLZ77Store* lz77,
570 size_t lstart, size_t lend,
571 unsigned* ll_lengths, unsigned* d_lengths) {
572 size_t ll_counts[ZOPFLI_NUM_LL];
573 size_t d_counts[ZOPFLI_NUM_D];
574
575 ZopfliLZ77GetHistogram(lz77, lstart, lend, ll_counts, d_counts);
576 ll_counts[256] = 1; /* End symbol. */
577 ZopfliCalculateBitLengths(ll_counts, ZOPFLI_NUM_LL, 15, ll_lengths);
578 ZopfliCalculateBitLengths(d_counts, ZOPFLI_NUM_D, 15, d_lengths);
579 PatchDistanceCodesForBuggyDecoders(d_lengths);
580 return TryOptimizeHuffmanForRle(
581 lz77, lstart, lend, ll_counts, d_counts, ll_lengths, d_lengths);
582 }
583
ZopfliCalculateBlockSize(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,int btype)584 double ZopfliCalculateBlockSize(const ZopfliLZ77Store* lz77,
585 size_t lstart, size_t lend, int btype) {
586 unsigned ll_lengths[ZOPFLI_NUM_LL];
587 unsigned d_lengths[ZOPFLI_NUM_D];
588
589 double result = 3; /* bfinal and btype bits */
590
591 if (btype == 0) {
592 size_t length = ZopfliLZ77GetByteRange(lz77, lstart, lend);
593 size_t rem = length % 65535;
594 size_t blocks = length / 65535 + (rem ? 1 : 0);
595 /* An uncompressed block must actually be split into multiple blocks if it's
596 larger than 65535 bytes long. Eeach block header is 5 bytes: 3 bits,
597 padding, LEN and NLEN (potential less padding for first one ignored). */
598 return blocks * 5 * 8 + length * 8;
599 } if (btype == 1) {
600 GetFixedTree(ll_lengths, d_lengths);
601 result += CalculateBlockSymbolSize(
602 ll_lengths, d_lengths, lz77, lstart, lend);
603 } else {
604 result += GetDynamicLengths(lz77, lstart, lend, ll_lengths, d_lengths);
605 }
606
607 return result;
608 }
609
ZopfliCalculateBlockSizeAutoType(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend)610 double ZopfliCalculateBlockSizeAutoType(const ZopfliLZ77Store* lz77,
611 size_t lstart, size_t lend) {
612 double uncompressedcost = ZopfliCalculateBlockSize(lz77, lstart, lend, 0);
613 /* Don't do the expensive fixed cost calculation for larger blocks that are
614 unlikely to use it. */
615 double fixedcost = (lz77->size > 1000) ?
616 uncompressedcost : ZopfliCalculateBlockSize(lz77, lstart, lend, 1);
617 double dyncost = ZopfliCalculateBlockSize(lz77, lstart, lend, 2);
618 return (uncompressedcost < fixedcost && uncompressedcost < dyncost)
619 ? uncompressedcost
620 : (fixedcost < dyncost ? fixedcost : dyncost);
621 }
622
623 /* Since an uncompressed block can be max 65535 in size, it actually adds
624 multible blocks if needed. */
AddNonCompressedBlock(const ZopfliOptions * options,int final,const unsigned char * in,size_t instart,size_t inend,unsigned char * bp,unsigned char ** out,size_t * outsize)625 static void AddNonCompressedBlock(const ZopfliOptions* options, int final,
626 const unsigned char* in, size_t instart,
627 size_t inend,
628 unsigned char* bp,
629 unsigned char** out, size_t* outsize) {
630 size_t pos = instart;
631 (void)options;
632 for (;;) {
633 size_t i;
634 unsigned short blocksize = 65535;
635 unsigned short nlen;
636 int currentfinal;
637
638 if (pos + blocksize > inend) blocksize = inend - pos;
639 currentfinal = pos + blocksize >= inend;
640
641 nlen = ~blocksize;
642
643 AddBit(final && currentfinal, bp, out, outsize);
644 /* BTYPE 00 */
645 AddBit(0, bp, out, outsize);
646 AddBit(0, bp, out, outsize);
647
648 /* Any bits of input up to the next byte boundary are ignored. */
649 *bp = 0;
650
651 ZOPFLI_APPEND_DATA(blocksize % 256, out, outsize);
652 ZOPFLI_APPEND_DATA((blocksize / 256) % 256, out, outsize);
653 ZOPFLI_APPEND_DATA(nlen % 256, out, outsize);
654 ZOPFLI_APPEND_DATA((nlen / 256) % 256, out, outsize);
655
656 for (i = 0; i < blocksize; i++) {
657 ZOPFLI_APPEND_DATA(in[pos + i], out, outsize);
658 }
659
660 if (currentfinal) break;
661 pos += blocksize;
662 }
663 }
664
665 /*
666 Adds a deflate block with the given LZ77 data to the output.
667 options: global program options
668 btype: the block type, must be 1 or 2
669 final: whether to set the "final" bit on this block, must be the last block
670 litlens: literal/length array of the LZ77 data, in the same format as in
671 ZopfliLZ77Store.
672 dists: distance array of the LZ77 data, in the same format as in
673 ZopfliLZ77Store.
674 lstart: where to start in the LZ77 data
675 lend: where to end in the LZ77 data (not inclusive)
676 expected_data_size: the uncompressed block size, used for assert, but you can
677 set it to 0 to not do the assertion.
678 bp: output bit pointer
679 out: dynamic output array to append to
680 outsize: dynamic output array size
681 */
AddLZ77Block(const ZopfliOptions * options,int btype,int final,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,size_t expected_data_size,unsigned char * bp,unsigned char ** out,size_t * outsize)682 static void AddLZ77Block(const ZopfliOptions* options, int btype, int final,
683 const ZopfliLZ77Store* lz77,
684 size_t lstart, size_t lend,
685 size_t expected_data_size,
686 unsigned char* bp,
687 unsigned char** out, size_t* outsize) {
688 unsigned ll_lengths[ZOPFLI_NUM_LL];
689 unsigned d_lengths[ZOPFLI_NUM_D];
690 unsigned ll_symbols[ZOPFLI_NUM_LL];
691 unsigned d_symbols[ZOPFLI_NUM_D];
692 size_t detect_block_size = *outsize;
693 size_t compressed_size;
694 size_t uncompressed_size = 0;
695 size_t i;
696 if (btype == 0) {
697 size_t length = ZopfliLZ77GetByteRange(lz77, lstart, lend);
698 size_t pos = lstart == lend ? 0 : lz77->pos[lstart];
699 size_t end = pos + length;
700 AddNonCompressedBlock(options, final,
701 lz77->data, pos, end, bp, out, outsize);
702 return;
703 }
704
705 AddBit(final, bp, out, outsize);
706 AddBit(btype & 1, bp, out, outsize);
707 AddBit((btype & 2) >> 1, bp, out, outsize);
708
709 if (btype == 1) {
710 /* Fixed block. */
711 GetFixedTree(ll_lengths, d_lengths);
712 } else {
713 /* Dynamic block. */
714 unsigned detect_tree_size;
715 assert(btype == 2);
716
717 GetDynamicLengths(lz77, lstart, lend, ll_lengths, d_lengths);
718
719 detect_tree_size = *outsize;
720 AddDynamicTree(ll_lengths, d_lengths, bp, out, outsize);
721 if (options->verbose) {
722 fprintf(stderr, "treesize: %d\n", (int)(*outsize - detect_tree_size));
723 }
724 }
725
726 ZopfliLengthsToSymbols(ll_lengths, ZOPFLI_NUM_LL, 15, ll_symbols);
727 ZopfliLengthsToSymbols(d_lengths, ZOPFLI_NUM_D, 15, d_symbols);
728
729 detect_block_size = *outsize;
730 AddLZ77Data(lz77, lstart, lend, expected_data_size,
731 ll_symbols, ll_lengths, d_symbols, d_lengths,
732 bp, out, outsize);
733 /* End symbol. */
734 AddHuffmanBits(ll_symbols[256], ll_lengths[256], bp, out, outsize);
735
736 for (i = lstart; i < lend; i++) {
737 uncompressed_size += lz77->dists[i] == 0 ? 1 : lz77->litlens[i];
738 }
739 compressed_size = *outsize - detect_block_size;
740 if (options->verbose) {
741 fprintf(stderr, "compressed block size: %d (%dk) (unc: %d)\n",
742 (int)compressed_size, (int)(compressed_size / 1024),
743 (int)(uncompressed_size));
744 }
745 }
746
AddLZ77BlockAutoType(const ZopfliOptions * options,int final,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,size_t expected_data_size,unsigned char * bp,unsigned char ** out,size_t * outsize)747 static void AddLZ77BlockAutoType(const ZopfliOptions* options, int final,
748 const ZopfliLZ77Store* lz77,
749 size_t lstart, size_t lend,
750 size_t expected_data_size,
751 unsigned char* bp,
752 unsigned char** out, size_t* outsize) {
753 double uncompressedcost = ZopfliCalculateBlockSize(lz77, lstart, lend, 0);
754 double fixedcost = ZopfliCalculateBlockSize(lz77, lstart, lend, 1);
755 double dyncost = ZopfliCalculateBlockSize(lz77, lstart, lend, 2);
756
757 /* Whether to perform the expensive calculation of creating an optimal block
758 with fixed huffman tree to check if smaller. Only do this for small blocks or
759 blocks which already are pretty good with fixed huffman tree. */
760 int expensivefixed = (lz77->size < 1000) || fixedcost <= dyncost * 1.1;
761
762 ZopfliLZ77Store fixedstore;
763 if (lstart == lend) {
764 /* Smallest empty block is represented by fixed block */
765 AddBits(final, 1, bp, out, outsize);
766 AddBits(1, 2, bp, out, outsize); /* btype 01 */
767 AddBits(0, 7, bp, out, outsize); /* end symbol has code 0000000 */
768 return;
769 }
770 ZopfliInitLZ77Store(lz77->data, &fixedstore);
771 if (expensivefixed) {
772 /* Recalculate the LZ77 with ZopfliLZ77OptimalFixed */
773 size_t instart = lz77->pos[lstart];
774 size_t inend = instart + ZopfliLZ77GetByteRange(lz77, lstart, lend);
775
776 ZopfliBlockState s;
777 ZopfliInitBlockState(options, instart, inend, 1, &s);
778 ZopfliLZ77OptimalFixed(&s, lz77->data, instart, inend, &fixedstore);
779 fixedcost = ZopfliCalculateBlockSize(&fixedstore, 0, fixedstore.size, 1);
780 ZopfliCleanBlockState(&s);
781 }
782
783 if (uncompressedcost < fixedcost && uncompressedcost < dyncost) {
784 AddLZ77Block(options, 0, final, lz77, lstart, lend,
785 expected_data_size, bp, out, outsize);
786 } else if (fixedcost < dyncost) {
787 if (expensivefixed) {
788 AddLZ77Block(options, 1, final, &fixedstore, 0, fixedstore.size,
789 expected_data_size, bp, out, outsize);
790 } else {
791 AddLZ77Block(options, 1, final, lz77, lstart, lend,
792 expected_data_size, bp, out, outsize);
793 }
794 } else {
795 AddLZ77Block(options, 2, final, lz77, lstart, lend,
796 expected_data_size, bp, out, outsize);
797 }
798
799 ZopfliCleanLZ77Store(&fixedstore);
800 }
801
802 /*
803 Deflate a part, to allow ZopfliDeflate() to use multiple master blocks if
804 needed.
805 It is possible to call this function multiple times in a row, shifting
806 instart and inend to next bytes of the data. If instart is larger than 0, then
807 previous bytes are used as the initial dictionary for LZ77.
808 This function will usually output multiple deflate blocks. If final is 1, then
809 the final bit will be set on the last block.
810 */
ZopfliDeflatePart(const ZopfliOptions * options,int btype,int final,const unsigned char * in,size_t instart,size_t inend,unsigned char * bp,unsigned char ** out,size_t * outsize)811 void ZopfliDeflatePart(const ZopfliOptions* options, int btype, int final,
812 const unsigned char* in, size_t instart, size_t inend,
813 unsigned char* bp, unsigned char** out,
814 size_t* outsize) {
815 size_t i;
816 /* byte coordinates rather than lz77 index */
817 size_t* splitpoints_uncompressed = 0;
818 size_t npoints = 0;
819 size_t* splitpoints = 0;
820 double totalcost = 0;
821 ZopfliLZ77Store lz77;
822
823 /* If btype=2 is specified, it tries all block types. If a lesser btype is
824 given, then however it forces that one. Neither of the lesser types needs
825 block splitting as they have no dynamic huffman trees. */
826 if (btype == 0) {
827 AddNonCompressedBlock(options, final, in, instart, inend, bp, out, outsize);
828 return;
829 } else if (btype == 1) {
830 ZopfliLZ77Store store;
831 ZopfliBlockState s;
832 ZopfliInitLZ77Store(in, &store);
833 ZopfliInitBlockState(options, instart, inend, 1, &s);
834
835 ZopfliLZ77OptimalFixed(&s, in, instart, inend, &store);
836 AddLZ77Block(options, btype, final, &store, 0, store.size, 0,
837 bp, out, outsize);
838
839 ZopfliCleanBlockState(&s);
840 ZopfliCleanLZ77Store(&store);
841 return;
842 }
843
844
845 if (options->blocksplitting) {
846 ZopfliBlockSplit(options, in, instart, inend,
847 options->blocksplittingmax,
848 &splitpoints_uncompressed, &npoints);
849 splitpoints = (size_t*)malloc(sizeof(*splitpoints) * npoints);
850 }
851
852 ZopfliInitLZ77Store(in, &lz77);
853
854 for (i = 0; i <= npoints; i++) {
855 size_t start = i == 0 ? instart : splitpoints_uncompressed[i - 1];
856 size_t end = i == npoints ? inend : splitpoints_uncompressed[i];
857 ZopfliBlockState s;
858 ZopfliLZ77Store store;
859 ZopfliInitLZ77Store(in, &store);
860 ZopfliInitBlockState(options, start, end, 1, &s);
861 ZopfliLZ77Optimal(&s, in, start, end, options->numiterations, &store);
862 totalcost += ZopfliCalculateBlockSizeAutoType(&store, 0, store.size);
863
864 ZopfliAppendLZ77Store(&store, &lz77);
865 if (i < npoints) splitpoints[i] = lz77.size;
866
867 ZopfliCleanBlockState(&s);
868 ZopfliCleanLZ77Store(&store);
869 }
870
871 /* Second block splitting attempt */
872 if (options->blocksplitting && npoints > 1) {
873 size_t* splitpoints2 = 0;
874 size_t npoints2 = 0;
875 double totalcost2 = 0;
876
877 ZopfliBlockSplitLZ77(options, &lz77,
878 options->blocksplittingmax, &splitpoints2, &npoints2);
879
880 for (i = 0; i <= npoints2; i++) {
881 size_t start = i == 0 ? 0 : splitpoints2[i - 1];
882 size_t end = i == npoints2 ? lz77.size : splitpoints2[i];
883 totalcost2 += ZopfliCalculateBlockSizeAutoType(&lz77, start, end);
884 }
885
886 if (totalcost2 < totalcost) {
887 free(splitpoints);
888 splitpoints = splitpoints2;
889 npoints = npoints2;
890 } else {
891 free(splitpoints2);
892 }
893 }
894
895 for (i = 0; i <= npoints; i++) {
896 size_t start = i == 0 ? 0 : splitpoints[i - 1];
897 size_t end = i == npoints ? lz77.size : splitpoints[i];
898 AddLZ77BlockAutoType(options, i == npoints && final,
899 &lz77, start, end, 0,
900 bp, out, outsize);
901 }
902
903 ZopfliCleanLZ77Store(&lz77);
904 free(splitpoints);
905 free(splitpoints_uncompressed);
906 }
907
ZopfliDeflate(const ZopfliOptions * options,int btype,int final,const unsigned char * in,size_t insize,unsigned char * bp,unsigned char ** out,size_t * outsize)908 void ZopfliDeflate(const ZopfliOptions* options, int btype, int final,
909 const unsigned char* in, size_t insize,
910 unsigned char* bp, unsigned char** out, size_t* outsize) {
911 size_t offset = *outsize;
912 #if ZOPFLI_MASTER_BLOCK_SIZE == 0
913 ZopfliDeflatePart(options, btype, final, in, 0, insize, bp, out, outsize);
914 #else
915 size_t i = 0;
916 do {
917 int masterfinal = (i + ZOPFLI_MASTER_BLOCK_SIZE >= insize);
918 int final2 = final && masterfinal;
919 size_t size = masterfinal ? insize - i : ZOPFLI_MASTER_BLOCK_SIZE;
920 ZopfliDeflatePart(options, btype, final2,
921 in, i, i + size, bp, out, outsize);
922 i += size;
923 } while (i < insize);
924 #endif
925 if (options->verbose) {
926 fprintf(stderr,
927 "Original Size: %lu, Deflate: %lu, Compression: %f%% Removed\n",
928 (unsigned long)insize, (unsigned long)(*outsize - offset),
929 100.0 * (double)(insize - (*outsize - offset)) / (double)insize);
930 }
931 }
932