1 /*
2 Copyright 2011 Google Inc. All Rights Reserved.
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8     http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 */
19 
20 #include "deflate.h"
21 
22 #include <assert.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 
26 #include "blocksplitter.h"
27 #include "squeeze.h"
28 #include "symbols.h"
29 #include "tree.h"
30 
31 /*
32 bp = bitpointer, always in range [0, 7].
33 The outsize is number of necessary bytes to encode the bits.
34 Given the value of bp and the amount of bytes, the amount of bits represented
35 is not simply bytesize * 8 + bp because even representing one bit requires a
36 whole byte. It is: (bp == 0) ? (bytesize * 8) : ((bytesize - 1) * 8 + bp)
37 */
AddBit(int bit,unsigned char * bp,unsigned char ** out,size_t * outsize)38 static void AddBit(int bit,
39                    unsigned char* bp, unsigned char** out, size_t* outsize) {
40   if (*bp == 0) ZOPFLI_APPEND_DATA(0, out, outsize);
41   (*out)[*outsize - 1] |= bit << *bp;
42   *bp = (*bp + 1) & 7;
43 }
44 
AddBits(unsigned symbol,unsigned length,unsigned char * bp,unsigned char ** out,size_t * outsize)45 static void AddBits(unsigned symbol, unsigned length,
46                     unsigned char* bp, unsigned char** out, size_t* outsize) {
47   /* TODO(lode): make more efficient (add more bits at once). */
48   unsigned i;
49   for (i = 0; i < length; i++) {
50     unsigned bit = (symbol >> i) & 1;
51     if (*bp == 0) ZOPFLI_APPEND_DATA(0, out, outsize);
52     (*out)[*outsize - 1] |= bit << *bp;
53     *bp = (*bp + 1) & 7;
54   }
55 }
56 
57 /*
58 Adds bits, like AddBits, but the order is inverted. The deflate specification
59 uses both orders in one standard.
60 */
AddHuffmanBits(unsigned symbol,unsigned length,unsigned char * bp,unsigned char ** out,size_t * outsize)61 static void AddHuffmanBits(unsigned symbol, unsigned length,
62                            unsigned char* bp, unsigned char** out,
63                            size_t* outsize) {
64   /* TODO(lode): make more efficient (add more bits at once). */
65   unsigned i;
66   for (i = 0; i < length; i++) {
67     unsigned bit = (symbol >> (length - i - 1)) & 1;
68     if (*bp == 0) ZOPFLI_APPEND_DATA(0, out, outsize);
69     (*out)[*outsize - 1] |= bit << *bp;
70     *bp = (*bp + 1) & 7;
71   }
72 }
73 
74 /*
75 Ensures there are at least 2 distance codes to support buggy decoders.
76 Zlib 1.2.1 and below have a bug where it fails if there isn't at least 1
77 distance code (with length > 0), even though it's valid according to the
78 deflate spec to have 0 distance codes. On top of that, some mobile phones
79 require at least two distance codes. To support these decoders too (but
80 potentially at the cost of a few bytes), add dummy code lengths of 1.
81 References to this bug can be found in the changelog of
82 Zlib 1.2.2 and here: http://www.jonof.id.au/forum/index.php?topic=515.0.
83 
84 d_lengths: the 32 lengths of the distance codes.
85 */
PatchDistanceCodesForBuggyDecoders(unsigned * d_lengths)86 static void PatchDistanceCodesForBuggyDecoders(unsigned* d_lengths) {
87   int num_dist_codes = 0; /* Amount of non-zero distance codes */
88   int i;
89   for (i = 0; i < 30 /* Ignore the two unused codes from the spec */; i++) {
90     if (d_lengths[i]) num_dist_codes++;
91     if (num_dist_codes >= 2) return; /* Two or more codes is fine. */
92   }
93 
94   if (num_dist_codes == 0) {
95     d_lengths[0] = d_lengths[1] = 1;
96   } else if (num_dist_codes == 1) {
97     d_lengths[d_lengths[0] ? 1 : 0] = 1;
98   }
99 }
100 
101 /*
102 Encodes the Huffman tree and returns how many bits its encoding takes. If out
103 is a null pointer, only returns the size and runs faster.
104 */
EncodeTree(const unsigned * ll_lengths,const unsigned * d_lengths,int use_16,int use_17,int use_18,unsigned char * bp,unsigned char ** out,size_t * outsize)105 static size_t EncodeTree(const unsigned* ll_lengths,
106                          const unsigned* d_lengths,
107                          int use_16, int use_17, int use_18,
108                          unsigned char* bp,
109                          unsigned char** out, size_t* outsize) {
110   unsigned lld_total;  /* Total amount of literal, length, distance codes. */
111   /* Runlength encoded version of lengths of litlen and dist trees. */
112   unsigned* rle = 0;
113   unsigned* rle_bits = 0;  /* Extra bits for rle values 16, 17 and 18. */
114   size_t rle_size = 0;  /* Size of rle array. */
115   size_t rle_bits_size = 0;  /* Should have same value as rle_size. */
116   unsigned hlit = 29;  /* 286 - 257 */
117   unsigned hdist = 29;  /* 32 - 1, but gzip does not like hdist > 29.*/
118   unsigned hclen;
119   unsigned hlit2;
120   size_t i, j;
121   size_t clcounts[19];
122   unsigned clcl[19];  /* Code length code lengths. */
123   unsigned clsymbols[19];
124   /* The order in which code length code lengths are encoded as per deflate. */
125   static const unsigned order[19] = {
126     16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
127   };
128   int size_only = !out;
129   size_t result_size = 0;
130 
131   for(i = 0; i < 19; i++) clcounts[i] = 0;
132 
133   /* Trim zeros. */
134   while (hlit > 0 && ll_lengths[257 + hlit - 1] == 0) hlit--;
135   while (hdist > 0 && d_lengths[1 + hdist - 1] == 0) hdist--;
136   hlit2 = hlit + 257;
137 
138   lld_total = hlit2 + hdist + 1;
139 
140   for (i = 0; i < lld_total; i++) {
141     /* This is an encoding of a huffman tree, so now the length is a symbol */
142     unsigned char symbol = i < hlit2 ? ll_lengths[i] : d_lengths[i - hlit2];
143     unsigned count = 1;
144     if(use_16 || (symbol == 0 && (use_17 || use_18))) {
145       for (j = i + 1; j < lld_total && symbol ==
146           (j < hlit2 ? ll_lengths[j] : d_lengths[j - hlit2]); j++) {
147         count++;
148       }
149     }
150     i += count - 1;
151 
152     /* Repetitions of zeroes */
153     if (symbol == 0 && count >= 3) {
154       if (use_18) {
155         while (count >= 11) {
156           unsigned count2 = count > 138 ? 138 : count;
157           if (!size_only) {
158             ZOPFLI_APPEND_DATA(18, &rle, &rle_size);
159             ZOPFLI_APPEND_DATA(count2 - 11, &rle_bits, &rle_bits_size);
160           }
161           clcounts[18]++;
162           count -= count2;
163         }
164       }
165       if (use_17) {
166         while (count >= 3) {
167           unsigned count2 = count > 10 ? 10 : count;
168           if (!size_only) {
169             ZOPFLI_APPEND_DATA(17, &rle, &rle_size);
170             ZOPFLI_APPEND_DATA(count2 - 3, &rle_bits, &rle_bits_size);
171           }
172           clcounts[17]++;
173           count -= count2;
174         }
175       }
176     }
177 
178     /* Repetitions of any symbol */
179     if (use_16 && count >= 4) {
180       count--;  /* Since the first one is hardcoded. */
181       clcounts[symbol]++;
182       if (!size_only) {
183         ZOPFLI_APPEND_DATA(symbol, &rle, &rle_size);
184         ZOPFLI_APPEND_DATA(0, &rle_bits, &rle_bits_size);
185       }
186       while (count >= 3) {
187         unsigned count2 = count > 6 ? 6 : count;
188         if (!size_only) {
189           ZOPFLI_APPEND_DATA(16, &rle, &rle_size);
190           ZOPFLI_APPEND_DATA(count2 - 3, &rle_bits, &rle_bits_size);
191         }
192         clcounts[16]++;
193         count -= count2;
194       }
195     }
196 
197     /* No or insufficient repetition */
198     clcounts[symbol] += count;
199     while (count > 0) {
200       if (!size_only) {
201         ZOPFLI_APPEND_DATA(symbol, &rle, &rle_size);
202         ZOPFLI_APPEND_DATA(0, &rle_bits, &rle_bits_size);
203       }
204       count--;
205     }
206   }
207 
208   ZopfliCalculateBitLengths(clcounts, 19, 7, clcl);
209   if (!size_only) ZopfliLengthsToSymbols(clcl, 19, 7, clsymbols);
210 
211   hclen = 15;
212   /* Trim zeros. */
213   while (hclen > 0 && clcounts[order[hclen + 4 - 1]] == 0) hclen--;
214 
215   if (!size_only) {
216     AddBits(hlit, 5, bp, out, outsize);
217     AddBits(hdist, 5, bp, out, outsize);
218     AddBits(hclen, 4, bp, out, outsize);
219 
220     for (i = 0; i < hclen + 4; i++) {
221       AddBits(clcl[order[i]], 3, bp, out, outsize);
222     }
223 
224     for (i = 0; i < rle_size; i++) {
225       unsigned symbol = clsymbols[rle[i]];
226       AddHuffmanBits(symbol, clcl[rle[i]], bp, out, outsize);
227       /* Extra bits. */
228       if (rle[i] == 16) AddBits(rle_bits[i], 2, bp, out, outsize);
229       else if (rle[i] == 17) AddBits(rle_bits[i], 3, bp, out, outsize);
230       else if (rle[i] == 18) AddBits(rle_bits[i], 7, bp, out, outsize);
231     }
232   }
233 
234   result_size += 14;  /* hlit, hdist, hclen bits */
235   result_size += (hclen + 4) * 3;  /* clcl bits */
236   for(i = 0; i < 19; i++) {
237     result_size += clcl[i] * clcounts[i];
238   }
239   /* Extra bits. */
240   result_size += clcounts[16] * 2;
241   result_size += clcounts[17] * 3;
242   result_size += clcounts[18] * 7;
243 
244   /* Note: in case of "size_only" these are null pointers so no effect. */
245   free(rle);
246   free(rle_bits);
247 
248   return result_size;
249 }
250 
AddDynamicTree(const unsigned * ll_lengths,const unsigned * d_lengths,unsigned char * bp,unsigned char ** out,size_t * outsize)251 static void AddDynamicTree(const unsigned* ll_lengths,
252                            const unsigned* d_lengths,
253                            unsigned char* bp,
254                            unsigned char** out, size_t* outsize) {
255   int i;
256   int best = 0;
257   size_t bestsize = 0;
258 
259   for(i = 0; i < 8; i++) {
260     size_t size = EncodeTree(ll_lengths, d_lengths,
261                              i & 1, i & 2, i & 4,
262                              0, 0, 0);
263     if (bestsize == 0 || size < bestsize) {
264       bestsize = size;
265       best = i;
266     }
267   }
268 
269   EncodeTree(ll_lengths, d_lengths,
270              best & 1, best & 2, best & 4,
271              bp, out, outsize);
272 }
273 
274 /*
275 Gives the exact size of the tree, in bits, as it will be encoded in DEFLATE.
276 */
CalculateTreeSize(const unsigned * ll_lengths,const unsigned * d_lengths)277 static size_t CalculateTreeSize(const unsigned* ll_lengths,
278                                 const unsigned* d_lengths) {
279   size_t result = 0;
280   int i;
281 
282   for(i = 0; i < 8; i++) {
283     size_t size = EncodeTree(ll_lengths, d_lengths,
284                              i & 1, i & 2, i & 4,
285                              0, 0, 0);
286     if (result == 0 || size < result) result = size;
287   }
288 
289   return result;
290 }
291 
292 /*
293 Adds all lit/len and dist codes from the lists as huffman symbols. Does not add
294 end code 256. expected_data_size is the uncompressed block size, used for
295 assert, but you can set it to 0 to not do the assertion.
296 */
AddLZ77Data(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,size_t expected_data_size,const unsigned * ll_symbols,const unsigned * ll_lengths,const unsigned * d_symbols,const unsigned * d_lengths,unsigned char * bp,unsigned char ** out,size_t * outsize)297 static void AddLZ77Data(const ZopfliLZ77Store* lz77,
298                         size_t lstart, size_t lend,
299                         size_t expected_data_size,
300                         const unsigned* ll_symbols, const unsigned* ll_lengths,
301                         const unsigned* d_symbols, const unsigned* d_lengths,
302                         unsigned char* bp,
303                         unsigned char** out, size_t* outsize) {
304   size_t testlength = 0;
305   size_t i;
306 
307   for (i = lstart; i < lend; i++) {
308     unsigned dist = lz77->dists[i];
309     unsigned litlen = lz77->litlens[i];
310     if (dist == 0) {
311       assert(litlen < 256);
312       assert(ll_lengths[litlen] > 0);
313       AddHuffmanBits(ll_symbols[litlen], ll_lengths[litlen], bp, out, outsize);
314       testlength++;
315     } else {
316       unsigned lls = ZopfliGetLengthSymbol(litlen);
317       unsigned ds = ZopfliGetDistSymbol(dist);
318       assert(litlen >= 3 && litlen <= 288);
319       assert(ll_lengths[lls] > 0);
320       assert(d_lengths[ds] > 0);
321       AddHuffmanBits(ll_symbols[lls], ll_lengths[lls], bp, out, outsize);
322       AddBits(ZopfliGetLengthExtraBitsValue(litlen),
323               ZopfliGetLengthExtraBits(litlen),
324               bp, out, outsize);
325       AddHuffmanBits(d_symbols[ds], d_lengths[ds], bp, out, outsize);
326       AddBits(ZopfliGetDistExtraBitsValue(dist),
327               ZopfliGetDistExtraBits(dist),
328               bp, out, outsize);
329       testlength += litlen;
330     }
331   }
332   assert(expected_data_size == 0 || testlength == expected_data_size);
333 }
334 
GetFixedTree(unsigned * ll_lengths,unsigned * d_lengths)335 static void GetFixedTree(unsigned* ll_lengths, unsigned* d_lengths) {
336   size_t i;
337   for (i = 0; i < 144; i++) ll_lengths[i] = 8;
338   for (i = 144; i < 256; i++) ll_lengths[i] = 9;
339   for (i = 256; i < 280; i++) ll_lengths[i] = 7;
340   for (i = 280; i < 288; i++) ll_lengths[i] = 8;
341   for (i = 0; i < 32; i++) d_lengths[i] = 5;
342 }
343 
344 /*
345 Same as CalculateBlockSymbolSize, but for block size smaller than histogram
346 size.
347 */
CalculateBlockSymbolSizeSmall(const unsigned * ll_lengths,const unsigned * d_lengths,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend)348 static size_t CalculateBlockSymbolSizeSmall(const unsigned* ll_lengths,
349                                             const unsigned* d_lengths,
350                                             const ZopfliLZ77Store* lz77,
351                                             size_t lstart, size_t lend) {
352   size_t result = 0;
353   size_t i;
354   for (i = lstart; i < lend; i++) {
355     assert(i < lz77->size);
356     assert(lz77->litlens[i] < 259);
357     if (lz77->dists[i] == 0) {
358       result += ll_lengths[lz77->litlens[i]];
359     } else {
360       int ll_symbol = ZopfliGetLengthSymbol(lz77->litlens[i]);
361       int d_symbol = ZopfliGetDistSymbol(lz77->dists[i]);
362       result += ll_lengths[ll_symbol];
363       result += d_lengths[d_symbol];
364       result += ZopfliGetLengthSymbolExtraBits(ll_symbol);
365       result += ZopfliGetDistSymbolExtraBits(d_symbol);
366     }
367   }
368   result += ll_lengths[256]; /*end symbol*/
369   return result;
370 }
371 
372 /*
373 Same as CalculateBlockSymbolSize, but with the histogram provided by the caller.
374 */
CalculateBlockSymbolSizeGivenCounts(const size_t * ll_counts,const size_t * d_counts,const unsigned * ll_lengths,const unsigned * d_lengths,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend)375 static size_t CalculateBlockSymbolSizeGivenCounts(const size_t* ll_counts,
376                                                   const size_t* d_counts,
377                                                   const unsigned* ll_lengths,
378                                                   const unsigned* d_lengths,
379                                                   const ZopfliLZ77Store* lz77,
380                                                   size_t lstart, size_t lend) {
381   size_t result = 0;
382   size_t i;
383   if (lstart + ZOPFLI_NUM_LL * 3 > lend) {
384     return CalculateBlockSymbolSizeSmall(
385         ll_lengths, d_lengths, lz77, lstart, lend);
386   } else {
387     for (i = 0; i < 256; i++) {
388       result += ll_lengths[i] * ll_counts[i];
389     }
390     for (i = 257; i < 286; i++) {
391       result += ll_lengths[i] * ll_counts[i];
392       result += ZopfliGetLengthSymbolExtraBits(i) * ll_counts[i];
393     }
394     for (i = 0; i < 30; i++) {
395       result += d_lengths[i] * d_counts[i];
396       result += ZopfliGetDistSymbolExtraBits(i) * d_counts[i];
397     }
398     result += ll_lengths[256]; /*end symbol*/
399     return result;
400   }
401 }
402 
403 /*
404 Calculates size of the part after the header and tree of an LZ77 block, in bits.
405 */
CalculateBlockSymbolSize(const unsigned * ll_lengths,const unsigned * d_lengths,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend)406 static size_t CalculateBlockSymbolSize(const unsigned* ll_lengths,
407                                        const unsigned* d_lengths,
408                                        const ZopfliLZ77Store* lz77,
409                                        size_t lstart, size_t lend) {
410   if (lstart + ZOPFLI_NUM_LL * 3 > lend) {
411     return CalculateBlockSymbolSizeSmall(
412         ll_lengths, d_lengths, lz77, lstart, lend);
413   } else {
414     size_t ll_counts[ZOPFLI_NUM_LL];
415     size_t d_counts[ZOPFLI_NUM_D];
416     ZopfliLZ77GetHistogram(lz77, lstart, lend, ll_counts, d_counts);
417     return CalculateBlockSymbolSizeGivenCounts(
418         ll_counts, d_counts, ll_lengths, d_lengths, lz77, lstart, lend);
419   }
420 }
421 
AbsDiff(size_t x,size_t y)422 static size_t AbsDiff(size_t x, size_t y) {
423   if (x > y)
424     return x - y;
425   else
426     return y - x;
427 }
428 
429 /*
430 Changes the population counts in a way that the consequent Huffman tree
431 compression, especially its rle-part, will be more likely to compress this data
432 more efficiently. length contains the size of the histogram.
433 */
OptimizeHuffmanForRle(int length,size_t * counts)434 void OptimizeHuffmanForRle(int length, size_t* counts) {
435   int i, k, stride;
436   size_t symbol, sum, limit;
437   int* good_for_rle;
438 
439   /* 1) We don't want to touch the trailing zeros. We may break the
440   rules of the format by adding more data in the distance codes. */
441   for (; length >= 0; --length) {
442     if (length == 0) {
443       return;
444     }
445     if (counts[length - 1] != 0) {
446       /* Now counts[0..length - 1] does not have trailing zeros. */
447       break;
448     }
449   }
450   /* 2) Let's mark all population counts that already can be encoded
451   with an rle code.*/
452   good_for_rle = (int*)malloc((unsigned)length * sizeof(int));
453   for (i = 0; i < length; ++i) good_for_rle[i] = 0;
454 
455   /* Let's not spoil any of the existing good rle codes.
456   Mark any seq of 0's that is longer than 5 as a good_for_rle.
457   Mark any seq of non-0's that is longer than 7 as a good_for_rle.*/
458   symbol = counts[0];
459   stride = 0;
460   for (i = 0; i < length + 1; ++i) {
461     if (i == length || counts[i] != symbol) {
462       if ((symbol == 0 && stride >= 5) || (symbol != 0 && stride >= 7)) {
463         for (k = 0; k < stride; ++k) {
464           good_for_rle[i - k - 1] = 1;
465         }
466       }
467       stride = 1;
468       if (i != length) {
469         symbol = counts[i];
470       }
471     } else {
472       ++stride;
473     }
474   }
475 
476   /* 3) Let's replace those population counts that lead to more rle codes. */
477   stride = 0;
478   limit = counts[0];
479   sum = 0;
480   for (i = 0; i < length + 1; ++i) {
481     if (i == length || good_for_rle[i]
482         /* Heuristic for selecting the stride ranges to collapse. */
483         || AbsDiff(counts[i], limit) >= 4) {
484       if (stride >= 4 || (stride >= 3 && sum == 0)) {
485         /* The stride must end, collapse what we have, if we have enough (4). */
486         int count = (sum + stride / 2) / stride;
487         if (count < 1) count = 1;
488         if (sum == 0) {
489           /* Don't make an all zeros stride to be upgraded to ones. */
490           count = 0;
491         }
492         for (k = 0; k < stride; ++k) {
493           /* We don't want to change value at counts[i],
494           that is already belonging to the next stride. Thus - 1. */
495           counts[i - k - 1] = count;
496         }
497       }
498       stride = 0;
499       sum = 0;
500       if (i < length - 3) {
501         /* All interesting strides have a count of at least 4,
502         at least when non-zeros. */
503         limit = (counts[i] + counts[i + 1] +
504                  counts[i + 2] + counts[i + 3] + 2) / 4;
505       } else if (i < length) {
506         limit = counts[i];
507       } else {
508         limit = 0;
509       }
510     }
511     ++stride;
512     if (i != length) {
513       sum += counts[i];
514     }
515   }
516 
517   free(good_for_rle);
518 }
519 
520 /*
521 Tries out OptimizeHuffmanForRle for this block, if the result is smaller,
522 uses it, otherwise keeps the original. Returns size of encoded tree and data in
523 bits, not including the 3-bit block header.
524 */
TryOptimizeHuffmanForRle(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,const size_t * ll_counts,const size_t * d_counts,unsigned * ll_lengths,unsigned * d_lengths)525 static double TryOptimizeHuffmanForRle(
526     const ZopfliLZ77Store* lz77, size_t lstart, size_t lend,
527     const size_t* ll_counts, const size_t* d_counts,
528     unsigned* ll_lengths, unsigned* d_lengths) {
529   size_t ll_counts2[ZOPFLI_NUM_LL];
530   size_t d_counts2[ZOPFLI_NUM_D];
531   unsigned ll_lengths2[ZOPFLI_NUM_LL];
532   unsigned d_lengths2[ZOPFLI_NUM_D];
533   double treesize;
534   double datasize;
535   double treesize2;
536   double datasize2;
537 
538   treesize = CalculateTreeSize(ll_lengths, d_lengths);
539   datasize = CalculateBlockSymbolSizeGivenCounts(ll_counts, d_counts,
540       ll_lengths, d_lengths, lz77, lstart, lend);
541 
542   memcpy(ll_counts2, ll_counts, sizeof(ll_counts2));
543   memcpy(d_counts2, d_counts, sizeof(d_counts2));
544   OptimizeHuffmanForRle(ZOPFLI_NUM_LL, ll_counts2);
545   OptimizeHuffmanForRle(ZOPFLI_NUM_D, d_counts2);
546   ZopfliCalculateBitLengths(ll_counts2, ZOPFLI_NUM_LL, 15, ll_lengths2);
547   ZopfliCalculateBitLengths(d_counts2, ZOPFLI_NUM_D, 15, d_lengths2);
548   PatchDistanceCodesForBuggyDecoders(d_lengths2);
549 
550   treesize2 = CalculateTreeSize(ll_lengths2, d_lengths2);
551   datasize2 = CalculateBlockSymbolSizeGivenCounts(ll_counts, d_counts,
552       ll_lengths2, d_lengths2, lz77, lstart, lend);
553 
554   if (treesize2 + datasize2 < treesize + datasize) {
555     memcpy(ll_lengths, ll_lengths2, sizeof(ll_lengths2));
556     memcpy(d_lengths, d_lengths2, sizeof(d_lengths2));
557     return treesize2 + datasize2;
558   }
559   return treesize + datasize;
560 }
561 
562 /*
563 Calculates the bit lengths for the symbols for dynamic blocks. Chooses bit
564 lengths that give the smallest size of tree encoding + encoding of all the
565 symbols to have smallest output size. This are not necessarily the ideal Huffman
566 bit lengths. Returns size of encoded tree and data in bits, not including the
567 3-bit block header.
568 */
GetDynamicLengths(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,unsigned * ll_lengths,unsigned * d_lengths)569 static double GetDynamicLengths(const ZopfliLZ77Store* lz77,
570                                 size_t lstart, size_t lend,
571                                 unsigned* ll_lengths, unsigned* d_lengths) {
572   size_t ll_counts[ZOPFLI_NUM_LL];
573   size_t d_counts[ZOPFLI_NUM_D];
574 
575   ZopfliLZ77GetHistogram(lz77, lstart, lend, ll_counts, d_counts);
576   ll_counts[256] = 1;  /* End symbol. */
577   ZopfliCalculateBitLengths(ll_counts, ZOPFLI_NUM_LL, 15, ll_lengths);
578   ZopfliCalculateBitLengths(d_counts, ZOPFLI_NUM_D, 15, d_lengths);
579   PatchDistanceCodesForBuggyDecoders(d_lengths);
580   return TryOptimizeHuffmanForRle(
581       lz77, lstart, lend, ll_counts, d_counts, ll_lengths, d_lengths);
582 }
583 
ZopfliCalculateBlockSize(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,int btype)584 double ZopfliCalculateBlockSize(const ZopfliLZ77Store* lz77,
585                                 size_t lstart, size_t lend, int btype) {
586   unsigned ll_lengths[ZOPFLI_NUM_LL];
587   unsigned d_lengths[ZOPFLI_NUM_D];
588 
589   double result = 3; /* bfinal and btype bits */
590 
591   if (btype == 0) {
592     size_t length = ZopfliLZ77GetByteRange(lz77, lstart, lend);
593     size_t rem = length % 65535;
594     size_t blocks = length / 65535 + (rem ? 1 : 0);
595     /* An uncompressed block must actually be split into multiple blocks if it's
596        larger than 65535 bytes long. Eeach block header is 5 bytes: 3 bits,
597        padding, LEN and NLEN (potential less padding for first one ignored). */
598     return blocks * 5 * 8 + length * 8;
599   } if (btype == 1) {
600     GetFixedTree(ll_lengths, d_lengths);
601     result += CalculateBlockSymbolSize(
602         ll_lengths, d_lengths, lz77, lstart, lend);
603   } else {
604     result += GetDynamicLengths(lz77, lstart, lend, ll_lengths, d_lengths);
605   }
606 
607   return result;
608 }
609 
ZopfliCalculateBlockSizeAutoType(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend)610 double ZopfliCalculateBlockSizeAutoType(const ZopfliLZ77Store* lz77,
611                                         size_t lstart, size_t lend) {
612   double uncompressedcost = ZopfliCalculateBlockSize(lz77, lstart, lend, 0);
613   /* Don't do the expensive fixed cost calculation for larger blocks that are
614      unlikely to use it. */
615   double fixedcost = (lz77->size > 1000) ?
616       uncompressedcost : ZopfliCalculateBlockSize(lz77, lstart, lend, 1);
617   double dyncost = ZopfliCalculateBlockSize(lz77, lstart, lend, 2);
618   return (uncompressedcost < fixedcost && uncompressedcost < dyncost)
619       ? uncompressedcost
620       : (fixedcost < dyncost ? fixedcost : dyncost);
621 }
622 
623 /* Since an uncompressed block can be max 65535 in size, it actually adds
624 multible blocks if needed. */
AddNonCompressedBlock(const ZopfliOptions * options,int final,const unsigned char * in,size_t instart,size_t inend,unsigned char * bp,unsigned char ** out,size_t * outsize)625 static void AddNonCompressedBlock(const ZopfliOptions* options, int final,
626                                   const unsigned char* in, size_t instart,
627                                   size_t inend,
628                                   unsigned char* bp,
629                                   unsigned char** out, size_t* outsize) {
630   size_t pos = instart;
631   (void)options;
632   for (;;) {
633     size_t i;
634     unsigned short blocksize = 65535;
635     unsigned short nlen;
636     int currentfinal;
637 
638     if (pos + blocksize > inend) blocksize = inend - pos;
639     currentfinal = pos + blocksize >= inend;
640 
641     nlen = ~blocksize;
642 
643     AddBit(final && currentfinal, bp, out, outsize);
644     /* BTYPE 00 */
645     AddBit(0, bp, out, outsize);
646     AddBit(0, bp, out, outsize);
647 
648     /* Any bits of input up to the next byte boundary are ignored. */
649     *bp = 0;
650 
651     ZOPFLI_APPEND_DATA(blocksize % 256, out, outsize);
652     ZOPFLI_APPEND_DATA((blocksize / 256) % 256, out, outsize);
653     ZOPFLI_APPEND_DATA(nlen % 256, out, outsize);
654     ZOPFLI_APPEND_DATA((nlen / 256) % 256, out, outsize);
655 
656     for (i = 0; i < blocksize; i++) {
657       ZOPFLI_APPEND_DATA(in[pos + i], out, outsize);
658     }
659 
660     if (currentfinal) break;
661     pos += blocksize;
662   }
663 }
664 
665 /*
666 Adds a deflate block with the given LZ77 data to the output.
667 options: global program options
668 btype: the block type, must be 1 or 2
669 final: whether to set the "final" bit on this block, must be the last block
670 litlens: literal/length array of the LZ77 data, in the same format as in
671     ZopfliLZ77Store.
672 dists: distance array of the LZ77 data, in the same format as in
673     ZopfliLZ77Store.
674 lstart: where to start in the LZ77 data
675 lend: where to end in the LZ77 data (not inclusive)
676 expected_data_size: the uncompressed block size, used for assert, but you can
677   set it to 0 to not do the assertion.
678 bp: output bit pointer
679 out: dynamic output array to append to
680 outsize: dynamic output array size
681 */
AddLZ77Block(const ZopfliOptions * options,int btype,int final,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,size_t expected_data_size,unsigned char * bp,unsigned char ** out,size_t * outsize)682 static void AddLZ77Block(const ZopfliOptions* options, int btype, int final,
683                          const ZopfliLZ77Store* lz77,
684                          size_t lstart, size_t lend,
685                          size_t expected_data_size,
686                          unsigned char* bp,
687                          unsigned char** out, size_t* outsize) {
688   unsigned ll_lengths[ZOPFLI_NUM_LL];
689   unsigned d_lengths[ZOPFLI_NUM_D];
690   unsigned ll_symbols[ZOPFLI_NUM_LL];
691   unsigned d_symbols[ZOPFLI_NUM_D];
692   size_t detect_block_size = *outsize;
693   size_t compressed_size;
694   size_t uncompressed_size = 0;
695   size_t i;
696   if (btype == 0) {
697     size_t length = ZopfliLZ77GetByteRange(lz77, lstart, lend);
698     size_t pos = lstart == lend ? 0 : lz77->pos[lstart];
699     size_t end = pos + length;
700     AddNonCompressedBlock(options, final,
701                           lz77->data, pos, end, bp, out, outsize);
702     return;
703   }
704 
705   AddBit(final, bp, out, outsize);
706   AddBit(btype & 1, bp, out, outsize);
707   AddBit((btype & 2) >> 1, bp, out, outsize);
708 
709   if (btype == 1) {
710     /* Fixed block. */
711     GetFixedTree(ll_lengths, d_lengths);
712   } else {
713     /* Dynamic block. */
714     unsigned detect_tree_size;
715     assert(btype == 2);
716 
717     GetDynamicLengths(lz77, lstart, lend, ll_lengths, d_lengths);
718 
719     detect_tree_size = *outsize;
720     AddDynamicTree(ll_lengths, d_lengths, bp, out, outsize);
721     if (options->verbose) {
722       fprintf(stderr, "treesize: %d\n", (int)(*outsize - detect_tree_size));
723     }
724   }
725 
726   ZopfliLengthsToSymbols(ll_lengths, ZOPFLI_NUM_LL, 15, ll_symbols);
727   ZopfliLengthsToSymbols(d_lengths, ZOPFLI_NUM_D, 15, d_symbols);
728 
729   detect_block_size = *outsize;
730   AddLZ77Data(lz77, lstart, lend, expected_data_size,
731               ll_symbols, ll_lengths, d_symbols, d_lengths,
732               bp, out, outsize);
733   /* End symbol. */
734   AddHuffmanBits(ll_symbols[256], ll_lengths[256], bp, out, outsize);
735 
736   for (i = lstart; i < lend; i++) {
737     uncompressed_size += lz77->dists[i] == 0 ? 1 : lz77->litlens[i];
738   }
739   compressed_size = *outsize - detect_block_size;
740   if (options->verbose) {
741     fprintf(stderr, "compressed block size: %d (%dk) (unc: %d)\n",
742            (int)compressed_size, (int)(compressed_size / 1024),
743            (int)(uncompressed_size));
744   }
745 }
746 
AddLZ77BlockAutoType(const ZopfliOptions * options,int final,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,size_t expected_data_size,unsigned char * bp,unsigned char ** out,size_t * outsize)747 static void AddLZ77BlockAutoType(const ZopfliOptions* options, int final,
748                                  const ZopfliLZ77Store* lz77,
749                                  size_t lstart, size_t lend,
750                                  size_t expected_data_size,
751                                  unsigned char* bp,
752                                  unsigned char** out, size_t* outsize) {
753   double uncompressedcost = ZopfliCalculateBlockSize(lz77, lstart, lend, 0);
754   double fixedcost = ZopfliCalculateBlockSize(lz77, lstart, lend, 1);
755   double dyncost = ZopfliCalculateBlockSize(lz77, lstart, lend, 2);
756 
757   /* Whether to perform the expensive calculation of creating an optimal block
758   with fixed huffman tree to check if smaller. Only do this for small blocks or
759   blocks which already are pretty good with fixed huffman tree. */
760   int expensivefixed = (lz77->size < 1000) || fixedcost <= dyncost * 1.1;
761 
762   ZopfliLZ77Store fixedstore;
763   if (lstart == lend) {
764     /* Smallest empty block is represented by fixed block */
765     AddBits(final, 1, bp, out, outsize);
766     AddBits(1, 2, bp, out, outsize);  /* btype 01 */
767     AddBits(0, 7, bp, out, outsize);  /* end symbol has code 0000000 */
768     return;
769   }
770   ZopfliInitLZ77Store(lz77->data, &fixedstore);
771   if (expensivefixed) {
772     /* Recalculate the LZ77 with ZopfliLZ77OptimalFixed */
773     size_t instart = lz77->pos[lstart];
774     size_t inend = instart + ZopfliLZ77GetByteRange(lz77, lstart, lend);
775 
776     ZopfliBlockState s;
777     ZopfliInitBlockState(options, instart, inend, 1, &s);
778     ZopfliLZ77OptimalFixed(&s, lz77->data, instart, inend, &fixedstore);
779     fixedcost = ZopfliCalculateBlockSize(&fixedstore, 0, fixedstore.size, 1);
780     ZopfliCleanBlockState(&s);
781   }
782 
783   if (uncompressedcost < fixedcost && uncompressedcost < dyncost) {
784     AddLZ77Block(options, 0, final, lz77, lstart, lend,
785                  expected_data_size, bp, out, outsize);
786   } else if (fixedcost < dyncost) {
787     if (expensivefixed) {
788       AddLZ77Block(options, 1, final, &fixedstore, 0, fixedstore.size,
789                    expected_data_size, bp, out, outsize);
790     } else {
791       AddLZ77Block(options, 1, final, lz77, lstart, lend,
792                    expected_data_size, bp, out, outsize);
793     }
794   } else {
795     AddLZ77Block(options, 2, final, lz77, lstart, lend,
796                  expected_data_size, bp, out, outsize);
797   }
798 
799   ZopfliCleanLZ77Store(&fixedstore);
800 }
801 
802 /*
803 Deflate a part, to allow ZopfliDeflate() to use multiple master blocks if
804 needed.
805 It is possible to call this function multiple times in a row, shifting
806 instart and inend to next bytes of the data. If instart is larger than 0, then
807 previous bytes are used as the initial dictionary for LZ77.
808 This function will usually output multiple deflate blocks. If final is 1, then
809 the final bit will be set on the last block.
810 */
ZopfliDeflatePart(const ZopfliOptions * options,int btype,int final,const unsigned char * in,size_t instart,size_t inend,unsigned char * bp,unsigned char ** out,size_t * outsize)811 void ZopfliDeflatePart(const ZopfliOptions* options, int btype, int final,
812                        const unsigned char* in, size_t instart, size_t inend,
813                        unsigned char* bp, unsigned char** out,
814                        size_t* outsize) {
815   size_t i;
816   /* byte coordinates rather than lz77 index */
817   size_t* splitpoints_uncompressed = 0;
818   size_t npoints = 0;
819   size_t* splitpoints = 0;
820   double totalcost = 0;
821   ZopfliLZ77Store lz77;
822 
823   /* If btype=2 is specified, it tries all block types. If a lesser btype is
824   given, then however it forces that one. Neither of the lesser types needs
825   block splitting as they have no dynamic huffman trees. */
826   if (btype == 0) {
827     AddNonCompressedBlock(options, final, in, instart, inend, bp, out, outsize);
828     return;
829   } else if (btype == 1) {
830     ZopfliLZ77Store store;
831     ZopfliBlockState s;
832     ZopfliInitLZ77Store(in, &store);
833     ZopfliInitBlockState(options, instart, inend, 1, &s);
834 
835     ZopfliLZ77OptimalFixed(&s, in, instart, inend, &store);
836     AddLZ77Block(options, btype, final, &store, 0, store.size, 0,
837                  bp, out, outsize);
838 
839     ZopfliCleanBlockState(&s);
840     ZopfliCleanLZ77Store(&store);
841     return;
842   }
843 
844 
845   if (options->blocksplitting) {
846     ZopfliBlockSplit(options, in, instart, inend,
847                      options->blocksplittingmax,
848                      &splitpoints_uncompressed, &npoints);
849     splitpoints = (size_t*)malloc(sizeof(*splitpoints) * npoints);
850   }
851 
852   ZopfliInitLZ77Store(in, &lz77);
853 
854   for (i = 0; i <= npoints; i++) {
855     size_t start = i == 0 ? instart : splitpoints_uncompressed[i - 1];
856     size_t end = i == npoints ? inend : splitpoints_uncompressed[i];
857     ZopfliBlockState s;
858     ZopfliLZ77Store store;
859     ZopfliInitLZ77Store(in, &store);
860     ZopfliInitBlockState(options, start, end, 1, &s);
861     ZopfliLZ77Optimal(&s, in, start, end, options->numiterations, &store);
862     totalcost += ZopfliCalculateBlockSizeAutoType(&store, 0, store.size);
863 
864     ZopfliAppendLZ77Store(&store, &lz77);
865     if (i < npoints) splitpoints[i] = lz77.size;
866 
867     ZopfliCleanBlockState(&s);
868     ZopfliCleanLZ77Store(&store);
869   }
870 
871   /* Second block splitting attempt */
872   if (options->blocksplitting && npoints > 1) {
873     size_t* splitpoints2 = 0;
874     size_t npoints2 = 0;
875     double totalcost2 = 0;
876 
877     ZopfliBlockSplitLZ77(options, &lz77,
878                          options->blocksplittingmax, &splitpoints2, &npoints2);
879 
880     for (i = 0; i <= npoints2; i++) {
881       size_t start = i == 0 ? 0 : splitpoints2[i - 1];
882       size_t end = i == npoints2 ? lz77.size : splitpoints2[i];
883       totalcost2 += ZopfliCalculateBlockSizeAutoType(&lz77, start, end);
884     }
885 
886     if (totalcost2 < totalcost) {
887       free(splitpoints);
888       splitpoints = splitpoints2;
889       npoints = npoints2;
890     } else {
891       free(splitpoints2);
892     }
893   }
894 
895   for (i = 0; i <= npoints; i++) {
896     size_t start = i == 0 ? 0 : splitpoints[i - 1];
897     size_t end = i == npoints ? lz77.size : splitpoints[i];
898     AddLZ77BlockAutoType(options, i == npoints && final,
899                          &lz77, start, end, 0,
900                          bp, out, outsize);
901   }
902 
903   ZopfliCleanLZ77Store(&lz77);
904   free(splitpoints);
905   free(splitpoints_uncompressed);
906 }
907 
ZopfliDeflate(const ZopfliOptions * options,int btype,int final,const unsigned char * in,size_t insize,unsigned char * bp,unsigned char ** out,size_t * outsize)908 void ZopfliDeflate(const ZopfliOptions* options, int btype, int final,
909                    const unsigned char* in, size_t insize,
910                    unsigned char* bp, unsigned char** out, size_t* outsize) {
911  size_t offset = *outsize;
912 #if ZOPFLI_MASTER_BLOCK_SIZE == 0
913   ZopfliDeflatePart(options, btype, final, in, 0, insize, bp, out, outsize);
914 #else
915   size_t i = 0;
916   do {
917     int masterfinal = (i + ZOPFLI_MASTER_BLOCK_SIZE >= insize);
918     int final2 = final && masterfinal;
919     size_t size = masterfinal ? insize - i : ZOPFLI_MASTER_BLOCK_SIZE;
920     ZopfliDeflatePart(options, btype, final2,
921                       in, i, i + size, bp, out, outsize);
922     i += size;
923   } while (i < insize);
924 #endif
925   if (options->verbose) {
926     fprintf(stderr,
927             "Original Size: %lu, Deflate: %lu, Compression: %f%% Removed\n",
928             (unsigned long)insize, (unsigned long)(*outsize - offset),
929             100.0 * (double)(insize - (*outsize - offset)) / (double)insize);
930   }
931 }
932