1 /*
2 Copyright 2011 Google Inc. All Rights Reserved.
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8     http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 */
19 
20 #include "deflate.h"
21 
22 #include <assert.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 
26 #include "blocksplitter.h"
27 #include "squeeze.h"
28 #include "symbols.h"
29 #include "tree.h"
30 
31 /*
32 bp = bitpointer, always in range [0, 7].
33 The outsize is number of necessary bytes to encode the bits.
34 Given the value of bp and the amount of bytes, the amount of bits represented
35 is not simply bytesize * 8 + bp because even representing one bit requires a
36 whole byte. It is: (bp == 0) ? (bytesize * 8) : ((bytesize - 1) * 8 + bp)
37 */
AddBit(int bit,unsigned char * bp,unsigned char ** out,size_t * outsize)38 static void AddBit(int bit,
39                    unsigned char* bp, unsigned char** out, size_t* outsize) {
40   if (*bp == 0) ZOPFLI_APPEND_DATA(0, out, outsize);
41   (*out)[*outsize - 1] |= bit << *bp;
42   *bp = (*bp + 1) & 7;
43 }
44 
AddBits(unsigned symbol,unsigned length,unsigned char * bp,unsigned char ** out,size_t * outsize)45 static void AddBits(unsigned symbol, unsigned length,
46                     unsigned char* bp, unsigned char** out, size_t* outsize) {
47   /* TODO(lode): make more efficient (add more bits at once). */
48   unsigned i;
49   for (i = 0; i < length; i++) {
50     unsigned bit = (symbol >> i) & 1;
51     if (*bp == 0) ZOPFLI_APPEND_DATA(0, out, outsize);
52     (*out)[*outsize - 1] |= bit << *bp;
53     *bp = (*bp + 1) & 7;
54   }
55 }
56 
57 /*
58 Adds bits, like AddBits, but the order is inverted. The deflate specification
59 uses both orders in one standard.
60 */
AddHuffmanBits(unsigned symbol,unsigned length,unsigned char * bp,unsigned char ** out,size_t * outsize)61 static void AddHuffmanBits(unsigned symbol, unsigned length,
62                            unsigned char* bp, unsigned char** out,
63                            size_t* outsize) {
64   /* TODO(lode): make more efficient (add more bits at once). */
65   unsigned i;
66   for (i = 0; i < length; i++) {
67     unsigned bit = (symbol >> (length - i - 1)) & 1;
68     if (*bp == 0) ZOPFLI_APPEND_DATA(0, out, outsize);
69     (*out)[*outsize - 1] |= bit << *bp;
70     *bp = (*bp + 1) & 7;
71   }
72 }
73 
74 /*
75 Ensures there are at least 2 distance codes to support buggy decoders.
76 Zlib 1.2.1 and below have a bug where it fails if there isn't at least 1
77 distance code (with length > 0), even though it's valid according to the
78 deflate spec to have 0 distance codes. On top of that, some mobile phones
79 require at least two distance codes. To support these decoders too (but
80 potentially at the cost of a few bytes), add dummy code lengths of 1.
81 References to this bug can be found in the changelog of
82 Zlib 1.2.2 and here: http://www.jonof.id.au/forum/index.php?topic=515.0.
83 
84 d_lengths: the 32 lengths of the distance codes.
85 */
PatchDistanceCodesForBuggyDecoders(unsigned * d_lengths)86 static void PatchDistanceCodesForBuggyDecoders(unsigned* d_lengths) {
87   int num_dist_codes = 0; /* Amount of non-zero distance codes */
88   int i;
89   for (i = 0; i < 30 /* Ignore the two unused codes from the spec */; i++) {
90     if (d_lengths[i]) num_dist_codes++;
91     if (num_dist_codes >= 2) return; /* Two or more codes is fine. */
92   }
93 
94   if (num_dist_codes == 0) {
95     d_lengths[0] = d_lengths[1] = 1;
96   } else if (num_dist_codes == 1) {
97     d_lengths[d_lengths[0] ? 1 : 0] = 1;
98   }
99 }
100 
101 /*
102 Encodes the Huffman tree and returns how many bits its encoding takes. If out
103 is a null pointer, only returns the size and runs faster.
104 */
EncodeTree(const unsigned * ll_lengths,const unsigned * d_lengths,int use_16,int use_17,int use_18,unsigned char * bp,unsigned char ** out,size_t * outsize)105 static size_t EncodeTree(const unsigned* ll_lengths,
106                          const unsigned* d_lengths,
107                          int use_16, int use_17, int use_18,
108                          unsigned char* bp,
109                          unsigned char** out, size_t* outsize) {
110   unsigned lld_total;  /* Total amount of literal, length, distance codes. */
111   /* Runlength encoded version of lengths of litlen and dist trees. */
112   unsigned* rle = 0;
113   unsigned* rle_bits = 0;  /* Extra bits for rle values 16, 17 and 18. */
114   size_t rle_size = 0;  /* Size of rle array. */
115   size_t rle_bits_size = 0;  /* Should have same value as rle_size. */
116   unsigned hlit = 29;  /* 286 - 257 */
117   unsigned hdist = 29;  /* 32 - 1, but gzip does not like hdist > 29.*/
118   unsigned hclen;
119   unsigned hlit2;
120   size_t i, j;
121   size_t clcounts[19];
122   unsigned clcl[19];  /* Code length code lengths. */
123   unsigned clsymbols[19];
124   /* The order in which code length code lengths are encoded as per deflate. */
125   static const unsigned order[19] = {
126     16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
127   };
128   int size_only = !out;
129   size_t result_size = 0;
130 
131   for(i = 0; i < 19; i++) clcounts[i] = 0;
132 
133   /* Trim zeros. */
134   while (hlit > 0 && ll_lengths[257 + hlit - 1] == 0) hlit--;
135   while (hdist > 0 && d_lengths[1 + hdist - 1] == 0) hdist--;
136   hlit2 = hlit + 257;
137 
138   lld_total = hlit2 + hdist + 1;
139 
140   for (i = 0; i < lld_total; i++) {
141     /* This is an encoding of a huffman tree, so now the length is a symbol */
142     unsigned char symbol = i < hlit2 ? ll_lengths[i] : d_lengths[i - hlit2];
143     unsigned count = 1;
144     if(use_16 || (symbol == 0 && (use_17 || use_18))) {
145       for (j = i + 1; j < lld_total && symbol ==
146           (j < hlit2 ? ll_lengths[j] : d_lengths[j - hlit2]); j++) {
147         count++;
148       }
149     }
150     i += count - 1;
151 
152     /* Repetitions of zeroes */
153     if (symbol == 0 && count >= 3) {
154       if (use_18) {
155         while (count >= 11) {
156           unsigned count2 = count > 138 ? 138 : count;
157           if (!size_only) {
158             ZOPFLI_APPEND_DATA(18, &rle, &rle_size);
159             ZOPFLI_APPEND_DATA(count2 - 11, &rle_bits, &rle_bits_size);
160           }
161           clcounts[18]++;
162           count -= count2;
163         }
164       }
165       if (use_17) {
166         while (count >= 3) {
167           unsigned count2 = count > 10 ? 10 : count;
168           if (!size_only) {
169             ZOPFLI_APPEND_DATA(17, &rle, &rle_size);
170             ZOPFLI_APPEND_DATA(count2 - 3, &rle_bits, &rle_bits_size);
171           }
172           clcounts[17]++;
173           count -= count2;
174         }
175       }
176     }
177 
178     /* Repetitions of any symbol */
179     if (use_16 && count >= 4) {
180       count--;  /* Since the first one is hardcoded. */
181       clcounts[symbol]++;
182       if (!size_only) {
183         ZOPFLI_APPEND_DATA(symbol, &rle, &rle_size);
184         ZOPFLI_APPEND_DATA(0, &rle_bits, &rle_bits_size);
185       }
186       while (count >= 3) {
187         unsigned count2 = count > 6 ? 6 : count;
188         if (!size_only) {
189           ZOPFLI_APPEND_DATA(16, &rle, &rle_size);
190           ZOPFLI_APPEND_DATA(count2 - 3, &rle_bits, &rle_bits_size);
191         }
192         clcounts[16]++;
193         count -= count2;
194       }
195     }
196 
197     /* No or insufficient repetition */
198     clcounts[symbol] += count;
199     while (count > 0) {
200       if (!size_only) {
201         ZOPFLI_APPEND_DATA(symbol, &rle, &rle_size);
202         ZOPFLI_APPEND_DATA(0, &rle_bits, &rle_bits_size);
203       }
204       count--;
205     }
206   }
207 
208   ZopfliCalculateBitLengths(clcounts, 19, 7, clcl);
209   if (!size_only) ZopfliLengthsToSymbols(clcl, 19, 7, clsymbols);
210 
211   hclen = 15;
212   /* Trim zeros. */
213   while (hclen > 0 && clcounts[order[hclen + 4 - 1]] == 0) hclen--;
214 
215   if (!size_only) {
216     AddBits(hlit, 5, bp, out, outsize);
217     AddBits(hdist, 5, bp, out, outsize);
218     AddBits(hclen, 4, bp, out, outsize);
219 
220     for (i = 0; i < hclen + 4; i++) {
221       AddBits(clcl[order[i]], 3, bp, out, outsize);
222     }
223 
224     for (i = 0; i < rle_size; i++) {
225       unsigned symbol = clsymbols[rle[i]];
226       AddHuffmanBits(symbol, clcl[rle[i]], bp, out, outsize);
227       /* Extra bits. */
228       if (rle[i] == 16) AddBits(rle_bits[i], 2, bp, out, outsize);
229       else if (rle[i] == 17) AddBits(rle_bits[i], 3, bp, out, outsize);
230       else if (rle[i] == 18) AddBits(rle_bits[i], 7, bp, out, outsize);
231     }
232   }
233 
234   result_size += 14;  /* hlit, hdist, hclen bits */
235   result_size += (hclen + 4) * 3;  /* clcl bits */
236   for(i = 0; i < 19; i++) {
237     result_size += clcl[i] * clcounts[i];
238   }
239   /* Extra bits. */
240   result_size += clcounts[16] * 2;
241   result_size += clcounts[17] * 3;
242   result_size += clcounts[18] * 7;
243 
244   /* Note: in case of "size_only" these are null pointers so no effect. */
245   free(rle);
246   free(rle_bits);
247 
248   return result_size;
249 }
250 
AddDynamicTree(const unsigned * ll_lengths,const unsigned * d_lengths,unsigned char * bp,unsigned char ** out,size_t * outsize)251 static void AddDynamicTree(const unsigned* ll_lengths,
252                            const unsigned* d_lengths,
253                            unsigned char* bp,
254                            unsigned char** out, size_t* outsize) {
255   int i;
256   int best = 0;
257   size_t bestsize = 0;
258 
259   for(i = 0; i < 8; i++) {
260     size_t size = EncodeTree(ll_lengths, d_lengths,
261                              i & 1, i & 2, i & 4,
262                              0, 0, 0);
263     if (bestsize == 0 || size < bestsize) {
264       bestsize = size;
265       best = i;
266     }
267   }
268 
269   EncodeTree(ll_lengths, d_lengths,
270              best & 1, best & 2, best & 4,
271              bp, out, outsize);
272 }
273 
274 /*
275 Gives the exact size of the tree, in bits, as it will be encoded in DEFLATE.
276 */
CalculateTreeSize(const unsigned * ll_lengths,const unsigned * d_lengths)277 static size_t CalculateTreeSize(const unsigned* ll_lengths,
278                                 const unsigned* d_lengths) {
279   size_t result = 0;
280   int i;
281 
282   for(i = 0; i < 8; i++) {
283     size_t size = EncodeTree(ll_lengths, d_lengths,
284                              i & 1, i & 2, i & 4,
285                              0, 0, 0);
286     if (result == 0 || size < result) result = size;
287   }
288 
289   return result;
290 }
291 
292 /*
293 Adds all lit/len and dist codes from the lists as huffman symbols. Does not add
294 end code 256. expected_data_size is the uncompressed block size, used for
295 assert, but you can set it to 0 to not do the assertion.
296 */
AddLZ77Data(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,size_t expected_data_size,const unsigned * ll_symbols,const unsigned * ll_lengths,const unsigned * d_symbols,const unsigned * d_lengths,unsigned char * bp,unsigned char ** out,size_t * outsize)297 static void AddLZ77Data(const ZopfliLZ77Store* lz77,
298                         size_t lstart, size_t lend,
299                         size_t expected_data_size,
300                         const unsigned* ll_symbols, const unsigned* ll_lengths,
301                         const unsigned* d_symbols, const unsigned* d_lengths,
302                         unsigned char* bp,
303                         unsigned char** out, size_t* outsize) {
304   size_t testlength = 0;
305   size_t i;
306 
307   for (i = lstart; i < lend; i++) {
308     unsigned dist = lz77->dists[i];
309     unsigned litlen = lz77->litlens[i];
310     if (dist == 0) {
311       assert(litlen < 256);
312       assert(ll_lengths[litlen] > 0);
313       AddHuffmanBits(ll_symbols[litlen], ll_lengths[litlen], bp, out, outsize);
314       testlength++;
315     } else {
316       unsigned lls = ZopfliGetLengthSymbol(litlen);
317       unsigned ds = ZopfliGetDistSymbol(dist);
318       assert(litlen >= 3 && litlen <= 288);
319       assert(ll_lengths[lls] > 0);
320       assert(d_lengths[ds] > 0);
321       AddHuffmanBits(ll_symbols[lls], ll_lengths[lls], bp, out, outsize);
322       AddBits(ZopfliGetLengthExtraBitsValue(litlen),
323               ZopfliGetLengthExtraBits(litlen),
324               bp, out, outsize);
325       AddHuffmanBits(d_symbols[ds], d_lengths[ds], bp, out, outsize);
326       AddBits(ZopfliGetDistExtraBitsValue(dist),
327               ZopfliGetDistExtraBits(dist),
328               bp, out, outsize);
329       testlength += litlen;
330     }
331   }
332   assert(expected_data_size == 0 || testlength == expected_data_size);
333 }
334 
GetFixedTree(unsigned * ll_lengths,unsigned * d_lengths)335 static void GetFixedTree(unsigned* ll_lengths, unsigned* d_lengths) {
336   size_t i;
337   for (i = 0; i < 144; i++) ll_lengths[i] = 8;
338   for (i = 144; i < 256; i++) ll_lengths[i] = 9;
339   for (i = 256; i < 280; i++) ll_lengths[i] = 7;
340   for (i = 280; i < 288; i++) ll_lengths[i] = 8;
341   for (i = 0; i < 32; i++) d_lengths[i] = 5;
342 }
343 
344 /*
345 Same as CalculateBlockSymbolSize, but for block size smaller than histogram
346 size.
347 */
CalculateBlockSymbolSizeSmall(const unsigned * ll_lengths,const unsigned * d_lengths,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend)348 static size_t CalculateBlockSymbolSizeSmall(const unsigned* ll_lengths,
349                                             const unsigned* d_lengths,
350                                             const ZopfliLZ77Store* lz77,
351                                             size_t lstart, size_t lend) {
352   size_t result = 0;
353   size_t i;
354   for (i = lstart; i < lend; i++) {
355     assert(i < lz77->size);
356     assert(lz77->litlens[i] < 259);
357     if (lz77->dists[i] == 0) {
358       result += ll_lengths[lz77->litlens[i]];
359     } else {
360       int ll_symbol = ZopfliGetLengthSymbol(lz77->litlens[i]);
361       int d_symbol = ZopfliGetDistSymbol(lz77->dists[i]);
362       result += ll_lengths[ll_symbol];
363       result += d_lengths[d_symbol];
364       result += ZopfliGetLengthSymbolExtraBits(ll_symbol);
365       result += ZopfliGetDistSymbolExtraBits(d_symbol);
366     }
367   }
368   result += ll_lengths[256]; /*end symbol*/
369   return result;
370 }
371 
372 /*
373 Same as CalculateBlockSymbolSize, but with the histogram provided by the caller.
374 */
CalculateBlockSymbolSizeGivenCounts(const size_t * ll_counts,const size_t * d_counts,const unsigned * ll_lengths,const unsigned * d_lengths,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend)375 static size_t CalculateBlockSymbolSizeGivenCounts(const size_t* ll_counts,
376                                                   const size_t* d_counts,
377                                                   const unsigned* ll_lengths,
378                                                   const unsigned* d_lengths,
379                                                   const ZopfliLZ77Store* lz77,
380                                                   size_t lstart, size_t lend) {
381   size_t result = 0;
382   size_t i;
383   if (lstart + ZOPFLI_NUM_LL * 3 > lend) {
384     return CalculateBlockSymbolSizeSmall(
385         ll_lengths, d_lengths, lz77, lstart, lend);
386   } else {
387     for (i = 0; i < 256; i++) {
388       result += ll_lengths[i] * ll_counts[i];
389     }
390     for (i = 257; i < 286; i++) {
391       result += ll_lengths[i] * ll_counts[i];
392       result += ZopfliGetLengthSymbolExtraBits(i) * ll_counts[i];
393     }
394     for (i = 0; i < 30; i++) {
395       result += d_lengths[i] * d_counts[i];
396       result += ZopfliGetDistSymbolExtraBits(i) * d_counts[i];
397     }
398     result += ll_lengths[256]; /*end symbol*/
399     return result;
400   }
401 }
402 
403 /*
404 Calculates size of the part after the header and tree of an LZ77 block, in bits.
405 */
CalculateBlockSymbolSize(const unsigned * ll_lengths,const unsigned * d_lengths,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend)406 static size_t CalculateBlockSymbolSize(const unsigned* ll_lengths,
407                                        const unsigned* d_lengths,
408                                        const ZopfliLZ77Store* lz77,
409                                        size_t lstart, size_t lend) {
410   if (lstart + ZOPFLI_NUM_LL * 3 > lend) {
411     return CalculateBlockSymbolSizeSmall(
412         ll_lengths, d_lengths, lz77, lstart, lend);
413   } else {
414     size_t ll_counts[ZOPFLI_NUM_LL];
415     size_t d_counts[ZOPFLI_NUM_D];
416     ZopfliLZ77GetHistogram(lz77, lstart, lend, ll_counts, d_counts);
417     return CalculateBlockSymbolSizeGivenCounts(
418         ll_counts, d_counts, ll_lengths, d_lengths, lz77, lstart, lend);
419   }
420 }
421 
AbsDiff(size_t x,size_t y)422 static size_t AbsDiff(size_t x, size_t y) {
423   if (x > y)
424     return x - y;
425   else
426     return y - x;
427 }
428 
429 /*
430 Changes the population counts in a way that the consequent Huffman tree
431 compression, especially its rle-part, will be more likely to compress this data
432 more efficiently. length contains the size of the histogram.
433 */
OptimizeHuffmanForRle(unsigned length,size_t * counts)434 void OptimizeHuffmanForRle(unsigned length, size_t* counts) {
435   unsigned i;
436   int k, stride;
437   size_t symbol, sum, limit;
438   int* good_for_rle;
439 
440   /* 1) We don't want to touch the trailing zeros. We may break the
441   rules of the format by adding more data in the distance codes. */
442   for (; length > 0; --length) {
443     if (counts[length - 1] != 0) {
444       /* Now counts[0..length - 1] does not have trailing zeros. */
445       break;
446     }
447   }
448   if (length == 0) {
449     return;
450   }
451   /* 2) Let's mark all population counts that already can be encoded
452   with an rle code.*/
453   good_for_rle = (int*)malloc(length * sizeof(int));
454   for (i = 0; i < length; ++i) good_for_rle[i] = 0;
455 
456   /* Let's not spoil any of the existing good rle codes.
457   Mark any seq of 0's that is longer than 5 as a good_for_rle.
458   Mark any seq of non-0's that is longer than 7 as a good_for_rle.*/
459   symbol = counts[0];
460   stride = 0;
461   for (i = 0; i < length + 1; ++i) {
462     if (i == length || counts[i] != symbol) {
463       if ((symbol == 0 && stride >= 5) || (symbol != 0 && stride >= 7)) {
464         for (k = 0; k < stride; ++k) {
465           good_for_rle[i - k - 1] = 1;
466         }
467       }
468       stride = 1;
469       if (i != length) {
470         symbol = counts[i];
471       }
472     } else {
473       ++stride;
474     }
475   }
476 
477   /* 3) Let's replace those population counts that lead to more rle codes. */
478   stride = 0;
479   limit = counts[0];
480   sum = 0;
481   for (i = 0; i < length + 1; ++i) {
482     if (i == length || good_for_rle[i]
483         /* Heuristic for selecting the stride ranges to collapse. */
484         || AbsDiff(counts[i], limit) >= 4) {
485       if (stride >= 4 || (stride >= 3 && sum == 0)) {
486         /* The stride must end, collapse what we have, if we have enough (4). */
487         int count = (sum + stride / 2) / stride;
488         if (count < 1) count = 1;
489         if (sum == 0) {
490           /* Don't make an all zeros stride to be upgraded to ones. */
491           count = 0;
492         }
493         for (k = 0; k < stride; ++k) {
494           /* We don't want to change value at counts[i],
495           that is already belonging to the next stride. Thus - 1. */
496           counts[i - k - 1] = count;
497         }
498       }
499       stride = 0;
500       sum = 0;
501       if (i < length - 3) {
502         /* All interesting strides have a count of at least 4,
503         at least when non-zeros. */
504         limit = (counts[i] + counts[i + 1] +
505                  counts[i + 2] + counts[i + 3] + 2) / 4;
506       } else if (i < length) {
507         limit = counts[i];
508       } else {
509         limit = 0;
510       }
511     }
512     ++stride;
513     if (i != length) {
514       sum += counts[i];
515     }
516   }
517 
518   free(good_for_rle);
519 }
520 
521 /*
522 Tries out OptimizeHuffmanForRle for this block, if the result is smaller,
523 uses it, otherwise keeps the original. Returns size of encoded tree and data in
524 bits, not including the 3-bit block header.
525 */
TryOptimizeHuffmanForRle(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,const size_t * ll_counts,const size_t * d_counts,unsigned * ll_lengths,unsigned * d_lengths)526 static double TryOptimizeHuffmanForRle(
527     const ZopfliLZ77Store* lz77, size_t lstart, size_t lend,
528     const size_t* ll_counts, const size_t* d_counts,
529     unsigned* ll_lengths, unsigned* d_lengths) {
530   size_t ll_counts2[ZOPFLI_NUM_LL];
531   size_t d_counts2[ZOPFLI_NUM_D];
532   unsigned ll_lengths2[ZOPFLI_NUM_LL];
533   unsigned d_lengths2[ZOPFLI_NUM_D];
534   double treesize;
535   double datasize;
536   double treesize2;
537   double datasize2;
538 
539   treesize = CalculateTreeSize(ll_lengths, d_lengths);
540   datasize = CalculateBlockSymbolSizeGivenCounts(ll_counts, d_counts,
541       ll_lengths, d_lengths, lz77, lstart, lend);
542 
543   memcpy(ll_counts2, ll_counts, sizeof(ll_counts2));
544   memcpy(d_counts2, d_counts, sizeof(d_counts2));
545   OptimizeHuffmanForRle(ZOPFLI_NUM_LL, ll_counts2);
546   OptimizeHuffmanForRle(ZOPFLI_NUM_D, d_counts2);
547   ZopfliCalculateBitLengths(ll_counts2, ZOPFLI_NUM_LL, 15, ll_lengths2);
548   ZopfliCalculateBitLengths(d_counts2, ZOPFLI_NUM_D, 15, d_lengths2);
549   PatchDistanceCodesForBuggyDecoders(d_lengths2);
550 
551   treesize2 = CalculateTreeSize(ll_lengths2, d_lengths2);
552   datasize2 = CalculateBlockSymbolSizeGivenCounts(ll_counts, d_counts,
553       ll_lengths2, d_lengths2, lz77, lstart, lend);
554 
555   if (treesize2 + datasize2 < treesize + datasize) {
556     memcpy(ll_lengths, ll_lengths2, sizeof(ll_lengths2));
557     memcpy(d_lengths, d_lengths2, sizeof(d_lengths2));
558     return treesize2 + datasize2;
559   }
560   return treesize + datasize;
561 }
562 
563 /*
564 Calculates the bit lengths for the symbols for dynamic blocks. Chooses bit
565 lengths that give the smallest size of tree encoding + encoding of all the
566 symbols to have smallest output size. This are not necessarily the ideal Huffman
567 bit lengths. Returns size of encoded tree and data in bits, not including the
568 3-bit block header.
569 */
GetDynamicLengths(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,unsigned * ll_lengths,unsigned * d_lengths)570 static double GetDynamicLengths(const ZopfliLZ77Store* lz77,
571                                 size_t lstart, size_t lend,
572                                 unsigned* ll_lengths, unsigned* d_lengths) {
573   size_t ll_counts[ZOPFLI_NUM_LL];
574   size_t d_counts[ZOPFLI_NUM_D];
575 
576   ZopfliLZ77GetHistogram(lz77, lstart, lend, ll_counts, d_counts);
577   ll_counts[256] = 1;  /* End symbol. */
578   ZopfliCalculateBitLengths(ll_counts, ZOPFLI_NUM_LL, 15, ll_lengths);
579   ZopfliCalculateBitLengths(d_counts, ZOPFLI_NUM_D, 15, d_lengths);
580   PatchDistanceCodesForBuggyDecoders(d_lengths);
581   return TryOptimizeHuffmanForRle(
582       lz77, lstart, lend, ll_counts, d_counts, ll_lengths, d_lengths);
583 }
584 
ZopfliCalculateBlockSize(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,int btype)585 double ZopfliCalculateBlockSize(const ZopfliLZ77Store* lz77,
586                                 size_t lstart, size_t lend, int btype) {
587   unsigned ll_lengths[ZOPFLI_NUM_LL];
588   unsigned d_lengths[ZOPFLI_NUM_D];
589 
590   double result = 3; /* bfinal and btype bits */
591 
592   if (btype == 0) {
593     size_t length = ZopfliLZ77GetByteRange(lz77, lstart, lend);
594     size_t rem = length % 65535;
595     size_t blocks = length / 65535 + (rem ? 1 : 0);
596     /* An uncompressed block must actually be split into multiple blocks if it's
597        larger than 65535 bytes long. Eeach block header is 5 bytes: 3 bits,
598        padding, LEN and NLEN (potential less padding for first one ignored). */
599     return blocks * 5 * 8 + length * 8;
600   } if (btype == 1) {
601     GetFixedTree(ll_lengths, d_lengths);
602     result += CalculateBlockSymbolSize(
603         ll_lengths, d_lengths, lz77, lstart, lend);
604   } else {
605     result += GetDynamicLengths(lz77, lstart, lend, ll_lengths, d_lengths);
606   }
607 
608   return result;
609 }
610 
ZopfliCalculateBlockSizeAutoType(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend)611 double ZopfliCalculateBlockSizeAutoType(const ZopfliLZ77Store* lz77,
612                                         size_t lstart, size_t lend) {
613   double uncompressedcost = ZopfliCalculateBlockSize(lz77, lstart, lend, 0);
614   /* Don't do the expensive fixed cost calculation for larger blocks that are
615      unlikely to use it. */
616   double fixedcost = (lz77->size > 1000) ?
617       uncompressedcost : ZopfliCalculateBlockSize(lz77, lstart, lend, 1);
618   double dyncost = ZopfliCalculateBlockSize(lz77, lstart, lend, 2);
619   return (uncompressedcost < fixedcost && uncompressedcost < dyncost)
620       ? uncompressedcost
621       : (fixedcost < dyncost ? fixedcost : dyncost);
622 }
623 
624 /* Since an uncompressed block can be max 65535 in size, it actually adds
625 multible blocks if needed. */
AddNonCompressedBlock(const ZopfliOptions * options,int final,const unsigned char * in,size_t instart,size_t inend,unsigned char * bp,unsigned char ** out,size_t * outsize)626 static void AddNonCompressedBlock(const ZopfliOptions* options, int final,
627                                   const unsigned char* in, size_t instart,
628                                   size_t inend,
629                                   unsigned char* bp,
630                                   unsigned char** out, size_t* outsize) {
631   size_t pos = instart;
632   (void)options;
633   for (;;) {
634     size_t i;
635     unsigned short blocksize = 65535;
636     unsigned short nlen;
637     int currentfinal;
638 
639     if (pos + blocksize > inend) blocksize = inend - pos;
640     currentfinal = pos + blocksize >= inend;
641 
642     nlen = ~blocksize;
643 
644     AddBit(final && currentfinal, bp, out, outsize);
645     /* BTYPE 00 */
646     AddBit(0, bp, out, outsize);
647     AddBit(0, bp, out, outsize);
648 
649     /* Any bits of input up to the next byte boundary are ignored. */
650     *bp = 0;
651 
652     ZOPFLI_APPEND_DATA(blocksize % 256, out, outsize);
653     ZOPFLI_APPEND_DATA((blocksize / 256) % 256, out, outsize);
654     ZOPFLI_APPEND_DATA(nlen % 256, out, outsize);
655     ZOPFLI_APPEND_DATA((nlen / 256) % 256, out, outsize);
656 
657     for (i = 0; i < blocksize; i++) {
658       ZOPFLI_APPEND_DATA(in[pos + i], out, outsize);
659     }
660 
661     if (currentfinal) break;
662     pos += blocksize;
663   }
664 }
665 
666 /*
667 Adds a deflate block with the given LZ77 data to the output.
668 options: global program options
669 btype: the block type, must be 1 or 2
670 final: whether to set the "final" bit on this block, must be the last block
671 litlens: literal/length array of the LZ77 data, in the same format as in
672     ZopfliLZ77Store.
673 dists: distance array of the LZ77 data, in the same format as in
674     ZopfliLZ77Store.
675 lstart: where to start in the LZ77 data
676 lend: where to end in the LZ77 data (not inclusive)
677 expected_data_size: the uncompressed block size, used for assert, but you can
678   set it to 0 to not do the assertion.
679 bp: output bit pointer
680 out: dynamic output array to append to
681 outsize: dynamic output array size
682 */
AddLZ77Block(const ZopfliOptions * options,int btype,int final,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,size_t expected_data_size,unsigned char * bp,unsigned char ** out,size_t * outsize)683 static void AddLZ77Block(const ZopfliOptions* options, int btype, int final,
684                          const ZopfliLZ77Store* lz77,
685                          size_t lstart, size_t lend,
686                          size_t expected_data_size,
687                          unsigned char* bp,
688                          unsigned char** out, size_t* outsize) {
689   unsigned ll_lengths[ZOPFLI_NUM_LL];
690   unsigned d_lengths[ZOPFLI_NUM_D];
691   unsigned ll_symbols[ZOPFLI_NUM_LL];
692   unsigned d_symbols[ZOPFLI_NUM_D];
693   size_t detect_block_size = *outsize;
694   size_t compressed_size;
695   size_t uncompressed_size = 0;
696   size_t i;
697   if (btype == 0) {
698     size_t length = ZopfliLZ77GetByteRange(lz77, lstart, lend);
699     size_t pos = lstart == lend ? 0 : lz77->pos[lstart];
700     size_t end = pos + length;
701     AddNonCompressedBlock(options, final,
702                           lz77->data, pos, end, bp, out, outsize);
703     return;
704   }
705 
706   AddBit(final, bp, out, outsize);
707   AddBit(btype & 1, bp, out, outsize);
708   AddBit((btype & 2) >> 1, bp, out, outsize);
709 
710   if (btype == 1) {
711     /* Fixed block. */
712     GetFixedTree(ll_lengths, d_lengths);
713   } else {
714     /* Dynamic block. */
715     unsigned detect_tree_size;
716     assert(btype == 2);
717 
718     GetDynamicLengths(lz77, lstart, lend, ll_lengths, d_lengths);
719 
720     detect_tree_size = *outsize;
721     AddDynamicTree(ll_lengths, d_lengths, bp, out, outsize);
722     if (options->verbose) {
723       fprintf(stderr, "treesize: %d\n", (int)(*outsize - detect_tree_size));
724     }
725   }
726 
727   ZopfliLengthsToSymbols(ll_lengths, ZOPFLI_NUM_LL, 15, ll_symbols);
728   ZopfliLengthsToSymbols(d_lengths, ZOPFLI_NUM_D, 15, d_symbols);
729 
730   detect_block_size = *outsize;
731   AddLZ77Data(lz77, lstart, lend, expected_data_size,
732               ll_symbols, ll_lengths, d_symbols, d_lengths,
733               bp, out, outsize);
734   /* End symbol. */
735   AddHuffmanBits(ll_symbols[256], ll_lengths[256], bp, out, outsize);
736 
737   for (i = lstart; i < lend; i++) {
738     uncompressed_size += lz77->dists[i] == 0 ? 1 : lz77->litlens[i];
739   }
740   compressed_size = *outsize - detect_block_size;
741   if (options->verbose) {
742     fprintf(stderr, "compressed block size: %d (%dk) (unc: %d)\n",
743            (int)compressed_size, (int)(compressed_size / 1024),
744            (int)(uncompressed_size));
745   }
746 }
747 
AddLZ77BlockAutoType(const ZopfliOptions * options,int final,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,size_t expected_data_size,unsigned char * bp,unsigned char ** out,size_t * outsize)748 static void AddLZ77BlockAutoType(const ZopfliOptions* options, int final,
749                                  const ZopfliLZ77Store* lz77,
750                                  size_t lstart, size_t lend,
751                                  size_t expected_data_size,
752                                  unsigned char* bp,
753                                  unsigned char** out, size_t* outsize) {
754   double uncompressedcost = ZopfliCalculateBlockSize(lz77, lstart, lend, 0);
755   double fixedcost = ZopfliCalculateBlockSize(lz77, lstart, lend, 1);
756   double dyncost = ZopfliCalculateBlockSize(lz77, lstart, lend, 2);
757 
758   /* Whether to perform the expensive calculation of creating an optimal block
759   with fixed huffman tree to check if smaller. Only do this for small blocks or
760   blocks which already are pretty good with fixed huffman tree. */
761   int expensivefixed = (lz77->size < 1000) || fixedcost <= dyncost * 1.1;
762 
763   ZopfliLZ77Store fixedstore;
764   if (lstart == lend) {
765     /* Smallest empty block is represented by fixed block */
766     AddBits(final, 1, bp, out, outsize);
767     AddBits(1, 2, bp, out, outsize);  /* btype 01 */
768     AddBits(0, 7, bp, out, outsize);  /* end symbol has code 0000000 */
769     return;
770   }
771   ZopfliInitLZ77Store(lz77->data, &fixedstore);
772   if (expensivefixed) {
773     /* Recalculate the LZ77 with ZopfliLZ77OptimalFixed */
774     size_t instart = lz77->pos[lstart];
775     size_t inend = instart + ZopfliLZ77GetByteRange(lz77, lstart, lend);
776 
777     ZopfliBlockState s;
778     ZopfliInitBlockState(options, instart, inend, 1, &s);
779     ZopfliLZ77OptimalFixed(&s, lz77->data, instart, inend, &fixedstore);
780     fixedcost = ZopfliCalculateBlockSize(&fixedstore, 0, fixedstore.size, 1);
781     ZopfliCleanBlockState(&s);
782   }
783 
784   if (uncompressedcost < fixedcost && uncompressedcost < dyncost) {
785     AddLZ77Block(options, 0, final, lz77, lstart, lend,
786                  expected_data_size, bp, out, outsize);
787   } else if (fixedcost < dyncost) {
788     if (expensivefixed) {
789       AddLZ77Block(options, 1, final, &fixedstore, 0, fixedstore.size,
790                    expected_data_size, bp, out, outsize);
791     } else {
792       AddLZ77Block(options, 1, final, lz77, lstart, lend,
793                    expected_data_size, bp, out, outsize);
794     }
795   } else {
796     AddLZ77Block(options, 2, final, lz77, lstart, lend,
797                  expected_data_size, bp, out, outsize);
798   }
799 
800   ZopfliCleanLZ77Store(&fixedstore);
801 }
802 
803 /*
804 Deflate a part, to allow ZopfliDeflate() to use multiple master blocks if
805 needed.
806 It is possible to call this function multiple times in a row, shifting
807 instart and inend to next bytes of the data. If instart is larger than 0, then
808 previous bytes are used as the initial dictionary for LZ77.
809 This function will usually output multiple deflate blocks. If final is 1, then
810 the final bit will be set on the last block.
811 */
ZopfliDeflatePart(const ZopfliOptions * options,int btype,int final,const unsigned char * in,size_t instart,size_t inend,unsigned char * bp,unsigned char ** out,size_t * outsize)812 void ZopfliDeflatePart(const ZopfliOptions* options, int btype, int final,
813                        const unsigned char* in, size_t instart, size_t inend,
814                        unsigned char* bp, unsigned char** out,
815                        size_t* outsize) {
816   size_t i;
817   /* byte coordinates rather than lz77 index */
818   size_t* splitpoints_uncompressed = 0;
819   size_t npoints = 0;
820   size_t* splitpoints = 0;
821   double totalcost = 0;
822   ZopfliLZ77Store lz77;
823 
824   /* If btype=2 is specified, it tries all block types. If a lesser btype is
825   given, then however it forces that one. Neither of the lesser types needs
826   block splitting as they have no dynamic huffman trees. */
827   if (btype == 0) {
828     AddNonCompressedBlock(options, final, in, instart, inend, bp, out, outsize);
829     return;
830   } else if (btype == 1) {
831     ZopfliLZ77Store store;
832     ZopfliBlockState s;
833     ZopfliInitLZ77Store(in, &store);
834     ZopfliInitBlockState(options, instart, inend, 1, &s);
835 
836     ZopfliLZ77OptimalFixed(&s, in, instart, inend, &store);
837     AddLZ77Block(options, btype, final, &store, 0, store.size, 0,
838                  bp, out, outsize);
839 
840     ZopfliCleanBlockState(&s);
841     ZopfliCleanLZ77Store(&store);
842     return;
843   }
844 
845 
846   if (options->blocksplitting) {
847     ZopfliBlockSplit(options, in, instart, inend,
848                      options->blocksplittingmax,
849                      &splitpoints_uncompressed, &npoints);
850     splitpoints = (size_t*)malloc(sizeof(*splitpoints) * npoints);
851   }
852 
853   ZopfliInitLZ77Store(in, &lz77);
854 
855   for (i = 0; i <= npoints; i++) {
856     size_t start = i == 0 ? instart : splitpoints_uncompressed[i - 1];
857     size_t end = i == npoints ? inend : splitpoints_uncompressed[i];
858     ZopfliBlockState s;
859     ZopfliLZ77Store store;
860     ZopfliInitLZ77Store(in, &store);
861     ZopfliInitBlockState(options, start, end, 1, &s);
862     ZopfliLZ77Optimal(&s, in, start, end, options->numiterations, &store);
863     totalcost += ZopfliCalculateBlockSizeAutoType(&store, 0, store.size);
864 
865     ZopfliAppendLZ77Store(&store, &lz77);
866     if (i < npoints) splitpoints[i] = lz77.size;
867 
868     ZopfliCleanBlockState(&s);
869     ZopfliCleanLZ77Store(&store);
870   }
871 
872   /* Second block splitting attempt */
873   if (options->blocksplitting && npoints > 1) {
874     size_t* splitpoints2 = 0;
875     size_t npoints2 = 0;
876     double totalcost2 = 0;
877 
878     ZopfliBlockSplitLZ77(options, &lz77,
879                          options->blocksplittingmax, &splitpoints2, &npoints2);
880 
881     for (i = 0; i <= npoints2; i++) {
882       size_t start = i == 0 ? 0 : splitpoints2[i - 1];
883       size_t end = i == npoints2 ? lz77.size : splitpoints2[i];
884       totalcost2 += ZopfliCalculateBlockSizeAutoType(&lz77, start, end);
885     }
886 
887     if (totalcost2 < totalcost) {
888       free(splitpoints);
889       splitpoints = splitpoints2;
890       npoints = npoints2;
891     } else {
892       free(splitpoints2);
893     }
894   }
895 
896   for (i = 0; i <= npoints; i++) {
897     size_t start = i == 0 ? 0 : splitpoints[i - 1];
898     size_t end = i == npoints ? lz77.size : splitpoints[i];
899     AddLZ77BlockAutoType(options, i == npoints && final,
900                          &lz77, start, end, 0,
901                          bp, out, outsize);
902   }
903 
904   ZopfliCleanLZ77Store(&lz77);
905   free(splitpoints);
906   free(splitpoints_uncompressed);
907 }
908 
ZopfliDeflate(const ZopfliOptions * options,int btype,int final,const unsigned char * in,size_t insize,unsigned char * bp,unsigned char ** out,size_t * outsize)909 void ZopfliDeflate(const ZopfliOptions* options, int btype, int final,
910                    const unsigned char* in, size_t insize,
911                    unsigned char* bp, unsigned char** out, size_t* outsize) {
912  size_t offset = *outsize;
913 #if ZOPFLI_MASTER_BLOCK_SIZE == 0
914   ZopfliDeflatePart(options, btype, final, in, 0, insize, bp, out, outsize);
915 #else
916   size_t i = 0;
917   do {
918     int masterfinal = (i + ZOPFLI_MASTER_BLOCK_SIZE >= insize);
919     int final2 = final && masterfinal;
920     size_t size = masterfinal ? insize - i : ZOPFLI_MASTER_BLOCK_SIZE;
921     ZopfliDeflatePart(options, btype, final2,
922                       in, i, i + size, bp, out, outsize);
923     i += size;
924   } while (i < insize);
925 #endif
926   if (options->verbose) {
927     fprintf(stderr,
928             "Original Size: %lu, Deflate: %lu, Compression: %f%% Removed\n",
929             (unsigned long)insize, (unsigned long)(*outsize - offset),
930             100.0 * (double)(insize - (*outsize - offset)) / (double)insize);
931   }
932 }
933