1 /*
2 Copyright 2011 Google Inc. All Rights Reserved.
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8     http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 */
19 
20 #include "deflate.h"
21 
22 #include <assert.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 
26 #include "blocksplitter.h"
27 #include "squeeze.h"
28 #include "symbols.h"
29 #include "tree.h"
30 
31 /*
32 bp = bitpointer, always in range [0, 7].
33 The outsize is number of necessary bytes to encode the bits.
34 Given the value of bp and the amount of bytes, the amount of bits represented
35 is not simply bytesize * 8 + bp because even representing one bit requires a
36 whole byte. It is: (bp == 0) ? (bytesize * 8) : ((bytesize - 1) * 8 + bp)
37 */
AddBit(int bit,unsigned char * bp,unsigned char ** out,size_t * outsize)38 static void AddBit(int bit,
39                    unsigned char* bp, unsigned char** out, size_t* outsize) {
40   if (*bp == 0) ZOPFLI_APPEND_DATA(0, out, outsize);
41   (*out)[*outsize - 1] |= bit << *bp;
42   *bp = (*bp + 1) & 7;
43 }
44 
AddBits(unsigned symbol,unsigned length,unsigned char * bp,unsigned char ** out,size_t * outsize)45 static void AddBits(unsigned symbol, unsigned length,
46                     unsigned char* bp, unsigned char** out, size_t* outsize) {
47   /* TODO(lode): make more efficient (add more bits at once). */
48   unsigned i;
49   for (i = 0; i < length; i++) {
50     unsigned bit = (symbol >> i) & 1;
51     if (*bp == 0) ZOPFLI_APPEND_DATA(0, out, outsize);
52     (*out)[*outsize - 1] |= bit << *bp;
53     *bp = (*bp + 1) & 7;
54   }
55 }
56 
57 /*
58 Adds bits, like AddBits, but the order is inverted. The deflate specification
59 uses both orders in one standard.
60 */
AddHuffmanBits(unsigned symbol,unsigned length,unsigned char * bp,unsigned char ** out,size_t * outsize)61 static void AddHuffmanBits(unsigned symbol, unsigned length,
62                            unsigned char* bp, unsigned char** out,
63                            size_t* outsize) {
64   /* TODO(lode): make more efficient (add more bits at once). */
65   unsigned i;
66   for (i = 0; i < length; i++) {
67     unsigned bit = (symbol >> (length - i - 1)) & 1;
68     if (*bp == 0) ZOPFLI_APPEND_DATA(0, out, outsize);
69     (*out)[*outsize - 1] |= bit << *bp;
70     *bp = (*bp + 1) & 7;
71   }
72 }
73 
74 /*
75 Ensures there are at least 2 distance codes to support buggy decoders.
76 Zlib 1.2.1 and below have a bug where it fails if there isn't at least 1
77 distance code (with length > 0), even though it's valid according to the
78 deflate spec to have 0 distance codes. On top of that, some mobile phones
79 require at least two distance codes. To support these decoders too (but
80 potentially at the cost of a few bytes), add dummy code lengths of 1.
81 References to this bug can be found in the changelog of
82 Zlib 1.2.2 and here: http://www.jonof.id.au/forum/index.php?topic=515.0.
83 
84 d_lengths: the 32 lengths of the distance codes.
85 */
PatchDistanceCodesForBuggyDecoders(unsigned * d_lengths)86 static void PatchDistanceCodesForBuggyDecoders(unsigned* d_lengths) {
87   int num_dist_codes = 0; /* Amount of non-zero distance codes */
88   int i;
89   for (i = 0; i < 30 /* Ignore the two unused codes from the spec */; i++) {
90     if (d_lengths[i]) num_dist_codes++;
91     if (num_dist_codes >= 2) return; /* Two or more codes is fine. */
92   }
93 
94   if (num_dist_codes == 0) {
95     d_lengths[0] = d_lengths[1] = 1;
96   } else if (num_dist_codes == 1) {
97     d_lengths[d_lengths[0] ? 1 : 0] = 1;
98   }
99 }
100 
101 /*
102 Encodes the Huffman tree and returns how many bits its encoding takes. If out
103 is a null pointer, only returns the size and runs faster.
104 */
EncodeTree(const unsigned * ll_lengths,const unsigned * d_lengths,int use_16,int use_17,int use_18,unsigned char * bp,unsigned char ** out,size_t * outsize)105 static size_t EncodeTree(const unsigned* ll_lengths,
106                          const unsigned* d_lengths,
107                          int use_16, int use_17, int use_18,
108                          unsigned char* bp,
109                          unsigned char** out, size_t* outsize) {
110   unsigned lld_total;  /* Total amount of literal, length, distance codes. */
111   /* Runlength encoded version of lengths of litlen and dist trees. */
112   unsigned* rle = 0;
113   unsigned* rle_bits = 0;  /* Extra bits for rle values 16, 17 and 18. */
114   size_t rle_size = 0;  /* Size of rle array. */
115   size_t rle_bits_size = 0;  /* Should have same value as rle_size. */
116   unsigned hlit = 29;  /* 286 - 257 */
117   unsigned hdist = 29;  /* 32 - 1, but gzip does not like hdist > 29.*/
118   unsigned hclen;
119   unsigned hlit2;
120   size_t i, j;
121   size_t clcounts[19];
122   unsigned clcl[19];  /* Code length code lengths. */
123   unsigned clsymbols[19];
124   /* The order in which code length code lengths are encoded as per deflate. */
125   static const unsigned order[19] = {
126     16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
127   };
128   int size_only = !out;
129   size_t result_size = 0;
130 
131   for(i = 0; i < 19; i++) clcounts[i] = 0;
132 
133   /* Trim zeros. */
134   while (hlit > 0 && ll_lengths[257 + hlit - 1] == 0) hlit--;
135   while (hdist > 0 && d_lengths[1 + hdist - 1] == 0) hdist--;
136   hlit2 = hlit + 257;
137 
138   lld_total = hlit2 + hdist + 1;
139 
140   for (i = 0; i < lld_total; i++) {
141     /* This is an encoding of a huffman tree, so now the length is a symbol */
142     unsigned char symbol = i < hlit2 ? ll_lengths[i] : d_lengths[i - hlit2];
143     unsigned count = 1;
144     if(use_16 || (symbol == 0 && (use_17 || use_18))) {
145       for (j = i + 1; j < lld_total && symbol ==
146           (j < hlit2 ? ll_lengths[j] : d_lengths[j - hlit2]); j++) {
147         count++;
148       }
149     }
150     i += count - 1;
151 
152     /* Repetitions of zeroes */
153     if (symbol == 0 && count >= 3) {
154       if (use_18) {
155         while (count >= 11) {
156           unsigned count2 = count > 138 ? 138 : count;
157           if (!size_only) {
158             ZOPFLI_APPEND_DATA(18, &rle, &rle_size);
159             ZOPFLI_APPEND_DATA(count2 - 11, &rle_bits, &rle_bits_size);
160           }
161           clcounts[18]++;
162           count -= count2;
163         }
164       }
165       if (use_17) {
166         while (count >= 3) {
167           unsigned count2 = count > 10 ? 10 : count;
168           if (!size_only) {
169             ZOPFLI_APPEND_DATA(17, &rle, &rle_size);
170             ZOPFLI_APPEND_DATA(count2 - 3, &rle_bits, &rle_bits_size);
171           }
172           clcounts[17]++;
173           count -= count2;
174         }
175       }
176     }
177 
178     /* Repetitions of any symbol */
179     if (use_16 && count >= 4) {
180       count--;  /* Since the first one is hardcoded. */
181       clcounts[symbol]++;
182       if (!size_only) {
183         ZOPFLI_APPEND_DATA(symbol, &rle, &rle_size);
184         ZOPFLI_APPEND_DATA(0, &rle_bits, &rle_bits_size);
185       }
186       while (count >= 3) {
187         unsigned count2 = count > 6 ? 6 : count;
188         if (!size_only) {
189           ZOPFLI_APPEND_DATA(16, &rle, &rle_size);
190           ZOPFLI_APPEND_DATA(count2 - 3, &rle_bits, &rle_bits_size);
191         }
192         clcounts[16]++;
193         count -= count2;
194       }
195     }
196 
197     /* No or insufficient repetition */
198     clcounts[symbol] += count;
199     while (count > 0) {
200       if (!size_only) {
201         ZOPFLI_APPEND_DATA(symbol, &rle, &rle_size);
202         ZOPFLI_APPEND_DATA(0, &rle_bits, &rle_bits_size);
203       }
204       count--;
205     }
206   }
207 
208   ZopfliCalculateBitLengths(clcounts, 19, 7, clcl);
209   if (!size_only) ZopfliLengthsToSymbols(clcl, 19, 7, clsymbols);
210 
211   hclen = 15;
212   /* Trim zeros. */
213   while (hclen > 0 && clcounts[order[hclen + 4 - 1]] == 0) hclen--;
214 
215   if (!size_only) {
216     AddBits(hlit, 5, bp, out, outsize);
217     AddBits(hdist, 5, bp, out, outsize);
218     AddBits(hclen, 4, bp, out, outsize);
219 
220     for (i = 0; i < hclen + 4; i++) {
221       AddBits(clcl[order[i]], 3, bp, out, outsize);
222     }
223 
224     for (i = 0; i < rle_size; i++) {
225       unsigned symbol = clsymbols[rle[i]];
226       AddHuffmanBits(symbol, clcl[rle[i]], bp, out, outsize);
227       /* Extra bits. */
228       if (rle[i] == 16) AddBits(rle_bits[i], 2, bp, out, outsize);
229       else if (rle[i] == 17) AddBits(rle_bits[i], 3, bp, out, outsize);
230       else if (rle[i] == 18) AddBits(rle_bits[i], 7, bp, out, outsize);
231     }
232   }
233 
234   result_size += 14;  /* hlit, hdist, hclen bits */
235   result_size += (hclen + 4) * 3;  /* clcl bits */
236   for(i = 0; i < 19; i++) {
237     result_size += clcl[i] * clcounts[i];
238   }
239   /* Extra bits. */
240   result_size += clcounts[16] * 2;
241   result_size += clcounts[17] * 3;
242   result_size += clcounts[18] * 7;
243 
244   /* Note: in case of "size_only" these are null pointers so no effect. */
245   free(rle);
246   free(rle_bits);
247 
248   return result_size;
249 }
250 
AddDynamicTree(const unsigned * ll_lengths,const unsigned * d_lengths,unsigned char * bp,unsigned char ** out,size_t * outsize)251 static void AddDynamicTree(const unsigned* ll_lengths,
252                            const unsigned* d_lengths,
253                            unsigned char* bp,
254                            unsigned char** out, size_t* outsize) {
255   int i;
256   int best = 0;
257   size_t bestsize = 0;
258 
259   for(i = 0; i < 8; i++) {
260     size_t size = EncodeTree(ll_lengths, d_lengths,
261                              i & 1, i & 2, i & 4,
262                              0, 0, 0);
263     if (bestsize == 0 || size < bestsize) {
264       bestsize = size;
265       best = i;
266     }
267   }
268 
269   EncodeTree(ll_lengths, d_lengths,
270              best & 1, best & 2, best & 4,
271              bp, out, outsize);
272 }
273 
274 /*
275 Gives the exact size of the tree, in bits, as it will be encoded in DEFLATE.
276 */
CalculateTreeSize(const unsigned * ll_lengths,const unsigned * d_lengths)277 static size_t CalculateTreeSize(const unsigned* ll_lengths,
278                                 const unsigned* d_lengths) {
279   size_t result = 0;
280   int i;
281 
282   for(i = 0; i < 8; i++) {
283     size_t size = EncodeTree(ll_lengths, d_lengths,
284                              i & 1, i & 2, i & 4,
285                              0, 0, 0);
286     if (result == 0 || size < result) result = size;
287   }
288 
289   return result;
290 }
291 
292 /*
293 Adds all lit/len and dist codes from the lists as huffman symbols. Does not add
294 end code 256. expected_data_size is the uncompressed block size, used for
295 assert, but you can set it to 0 to not do the assertion.
296 */
AddLZ77Data(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,size_t expected_data_size,const unsigned * ll_symbols,const unsigned * ll_lengths,const unsigned * d_symbols,const unsigned * d_lengths,unsigned char * bp,unsigned char ** out,size_t * outsize)297 static void AddLZ77Data(const ZopfliLZ77Store* lz77,
298                         size_t lstart, size_t lend,
299                         size_t expected_data_size,
300                         const unsigned* ll_symbols, const unsigned* ll_lengths,
301                         const unsigned* d_symbols, const unsigned* d_lengths,
302                         unsigned char* bp,
303                         unsigned char** out, size_t* outsize) {
304   size_t testlength = 0;
305   size_t i;
306 
307   for (i = lstart; i < lend; i++) {
308     unsigned dist = lz77->dists[i];
309     unsigned litlen = lz77->litlens[i];
310     if (dist == 0) {
311       assert(litlen < 256);
312       assert(ll_lengths[litlen] > 0);
313       AddHuffmanBits(ll_symbols[litlen], ll_lengths[litlen], bp, out, outsize);
314       testlength++;
315     } else {
316       unsigned lls = ZopfliGetLengthSymbol(litlen);
317       unsigned ds = ZopfliGetDistSymbol(dist);
318       assert(litlen >= 3 && litlen <= 288);
319       assert(ll_lengths[lls] > 0);
320       assert(d_lengths[ds] > 0);
321       AddHuffmanBits(ll_symbols[lls], ll_lengths[lls], bp, out, outsize);
322       AddBits(ZopfliGetLengthExtraBitsValue(litlen),
323               ZopfliGetLengthExtraBits(litlen),
324               bp, out, outsize);
325       AddHuffmanBits(d_symbols[ds], d_lengths[ds], bp, out, outsize);
326       AddBits(ZopfliGetDistExtraBitsValue(dist),
327               ZopfliGetDistExtraBits(dist),
328               bp, out, outsize);
329       testlength += litlen;
330     }
331   }
332   assert(expected_data_size == 0 || testlength == expected_data_size);
333 }
334 
GetFixedTree(unsigned * ll_lengths,unsigned * d_lengths)335 static void GetFixedTree(unsigned* ll_lengths, unsigned* d_lengths) {
336   size_t i;
337   for (i = 0; i < 144; i++) ll_lengths[i] = 8;
338   for (i = 144; i < 256; i++) ll_lengths[i] = 9;
339   for (i = 256; i < 280; i++) ll_lengths[i] = 7;
340   for (i = 280; i < 288; i++) ll_lengths[i] = 8;
341   for (i = 0; i < 32; i++) d_lengths[i] = 5;
342 }
343 
344 /*
345 Calculates size of the part after the header and tree of an LZ77 block, in bits.
346 */
CalculateBlockSymbolSize(const unsigned * ll_lengths,const unsigned * d_lengths,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend)347 static size_t CalculateBlockSymbolSize(const unsigned* ll_lengths,
348                                        const unsigned* d_lengths,
349                                        const ZopfliLZ77Store* lz77,
350                                        size_t lstart, size_t lend) {
351   size_t result = 0;
352   size_t i;
353   if (lstart + ZOPFLI_NUM_LL * 3 > lend) {
354     for (i = lstart; i < lend; i++) {
355       assert(i < lz77->size);
356       assert(lz77->litlens[i] < 259);
357       if (lz77->dists[i] == 0) {
358         result += ll_lengths[lz77->litlens[i]];
359       } else {
360         int ll_symbol = ZopfliGetLengthSymbol(lz77->litlens[i]);
361         int d_symbol = ZopfliGetDistSymbol(lz77->dists[i]);
362         result += ll_lengths[ll_symbol];
363         result += d_lengths[d_symbol];
364         result += ZopfliGetLengthSymbolExtraBits(ll_symbol);
365         result += ZopfliGetDistSymbolExtraBits(d_symbol);
366       }
367     }
368   } else {
369     size_t ll_counts[ZOPFLI_NUM_LL];
370     size_t d_counts[ZOPFLI_NUM_D];
371     ZopfliLZ77GetHistogram(lz77, lstart, lend, ll_counts, d_counts);
372     for (i = 0; i < 256; i++) {
373       result += ll_lengths[i] * ll_counts[i];
374     }
375     for (i = 257; i < 286; i++) {
376       result += ll_lengths[i] * ll_counts[i];
377       result += ZopfliGetLengthSymbolExtraBits(i) * ll_counts[i];
378     }
379     for (i = 0; i < 30; i++) {
380       result += d_lengths[i] * d_counts[i];
381       result += ZopfliGetDistSymbolExtraBits(i) * d_counts[i];
382     }
383   }
384   result += ll_lengths[256]; /*end symbol*/
385   return result;
386 }
387 
AbsDiff(size_t x,size_t y)388 static size_t AbsDiff(size_t x, size_t y) {
389   if (x > y)
390     return x - y;
391   else
392     return y - x;
393 }
394 
395 /*
396 Change the population counts in a way that the consequent Huffman tree
397 compression, especially its rle-part will be more likely to compress this data
398 more efficiently. length containts the size of the histogram.
399 */
OptimizeHuffmanForRle(int length,size_t * counts)400 void OptimizeHuffmanForRle(int length, size_t* counts) {
401   int i, k, stride;
402   size_t symbol, sum, limit;
403   int* good_for_rle;
404 
405   /* 1) We don't want to touch the trailing zeros. We may break the
406   rules of the format by adding more data in the distance codes. */
407   for (; length >= 0; --length) {
408     if (length == 0) {
409       return;
410     }
411     if (counts[length - 1] != 0) {
412       /* Now counts[0..length - 1] does not have trailing zeros. */
413       break;
414     }
415   }
416   /* 2) Let's mark all population counts that already can be encoded
417   with an rle code.*/
418   good_for_rle = (int*)malloc(length * sizeof(int));
419   for (i = 0; i < length; ++i) good_for_rle[i] = 0;
420 
421   /* Let's not spoil any of the existing good rle codes.
422   Mark any seq of 0's that is longer than 5 as a good_for_rle.
423   Mark any seq of non-0's that is longer than 7 as a good_for_rle.*/
424   symbol = counts[0];
425   stride = 0;
426   for (i = 0; i < length + 1; ++i) {
427     if (i == length || counts[i] != symbol) {
428       if ((symbol == 0 && stride >= 5) || (symbol != 0 && stride >= 7)) {
429         for (k = 0; k < stride; ++k) {
430           good_for_rle[i - k - 1] = 1;
431         }
432       }
433       stride = 1;
434       if (i != length) {
435         symbol = counts[i];
436       }
437     } else {
438       ++stride;
439     }
440   }
441 
442   /* 3) Let's replace those population counts that lead to more rle codes. */
443   stride = 0;
444   limit = counts[0];
445   sum = 0;
446   for (i = 0; i < length + 1; ++i) {
447     if (i == length || good_for_rle[i]
448         /* Heuristic for selecting the stride ranges to collapse. */
449         || AbsDiff(counts[i], limit) >= 4) {
450       if (stride >= 4 || (stride >= 3 && sum == 0)) {
451         /* The stride must end, collapse what we have, if we have enough (4). */
452         int count = (sum + stride / 2) / stride;
453         if (count < 1) count = 1;
454         if (sum == 0) {
455           /* Don't make an all zeros stride to be upgraded to ones. */
456           count = 0;
457         }
458         for (k = 0; k < stride; ++k) {
459           /* We don't want to change value at counts[i],
460           that is already belonging to the next stride. Thus - 1. */
461           counts[i - k - 1] = count;
462         }
463       }
464       stride = 0;
465       sum = 0;
466       if (i < length - 3) {
467         /* All interesting strides have a count of at least 4,
468         at least when non-zeros. */
469         limit = (counts[i] + counts[i + 1] +
470                  counts[i + 2] + counts[i + 3] + 2) / 4;
471       } else if (i < length) {
472         limit = counts[i];
473       } else {
474         limit = 0;
475       }
476     }
477     ++stride;
478     if (i != length) {
479       sum += counts[i];
480     }
481   }
482 
483   free(good_for_rle);
484 }
485 
486 /*
487 Calculates the bit lengths for the symbols for dynamic blocks. Chooses bit
488 lengths that give the smallest size of tree encoding + encoding of all the
489 symbols to have smallest output size. This are not necessarily the ideal Huffman
490 bit lengths.
491 */
GetDynamicLengths(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,unsigned * ll_lengths,unsigned * d_lengths)492 static void GetDynamicLengths(const ZopfliLZ77Store* lz77,
493                               size_t lstart, size_t lend,
494                               unsigned* ll_lengths, unsigned* d_lengths) {
495   size_t ll_counts[ZOPFLI_NUM_LL];
496   size_t d_counts[ZOPFLI_NUM_D];
497 
498   ZopfliLZ77GetHistogram(lz77, lstart, lend, ll_counts, d_counts);
499   ll_counts[256] = 1;  /* End symbol. */
500   OptimizeHuffmanForRle(ZOPFLI_NUM_LL, ll_counts);
501   OptimizeHuffmanForRle(ZOPFLI_NUM_D, d_counts);
502   ZopfliCalculateBitLengths(ll_counts, ZOPFLI_NUM_LL, 15, ll_lengths);
503   ZopfliCalculateBitLengths(d_counts, ZOPFLI_NUM_D, 15, d_lengths);
504   PatchDistanceCodesForBuggyDecoders(d_lengths);
505 }
506 
ZopfliCalculateBlockSize(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,int btype)507 double ZopfliCalculateBlockSize(const ZopfliLZ77Store* lz77,
508                                 size_t lstart, size_t lend, int btype) {
509   unsigned ll_lengths[ZOPFLI_NUM_LL];
510   unsigned d_lengths[ZOPFLI_NUM_D];
511 
512   double result = 3; /* bfinal and btype bits */
513 
514   if (btype == 0) {
515     size_t length = ZopfliLZ77GetByteRange(lz77, lstart, lend);
516     size_t rem = length % 65535;
517     size_t blocks = length / 65535 + (rem ? 1 : 0);
518     /* An uncompressed block must actually be split into multiple blocks if it's
519        larger than 65535 bytes long. Eeach block header is 5 bytes: 3 bits,
520        padding, LEN and NLEN (potential less padding for first one ignored). */
521     return blocks * 5 * 8 + length * 8;
522   } if (btype == 1) {
523     GetFixedTree(ll_lengths, d_lengths);
524   } else {
525     GetDynamicLengths(lz77, lstart, lend, ll_lengths, d_lengths);
526     result += CalculateTreeSize(ll_lengths, d_lengths);
527   }
528 
529   result += CalculateBlockSymbolSize(
530       ll_lengths, d_lengths, lz77, lstart, lend);
531 
532   return result;
533 }
534 
ZopfliCalculateBlockSizeAutoType(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend)535 double ZopfliCalculateBlockSizeAutoType(const ZopfliLZ77Store* lz77,
536                                         size_t lstart, size_t lend) {
537   double uncompressedcost = ZopfliCalculateBlockSize(lz77, lstart, lend, 0);
538   /* Don't do the expensive fixed cost calculation for larger blocks that are
539      unlikely to use it. */
540   double fixedcost = (lz77->size > 1000) ?
541       uncompressedcost : ZopfliCalculateBlockSize(lz77, lstart, lend, 1);
542   double dyncost = ZopfliCalculateBlockSize(lz77, lstart, lend, 2);
543   return (uncompressedcost < fixedcost && uncompressedcost < dyncost)
544       ? uncompressedcost
545       : (fixedcost < dyncost ? fixedcost : dyncost);
546 }
547 
548 /* Since an uncompressed block can be max 65535 in size, it actually adds
549 multible blocks if needed. */
AddNonCompressedBlock(const ZopfliOptions * options,int final,const unsigned char * in,size_t instart,size_t inend,unsigned char * bp,unsigned char ** out,size_t * outsize)550 static void AddNonCompressedBlock(const ZopfliOptions* options, int final,
551                                   const unsigned char* in, size_t instart,
552                                   size_t inend,
553                                   unsigned char* bp,
554                                   unsigned char** out, size_t* outsize) {
555   size_t pos = instart;
556   (void)options;
557   for (;;) {
558     size_t i;
559     unsigned short blocksize = 65535;
560     unsigned short nlen;
561     int currentfinal;
562 
563     if (pos + blocksize > inend) blocksize = inend - pos;
564     currentfinal = pos + blocksize >= inend;
565 
566     nlen = ~blocksize;
567 
568     AddBit(final && currentfinal, bp, out, outsize);
569     /* BTYPE 00 */
570     AddBit(0, bp, out, outsize);
571     AddBit(0, bp, out, outsize);
572 
573     /* Any bits of input up to the next byte boundary are ignored. */
574     *bp = 0;
575 
576     ZOPFLI_APPEND_DATA(blocksize % 256, out, outsize);
577     ZOPFLI_APPEND_DATA((blocksize / 256) % 256, out, outsize);
578     ZOPFLI_APPEND_DATA(nlen % 256, out, outsize);
579     ZOPFLI_APPEND_DATA((nlen / 256) % 256, out, outsize);
580 
581     for (i = 0; i < blocksize; i++) {
582       ZOPFLI_APPEND_DATA(in[pos + i], out, outsize);
583     }
584 
585     if (currentfinal) break;
586     pos += blocksize;
587   }
588 }
589 
590 /*
591 Adds a deflate block with the given LZ77 data to the output.
592 options: global program options
593 btype: the block type, must be 1 or 2
594 final: whether to set the "final" bit on this block, must be the last block
595 litlens: literal/length array of the LZ77 data, in the same format as in
596     ZopfliLZ77Store.
597 dists: distance array of the LZ77 data, in the same format as in
598     ZopfliLZ77Store.
599 lstart: where to start in the LZ77 data
600 lend: where to end in the LZ77 data (not inclusive)
601 expected_data_size: the uncompressed block size, used for assert, but you can
602   set it to 0 to not do the assertion.
603 bp: output bit pointer
604 out: dynamic output array to append to
605 outsize: dynamic output array size
606 */
AddLZ77Block(const ZopfliOptions * options,int btype,int final,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,size_t expected_data_size,unsigned char * bp,unsigned char ** out,size_t * outsize)607 static void AddLZ77Block(const ZopfliOptions* options, int btype, int final,
608                          const ZopfliLZ77Store* lz77,
609                          size_t lstart, size_t lend,
610                          size_t expected_data_size,
611                          unsigned char* bp,
612                          unsigned char** out, size_t* outsize) {
613   unsigned ll_lengths[ZOPFLI_NUM_LL];
614   unsigned d_lengths[ZOPFLI_NUM_D];
615   unsigned ll_symbols[ZOPFLI_NUM_LL];
616   unsigned d_symbols[ZOPFLI_NUM_D];
617   size_t detect_block_size = *outsize;
618   size_t compressed_size;
619   size_t uncompressed_size = 0;
620   size_t i;
621   if (btype == 0) {
622     size_t length = ZopfliLZ77GetByteRange(lz77, lstart, lend);
623     size_t pos = lstart == lend ? 0 : lz77->pos[lstart];
624     size_t end = pos + length;
625     AddNonCompressedBlock(options, final,
626                           lz77->data, pos, end, bp, out, outsize);
627     return;
628   }
629 
630   AddBit(final, bp, out, outsize);
631   AddBit(btype & 1, bp, out, outsize);
632   AddBit((btype & 2) >> 1, bp, out, outsize);
633 
634   if (btype == 1) {
635     /* Fixed block. */
636     GetFixedTree(ll_lengths, d_lengths);
637   } else {
638     /* Dynamic block. */
639     unsigned detect_tree_size;
640     assert(btype == 2);
641 
642     GetDynamicLengths(lz77, lstart, lend, ll_lengths, d_lengths);
643 
644     detect_tree_size = *outsize;
645     AddDynamicTree(ll_lengths, d_lengths, bp, out, outsize);
646     if (options->verbose) {
647       fprintf(stderr, "treesize: %d\n", (int)(*outsize - detect_tree_size));
648     }
649   }
650 
651   ZopfliLengthsToSymbols(ll_lengths, ZOPFLI_NUM_LL, 15, ll_symbols);
652   ZopfliLengthsToSymbols(d_lengths, ZOPFLI_NUM_D, 15, d_symbols);
653 
654   detect_block_size = *outsize;
655   AddLZ77Data(lz77, lstart, lend, expected_data_size,
656               ll_symbols, ll_lengths, d_symbols, d_lengths,
657               bp, out, outsize);
658   /* End symbol. */
659   AddHuffmanBits(ll_symbols[256], ll_lengths[256], bp, out, outsize);
660 
661   for (i = lstart; i < lend; i++) {
662     uncompressed_size += lz77->dists[i] == 0 ? 1 : lz77->litlens[i];
663   }
664   compressed_size = *outsize - detect_block_size;
665   if (options->verbose) {
666     fprintf(stderr, "compressed block size: %d (%dk) (unc: %d)\n",
667            (int)compressed_size, (int)(compressed_size / 1024),
668            (int)(uncompressed_size));
669   }
670 }
671 
AddLZ77BlockAutoType(const ZopfliOptions * options,int final,const ZopfliLZ77Store * lz77,size_t lstart,size_t lend,size_t expected_data_size,unsigned char * bp,unsigned char ** out,size_t * outsize)672 static void AddLZ77BlockAutoType(const ZopfliOptions* options, int final,
673                                  const ZopfliLZ77Store* lz77,
674                                  size_t lstart, size_t lend,
675                                  size_t expected_data_size,
676                                  unsigned char* bp,
677                                  unsigned char** out, size_t* outsize) {
678   double uncompressedcost = ZopfliCalculateBlockSize(lz77, lstart, lend, 0);
679   double fixedcost = ZopfliCalculateBlockSize(lz77, lstart, lend, 1);
680   double dyncost = ZopfliCalculateBlockSize(lz77, lstart, lend, 2);
681 
682   /* Whether to perform the expensive calculation of creating an optimal block
683   with fixed huffman tree to check if smaller. Only do this for small blocks or
684   blocks which already are pretty good with fixed huffman tree. */
685   int expensivefixed = (lz77->size < 1000) || fixedcost <= dyncost * 1.1;
686 
687   ZopfliLZ77Store fixedstore;
688   if (lstart == lend) {
689     /* Smallest empty block is represented by fixed block */
690     AddBits(final, 1, bp, out, outsize);
691     AddBits(1, 2, bp, out, outsize);  /* btype 01 */
692     AddBits(0, 7, bp, out, outsize);  /* end symbol has code 0000000 */
693     return;
694   }
695   ZopfliInitLZ77Store(lz77->data, &fixedstore);
696   if (expensivefixed) {
697     /* Recalculate the LZ77 with ZopfliLZ77OptimalFixed */
698     size_t instart = lz77->pos[lstart];
699     size_t inend = instart + ZopfliLZ77GetByteRange(lz77, lstart, lend);
700 
701     ZopfliBlockState s;
702     ZopfliInitBlockState(options, instart, inend, 1, &s);
703     ZopfliLZ77OptimalFixed(&s, lz77->data, instart, inend, &fixedstore);
704     fixedcost = ZopfliCalculateBlockSize(&fixedstore, 0, fixedstore.size, 1);
705     ZopfliCleanBlockState(&s);
706   }
707 
708   if (uncompressedcost < fixedcost && uncompressedcost < dyncost) {
709     AddLZ77Block(options, 0, final, lz77, lstart, lend,
710                  expected_data_size, bp, out, outsize);
711   } else if (fixedcost < dyncost) {
712     if (expensivefixed) {
713       AddLZ77Block(options, 1, final, &fixedstore, 0, fixedstore.size,
714                    expected_data_size, bp, out, outsize);
715     } else {
716       AddLZ77Block(options, 1, final, lz77, lstart, lend,
717                    expected_data_size, bp, out, outsize);
718     }
719   } else {
720     AddLZ77Block(options, 2, final, lz77, lstart, lend,
721                  expected_data_size, bp, out, outsize);
722   }
723 
724   ZopfliCleanLZ77Store(&fixedstore);
725 }
726 
727 /*
728 Deflate a part, to allow ZopfliDeflate() to use multiple master blocks if
729 needed.
730 It is possible to call this function multiple times in a row, shifting
731 instart and inend to next bytes of the data. If instart is larger than 0, then
732 previous bytes are used as the initial dictionary for LZ77.
733 This function will usually output multiple deflate blocks. If final is 1, then
734 the final bit will be set on the last block.
735 */
ZopfliDeflatePart(const ZopfliOptions * options,int btype,int final,const unsigned char * in,size_t instart,size_t inend,unsigned char * bp,unsigned char ** out,size_t * outsize)736 void ZopfliDeflatePart(const ZopfliOptions* options, int btype, int final,
737                        const unsigned char* in, size_t instart, size_t inend,
738                        unsigned char* bp, unsigned char** out,
739                        size_t* outsize) {
740   size_t i;
741   /* byte coordinates rather than lz77 index */
742   size_t* splitpoints_uncompressed = 0;
743   size_t npoints = 0;
744   size_t* splitpoints = 0;
745   double totalcost = 0;
746   ZopfliLZ77Store lz77;
747 
748   /* If btype=2 is specified, it tries all block types. If a lesser btype is
749   given, then however it forces that one. Neither of the lesser types needs
750   block splitting as they have no dynamic huffman trees. */
751   if (btype == 0) {
752     AddNonCompressedBlock(options, final, in, instart, inend, bp, out, outsize);
753     return;
754   } else if (btype == 1) {
755     ZopfliLZ77Store store;
756     ZopfliBlockState s;
757     ZopfliInitLZ77Store(in, &store);
758     ZopfliInitBlockState(options, instart, inend, 1, &s);
759 
760     ZopfliLZ77OptimalFixed(&s, in, instart, inend, &store);
761     AddLZ77Block(options, btype, final, &store, 0, store.size, 0,
762                  bp, out, outsize);
763 
764     ZopfliCleanBlockState(&s);
765     ZopfliCleanLZ77Store(&store);
766     return;
767   }
768 
769 
770   if (options->blocksplitting) {
771     ZopfliBlockSplit(options, in, instart, inend,
772                      options->blocksplittingmax,
773                      &splitpoints_uncompressed, &npoints);
774     splitpoints = (size_t*)malloc(sizeof(*splitpoints) * npoints);
775   }
776 
777   ZopfliInitLZ77Store(in, &lz77);
778 
779   for (i = 0; i <= npoints; i++) {
780     size_t start = i == 0 ? instart : splitpoints_uncompressed[i - 1];
781     size_t end = i == npoints ? inend : splitpoints_uncompressed[i];
782     ZopfliBlockState s;
783     ZopfliLZ77Store store;
784     ZopfliInitLZ77Store(in, &store);
785     ZopfliInitBlockState(options, start, end, 1, &s);
786     ZopfliLZ77Optimal(&s, in, start, end, options->numiterations, &store);
787     totalcost += ZopfliCalculateBlockSizeAutoType(&store, 0, store.size);
788 
789     ZopfliAppendLZ77Store(&store, &lz77);
790     if (i < npoints) splitpoints[i] = lz77.size;
791 
792     ZopfliCleanBlockState(&s);
793     ZopfliCleanLZ77Store(&store);
794   }
795 
796   /* Second block splitting attempt */
797   if (options->blocksplitting && npoints > 1) {
798     size_t* splitpoints2 = 0;
799     size_t npoints2 = 0;
800     double totalcost2 = 0;
801 
802     ZopfliBlockSplitLZ77(options, &lz77,
803                          options->blocksplittingmax, &splitpoints2, &npoints2);
804 
805     for (i = 0; i <= npoints2; i++) {
806       size_t start = i == 0 ? 0 : splitpoints2[i - 1];
807       size_t end = i == npoints2 ? lz77.size : splitpoints2[i];
808       totalcost2 += ZopfliCalculateBlockSizeAutoType(&lz77, start, end);
809     }
810 
811     if (totalcost2 < totalcost) {
812       free(splitpoints);
813       splitpoints = splitpoints2;
814       npoints = npoints2;
815     } else {
816       free(splitpoints2);
817     }
818   }
819 
820   for (i = 0; i <= npoints; i++) {
821     size_t start = i == 0 ? 0 : splitpoints[i - 1];
822     size_t end = i == npoints ? lz77.size : splitpoints[i];
823     AddLZ77BlockAutoType(options, i == npoints && final,
824                          &lz77, start, end, 0,
825                          bp, out, outsize);
826   }
827 
828   ZopfliCleanLZ77Store(&lz77);
829   free(splitpoints);
830   free(splitpoints_uncompressed);
831 }
832 
ZopfliDeflate(const ZopfliOptions * options,int btype,int final,const unsigned char * in,size_t insize,unsigned char * bp,unsigned char ** out,size_t * outsize)833 void ZopfliDeflate(const ZopfliOptions* options, int btype, int final,
834                    const unsigned char* in, size_t insize,
835                    unsigned char* bp, unsigned char** out, size_t* outsize) {
836  size_t offset = *outsize;
837 #if ZOPFLI_MASTER_BLOCK_SIZE == 0
838   ZopfliDeflatePart(options, btype, final, in, 0, insize, bp, out, outsize);
839 #else
840   size_t i = 0;
841   do {
842     int masterfinal = (i + ZOPFLI_MASTER_BLOCK_SIZE >= insize);
843     int final2 = final && masterfinal;
844     size_t size = masterfinal ? insize - i : ZOPFLI_MASTER_BLOCK_SIZE;
845     ZopfliDeflatePart(options, btype, final2,
846                       in, i, i + size, bp, out, outsize);
847     i += size;
848   } while (i < insize);
849 #endif
850   if (options->verbose) {
851     fprintf(stderr,
852             "Original Size: %lu, Deflate: %lu, Compression: %f%% Removed\n",
853             (unsigned long)insize, (unsigned long)(*outsize - offset),
854             100.0 * (double)(insize - (*outsize - offset)) / (double)insize);
855   }
856 }
857