1 /*
2 Copyright 2011 Google Inc. All Rights Reserved.
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8     http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 */
19 
20 #include "blocksplitter.h"
21 
22 #include <assert.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 
26 #include "deflate.h"
27 #include "squeeze.h"
28 #include "tree.h"
29 #include "util.h"
30 
31 /*
32 The "f" for the FindMinimum function below.
33 i: the current parameter of f(i)
34 context: for your implementation
35 */
36 typedef double FindMinimumFun(size_t i, void* context);
37 
38 /*
39 Finds minimum of function f(i) where is is of type size_t, f(i) is of type
40 double, i is in range start-end (excluding end).
41 Outputs the minimum value in *smallest and returns the index of this value.
42 */
FindMinimum(FindMinimumFun f,void * context,size_t start,size_t end,double * smallest)43 static size_t FindMinimum(FindMinimumFun f, void* context,
44                           size_t start, size_t end, double* smallest) {
45   if (end - start < 1024) {
46     double best = ZOPFLI_LARGE_FLOAT;
47     size_t result = start;
48     size_t i;
49     for (i = start; i < end; i++) {
50       double v = f(i, context);
51       if (v < best) {
52         best = v;
53         result = i;
54       }
55     }
56     *smallest = best;
57     return result;
58   } else {
59     /* Try to find minimum faster by recursively checking multiple points. */
60 #define NUM 9  /* Good value: 9. */
61     size_t i;
62     size_t p[NUM];
63     double vp[NUM];
64     size_t besti;
65     double best;
66     double lastbest = ZOPFLI_LARGE_FLOAT;
67     size_t pos = start;
68 
69     for (;;) {
70       if (end - start <= NUM) break;
71 
72       for (i = 0; i < NUM; i++) {
73         p[i] = start + (i + 1) * ((end - start) / (NUM + 1));
74         vp[i] = f(p[i], context);
75       }
76       besti = 0;
77       best = vp[0];
78       for (i = 1; i < NUM; i++) {
79         if (vp[i] < best) {
80           best = vp[i];
81           besti = i;
82         }
83       }
84       if (best > lastbest) break;
85 
86       start = besti == 0 ? start : p[besti - 1];
87       end = besti == NUM - 1 ? end : p[besti + 1];
88 
89       pos = p[besti];
90       lastbest = best;
91     }
92     *smallest = lastbest;
93     return pos;
94 #undef NUM
95   }
96 }
97 
98 /*
99 Returns estimated cost of a block in bits.  It includes the size to encode the
100 tree and the size to encode all literal, length and distance symbols and their
101 extra bits.
102 
103 litlens: lz77 lit/lengths
104 dists: ll77 distances
105 lstart: start of block
106 lend: end of block (not inclusive)
107 */
EstimateCost(const ZopfliLZ77Store * lz77,size_t lstart,size_t lend)108 static double EstimateCost(const ZopfliLZ77Store* lz77,
109                            size_t lstart, size_t lend) {
110   return ZopfliCalculateBlockSizeAutoType(lz77, lstart, lend);
111 }
112 
113 typedef struct SplitCostContext {
114   const ZopfliLZ77Store* lz77;
115   size_t start;
116   size_t end;
117 } SplitCostContext;
118 
119 
120 /*
121 Gets the cost which is the sum of the cost of the left and the right section
122 of the data.
123 type: FindMinimumFun
124 */
SplitCost(size_t i,void * context)125 static double SplitCost(size_t i, void* context) {
126   SplitCostContext* c = (SplitCostContext*)context;
127   return EstimateCost(c->lz77, c->start, i) + EstimateCost(c->lz77, i, c->end);
128 }
129 
AddSorted(size_t value,size_t ** out,size_t * outsize)130 static void AddSorted(size_t value, size_t** out, size_t* outsize) {
131   size_t i;
132   ZOPFLI_APPEND_DATA(value, out, outsize);
133   for (i = 0; i + 1 < *outsize; i++) {
134     if ((*out)[i] > value) {
135       size_t j;
136       for (j = *outsize - 1; j > i; j--) {
137         (*out)[j] = (*out)[j - 1];
138       }
139       (*out)[i] = value;
140       break;
141     }
142   }
143 }
144 
145 /*
146 Prints the block split points as decimal and hex values in the terminal.
147 */
PrintBlockSplitPoints(const ZopfliLZ77Store * lz77,const size_t * lz77splitpoints,size_t nlz77points)148 static void PrintBlockSplitPoints(const ZopfliLZ77Store* lz77,
149                                   const size_t* lz77splitpoints,
150                                   size_t nlz77points) {
151   size_t* splitpoints = 0;
152   size_t npoints = 0;
153   size_t i;
154   /* The input is given as lz77 indices, but we want to see the uncompressed
155   index values. */
156   size_t pos = 0;
157   if (nlz77points > 0) {
158     for (i = 0; i < lz77->size; i++) {
159       size_t length = lz77->dists[i] == 0 ? 1 : lz77->litlens[i];
160       if (lz77splitpoints[npoints] == i) {
161         ZOPFLI_APPEND_DATA(pos, &splitpoints, &npoints);
162         if (npoints == nlz77points) break;
163       }
164       pos += length;
165     }
166   }
167   assert(npoints == nlz77points);
168 
169   fprintf(stderr, "block split points: ");
170   for (i = 0; i < npoints; i++) {
171     fprintf(stderr, "%d ", (int)splitpoints[i]);
172   }
173   fprintf(stderr, "(hex:");
174   for (i = 0; i < npoints; i++) {
175     fprintf(stderr, " %x", (int)splitpoints[i]);
176   }
177   fprintf(stderr, ")\n");
178 
179   free(splitpoints);
180 }
181 
182 /*
183 Finds next block to try to split, the largest of the available ones.
184 The largest is chosen to make sure that if only a limited amount of blocks is
185 requested, their sizes are spread evenly.
186 lz77size: the size of the LL77 data, which is the size of the done array here.
187 done: array indicating which blocks starting at that position are no longer
188     splittable (splitting them increases rather than decreases cost).
189 splitpoints: the splitpoints found so far.
190 npoints: the amount of splitpoints found so far.
191 lstart: output variable, giving start of block.
192 lend: output variable, giving end of block.
193 returns 1 if a block was found, 0 if no block found (all are done).
194 */
FindLargestSplittableBlock(size_t lz77size,const unsigned char * done,const size_t * splitpoints,size_t npoints,size_t * lstart,size_t * lend)195 static int FindLargestSplittableBlock(
196     size_t lz77size, const unsigned char* done,
197     const size_t* splitpoints, size_t npoints,
198     size_t* lstart, size_t* lend) {
199   size_t longest = 0;
200   int found = 0;
201   size_t i;
202   for (i = 0; i <= npoints; i++) {
203     size_t start = i == 0 ? 0 : splitpoints[i - 1];
204     size_t end = i == npoints ? lz77size - 1 : splitpoints[i];
205     if (!done[start] && end - start > longest) {
206       *lstart = start;
207       *lend = end;
208       found = 1;
209       longest = end - start;
210     }
211   }
212   return found;
213 }
214 
ZopfliBlockSplitLZ77(const ZopfliOptions * options,const ZopfliLZ77Store * lz77,size_t maxblocks,size_t ** splitpoints,size_t * npoints)215 void ZopfliBlockSplitLZ77(const ZopfliOptions* options,
216                           const ZopfliLZ77Store* lz77, size_t maxblocks,
217                           size_t** splitpoints, size_t* npoints) {
218   size_t lstart, lend;
219   size_t i;
220   size_t llpos = 0;
221   size_t numblocks = 1;
222   unsigned char* done;
223   double splitcost, origcost;
224 
225   if (lz77->size < 10) return;  /* This code fails on tiny files. */
226 
227   done = (unsigned char*)malloc(lz77->size);
228   if (!done) exit(-1); /* Allocation failed. */
229   for (i = 0; i < lz77->size; i++) done[i] = 0;
230 
231   lstart = 0;
232   lend = lz77->size;
233   for (;;) {
234     SplitCostContext c;
235 
236     if (maxblocks > 0 && numblocks >= maxblocks) {
237       break;
238     }
239 
240     c.lz77 = lz77;
241     c.start = lstart;
242     c.end = lend;
243     assert(lstart < lend);
244     llpos = FindMinimum(SplitCost, &c, lstart + 1, lend, &splitcost);
245 
246     assert(llpos > lstart);
247     assert(llpos < lend);
248 
249     origcost = EstimateCost(lz77, lstart, lend);
250 
251     if (splitcost > origcost || llpos == lstart + 1 || llpos == lend) {
252       done[lstart] = 1;
253     } else {
254       AddSorted(llpos, splitpoints, npoints);
255       numblocks++;
256     }
257 
258     if (!FindLargestSplittableBlock(
259         lz77->size, done, *splitpoints, *npoints, &lstart, &lend)) {
260       break;  /* No further split will probably reduce compression. */
261     }
262 
263     if (lend - lstart < 10) {
264       break;
265     }
266   }
267 
268   if (options->verbose) {
269     PrintBlockSplitPoints(lz77, *splitpoints, *npoints);
270   }
271 
272   free(done);
273 }
274 
ZopfliBlockSplit(const ZopfliOptions * options,const unsigned char * in,size_t instart,size_t inend,size_t maxblocks,size_t ** splitpoints,size_t * npoints)275 void ZopfliBlockSplit(const ZopfliOptions* options,
276                       const unsigned char* in, size_t instart, size_t inend,
277                       size_t maxblocks, size_t** splitpoints, size_t* npoints) {
278   size_t pos = 0;
279   size_t i;
280   ZopfliBlockState s;
281   size_t* lz77splitpoints = 0;
282   size_t nlz77points = 0;
283   ZopfliLZ77Store store;
284   ZopfliHash hash;
285   ZopfliHash* h = &hash;
286 
287   ZopfliInitLZ77Store(in, &store);
288   ZopfliInitBlockState(options, instart, inend, 0, &s);
289   ZopfliAllocHash(ZOPFLI_WINDOW_SIZE, h);
290 
291   *npoints = 0;
292   *splitpoints = 0;
293 
294   /* Unintuitively, Using a simple LZ77 method here instead of ZopfliLZ77Optimal
295   results in better blocks. */
296   ZopfliLZ77Greedy(&s, in, instart, inend, &store, h);
297 
298   ZopfliBlockSplitLZ77(options,
299                        &store, maxblocks,
300                        &lz77splitpoints, &nlz77points);
301 
302   /* Convert LZ77 positions to positions in the uncompressed input. */
303   pos = instart;
304   if (nlz77points > 0) {
305     for (i = 0; i < store.size; i++) {
306       size_t length = store.dists[i] == 0 ? 1 : store.litlens[i];
307       if (lz77splitpoints[*npoints] == i) {
308         ZOPFLI_APPEND_DATA(pos, splitpoints, npoints);
309         if (*npoints == nlz77points) break;
310       }
311       pos += length;
312     }
313   }
314   assert(*npoints == nlz77points);
315 
316   free(lz77splitpoints);
317   ZopfliCleanBlockState(&s);
318   ZopfliCleanLZ77Store(&store);
319   ZopfliCleanHash(h);
320 }
321 
ZopfliBlockSplitSimple(const unsigned char * in,size_t instart,size_t inend,size_t blocksize,size_t ** splitpoints,size_t * npoints)322 void ZopfliBlockSplitSimple(const unsigned char* in,
323                             size_t instart, size_t inend,
324                             size_t blocksize,
325                             size_t** splitpoints, size_t* npoints) {
326   size_t i = instart;
327   while (i < inend) {
328     ZOPFLI_APPEND_DATA(i, splitpoints, npoints);
329     i += blocksize;
330   }
331   (void)in;
332 }
333