1 // Copyright 2012 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Author: Jyrki Alakuijala (jyrki@google.com)
11 //
12 
13 #include <assert.h>
14 #include <float.h>
15 #include <math.h>
16 
17 #include "src/dsp/dsp.h"
18 #include "src/dsp/lossless.h"
19 #include "src/dsp/lossless_common.h"
20 #include "src/enc/backward_references_enc.h"
21 #include "src/enc/histogram_enc.h"
22 #include "src/utils/color_cache_utils.h"
23 #include "src/utils/utils.h"
24 
25 #define MIN_BLOCK_SIZE 256  // minimum block size for backward references
26 
27 #define MAX_ENTROPY    (1e30f)
28 
29 // 1M window (4M bytes) minus 120 special codes for short distances.
30 #define WINDOW_SIZE ((1 << WINDOW_SIZE_BITS) - 120)
31 
32 // Minimum number of pixels for which it is cheaper to encode a
33 // distance + length instead of each pixel as a literal.
34 #define MIN_LENGTH 4
35 
36 // -----------------------------------------------------------------------------
37 
38 static const uint8_t plane_to_code_lut[128] = {
39  96,   73,  55,  39,  23,  13,   5,  1,  255, 255, 255, 255, 255, 255, 255, 255,
40  101,  78,  58,  42,  26,  16,   8,  2,    0,   3,  9,   17,  27,  43,  59,  79,
41  102,  86,  62,  46,  32,  20,  10,  6,    4,   7,  11,  21,  33,  47,  63,  87,
42  105,  90,  70,  52,  37,  28,  18,  14,  12,  15,  19,  29,  38,  53,  71,  91,
43  110,  99,  82,  66,  48,  35,  30,  24,  22,  25,  31,  36,  49,  67,  83, 100,
44  115, 108,  94,  76,  64,  50,  44,  40,  34,  41,  45,  51,  65,  77,  95, 109,
45  118, 113, 103,  92,  80,  68,  60,  56,  54,  57,  61,  69,  81,  93, 104, 114,
46  119, 116, 111, 106,  97,  88,  84,  74,  72,  75,  85,  89,  98, 107, 112, 117
47 };
48 
49 extern int VP8LDistanceToPlaneCode(int xsize, int dist);
VP8LDistanceToPlaneCode(int xsize,int dist)50 int VP8LDistanceToPlaneCode(int xsize, int dist) {
51   const int yoffset = dist / xsize;
52   const int xoffset = dist - yoffset * xsize;
53   if (xoffset <= 8 && yoffset < 8) {
54     return plane_to_code_lut[yoffset * 16 + 8 - xoffset] + 1;
55   } else if (xoffset > xsize - 8 && yoffset < 7) {
56     return plane_to_code_lut[(yoffset + 1) * 16 + 8 + (xsize - xoffset)] + 1;
57   }
58   return dist + 120;
59 }
60 
61 // Returns the exact index where array1 and array2 are different. For an index
62 // inferior or equal to best_len_match, the return value just has to be strictly
63 // inferior to best_len_match. The current behavior is to return 0 if this index
64 // is best_len_match, and the index itself otherwise.
65 // If no two elements are the same, it returns max_limit.
FindMatchLength(const uint32_t * const array1,const uint32_t * const array2,int best_len_match,int max_limit)66 static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
67                                        const uint32_t* const array2,
68                                        int best_len_match, int max_limit) {
69   // Before 'expensive' linear match, check if the two arrays match at the
70   // current best length index.
71   if (array1[best_len_match] != array2[best_len_match]) return 0;
72 
73   return VP8LVectorMismatch(array1, array2, max_limit);
74 }
75 
76 // -----------------------------------------------------------------------------
77 //  VP8LBackwardRefs
78 
79 struct PixOrCopyBlock {
80   PixOrCopyBlock* next_;   // next block (or NULL)
81   PixOrCopy* start_;       // data start
82   int size_;               // currently used size
83 };
84 
85 extern void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs);
VP8LClearBackwardRefs(VP8LBackwardRefs * const refs)86 void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs) {
87   assert(refs != NULL);
88   if (refs->tail_ != NULL) {
89     *refs->tail_ = refs->free_blocks_;  // recycle all blocks at once
90   }
91   refs->free_blocks_ = refs->refs_;
92   refs->tail_ = &refs->refs_;
93   refs->last_block_ = NULL;
94   refs->refs_ = NULL;
95 }
96 
VP8LBackwardRefsClear(VP8LBackwardRefs * const refs)97 void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) {
98   assert(refs != NULL);
99   VP8LClearBackwardRefs(refs);
100   while (refs->free_blocks_ != NULL) {
101     PixOrCopyBlock* const next = refs->free_blocks_->next_;
102     WebPSafeFree(refs->free_blocks_);
103     refs->free_blocks_ = next;
104   }
105 }
106 
107 // Swaps the content of two VP8LBackwardRefs.
BackwardRefsSwap(VP8LBackwardRefs * const refs1,VP8LBackwardRefs * const refs2)108 static void BackwardRefsSwap(VP8LBackwardRefs* const refs1,
109                              VP8LBackwardRefs* const refs2) {
110   const int point_to_refs1 =
111       (refs1->tail_ != NULL && refs1->tail_ == &refs1->refs_);
112   const int point_to_refs2 =
113       (refs2->tail_ != NULL && refs2->tail_ == &refs2->refs_);
114   const VP8LBackwardRefs tmp = *refs1;
115   *refs1 = *refs2;
116   *refs2 = tmp;
117   if (point_to_refs2) refs1->tail_ = &refs1->refs_;
118   if (point_to_refs1) refs2->tail_ = &refs2->refs_;
119 }
120 
VP8LBackwardRefsInit(VP8LBackwardRefs * const refs,int block_size)121 void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) {
122   assert(refs != NULL);
123   memset(refs, 0, sizeof(*refs));
124   refs->tail_ = &refs->refs_;
125   refs->block_size_ =
126       (block_size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : block_size;
127 }
128 
VP8LRefsCursorInit(const VP8LBackwardRefs * const refs)129 VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs) {
130   VP8LRefsCursor c;
131   c.cur_block_ = refs->refs_;
132   if (refs->refs_ != NULL) {
133     c.cur_pos = c.cur_block_->start_;
134     c.last_pos_ = c.cur_pos + c.cur_block_->size_;
135   } else {
136     c.cur_pos = NULL;
137     c.last_pos_ = NULL;
138   }
139   return c;
140 }
141 
VP8LRefsCursorNextBlock(VP8LRefsCursor * const c)142 void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c) {
143   PixOrCopyBlock* const b = c->cur_block_->next_;
144   c->cur_pos = (b == NULL) ? NULL : b->start_;
145   c->last_pos_ = (b == NULL) ? NULL : b->start_ + b->size_;
146   c->cur_block_ = b;
147 }
148 
149 // Create a new block, either from the free list or allocated
BackwardRefsNewBlock(VP8LBackwardRefs * const refs)150 static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) {
151   PixOrCopyBlock* b = refs->free_blocks_;
152   if (b == NULL) {   // allocate new memory chunk
153     const size_t total_size =
154         sizeof(*b) + refs->block_size_ * sizeof(*b->start_);
155     b = (PixOrCopyBlock*)WebPSafeMalloc(1ULL, total_size);
156     if (b == NULL) {
157       refs->error_ |= 1;
158       return NULL;
159     }
160     b->start_ = (PixOrCopy*)((uint8_t*)b + sizeof(*b));  // not always aligned
161   } else {  // recycle from free-list
162     refs->free_blocks_ = b->next_;
163   }
164   *refs->tail_ = b;
165   refs->tail_ = &b->next_;
166   refs->last_block_ = b;
167   b->next_ = NULL;
168   b->size_ = 0;
169   return b;
170 }
171 
172 // Return 1 on success, 0 on error.
BackwardRefsClone(const VP8LBackwardRefs * const from,VP8LBackwardRefs * const to)173 static int BackwardRefsClone(const VP8LBackwardRefs* const from,
174                              VP8LBackwardRefs* const to) {
175   const PixOrCopyBlock* block_from = from->refs_;
176   VP8LClearBackwardRefs(to);
177   while (block_from != NULL) {
178     PixOrCopyBlock* const block_to = BackwardRefsNewBlock(to);
179     if (block_to == NULL) return 0;
180     memcpy(block_to->start_, block_from->start_,
181            block_from->size_ * sizeof(PixOrCopy));
182     block_to->size_ = block_from->size_;
183     block_from = block_from->next_;
184   }
185   return 1;
186 }
187 
188 extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
189                                       const PixOrCopy v);
VP8LBackwardRefsCursorAdd(VP8LBackwardRefs * const refs,const PixOrCopy v)190 void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
191                                const PixOrCopy v) {
192   PixOrCopyBlock* b = refs->last_block_;
193   if (b == NULL || b->size_ == refs->block_size_) {
194     b = BackwardRefsNewBlock(refs);
195     if (b == NULL) return;   // refs->error_ is set
196   }
197   b->start_[b->size_++] = v;
198 }
199 
200 // -----------------------------------------------------------------------------
201 // Hash chains
202 
VP8LHashChainInit(VP8LHashChain * const p,int size)203 int VP8LHashChainInit(VP8LHashChain* const p, int size) {
204   assert(p->size_ == 0);
205   assert(p->offset_length_ == NULL);
206   assert(size > 0);
207   p->offset_length_ =
208       (uint32_t*)WebPSafeMalloc(size, sizeof(*p->offset_length_));
209   if (p->offset_length_ == NULL) return 0;
210   p->size_ = size;
211 
212   return 1;
213 }
214 
VP8LHashChainClear(VP8LHashChain * const p)215 void VP8LHashChainClear(VP8LHashChain* const p) {
216   assert(p != NULL);
217   WebPSafeFree(p->offset_length_);
218 
219   p->size_ = 0;
220   p->offset_length_ = NULL;
221 }
222 
223 // -----------------------------------------------------------------------------
224 
225 static const uint32_t kHashMultiplierHi = 0xc6a4a793u;
226 static const uint32_t kHashMultiplierLo = 0x5bd1e996u;
227 
228 static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE
GetPixPairHash64(const uint32_t * const argb)229 uint32_t GetPixPairHash64(const uint32_t* const argb) {
230   uint32_t key;
231   key  = argb[1] * kHashMultiplierHi;
232   key += argb[0] * kHashMultiplierLo;
233   key = key >> (32 - HASH_BITS);
234   return key;
235 }
236 
237 // Returns the maximum number of hash chain lookups to do for a
238 // given compression quality. Return value in range [8, 86].
GetMaxItersForQuality(int quality)239 static int GetMaxItersForQuality(int quality) {
240   return 8 + (quality * quality) / 128;
241 }
242 
GetWindowSizeForHashChain(int quality,int xsize)243 static int GetWindowSizeForHashChain(int quality, int xsize) {
244   const int max_window_size = (quality > 75) ? WINDOW_SIZE
245                             : (quality > 50) ? (xsize << 8)
246                             : (quality > 25) ? (xsize << 6)
247                             : (xsize << 4);
248   assert(xsize > 0);
249   return (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE : max_window_size;
250 }
251 
MaxFindCopyLength(int len)252 static WEBP_INLINE int MaxFindCopyLength(int len) {
253   return (len < MAX_LENGTH) ? len : MAX_LENGTH;
254 }
255 
VP8LHashChainFill(VP8LHashChain * const p,int quality,const uint32_t * const argb,int xsize,int ysize,int low_effort)256 int VP8LHashChainFill(VP8LHashChain* const p, int quality,
257                       const uint32_t* const argb, int xsize, int ysize,
258                       int low_effort) {
259   const int size = xsize * ysize;
260   const int iter_max = GetMaxItersForQuality(quality);
261   const uint32_t window_size = GetWindowSizeForHashChain(quality, xsize);
262   int pos;
263   int argb_comp;
264   uint32_t base_position;
265   int32_t* hash_to_first_index;
266   // Temporarily use the p->offset_length_ as a hash chain.
267   int32_t* chain = (int32_t*)p->offset_length_;
268   assert(size > 0);
269   assert(p->size_ != 0);
270   assert(p->offset_length_ != NULL);
271 
272   if (size <= 2) {
273     p->offset_length_[0] = p->offset_length_[size - 1] = 0;
274     return 1;
275   }
276 
277   hash_to_first_index =
278       (int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index));
279   if (hash_to_first_index == NULL) return 0;
280 
281   // Set the int32_t array to -1.
282   memset(hash_to_first_index, 0xff, HASH_SIZE * sizeof(*hash_to_first_index));
283   // Fill the chain linking pixels with the same hash.
284   argb_comp = (argb[0] == argb[1]);
285   for (pos = 0; pos < size - 2;) {
286     uint32_t hash_code;
287     const int argb_comp_next = (argb[pos + 1] == argb[pos + 2]);
288     if (argb_comp && argb_comp_next) {
289       // Consecutive pixels with the same color will share the same hash.
290       // We therefore use a different hash: the color and its repetition
291       // length.
292       uint32_t tmp[2];
293       uint32_t len = 1;
294       tmp[0] = argb[pos];
295       // Figure out how far the pixels are the same.
296       // The last pixel has a different 64 bit hash, as its next pixel does
297       // not have the same color, so we just need to get to the last pixel equal
298       // to its follower.
299       while (pos + (int)len + 2 < size && argb[pos + len + 2] == argb[pos]) {
300         ++len;
301       }
302       if (len > MAX_LENGTH) {
303         // Skip the pixels that match for distance=1 and length>MAX_LENGTH
304         // because they are linked to their predecessor and we automatically
305         // check that in the main for loop below. Skipping means setting no
306         // predecessor in the chain, hence -1.
307         memset(chain + pos, 0xff, (len - MAX_LENGTH) * sizeof(*chain));
308         pos += len - MAX_LENGTH;
309         len = MAX_LENGTH;
310       }
311       // Process the rest of the hash chain.
312       while (len) {
313         tmp[1] = len--;
314         hash_code = GetPixPairHash64(tmp);
315         chain[pos] = hash_to_first_index[hash_code];
316         hash_to_first_index[hash_code] = pos++;
317       }
318       argb_comp = 0;
319     } else {
320       // Just move one pixel forward.
321       hash_code = GetPixPairHash64(argb + pos);
322       chain[pos] = hash_to_first_index[hash_code];
323       hash_to_first_index[hash_code] = pos++;
324       argb_comp = argb_comp_next;
325     }
326   }
327   // Process the penultimate pixel.
328   chain[pos] = hash_to_first_index[GetPixPairHash64(argb + pos)];
329 
330   WebPSafeFree(hash_to_first_index);
331 
332   // Find the best match interval at each pixel, defined by an offset to the
333   // pixel and a length. The right-most pixel cannot match anything to the right
334   // (hence a best length of 0) and the left-most pixel nothing to the left
335   // (hence an offset of 0).
336   assert(size > 2);
337   p->offset_length_[0] = p->offset_length_[size - 1] = 0;
338   for (base_position = size - 2; base_position > 0;) {
339     const int max_len = MaxFindCopyLength(size - 1 - base_position);
340     const uint32_t* const argb_start = argb + base_position;
341     int iter = iter_max;
342     int best_length = 0;
343     uint32_t best_distance = 0;
344     uint32_t best_argb;
345     const int min_pos =
346         (base_position > window_size) ? base_position - window_size : 0;
347     const int length_max = (max_len < 256) ? max_len : 256;
348     uint32_t max_base_position;
349 
350     pos = chain[base_position];
351     if (!low_effort) {
352       int curr_length;
353       // Heuristic: use the comparison with the above line as an initialization.
354       if (base_position >= (uint32_t)xsize) {
355         curr_length = FindMatchLength(argb_start - xsize, argb_start,
356                                       best_length, max_len);
357         if (curr_length > best_length) {
358           best_length = curr_length;
359           best_distance = xsize;
360         }
361         --iter;
362       }
363       // Heuristic: compare to the previous pixel.
364       curr_length =
365           FindMatchLength(argb_start - 1, argb_start, best_length, max_len);
366       if (curr_length > best_length) {
367         best_length = curr_length;
368         best_distance = 1;
369       }
370       --iter;
371       // Skip the for loop if we already have the maximum.
372       if (best_length == MAX_LENGTH) pos = min_pos - 1;
373     }
374     best_argb = argb_start[best_length];
375 
376     for (; pos >= min_pos && --iter; pos = chain[pos]) {
377       int curr_length;
378       assert(base_position > (uint32_t)pos);
379 
380       if (argb[pos + best_length] != best_argb) continue;
381 
382       curr_length = VP8LVectorMismatch(argb + pos, argb_start, max_len);
383       if (best_length < curr_length) {
384         best_length = curr_length;
385         best_distance = base_position - pos;
386         best_argb = argb_start[best_length];
387         // Stop if we have reached a good enough length.
388         if (best_length >= length_max) break;
389       }
390     }
391     // We have the best match but in case the two intervals continue matching
392     // to the left, we have the best matches for the left-extended pixels.
393     max_base_position = base_position;
394     while (1) {
395       assert(best_length <= MAX_LENGTH);
396       assert(best_distance <= WINDOW_SIZE);
397       p->offset_length_[base_position] =
398           (best_distance << MAX_LENGTH_BITS) | (uint32_t)best_length;
399       --base_position;
400       // Stop if we don't have a match or if we are out of bounds.
401       if (best_distance == 0 || base_position == 0) break;
402       // Stop if we cannot extend the matching intervals to the left.
403       if (base_position < best_distance ||
404           argb[base_position - best_distance] != argb[base_position]) {
405         break;
406       }
407       // Stop if we are matching at its limit because there could be a closer
408       // matching interval with the same maximum length. Then again, if the
409       // matching interval is as close as possible (best_distance == 1), we will
410       // never find anything better so let's continue.
411       if (best_length == MAX_LENGTH && best_distance != 1 &&
412           base_position + MAX_LENGTH < max_base_position) {
413         break;
414       }
415       if (best_length < MAX_LENGTH) {
416         ++best_length;
417         max_base_position = base_position;
418       }
419     }
420   }
421   return 1;
422 }
423 
AddSingleLiteral(uint32_t pixel,int use_color_cache,VP8LColorCache * const hashers,VP8LBackwardRefs * const refs)424 static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache,
425                                          VP8LColorCache* const hashers,
426                                          VP8LBackwardRefs* const refs) {
427   PixOrCopy v;
428   if (use_color_cache) {
429     const uint32_t key = VP8LColorCacheGetIndex(hashers, pixel);
430     if (VP8LColorCacheLookup(hashers, key) == pixel) {
431       v = PixOrCopyCreateCacheIdx(key);
432     } else {
433       v = PixOrCopyCreateLiteral(pixel);
434       VP8LColorCacheSet(hashers, key, pixel);
435     }
436   } else {
437     v = PixOrCopyCreateLiteral(pixel);
438   }
439   VP8LBackwardRefsCursorAdd(refs, v);
440 }
441 
BackwardReferencesRle(int xsize,int ysize,const uint32_t * const argb,int cache_bits,VP8LBackwardRefs * const refs)442 static int BackwardReferencesRle(int xsize, int ysize,
443                                  const uint32_t* const argb,
444                                  int cache_bits, VP8LBackwardRefs* const refs) {
445   const int pix_count = xsize * ysize;
446   int i, k;
447   const int use_color_cache = (cache_bits > 0);
448   VP8LColorCache hashers;
449 
450   if (use_color_cache && !VP8LColorCacheInit(&hashers, cache_bits)) {
451     return 0;
452   }
453   VP8LClearBackwardRefs(refs);
454   // Add first pixel as literal.
455   AddSingleLiteral(argb[0], use_color_cache, &hashers, refs);
456   i = 1;
457   while (i < pix_count) {
458     const int max_len = MaxFindCopyLength(pix_count - i);
459     const int rle_len = FindMatchLength(argb + i, argb + i - 1, 0, max_len);
460     const int prev_row_len = (i < xsize) ? 0 :
461         FindMatchLength(argb + i, argb + i - xsize, 0, max_len);
462     if (rle_len >= prev_row_len && rle_len >= MIN_LENGTH) {
463       VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, rle_len));
464       // We don't need to update the color cache here since it is always the
465       // same pixel being copied, and that does not change the color cache
466       // state.
467       i += rle_len;
468     } else if (prev_row_len >= MIN_LENGTH) {
469       VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(xsize, prev_row_len));
470       if (use_color_cache) {
471         for (k = 0; k < prev_row_len; ++k) {
472           VP8LColorCacheInsert(&hashers, argb[i + k]);
473         }
474       }
475       i += prev_row_len;
476     } else {
477       AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
478       i++;
479     }
480   }
481   if (use_color_cache) VP8LColorCacheClear(&hashers);
482   return !refs->error_;
483 }
484 
BackwardReferencesLz77(int xsize,int ysize,const uint32_t * const argb,int cache_bits,const VP8LHashChain * const hash_chain,VP8LBackwardRefs * const refs)485 static int BackwardReferencesLz77(int xsize, int ysize,
486                                   const uint32_t* const argb, int cache_bits,
487                                   const VP8LHashChain* const hash_chain,
488                                   VP8LBackwardRefs* const refs) {
489   int i;
490   int i_last_check = -1;
491   int ok = 0;
492   int cc_init = 0;
493   const int use_color_cache = (cache_bits > 0);
494   const int pix_count = xsize * ysize;
495   VP8LColorCache hashers;
496 
497   if (use_color_cache) {
498     cc_init = VP8LColorCacheInit(&hashers, cache_bits);
499     if (!cc_init) goto Error;
500   }
501   VP8LClearBackwardRefs(refs);
502   for (i = 0; i < pix_count;) {
503     // Alternative#1: Code the pixels starting at 'i' using backward reference.
504     int offset = 0;
505     int len = 0;
506     int j;
507     VP8LHashChainFindCopy(hash_chain, i, &offset, &len);
508     if (len >= MIN_LENGTH) {
509       const int len_ini = len;
510       int max_reach = 0;
511       const int j_max =
512           (i + len_ini >= pix_count) ? pix_count - 1 : i + len_ini;
513       // Only start from what we have not checked already.
514       i_last_check = (i > i_last_check) ? i : i_last_check;
515       // We know the best match for the current pixel but we try to find the
516       // best matches for the current pixel AND the next one combined.
517       // The naive method would use the intervals:
518       // [i,i+len) + [i+len, length of best match at i+len)
519       // while we check if we can use:
520       // [i,j) (where j<=i+len) + [j, length of best match at j)
521       for (j = i_last_check + 1; j <= j_max; ++j) {
522         const int len_j = VP8LHashChainFindLength(hash_chain, j);
523         const int reach =
524             j + (len_j >= MIN_LENGTH ? len_j : 1);  // 1 for single literal.
525         if (reach > max_reach) {
526           len = j - i;
527           max_reach = reach;
528           if (max_reach >= pix_count) break;
529         }
530       }
531     } else {
532       len = 1;
533     }
534     // Go with literal or backward reference.
535     assert(len > 0);
536     if (len == 1) {
537       AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
538     } else {
539       VP8LBackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
540       if (use_color_cache) {
541         for (j = i; j < i + len; ++j) VP8LColorCacheInsert(&hashers, argb[j]);
542       }
543     }
544     i += len;
545   }
546 
547   ok = !refs->error_;
548  Error:
549   if (cc_init) VP8LColorCacheClear(&hashers);
550   return ok;
551 }
552 
553 // Compute an LZ77 by forcing matches to happen within a given distance cost.
554 // We therefore limit the algorithm to the lowest 32 values in the PlaneCode
555 // definition.
556 #define WINDOW_OFFSETS_SIZE_MAX 32
BackwardReferencesLz77Box(int xsize,int ysize,const uint32_t * const argb,int cache_bits,const VP8LHashChain * const hash_chain_best,VP8LHashChain * hash_chain,VP8LBackwardRefs * const refs)557 static int BackwardReferencesLz77Box(int xsize, int ysize,
558                                      const uint32_t* const argb, int cache_bits,
559                                      const VP8LHashChain* const hash_chain_best,
560                                      VP8LHashChain* hash_chain,
561                                      VP8LBackwardRefs* const refs) {
562   int i;
563   const int pix_count = xsize * ysize;
564   uint16_t* counts;
565   int window_offsets[WINDOW_OFFSETS_SIZE_MAX] = {0};
566   int window_offsets_new[WINDOW_OFFSETS_SIZE_MAX] = {0};
567   int window_offsets_size = 0;
568   int window_offsets_new_size = 0;
569   uint16_t* const counts_ini =
570       (uint16_t*)WebPSafeMalloc(xsize * ysize, sizeof(*counts_ini));
571   int best_offset_prev = -1, best_length_prev = -1;
572   if (counts_ini == NULL) return 0;
573 
574   // counts[i] counts how many times a pixel is repeated starting at position i.
575   i = pix_count - 2;
576   counts = counts_ini + i;
577   counts[1] = 1;
578   for (; i >= 0; --i, --counts) {
579     if (argb[i] == argb[i + 1]) {
580       // Max out the counts to MAX_LENGTH.
581       counts[0] = counts[1] + (counts[1] != MAX_LENGTH);
582     } else {
583       counts[0] = 1;
584     }
585   }
586 
587   // Figure out the window offsets around a pixel. They are stored in a
588   // spiraling order around the pixel as defined by VP8LDistanceToPlaneCode.
589   {
590     int x, y;
591     for (y = 0; y <= 6; ++y) {
592       for (x = -6; x <= 6; ++x) {
593         const int offset = y * xsize + x;
594         int plane_code;
595         // Ignore offsets that bring us after the pixel.
596         if (offset <= 0) continue;
597         plane_code = VP8LDistanceToPlaneCode(xsize, offset) - 1;
598         if (plane_code >= WINDOW_OFFSETS_SIZE_MAX) continue;
599         window_offsets[plane_code] = offset;
600       }
601     }
602     // For narrow images, not all plane codes are reached, so remove those.
603     for (i = 0; i < WINDOW_OFFSETS_SIZE_MAX; ++i) {
604       if (window_offsets[i] == 0) continue;
605       window_offsets[window_offsets_size++] = window_offsets[i];
606     }
607     // Given a pixel P, find the offsets that reach pixels unreachable from P-1
608     // with any of the offsets in window_offsets[].
609     for (i = 0; i < window_offsets_size; ++i) {
610       int j;
611       int is_reachable = 0;
612       for (j = 0; j < window_offsets_size && !is_reachable; ++j) {
613         is_reachable |= (window_offsets[i] == window_offsets[j] + 1);
614       }
615       if (!is_reachable) {
616         window_offsets_new[window_offsets_new_size] = window_offsets[i];
617         ++window_offsets_new_size;
618       }
619     }
620   }
621 
622   hash_chain->offset_length_[0] = 0;
623   for (i = 1; i < pix_count; ++i) {
624     int ind;
625     int best_length = VP8LHashChainFindLength(hash_chain_best, i);
626     int best_offset;
627     int do_compute = 1;
628 
629     if (best_length >= MAX_LENGTH) {
630       // Do not recompute the best match if we already have a maximal one in the
631       // window.
632       best_offset = VP8LHashChainFindOffset(hash_chain_best, i);
633       for (ind = 0; ind < window_offsets_size; ++ind) {
634         if (best_offset == window_offsets[ind]) {
635           do_compute = 0;
636           break;
637         }
638       }
639     }
640     if (do_compute) {
641       // Figure out if we should use the offset/length from the previous pixel
642       // as an initial guess and therefore only inspect the offsets in
643       // window_offsets_new[].
644       const int use_prev =
645           (best_length_prev > 1) && (best_length_prev < MAX_LENGTH);
646       const int num_ind =
647           use_prev ? window_offsets_new_size : window_offsets_size;
648       best_length = use_prev ? best_length_prev - 1 : 0;
649       best_offset = use_prev ? best_offset_prev : 0;
650       // Find the longest match in a window around the pixel.
651       for (ind = 0; ind < num_ind; ++ind) {
652         int curr_length = 0;
653         int j = i;
654         int j_offset =
655             use_prev ? i - window_offsets_new[ind] : i - window_offsets[ind];
656         if (j_offset < 0 || argb[j_offset] != argb[i]) continue;
657         // The longest match is the sum of how many times each pixel is
658         // repeated.
659         do {
660           const int counts_j_offset = counts_ini[j_offset];
661           const int counts_j = counts_ini[j];
662           if (counts_j_offset != counts_j) {
663             curr_length +=
664                 (counts_j_offset < counts_j) ? counts_j_offset : counts_j;
665             break;
666           }
667           // The same color is repeated counts_pos times at j_offset and j.
668           curr_length += counts_j_offset;
669           j_offset += counts_j_offset;
670           j += counts_j_offset;
671         } while (curr_length <= MAX_LENGTH && j < pix_count &&
672                  argb[j_offset] == argb[j]);
673         if (best_length < curr_length) {
674           best_offset =
675               use_prev ? window_offsets_new[ind] : window_offsets[ind];
676           if (curr_length >= MAX_LENGTH) {
677             best_length = MAX_LENGTH;
678             break;
679           } else {
680             best_length = curr_length;
681           }
682         }
683       }
684     }
685 
686     assert(i + best_length <= pix_count);
687     assert(best_length <= MAX_LENGTH);
688     if (best_length <= MIN_LENGTH) {
689       hash_chain->offset_length_[i] = 0;
690       best_offset_prev = 0;
691       best_length_prev = 0;
692     } else {
693       hash_chain->offset_length_[i] =
694           (best_offset << MAX_LENGTH_BITS) | (uint32_t)best_length;
695       best_offset_prev = best_offset;
696       best_length_prev = best_length;
697     }
698   }
699   hash_chain->offset_length_[0] = 0;
700   WebPSafeFree(counts_ini);
701 
702   return BackwardReferencesLz77(xsize, ysize, argb, cache_bits, hash_chain,
703                                 refs);
704 }
705 
706 // -----------------------------------------------------------------------------
707 
BackwardReferences2DLocality(int xsize,const VP8LBackwardRefs * const refs)708 static void BackwardReferences2DLocality(int xsize,
709                                          const VP8LBackwardRefs* const refs) {
710   VP8LRefsCursor c = VP8LRefsCursorInit(refs);
711   while (VP8LRefsCursorOk(&c)) {
712     if (PixOrCopyIsCopy(c.cur_pos)) {
713       const int dist = c.cur_pos->argb_or_distance;
714       const int transformed_dist = VP8LDistanceToPlaneCode(xsize, dist);
715       c.cur_pos->argb_or_distance = transformed_dist;
716     }
717     VP8LRefsCursorNext(&c);
718   }
719 }
720 
721 // Evaluate optimal cache bits for the local color cache.
722 // The input *best_cache_bits sets the maximum cache bits to use (passing 0
723 // implies disabling the local color cache). The local color cache is also
724 // disabled for the lower (<= 25) quality.
725 // Returns 0 in case of memory error.
CalculateBestCacheSize(const uint32_t * argb,int quality,const VP8LBackwardRefs * const refs,int * const best_cache_bits)726 static int CalculateBestCacheSize(const uint32_t* argb, int quality,
727                                   const VP8LBackwardRefs* const refs,
728                                   int* const best_cache_bits) {
729   int i;
730   const int cache_bits_max = (quality <= 25) ? 0 : *best_cache_bits;
731   double entropy_min = MAX_ENTROPY;
732   int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 };
733   VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1];
734   VP8LRefsCursor c = VP8LRefsCursorInit(refs);
735   VP8LHistogram* histos[MAX_COLOR_CACHE_BITS + 1] = { NULL };
736   int ok = 0;
737 
738   assert(cache_bits_max >= 0 && cache_bits_max <= MAX_COLOR_CACHE_BITS);
739 
740   if (cache_bits_max == 0) {
741     *best_cache_bits = 0;
742     // Local color cache is disabled.
743     return 1;
744   }
745 
746   // Allocate data.
747   for (i = 0; i <= cache_bits_max; ++i) {
748     histos[i] = VP8LAllocateHistogram(i);
749     if (histos[i] == NULL) goto Error;
750     VP8LHistogramInit(histos[i], i, /*init_arrays=*/ 1);
751     if (i == 0) continue;
752     cc_init[i] = VP8LColorCacheInit(&hashers[i], i);
753     if (!cc_init[i]) goto Error;
754   }
755 
756   // Find the cache_bits giving the lowest entropy. The search is done in a
757   // brute-force way as the function (entropy w.r.t cache_bits) can be
758   // anything in practice.
759   while (VP8LRefsCursorOk(&c)) {
760     const PixOrCopy* const v = c.cur_pos;
761     if (PixOrCopyIsLiteral(v)) {
762       const uint32_t pix = *argb++;
763       const uint32_t a = (pix >> 24) & 0xff;
764       const uint32_t r = (pix >> 16) & 0xff;
765       const uint32_t g = (pix >>  8) & 0xff;
766       const uint32_t b = (pix >>  0) & 0xff;
767       // The keys of the caches can be derived from the longest one.
768       int key = VP8LHashPix(pix, 32 - cache_bits_max);
769       // Do not use the color cache for cache_bits = 0.
770       ++histos[0]->blue_[b];
771       ++histos[0]->literal_[g];
772       ++histos[0]->red_[r];
773       ++histos[0]->alpha_[a];
774       // Deal with cache_bits > 0.
775       for (i = cache_bits_max; i >= 1; --i, key >>= 1) {
776         if (VP8LColorCacheLookup(&hashers[i], key) == pix) {
777           ++histos[i]->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key];
778         } else {
779           VP8LColorCacheSet(&hashers[i], key, pix);
780           ++histos[i]->blue_[b];
781           ++histos[i]->literal_[g];
782           ++histos[i]->red_[r];
783           ++histos[i]->alpha_[a];
784         }
785       }
786     } else {
787       int code, extra_bits, extra_bits_value;
788       // We should compute the contribution of the (distance,length)
789       // histograms but those are the same independently from the cache size.
790       // As those constant contributions are in the end added to the other
791       // histogram contributions, we can ignore them, except for the length
792       // prefix that is part of the literal_ histogram.
793       int len = PixOrCopyLength(v);
794       uint32_t argb_prev = *argb ^ 0xffffffffu;
795       VP8LPrefixEncode(len, &code, &extra_bits, &extra_bits_value);
796       for (i = 0; i <= cache_bits_max; ++i) {
797         ++histos[i]->literal_[NUM_LITERAL_CODES + code];
798       }
799       // Update the color caches.
800       do {
801         if (*argb != argb_prev) {
802           // Efficiency: insert only if the color changes.
803           int key = VP8LHashPix(*argb, 32 - cache_bits_max);
804           for (i = cache_bits_max; i >= 1; --i, key >>= 1) {
805             hashers[i].colors_[key] = *argb;
806           }
807           argb_prev = *argb;
808         }
809         argb++;
810       } while (--len != 0);
811     }
812     VP8LRefsCursorNext(&c);
813   }
814 
815   for (i = 0; i <= cache_bits_max; ++i) {
816     const double entropy = VP8LHistogramEstimateBits(histos[i]);
817     if (i == 0 || entropy < entropy_min) {
818       entropy_min = entropy;
819       *best_cache_bits = i;
820     }
821   }
822   ok = 1;
823 Error:
824   for (i = 0; i <= cache_bits_max; ++i) {
825     if (cc_init[i]) VP8LColorCacheClear(&hashers[i]);
826     VP8LFreeHistogram(histos[i]);
827   }
828   return ok;
829 }
830 
831 // Update (in-place) backward references for specified cache_bits.
BackwardRefsWithLocalCache(const uint32_t * const argb,int cache_bits,VP8LBackwardRefs * const refs)832 static int BackwardRefsWithLocalCache(const uint32_t* const argb,
833                                       int cache_bits,
834                                       VP8LBackwardRefs* const refs) {
835   int pixel_index = 0;
836   VP8LColorCache hashers;
837   VP8LRefsCursor c = VP8LRefsCursorInit(refs);
838   if (!VP8LColorCacheInit(&hashers, cache_bits)) return 0;
839 
840   while (VP8LRefsCursorOk(&c)) {
841     PixOrCopy* const v = c.cur_pos;
842     if (PixOrCopyIsLiteral(v)) {
843       const uint32_t argb_literal = v->argb_or_distance;
844       const int ix = VP8LColorCacheContains(&hashers, argb_literal);
845       if (ix >= 0) {
846         // hashers contains argb_literal
847         *v = PixOrCopyCreateCacheIdx(ix);
848       } else {
849         VP8LColorCacheInsert(&hashers, argb_literal);
850       }
851       ++pixel_index;
852     } else {
853       // refs was created without local cache, so it can not have cache indexes.
854       int k;
855       assert(PixOrCopyIsCopy(v));
856       for (k = 0; k < v->len; ++k) {
857         VP8LColorCacheInsert(&hashers, argb[pixel_index++]);
858       }
859     }
860     VP8LRefsCursorNext(&c);
861   }
862   VP8LColorCacheClear(&hashers);
863   return 1;
864 }
865 
GetBackwardReferencesLowEffort(int width,int height,const uint32_t * const argb,int * const cache_bits,const VP8LHashChain * const hash_chain,VP8LBackwardRefs * const refs_lz77)866 static VP8LBackwardRefs* GetBackwardReferencesLowEffort(
867     int width, int height, const uint32_t* const argb,
868     int* const cache_bits, const VP8LHashChain* const hash_chain,
869     VP8LBackwardRefs* const refs_lz77) {
870   *cache_bits = 0;
871   if (!BackwardReferencesLz77(width, height, argb, 0, hash_chain, refs_lz77)) {
872     return NULL;
873   }
874   BackwardReferences2DLocality(width, refs_lz77);
875   return refs_lz77;
876 }
877 
878 extern int VP8LBackwardReferencesTraceBackwards(
879     int xsize, int ysize, const uint32_t* const argb, int cache_bits,
880     const VP8LHashChain* const hash_chain,
881     const VP8LBackwardRefs* const refs_src, VP8LBackwardRefs* const refs_dst);
GetBackwardReferences(int width,int height,const uint32_t * const argb,int quality,int lz77_types_to_try,int cache_bits_max,int do_no_cache,const VP8LHashChain * const hash_chain,VP8LBackwardRefs * const refs,int * const cache_bits_best)882 static int GetBackwardReferences(int width, int height,
883                                  const uint32_t* const argb, int quality,
884                                  int lz77_types_to_try, int cache_bits_max,
885                                  int do_no_cache,
886                                  const VP8LHashChain* const hash_chain,
887                                  VP8LBackwardRefs* const refs,
888                                  int* const cache_bits_best) {
889   VP8LHistogram* histo = NULL;
890   int i, lz77_type;
891   // Index 0 is for a color cache, index 1 for no cache (if needed).
892   int lz77_types_best[2] = {0, 0};
893   double bit_costs_best[2] = {DBL_MAX, DBL_MAX};
894   VP8LHashChain hash_chain_box;
895   VP8LBackwardRefs* const refs_tmp = &refs[do_no_cache ? 2 : 1];
896   int status = 0;
897   memset(&hash_chain_box, 0, sizeof(hash_chain_box));
898 
899   histo = VP8LAllocateHistogram(MAX_COLOR_CACHE_BITS);
900   if (histo == NULL) goto Error;
901 
902   for (lz77_type = 1; lz77_types_to_try;
903        lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) {
904     int res = 0;
905     double bit_cost = 0.;
906     if ((lz77_types_to_try & lz77_type) == 0) continue;
907     switch (lz77_type) {
908       case kLZ77RLE:
909         res = BackwardReferencesRle(width, height, argb, 0, refs_tmp);
910         break;
911       case kLZ77Standard:
912         // Compute LZ77 with no cache (0 bits), as the ideal LZ77 with a color
913         // cache is not that different in practice.
914         res = BackwardReferencesLz77(width, height, argb, 0, hash_chain,
915                                      refs_tmp);
916         break;
917       case kLZ77Box:
918         if (!VP8LHashChainInit(&hash_chain_box, width * height)) goto Error;
919         res = BackwardReferencesLz77Box(width, height, argb, 0, hash_chain,
920                                         &hash_chain_box, refs_tmp);
921         break;
922       default:
923         assert(0);
924     }
925     if (!res) goto Error;
926 
927     // Start with the no color cache case.
928     for (i = 1; i >= 0; --i) {
929       int cache_bits = (i == 1) ? 0 : cache_bits_max;
930 
931       if (i == 1 && !do_no_cache) continue;
932 
933       if (i == 0) {
934         // Try with a color cache.
935         if (!CalculateBestCacheSize(argb, quality, refs_tmp, &cache_bits)) {
936           goto Error;
937         }
938         if (cache_bits > 0) {
939           if (!BackwardRefsWithLocalCache(argb, cache_bits, refs_tmp)) {
940             goto Error;
941           }
942         }
943       }
944 
945       if (i == 0 && do_no_cache && cache_bits == 0) {
946         // No need to re-compute bit_cost as it was computed at i == 1.
947       } else {
948         VP8LHistogramCreate(histo, refs_tmp, cache_bits);
949         bit_cost = VP8LHistogramEstimateBits(histo);
950       }
951 
952       if (bit_cost < bit_costs_best[i]) {
953         if (i == 1) {
954           // Do not swap as the full cache analysis would have the wrong
955           // VP8LBackwardRefs to start with.
956           if (!BackwardRefsClone(refs_tmp, &refs[1])) goto Error;
957         } else {
958           BackwardRefsSwap(refs_tmp, &refs[0]);
959         }
960         bit_costs_best[i] = bit_cost;
961         lz77_types_best[i] = lz77_type;
962         if (i == 0) *cache_bits_best = cache_bits;
963       }
964     }
965   }
966   assert(lz77_types_best[0] > 0);
967   assert(!do_no_cache || lz77_types_best[1] > 0);
968 
969   // Improve on simple LZ77 but only for high quality (TraceBackwards is
970   // costly).
971   for (i = 1; i >= 0; --i) {
972     if (i == 1 && !do_no_cache) continue;
973     if ((lz77_types_best[i] == kLZ77Standard ||
974          lz77_types_best[i] == kLZ77Box) &&
975         quality >= 25) {
976       const VP8LHashChain* const hash_chain_tmp =
977           (lz77_types_best[i] == kLZ77Standard) ? hash_chain : &hash_chain_box;
978       const int cache_bits = (i == 1) ? 0 : *cache_bits_best;
979       if (VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits,
980                                                hash_chain_tmp, &refs[i],
981                                                refs_tmp)) {
982         double bit_cost_trace;
983         VP8LHistogramCreate(histo, refs_tmp, cache_bits);
984         bit_cost_trace = VP8LHistogramEstimateBits(histo);
985         if (bit_cost_trace < bit_costs_best[i]) {
986           BackwardRefsSwap(refs_tmp, &refs[i]);
987         }
988       }
989     }
990 
991     BackwardReferences2DLocality(width, &refs[i]);
992 
993     if (i == 1 && lz77_types_best[0] == lz77_types_best[1] &&
994         *cache_bits_best == 0) {
995       // If the best cache size is 0 and we have the same best LZ77, just copy
996       // the data over and stop here.
997       if (!BackwardRefsClone(&refs[1], &refs[0])) goto Error;
998       break;
999     }
1000   }
1001   status = 1;
1002 
1003 Error:
1004   VP8LHashChainClear(&hash_chain_box);
1005   VP8LFreeHistogram(histo);
1006   return status;
1007 }
1008 
VP8LGetBackwardReferences(int width,int height,const uint32_t * const argb,int quality,int low_effort,int lz77_types_to_try,int cache_bits_max,int do_no_cache,const VP8LHashChain * const hash_chain,VP8LBackwardRefs * const refs,int * const cache_bits_best)1009 WebPEncodingError VP8LGetBackwardReferences(
1010     int width, int height, const uint32_t* const argb, int quality,
1011     int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache,
1012     const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs,
1013     int* const cache_bits_best) {
1014   if (low_effort) {
1015     VP8LBackwardRefs* refs_best;
1016     *cache_bits_best = cache_bits_max;
1017     refs_best = GetBackwardReferencesLowEffort(
1018         width, height, argb, cache_bits_best, hash_chain, refs);
1019     if (refs_best == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
1020     // Set it in first position.
1021     BackwardRefsSwap(refs_best, &refs[0]);
1022   } else {
1023     if (!GetBackwardReferences(width, height, argb, quality, lz77_types_to_try,
1024                                cache_bits_max, do_no_cache, hash_chain, refs,
1025                                cache_bits_best)) {
1026       return VP8_ENC_ERROR_OUT_OF_MEMORY;
1027     }
1028   }
1029   return VP8_ENC_OK;
1030 }
1031