1 // Copyright 2012 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Image transforms and color space conversion methods for lossless decoder.
11 //
12 // Authors: Vikas Arora (vikaas.arora@gmail.com)
13 //          Jyrki Alakuijala (jyrki@google.com)
14 //          Urvang Joshi (urvang@google.com)
15 
16 #include "src/dsp/dsp.h"
17 
18 #include <assert.h>
19 #include <math.h>
20 #include <stdlib.h>
21 #include "src/dec/vp8li_dec.h"
22 #include "src/utils/endian_inl_utils.h"
23 #include "src/dsp/lossless.h"
24 #include "src/dsp/lossless_common.h"
25 
26 #define MAX_DIFF_COST (1e30f)
27 
28 //------------------------------------------------------------------------------
29 // Image transforms.
30 
Average2(uint32_t a0,uint32_t a1)31 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
32   return (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1);
33 }
34 
Average3(uint32_t a0,uint32_t a1,uint32_t a2)35 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
36   return Average2(Average2(a0, a2), a1);
37 }
38 
Average4(uint32_t a0,uint32_t a1,uint32_t a2,uint32_t a3)39 static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
40                                      uint32_t a2, uint32_t a3) {
41   return Average2(Average2(a0, a1), Average2(a2, a3));
42 }
43 
Clip255(uint32_t a)44 static WEBP_INLINE uint32_t Clip255(uint32_t a) {
45   if (a < 256) {
46     return a;
47   }
48   // return 0, when a is a negative integer.
49   // return 255, when a is positive.
50   return ~a >> 24;
51 }
52 
AddSubtractComponentFull(int a,int b,int c)53 static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
54   return Clip255(a + b - c);
55 }
56 
ClampedAddSubtractFull(uint32_t c0,uint32_t c1,uint32_t c2)57 static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
58                                                    uint32_t c2) {
59   const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
60   const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
61                                          (c1 >> 16) & 0xff,
62                                          (c2 >> 16) & 0xff);
63   const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
64                                          (c1 >> 8) & 0xff,
65                                          (c2 >> 8) & 0xff);
66   const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
67   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
68 }
69 
AddSubtractComponentHalf(int a,int b)70 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
71   return Clip255(a + (a - b) / 2);
72 }
73 
ClampedAddSubtractHalf(uint32_t c0,uint32_t c1,uint32_t c2)74 static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
75                                                    uint32_t c2) {
76   const uint32_t ave = Average2(c0, c1);
77   const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
78   const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
79   const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
80   const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
81   return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
82 }
83 
84 // gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is
85 // inlined.
86 #if defined(__arm__) && LOCAL_GCC_VERSION <= 0x409
87 # define LOCAL_INLINE __attribute__ ((noinline))
88 #else
89 # define LOCAL_INLINE WEBP_INLINE
90 #endif
91 
Sub3(int a,int b,int c)92 static LOCAL_INLINE int Sub3(int a, int b, int c) {
93   const int pb = b - c;
94   const int pa = a - c;
95   return abs(pb) - abs(pa);
96 }
97 
98 #undef LOCAL_INLINE
99 
Select(uint32_t a,uint32_t b,uint32_t c)100 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
101   const int pa_minus_pb =
102       Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
103       Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
104       Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
105       Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
106   return (pa_minus_pb <= 0) ? a : b;
107 }
108 
109 //------------------------------------------------------------------------------
110 // Predictors
111 
Predictor0_C(uint32_t left,const uint32_t * const top)112 static uint32_t Predictor0_C(uint32_t left, const uint32_t* const top) {
113   (void)top;
114   (void)left;
115   return ARGB_BLACK;
116 }
Predictor1_C(uint32_t left,const uint32_t * const top)117 static uint32_t Predictor1_C(uint32_t left, const uint32_t* const top) {
118   (void)top;
119   return left;
120 }
Predictor2_C(uint32_t left,const uint32_t * const top)121 static uint32_t Predictor2_C(uint32_t left, const uint32_t* const top) {
122   (void)left;
123   return top[0];
124 }
Predictor3_C(uint32_t left,const uint32_t * const top)125 static uint32_t Predictor3_C(uint32_t left, const uint32_t* const top) {
126   (void)left;
127   return top[1];
128 }
Predictor4_C(uint32_t left,const uint32_t * const top)129 static uint32_t Predictor4_C(uint32_t left, const uint32_t* const top) {
130   (void)left;
131   return top[-1];
132 }
Predictor5_C(uint32_t left,const uint32_t * const top)133 static uint32_t Predictor5_C(uint32_t left, const uint32_t* const top) {
134   const uint32_t pred = Average3(left, top[0], top[1]);
135   return pred;
136 }
Predictor6_C(uint32_t left,const uint32_t * const top)137 static uint32_t Predictor6_C(uint32_t left, const uint32_t* const top) {
138   const uint32_t pred = Average2(left, top[-1]);
139   return pred;
140 }
Predictor7_C(uint32_t left,const uint32_t * const top)141 static uint32_t Predictor7_C(uint32_t left, const uint32_t* const top) {
142   const uint32_t pred = Average2(left, top[0]);
143   return pred;
144 }
Predictor8_C(uint32_t left,const uint32_t * const top)145 static uint32_t Predictor8_C(uint32_t left, const uint32_t* const top) {
146   const uint32_t pred = Average2(top[-1], top[0]);
147   (void)left;
148   return pred;
149 }
Predictor9_C(uint32_t left,const uint32_t * const top)150 static uint32_t Predictor9_C(uint32_t left, const uint32_t* const top) {
151   const uint32_t pred = Average2(top[0], top[1]);
152   (void)left;
153   return pred;
154 }
Predictor10_C(uint32_t left,const uint32_t * const top)155 static uint32_t Predictor10_C(uint32_t left, const uint32_t* const top) {
156   const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
157   return pred;
158 }
Predictor11_C(uint32_t left,const uint32_t * const top)159 static uint32_t Predictor11_C(uint32_t left, const uint32_t* const top) {
160   const uint32_t pred = Select(top[0], left, top[-1]);
161   return pred;
162 }
Predictor12_C(uint32_t left,const uint32_t * const top)163 static uint32_t Predictor12_C(uint32_t left, const uint32_t* const top) {
164   const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
165   return pred;
166 }
Predictor13_C(uint32_t left,const uint32_t * const top)167 static uint32_t Predictor13_C(uint32_t left, const uint32_t* const top) {
168   const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
169   return pred;
170 }
171 
GENERATE_PREDICTOR_ADD(Predictor0_C,PredictorAdd0_C)172 GENERATE_PREDICTOR_ADD(Predictor0_C, PredictorAdd0_C)
173 static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
174                             int num_pixels, uint32_t* out) {
175   int i;
176   uint32_t left = out[-1];
177   for (i = 0; i < num_pixels; ++i) {
178     out[i] = left = VP8LAddPixels(in[i], left);
179   }
180   (void)upper;
181 }
GENERATE_PREDICTOR_ADD(Predictor2_C,PredictorAdd2_C)182 GENERATE_PREDICTOR_ADD(Predictor2_C, PredictorAdd2_C)
183 GENERATE_PREDICTOR_ADD(Predictor3_C, PredictorAdd3_C)
184 GENERATE_PREDICTOR_ADD(Predictor4_C, PredictorAdd4_C)
185 GENERATE_PREDICTOR_ADD(Predictor5_C, PredictorAdd5_C)
186 GENERATE_PREDICTOR_ADD(Predictor6_C, PredictorAdd6_C)
187 GENERATE_PREDICTOR_ADD(Predictor7_C, PredictorAdd7_C)
188 GENERATE_PREDICTOR_ADD(Predictor8_C, PredictorAdd8_C)
189 GENERATE_PREDICTOR_ADD(Predictor9_C, PredictorAdd9_C)
190 GENERATE_PREDICTOR_ADD(Predictor10_C, PredictorAdd10_C)
191 GENERATE_PREDICTOR_ADD(Predictor11_C, PredictorAdd11_C)
192 GENERATE_PREDICTOR_ADD(Predictor12_C, PredictorAdd12_C)
193 GENERATE_PREDICTOR_ADD(Predictor13_C, PredictorAdd13_C)
194 
195 //------------------------------------------------------------------------------
196 
197 // Inverse prediction.
198 static void PredictorInverseTransform_C(const VP8LTransform* const transform,
199                                         int y_start, int y_end,
200                                         const uint32_t* in, uint32_t* out) {
201   const int width = transform->xsize_;
202   if (y_start == 0) {  // First Row follows the L (mode=1) mode.
203     PredictorAdd0_C(in, NULL, 1, out);
204     PredictorAdd1_C(in + 1, NULL, width - 1, out + 1);
205     in += width;
206     out += width;
207     ++y_start;
208   }
209 
210   {
211     int y = y_start;
212     const int tile_width = 1 << transform->bits_;
213     const int mask = tile_width - 1;
214     const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
215     const uint32_t* pred_mode_base =
216         transform->data_ + (y >> transform->bits_) * tiles_per_row;
217 
218     while (y < y_end) {
219       const uint32_t* pred_mode_src = pred_mode_base;
220       int x = 1;
221       // First pixel follows the T (mode=2) mode.
222       PredictorAdd2_C(in, out - width, 1, out);
223       // .. the rest:
224       while (x < width) {
225         const VP8LPredictorAddSubFunc pred_func =
226             VP8LPredictorsAdd[((*pred_mode_src++) >> 8) & 0xf];
227         int x_end = (x & ~mask) + tile_width;
228         if (x_end > width) x_end = width;
229         pred_func(in + x, out + x - width, x_end - x, out + x);
230         x = x_end;
231       }
232       in += width;
233       out += width;
234       ++y;
235       if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
236         pred_mode_base += tiles_per_row;
237       }
238     }
239   }
240 }
241 
242 // Add green to blue and red channels (i.e. perform the inverse transform of
243 // 'subtract green').
VP8LAddGreenToBlueAndRed_C(const uint32_t * src,int num_pixels,uint32_t * dst)244 void VP8LAddGreenToBlueAndRed_C(const uint32_t* src, int num_pixels,
245                                 uint32_t* dst) {
246   int i;
247   for (i = 0; i < num_pixels; ++i) {
248     const uint32_t argb = src[i];
249     const uint32_t green = ((argb >> 8) & 0xff);
250     uint32_t red_blue = (argb & 0x00ff00ffu);
251     red_blue += (green << 16) | green;
252     red_blue &= 0x00ff00ffu;
253     dst[i] = (argb & 0xff00ff00u) | red_blue;
254   }
255 }
256 
ColorTransformDelta(int8_t color_pred,int8_t color)257 static WEBP_INLINE int ColorTransformDelta(int8_t color_pred,
258                                            int8_t color) {
259   return ((int)color_pred * color) >> 5;
260 }
261 
ColorCodeToMultipliers(uint32_t color_code,VP8LMultipliers * const m)262 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
263                                                VP8LMultipliers* const m) {
264   m->green_to_red_  = (color_code >>  0) & 0xff;
265   m->green_to_blue_ = (color_code >>  8) & 0xff;
266   m->red_to_blue_   = (color_code >> 16) & 0xff;
267 }
268 
VP8LTransformColorInverse_C(const VP8LMultipliers * const m,const uint32_t * src,int num_pixels,uint32_t * dst)269 void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
270                                  const uint32_t* src, int num_pixels,
271                                  uint32_t* dst) {
272   int i;
273   for (i = 0; i < num_pixels; ++i) {
274     const uint32_t argb = src[i];
275     const uint32_t green = argb >> 8;
276     const uint32_t red = argb >> 16;
277     int new_red = red & 0xff;
278     int new_blue = argb & 0xff;
279     new_red += ColorTransformDelta(m->green_to_red_, green);
280     new_red &= 0xff;
281     new_blue += ColorTransformDelta(m->green_to_blue_, green);
282     new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
283     new_blue &= 0xff;
284     dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
285   }
286 }
287 
288 // Color space inverse transform.
ColorSpaceInverseTransform_C(const VP8LTransform * const transform,int y_start,int y_end,const uint32_t * src,uint32_t * dst)289 static void ColorSpaceInverseTransform_C(const VP8LTransform* const transform,
290                                          int y_start, int y_end,
291                                          const uint32_t* src, uint32_t* dst) {
292   const int width = transform->xsize_;
293   const int tile_width = 1 << transform->bits_;
294   const int mask = tile_width - 1;
295   const int safe_width = width & ~mask;
296   const int remaining_width = width - safe_width;
297   const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
298   int y = y_start;
299   const uint32_t* pred_row =
300       transform->data_ + (y >> transform->bits_) * tiles_per_row;
301 
302   while (y < y_end) {
303     const uint32_t* pred = pred_row;
304     VP8LMultipliers m = { 0, 0, 0 };
305     const uint32_t* const src_safe_end = src + safe_width;
306     const uint32_t* const src_end = src + width;
307     while (src < src_safe_end) {
308       ColorCodeToMultipliers(*pred++, &m);
309       VP8LTransformColorInverse(&m, src, tile_width, dst);
310       src += tile_width;
311       dst += tile_width;
312     }
313     if (src < src_end) {  // Left-overs using C-version.
314       ColorCodeToMultipliers(*pred++, &m);
315       VP8LTransformColorInverse(&m, src, remaining_width, dst);
316       src += remaining_width;
317       dst += remaining_width;
318     }
319     ++y;
320     if ((y & mask) == 0) pred_row += tiles_per_row;
321   }
322 }
323 
324 // Separate out pixels packed together using pixel-bundling.
325 // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
326 #define COLOR_INDEX_INVERSE(FUNC_NAME, F_NAME, STATIC_DECL, TYPE, BIT_SUFFIX,  \
327                             GET_INDEX, GET_VALUE)                              \
328 static void F_NAME(const TYPE* src, const uint32_t* const color_map,           \
329                    TYPE* dst, int y_start, int y_end, int width) {             \
330   int y;                                                                       \
331   for (y = y_start; y < y_end; ++y) {                                          \
332     int x;                                                                     \
333     for (x = 0; x < width; ++x) {                                              \
334       *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                        \
335     }                                                                          \
336   }                                                                            \
337 }                                                                              \
338 STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform,               \
339                            int y_start, int y_end, const TYPE* src,            \
340                            TYPE* dst) {                                        \
341   int y;                                                                       \
342   const int bits_per_pixel = 8 >> transform->bits_;                            \
343   const int width = transform->xsize_;                                         \
344   const uint32_t* const color_map = transform->data_;                          \
345   if (bits_per_pixel < 8) {                                                    \
346     const int pixels_per_byte = 1 << transform->bits_;                         \
347     const int count_mask = pixels_per_byte - 1;                                \
348     const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
349     for (y = y_start; y < y_end; ++y) {                                        \
350       uint32_t packed_pixels = 0;                                              \
351       int x;                                                                   \
352       for (x = 0; x < width; ++x) {                                            \
353         /* We need to load fresh 'packed_pixels' once every                */  \
354         /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */  \
355         /* is a power of 2, so can just use a mask for that, instead of    */  \
356         /* decrementing a counter.                                         */  \
357         if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);          \
358         *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);               \
359         packed_pixels >>= bits_per_pixel;                                      \
360       }                                                                        \
361     }                                                                          \
362   } else {                                                                     \
363     VP8LMapColor##BIT_SUFFIX(src, color_map, dst, y_start, y_end, width);      \
364   }                                                                            \
365 }
366 
367 COLOR_INDEX_INVERSE(ColorIndexInverseTransform_C, MapARGB_C, static,
368                     uint32_t, 32b, VP8GetARGBIndex, VP8GetARGBValue)
369 COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha_C, ,
370                     uint8_t, 8b, VP8GetAlphaIndex, VP8GetAlphaValue)
371 
372 #undef COLOR_INDEX_INVERSE
373 
VP8LInverseTransform(const VP8LTransform * const transform,int row_start,int row_end,const uint32_t * const in,uint32_t * const out)374 void VP8LInverseTransform(const VP8LTransform* const transform,
375                           int row_start, int row_end,
376                           const uint32_t* const in, uint32_t* const out) {
377   const int width = transform->xsize_;
378   assert(row_start < row_end);
379   assert(row_end <= transform->ysize_);
380   switch (transform->type_) {
381     case SUBTRACT_GREEN:
382       VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
383       break;
384     case PREDICTOR_TRANSFORM:
385       PredictorInverseTransform_C(transform, row_start, row_end, in, out);
386       if (row_end != transform->ysize_) {
387         // The last predicted row in this iteration will be the top-pred row
388         // for the first row in next iteration.
389         memcpy(out - width, out + (row_end - row_start - 1) * width,
390                width * sizeof(*out));
391       }
392       break;
393     case CROSS_COLOR_TRANSFORM:
394       ColorSpaceInverseTransform_C(transform, row_start, row_end, in, out);
395       break;
396     case COLOR_INDEXING_TRANSFORM:
397       if (in == out && transform->bits_ > 0) {
398         // Move packed pixels to the end of unpacked region, so that unpacking
399         // can occur seamlessly.
400         // Also, note that this is the only transform that applies on
401         // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
402         // transforms work on effective width of xsize_.
403         const int out_stride = (row_end - row_start) * width;
404         const int in_stride = (row_end - row_start) *
405             VP8LSubSampleSize(transform->xsize_, transform->bits_);
406         uint32_t* const src = out + out_stride - in_stride;
407         memmove(src, out, in_stride * sizeof(*src));
408         ColorIndexInverseTransform_C(transform, row_start, row_end, src, out);
409       } else {
410         ColorIndexInverseTransform_C(transform, row_start, row_end, in, out);
411       }
412       break;
413   }
414 }
415 
416 //------------------------------------------------------------------------------
417 // Color space conversion.
418 
is_big_endian(void)419 static int is_big_endian(void) {
420   static const union {
421     uint16_t w;
422     uint8_t b[2];
423   } tmp = { 1 };
424   return (tmp.b[0] != 1);
425 }
426 
VP8LConvertBGRAToRGB_C(const uint32_t * src,int num_pixels,uint8_t * dst)427 void VP8LConvertBGRAToRGB_C(const uint32_t* src,
428                             int num_pixels, uint8_t* dst) {
429   const uint32_t* const src_end = src + num_pixels;
430   while (src < src_end) {
431     const uint32_t argb = *src++;
432     *dst++ = (argb >> 16) & 0xff;
433     *dst++ = (argb >>  8) & 0xff;
434     *dst++ = (argb >>  0) & 0xff;
435   }
436 }
437 
VP8LConvertBGRAToRGBA_C(const uint32_t * src,int num_pixels,uint8_t * dst)438 void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
439                              int num_pixels, uint8_t* dst) {
440   const uint32_t* const src_end = src + num_pixels;
441   while (src < src_end) {
442     const uint32_t argb = *src++;
443     *dst++ = (argb >> 16) & 0xff;
444     *dst++ = (argb >>  8) & 0xff;
445     *dst++ = (argb >>  0) & 0xff;
446     *dst++ = (argb >> 24) & 0xff;
447   }
448 }
449 
VP8LConvertBGRAToRGBA4444_C(const uint32_t * src,int num_pixels,uint8_t * dst)450 void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
451                                  int num_pixels, uint8_t* dst) {
452   const uint32_t* const src_end = src + num_pixels;
453   while (src < src_end) {
454     const uint32_t argb = *src++;
455     const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
456     const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
457 #if (WEBP_SWAP_16BIT_CSP == 1)
458     *dst++ = ba;
459     *dst++ = rg;
460 #else
461     *dst++ = rg;
462     *dst++ = ba;
463 #endif
464   }
465 }
466 
VP8LConvertBGRAToRGB565_C(const uint32_t * src,int num_pixels,uint8_t * dst)467 void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
468                                int num_pixels, uint8_t* dst) {
469   const uint32_t* const src_end = src + num_pixels;
470   while (src < src_end) {
471     const uint32_t argb = *src++;
472     const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
473     const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
474 #if (WEBP_SWAP_16BIT_CSP == 1)
475     *dst++ = gb;
476     *dst++ = rg;
477 #else
478     *dst++ = rg;
479     *dst++ = gb;
480 #endif
481   }
482 }
483 
VP8LConvertBGRAToBGR_C(const uint32_t * src,int num_pixels,uint8_t * dst)484 void VP8LConvertBGRAToBGR_C(const uint32_t* src,
485                             int num_pixels, uint8_t* dst) {
486   const uint32_t* const src_end = src + num_pixels;
487   while (src < src_end) {
488     const uint32_t argb = *src++;
489     *dst++ = (argb >>  0) & 0xff;
490     *dst++ = (argb >>  8) & 0xff;
491     *dst++ = (argb >> 16) & 0xff;
492   }
493 }
494 
CopyOrSwap(const uint32_t * src,int num_pixels,uint8_t * dst,int swap_on_big_endian)495 static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
496                        int swap_on_big_endian) {
497   if (is_big_endian() == swap_on_big_endian) {
498     const uint32_t* const src_end = src + num_pixels;
499     while (src < src_end) {
500       const uint32_t argb = *src++;
501       WebPUint32ToMem(dst, BSwap32(argb));
502       dst += sizeof(argb);
503     }
504   } else {
505     memcpy(dst, src, num_pixels * sizeof(*src));
506   }
507 }
508 
VP8LConvertFromBGRA(const uint32_t * const in_data,int num_pixels,WEBP_CSP_MODE out_colorspace,uint8_t * const rgba)509 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
510                          WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
511   switch (out_colorspace) {
512     case MODE_RGB:
513       VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
514       break;
515     case MODE_RGBA:
516       VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
517       break;
518     case MODE_rgbA:
519       VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
520       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
521       break;
522     case MODE_BGR:
523       VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
524       break;
525     case MODE_BGRA:
526       CopyOrSwap(in_data, num_pixels, rgba, 1);
527       break;
528     case MODE_bgrA:
529       CopyOrSwap(in_data, num_pixels, rgba, 1);
530       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
531       break;
532     case MODE_ARGB:
533       CopyOrSwap(in_data, num_pixels, rgba, 0);
534       break;
535     case MODE_Argb:
536       CopyOrSwap(in_data, num_pixels, rgba, 0);
537       WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
538       break;
539     case MODE_RGBA_4444:
540       VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
541       break;
542     case MODE_rgbA_4444:
543       VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
544       WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
545       break;
546     case MODE_RGB_565:
547       VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
548       break;
549     default:
550       assert(0);          // Code flow should not reach here.
551   }
552 }
553 
554 //------------------------------------------------------------------------------
555 
556 VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
557 VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
558 VP8LPredictorFunc VP8LPredictors[16];
559 
560 // exposed plain-C implementations
561 VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
562 VP8LPredictorFunc VP8LPredictors_C[16];
563 
564 VP8LTransformColorInverseFunc VP8LTransformColorInverse;
565 
566 VP8LConvertFunc VP8LConvertBGRAToRGB;
567 VP8LConvertFunc VP8LConvertBGRAToRGBA;
568 VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
569 VP8LConvertFunc VP8LConvertBGRAToRGB565;
570 VP8LConvertFunc VP8LConvertBGRAToBGR;
571 
572 VP8LMapARGBFunc VP8LMapColor32b;
573 VP8LMapAlphaFunc VP8LMapColor8b;
574 
575 extern void VP8LDspInitSSE2(void);
576 extern void VP8LDspInitNEON(void);
577 extern void VP8LDspInitMIPSdspR2(void);
578 extern void VP8LDspInitMSA(void);
579 
580 #define COPY_PREDICTOR_ARRAY(IN, OUT) do {                \
581   (OUT)[0] = IN##0_C;                                     \
582   (OUT)[1] = IN##1_C;                                     \
583   (OUT)[2] = IN##2_C;                                     \
584   (OUT)[3] = IN##3_C;                                     \
585   (OUT)[4] = IN##4_C;                                     \
586   (OUT)[5] = IN##5_C;                                     \
587   (OUT)[6] = IN##6_C;                                     \
588   (OUT)[7] = IN##7_C;                                     \
589   (OUT)[8] = IN##8_C;                                     \
590   (OUT)[9] = IN##9_C;                                     \
591   (OUT)[10] = IN##10_C;                                   \
592   (OUT)[11] = IN##11_C;                                   \
593   (OUT)[12] = IN##12_C;                                   \
594   (OUT)[13] = IN##13_C;                                   \
595   (OUT)[14] = IN##0_C; /* <- padding security sentinels*/ \
596   (OUT)[15] = IN##0_C;                                    \
597 } while (0);
598 
WEBP_DSP_INIT_FUNC(VP8LDspInit)599 WEBP_DSP_INIT_FUNC(VP8LDspInit) {
600   COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors)
601   COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors_C)
602   COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd)
603   COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C)
604 
605 #if !WEBP_NEON_OMIT_C_CODE
606   VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
607 
608   VP8LTransformColorInverse = VP8LTransformColorInverse_C;
609 
610   VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
611   VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
612   VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
613 #endif
614 
615   VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
616   VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
617 
618   VP8LMapColor32b = MapARGB_C;
619   VP8LMapColor8b = MapAlpha_C;
620 
621   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
622   if (VP8GetCPUInfo != NULL) {
623 #if defined(WEBP_USE_SSE2)
624     if (VP8GetCPUInfo(kSSE2)) {
625       VP8LDspInitSSE2();
626     }
627 #endif
628 #if defined(WEBP_USE_MIPS_DSP_R2)
629     if (VP8GetCPUInfo(kMIPSdspR2)) {
630       VP8LDspInitMIPSdspR2();
631     }
632 #endif
633 #if defined(WEBP_USE_MSA)
634     if (VP8GetCPUInfo(kMSA)) {
635       VP8LDspInitMSA();
636     }
637 #endif
638   }
639 
640 #if defined(WEBP_USE_NEON)
641   if (WEBP_NEON_OMIT_C_CODE ||
642       (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
643     VP8LDspInitNEON();
644   }
645 #endif
646 
647   assert(VP8LAddGreenToBlueAndRed != NULL);
648   assert(VP8LTransformColorInverse != NULL);
649   assert(VP8LConvertBGRAToRGBA != NULL);
650   assert(VP8LConvertBGRAToRGB != NULL);
651   assert(VP8LConvertBGRAToBGR != NULL);
652   assert(VP8LConvertBGRAToRGBA4444 != NULL);
653   assert(VP8LConvertBGRAToRGB565 != NULL);
654   assert(VP8LMapColor32b != NULL);
655   assert(VP8LMapColor8b != NULL);
656 }
657 #undef COPY_PREDICTOR_ARRAY
658 
659 //------------------------------------------------------------------------------
660