1 // Copyright 2014 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // WebPPicture utils for colorspace conversion
11 //
12 // Author: Skal (pascal.massimino@gmail.com)
13 
14 #include <assert.h>
15 #include <stdlib.h>
16 #include <math.h>
17 
18 #include "src/enc/vp8i_enc.h"
19 #include "src/utils/random_utils.h"
20 #include "src/utils/utils.h"
21 #include "src/dsp/dsp.h"
22 #include "src/dsp/lossless.h"
23 #include "src/dsp/yuv.h"
24 
25 // Uncomment to disable gamma-compression during RGB->U/V averaging
26 #define USE_GAMMA_COMPRESSION
27 
28 // If defined, use table to compute x / alpha.
29 #define USE_INVERSE_ALPHA_TABLE
30 
31 #ifdef WORDS_BIGENDIAN
32 // uint32_t 0xff000000 is 0xff,00,00,00 in memory
33 #define CHANNEL_OFFSET(i) (i)
34 #else
35 // uint32_t 0xff000000 is 0x00,00,00,ff in memory
36 #define CHANNEL_OFFSET(i) (3-(i))
37 #endif
38 
39 #define ALPHA_OFFSET CHANNEL_OFFSET(0)
40 
41 //------------------------------------------------------------------------------
42 // Detection of non-trivial transparency
43 
44 // Returns true if alpha[] has non-0xff values.
CheckNonOpaque(const uint8_t * alpha,int width,int height,int x_step,int y_step)45 static int CheckNonOpaque(const uint8_t* alpha, int width, int height,
46                           int x_step, int y_step) {
47   if (alpha == NULL) return 0;
48   WebPInitAlphaProcessing();
49   if (x_step == 1) {
50     for (; height-- > 0; alpha += y_step) {
51       if (WebPHasAlpha8b(alpha, width)) return 1;
52     }
53   } else {
54     for (; height-- > 0; alpha += y_step) {
55       if (WebPHasAlpha32b(alpha, width)) return 1;
56     }
57   }
58   return 0;
59 }
60 
61 // Checking for the presence of non-opaque alpha.
WebPPictureHasTransparency(const WebPPicture * picture)62 int WebPPictureHasTransparency(const WebPPicture* picture) {
63   if (picture == NULL) return 0;
64   if (picture->use_argb) {
65     const int alpha_offset = ALPHA_OFFSET;
66     return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset,
67                           picture->width, picture->height,
68                           4, picture->argb_stride * sizeof(*picture->argb));
69   }
70   return CheckNonOpaque(picture->a, picture->width, picture->height,
71                         1, picture->a_stride);
72 }
73 
74 //------------------------------------------------------------------------------
75 // Code for gamma correction
76 
77 #if defined(USE_GAMMA_COMPRESSION)
78 
79 // gamma-compensates loss of resolution during chroma subsampling
80 #define kGamma 0.80      // for now we use a different gamma value than kGammaF
81 #define kGammaFix 12     // fixed-point precision for linear values
82 #define kGammaScale ((1 << kGammaFix) - 1)
83 #define kGammaTabFix 7   // fixed-point fractional bits precision
84 #define kGammaTabScale (1 << kGammaTabFix)
85 #define kGammaTabRounder (kGammaTabScale >> 1)
86 #define kGammaTabSize (1 << (kGammaFix - kGammaTabFix))
87 
88 static int kLinearToGammaTab[kGammaTabSize + 1];
89 static uint16_t kGammaToLinearTab[256];
90 static volatile int kGammaTablesOk = 0;
91 static void InitGammaTables(void);
92 
WEBP_DSP_INIT_FUNC(InitGammaTables)93 WEBP_DSP_INIT_FUNC(InitGammaTables) {
94   if (!kGammaTablesOk) {
95     int v;
96     const double scale = (double)(1 << kGammaTabFix) / kGammaScale;
97     const double norm = 1. / 255.;
98     for (v = 0; v <= 255; ++v) {
99       kGammaToLinearTab[v] =
100           (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5);
101     }
102     for (v = 0; v <= kGammaTabSize; ++v) {
103       kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5);
104     }
105     kGammaTablesOk = 1;
106   }
107 }
108 
GammaToLinear(uint8_t v)109 static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) {
110   return kGammaToLinearTab[v];
111 }
112 
Interpolate(int v)113 static WEBP_INLINE int Interpolate(int v) {
114   const int tab_pos = v >> (kGammaTabFix + 2);    // integer part
115   const int x = v & ((kGammaTabScale << 2) - 1);  // fractional part
116   const int v0 = kLinearToGammaTab[tab_pos];
117   const int v1 = kLinearToGammaTab[tab_pos + 1];
118   const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x);   // interpolate
119   assert(tab_pos + 1 < kGammaTabSize + 1);
120   return y;
121 }
122 
123 // Convert a linear value 'v' to YUV_FIX+2 fixed-point precision
124 // U/V value, suitable for RGBToU/V calls.
LinearToGamma(uint32_t base_value,int shift)125 static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
126   const int y = Interpolate(base_value << shift);   // final uplifted value
127   return (y + kGammaTabRounder) >> kGammaTabFix;    // descale
128 }
129 
130 #else
131 
InitGammaTables(void)132 static void InitGammaTables(void) {}
GammaToLinear(uint8_t v)133 static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; }
LinearToGamma(uint32_t base_value,int shift)134 static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
135   return (int)(base_value << shift);
136 }
137 
138 #endif    // USE_GAMMA_COMPRESSION
139 
140 //------------------------------------------------------------------------------
141 // RGB -> YUV conversion
142 
RGBToY(int r,int g,int b,VP8Random * const rg)143 static int RGBToY(int r, int g, int b, VP8Random* const rg) {
144   return (rg == NULL) ? VP8RGBToY(r, g, b, YUV_HALF)
145                       : VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX));
146 }
147 
RGBToU(int r,int g,int b,VP8Random * const rg)148 static int RGBToU(int r, int g, int b, VP8Random* const rg) {
149   return (rg == NULL) ? VP8RGBToU(r, g, b, YUV_HALF << 2)
150                       : VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
151 }
152 
RGBToV(int r,int g,int b,VP8Random * const rg)153 static int RGBToV(int r, int g, int b, VP8Random* const rg) {
154   return (rg == NULL) ? VP8RGBToV(r, g, b, YUV_HALF << 2)
155                       : VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
156 }
157 
158 //------------------------------------------------------------------------------
159 // Sharp RGB->YUV conversion
160 
161 static const int kNumIterations = 4;
162 static const int kMinDimensionIterativeConversion = 4;
163 
164 // We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some
165 // banding sometimes. Better use extra precision.
166 #define SFIX 2                // fixed-point precision of RGB and Y/W
167 typedef int16_t fixed_t;      // signed type with extra SFIX precision for UV
168 typedef uint16_t fixed_y_t;   // unsigned type with extra SFIX precision for W
169 
170 #define SHALF (1 << SFIX >> 1)
171 #define MAX_Y_T ((256 << SFIX) - 1)
172 #define SROUNDER (1 << (YUV_FIX + SFIX - 1))
173 
174 #if defined(USE_GAMMA_COMPRESSION)
175 
176 // We use tables of different size and precision for the Rec709 / BT2020
177 // transfer function.
178 #define kGammaF (1./0.45)
179 static uint32_t kLinearToGammaTabS[kGammaTabSize + 2];
180 #define GAMMA_TO_LINEAR_BITS 14
181 static uint32_t kGammaToLinearTabS[MAX_Y_T + 1];   // size scales with Y_FIX
182 static volatile int kGammaTablesSOk = 0;
183 static void InitGammaTablesS(void);
184 
WEBP_DSP_INIT_FUNC(InitGammaTablesS)185 WEBP_DSP_INIT_FUNC(InitGammaTablesS) {
186   assert(2 * GAMMA_TO_LINEAR_BITS < 32);  // we use uint32_t intermediate values
187   if (!kGammaTablesSOk) {
188     int v;
189     const double norm = 1. / MAX_Y_T;
190     const double scale = 1. / kGammaTabSize;
191     const double a = 0.09929682680944;
192     const double thresh = 0.018053968510807;
193     const double final_scale = 1 << GAMMA_TO_LINEAR_BITS;
194     for (v = 0; v <= MAX_Y_T; ++v) {
195       const double g = norm * v;
196       double value;
197       if (g <= thresh * 4.5) {
198         value = g / 4.5;
199       } else {
200         const double a_rec = 1. / (1. + a);
201         value = pow(a_rec * (g + a), kGammaF);
202       }
203       kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5);
204     }
205     for (v = 0; v <= kGammaTabSize; ++v) {
206       const double g = scale * v;
207       double value;
208       if (g <= thresh) {
209         value = 4.5 * g;
210       } else {
211         value = (1. + a) * pow(g, 1. / kGammaF) - a;
212       }
213       // we already incorporate the 1/2 rounding constant here
214       kLinearToGammaTabS[v] =
215           (uint32_t)(MAX_Y_T * value) + (1 << GAMMA_TO_LINEAR_BITS >> 1);
216     }
217     // to prevent small rounding errors to cause read-overflow:
218     kLinearToGammaTabS[kGammaTabSize + 1] = kLinearToGammaTabS[kGammaTabSize];
219     kGammaTablesSOk = 1;
220   }
221 }
222 
223 // return value has a fixed-point precision of GAMMA_TO_LINEAR_BITS
GammaToLinearS(int v)224 static WEBP_INLINE uint32_t GammaToLinearS(int v) {
225   return kGammaToLinearTabS[v];
226 }
227 
LinearToGammaS(uint32_t value)228 static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
229   // 'value' is in GAMMA_TO_LINEAR_BITS fractional precision
230   const uint32_t v = value * kGammaTabSize;
231   const uint32_t tab_pos = v >> GAMMA_TO_LINEAR_BITS;
232   // fractional part, in GAMMA_TO_LINEAR_BITS fixed-point precision
233   const uint32_t x = v - (tab_pos << GAMMA_TO_LINEAR_BITS);  // fractional part
234   // v0 / v1 are in GAMMA_TO_LINEAR_BITS fixed-point precision (range [0..1])
235   const uint32_t v0 = kLinearToGammaTabS[tab_pos + 0];
236   const uint32_t v1 = kLinearToGammaTabS[tab_pos + 1];
237   // Final interpolation. Note that rounding is already included.
238   const uint32_t v2 = (v1 - v0) * x;    // note: v1 >= v0.
239   const uint32_t result = v0 + (v2 >> GAMMA_TO_LINEAR_BITS);
240   return result;
241 }
242 
243 #else
244 
InitGammaTablesS(void)245 static void InitGammaTablesS(void) {}
GammaToLinearS(int v)246 static WEBP_INLINE uint32_t GammaToLinearS(int v) {
247   return (v << GAMMA_TO_LINEAR_BITS) / MAX_Y_T;
248 }
LinearToGammaS(uint32_t value)249 static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) {
250   return (MAX_Y_T * value) >> GAMMA_TO_LINEAR_BITS;
251 }
252 
253 #endif    // USE_GAMMA_COMPRESSION
254 
255 //------------------------------------------------------------------------------
256 
clip_8b(fixed_t v)257 static uint8_t clip_8b(fixed_t v) {
258   return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
259 }
260 
clip_y(int y)261 static fixed_y_t clip_y(int y) {
262   return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T;
263 }
264 
265 //------------------------------------------------------------------------------
266 
RGBToGray(int r,int g,int b)267 static int RGBToGray(int r, int g, int b) {
268   const int luma = 13933 * r + 46871 * g + 4732 * b + YUV_HALF;
269   return (luma >> YUV_FIX);
270 }
271 
ScaleDown(int a,int b,int c,int d)272 static uint32_t ScaleDown(int a, int b, int c, int d) {
273   const uint32_t A = GammaToLinearS(a);
274   const uint32_t B = GammaToLinearS(b);
275   const uint32_t C = GammaToLinearS(c);
276   const uint32_t D = GammaToLinearS(d);
277   return LinearToGammaS((A + B + C + D + 2) >> 2);
278 }
279 
UpdateW(const fixed_y_t * src,fixed_y_t * dst,int w)280 static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) {
281   int i;
282   for (i = 0; i < w; ++i) {
283     const uint32_t R = GammaToLinearS(src[0 * w + i]);
284     const uint32_t G = GammaToLinearS(src[1 * w + i]);
285     const uint32_t B = GammaToLinearS(src[2 * w + i]);
286     const uint32_t Y = RGBToGray(R, G, B);
287     dst[i] = (fixed_y_t)LinearToGammaS(Y);
288   }
289 }
290 
UpdateChroma(const fixed_y_t * src1,const fixed_y_t * src2,fixed_t * dst,int uv_w)291 static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2,
292                          fixed_t* dst, int uv_w) {
293   int i;
294   for (i = 0; i < uv_w; ++i) {
295     const int r = ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1],
296                             src2[0 * uv_w + 0], src2[0 * uv_w + 1]);
297     const int g = ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1],
298                             src2[2 * uv_w + 0], src2[2 * uv_w + 1]);
299     const int b = ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1],
300                             src2[4 * uv_w + 0], src2[4 * uv_w + 1]);
301     const int W = RGBToGray(r, g, b);
302     dst[0 * uv_w] = (fixed_t)(r - W);
303     dst[1 * uv_w] = (fixed_t)(g - W);
304     dst[2 * uv_w] = (fixed_t)(b - W);
305     dst  += 1;
306     src1 += 2;
307     src2 += 2;
308   }
309 }
310 
StoreGray(const fixed_y_t * rgb,fixed_y_t * y,int w)311 static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) {
312   int i;
313   for (i = 0; i < w; ++i) {
314     y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]);
315   }
316 }
317 
318 //------------------------------------------------------------------------------
319 
Filter2(int A,int B,int W0)320 static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0) {
321   const int v0 = (A * 3 + B + 2) >> 2;
322   return clip_y(v0 + W0);
323 }
324 
325 //------------------------------------------------------------------------------
326 
UpLift(uint8_t a)327 static WEBP_INLINE fixed_y_t UpLift(uint8_t a) {  // 8bit -> SFIX
328   return ((fixed_y_t)a << SFIX) | SHALF;
329 }
330 
ImportOneRow(const uint8_t * const r_ptr,const uint8_t * const g_ptr,const uint8_t * const b_ptr,int step,int pic_width,fixed_y_t * const dst)331 static void ImportOneRow(const uint8_t* const r_ptr,
332                          const uint8_t* const g_ptr,
333                          const uint8_t* const b_ptr,
334                          int step,
335                          int pic_width,
336                          fixed_y_t* const dst) {
337   int i;
338   const int w = (pic_width + 1) & ~1;
339   for (i = 0; i < pic_width; ++i) {
340     const int off = i * step;
341     dst[i + 0 * w] = UpLift(r_ptr[off]);
342     dst[i + 1 * w] = UpLift(g_ptr[off]);
343     dst[i + 2 * w] = UpLift(b_ptr[off]);
344   }
345   if (pic_width & 1) {  // replicate rightmost pixel
346     dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1];
347     dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1];
348     dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1];
349   }
350 }
351 
InterpolateTwoRows(const fixed_y_t * const best_y,const fixed_t * prev_uv,const fixed_t * cur_uv,const fixed_t * next_uv,int w,fixed_y_t * out1,fixed_y_t * out2)352 static void InterpolateTwoRows(const fixed_y_t* const best_y,
353                                const fixed_t* prev_uv,
354                                const fixed_t* cur_uv,
355                                const fixed_t* next_uv,
356                                int w,
357                                fixed_y_t* out1,
358                                fixed_y_t* out2) {
359   const int uv_w = w >> 1;
360   const int len = (w - 1) >> 1;   // length to filter
361   int k = 3;
362   while (k-- > 0) {   // process each R/G/B segments in turn
363     // special boundary case for i==0
364     out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0]);
365     out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w]);
366 
367     WebPSharpYUVFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1);
368     WebPSharpYUVFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1);
369 
370     // special boundary case for i == w - 1 when w is even
371     if (!(w & 1)) {
372       out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1],
373                             best_y[w - 1 + 0]);
374       out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1],
375                             best_y[w - 1 + w]);
376     }
377     out1 += w;
378     out2 += w;
379     prev_uv += uv_w;
380     cur_uv  += uv_w;
381     next_uv += uv_w;
382   }
383 }
384 
ConvertRGBToY(int r,int g,int b)385 static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) {
386   const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER;
387   return clip_8b(16 + (luma >> (YUV_FIX + SFIX)));
388 }
389 
ConvertRGBToU(int r,int g,int b)390 static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) {
391   const int u =  -9719 * r - 19081 * g + 28800 * b + SROUNDER;
392   return clip_8b(128 + (u >> (YUV_FIX + SFIX)));
393 }
394 
ConvertRGBToV(int r,int g,int b)395 static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) {
396   const int v = +28800 * r - 24116 * g -  4684 * b + SROUNDER;
397   return clip_8b(128 + (v >> (YUV_FIX + SFIX)));
398 }
399 
ConvertWRGBToYUV(const fixed_y_t * best_y,const fixed_t * best_uv,WebPPicture * const picture)400 static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv,
401                             WebPPicture* const picture) {
402   int i, j;
403   uint8_t* dst_y = picture->y;
404   uint8_t* dst_u = picture->u;
405   uint8_t* dst_v = picture->v;
406   const fixed_t* const best_uv_base = best_uv;
407   const int w = (picture->width + 1) & ~1;
408   const int h = (picture->height + 1) & ~1;
409   const int uv_w = w >> 1;
410   const int uv_h = h >> 1;
411   for (best_uv = best_uv_base, j = 0; j < picture->height; ++j) {
412     for (i = 0; i < picture->width; ++i) {
413       const int off = (i >> 1);
414       const int W = best_y[i];
415       const int r = best_uv[off + 0 * uv_w] + W;
416       const int g = best_uv[off + 1 * uv_w] + W;
417       const int b = best_uv[off + 2 * uv_w] + W;
418       dst_y[i] = ConvertRGBToY(r, g, b);
419     }
420     best_y += w;
421     best_uv += (j & 1) * 3 * uv_w;
422     dst_y += picture->y_stride;
423   }
424   for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) {
425     for (i = 0; i < uv_w; ++i) {
426       const int off = i;
427       const int r = best_uv[off + 0 * uv_w];
428       const int g = best_uv[off + 1 * uv_w];
429       const int b = best_uv[off + 2 * uv_w];
430       dst_u[i] = ConvertRGBToU(r, g, b);
431       dst_v[i] = ConvertRGBToV(r, g, b);
432     }
433     best_uv += 3 * uv_w;
434     dst_u += picture->uv_stride;
435     dst_v += picture->uv_stride;
436   }
437   return 1;
438 }
439 
440 //------------------------------------------------------------------------------
441 // Main function
442 
443 #define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T)))
444 
PreprocessARGB(const uint8_t * r_ptr,const uint8_t * g_ptr,const uint8_t * b_ptr,int step,int rgb_stride,WebPPicture * const picture)445 static int PreprocessARGB(const uint8_t* r_ptr,
446                           const uint8_t* g_ptr,
447                           const uint8_t* b_ptr,
448                           int step, int rgb_stride,
449                           WebPPicture* const picture) {
450   // we expand the right/bottom border if needed
451   const int w = (picture->width + 1) & ~1;
452   const int h = (picture->height + 1) & ~1;
453   const int uv_w = w >> 1;
454   const int uv_h = h >> 1;
455   uint64_t prev_diff_y_sum = ~0;
456   int j, iter;
457 
458   // TODO(skal): allocate one big memory chunk. But for now, it's easier
459   // for valgrind debugging to have several chunks.
460   fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch
461   fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t);
462   fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t);
463   fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
464   fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
465   fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
466   fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
467   fixed_y_t* best_y = best_y_base;
468   fixed_y_t* target_y = target_y_base;
469   fixed_t* best_uv = best_uv_base;
470   fixed_t* target_uv = target_uv_base;
471   const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h);
472   int ok;
473 
474   if (best_y_base == NULL || best_uv_base == NULL ||
475       target_y_base == NULL || target_uv_base == NULL ||
476       best_rgb_y == NULL || best_rgb_uv == NULL ||
477       tmp_buffer == NULL) {
478     ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
479     goto End;
480   }
481   assert(picture->width >= kMinDimensionIterativeConversion);
482   assert(picture->height >= kMinDimensionIterativeConversion);
483 
484   WebPInitConvertARGBToYUV();
485 
486   // Import RGB samples to W/RGB representation.
487   for (j = 0; j < picture->height; j += 2) {
488     const int is_last_row = (j == picture->height - 1);
489     fixed_y_t* const src1 = tmp_buffer + 0 * w;
490     fixed_y_t* const src2 = tmp_buffer + 3 * w;
491 
492     // prepare two rows of input
493     ImportOneRow(r_ptr, g_ptr, b_ptr, step, picture->width, src1);
494     if (!is_last_row) {
495       ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride,
496                    step, picture->width, src2);
497     } else {
498       memcpy(src2, src1, 3 * w * sizeof(*src2));
499     }
500     StoreGray(src1, best_y + 0, w);
501     StoreGray(src2, best_y + w, w);
502 
503     UpdateW(src1, target_y, w);
504     UpdateW(src2, target_y + w, w);
505     UpdateChroma(src1, src2, target_uv, uv_w);
506     memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv));
507     best_y += 2 * w;
508     best_uv += 3 * uv_w;
509     target_y += 2 * w;
510     target_uv += 3 * uv_w;
511     r_ptr += 2 * rgb_stride;
512     g_ptr += 2 * rgb_stride;
513     b_ptr += 2 * rgb_stride;
514   }
515 
516   // Iterate and resolve clipping conflicts.
517   for (iter = 0; iter < kNumIterations; ++iter) {
518     const fixed_t* cur_uv = best_uv_base;
519     const fixed_t* prev_uv = best_uv_base;
520     uint64_t diff_y_sum = 0;
521 
522     best_y = best_y_base;
523     best_uv = best_uv_base;
524     target_y = target_y_base;
525     target_uv = target_uv_base;
526     for (j = 0; j < h; j += 2) {
527       fixed_y_t* const src1 = tmp_buffer + 0 * w;
528       fixed_y_t* const src2 = tmp_buffer + 3 * w;
529       {
530         const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
531         InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2);
532         prev_uv = cur_uv;
533         cur_uv = next_uv;
534       }
535 
536       UpdateW(src1, best_rgb_y + 0 * w, w);
537       UpdateW(src2, best_rgb_y + 1 * w, w);
538       UpdateChroma(src1, src2, best_rgb_uv, uv_w);
539 
540       // update two rows of Y and one row of RGB
541       diff_y_sum += WebPSharpYUVUpdateY(target_y, best_rgb_y, best_y, 2 * w);
542       WebPSharpYUVUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w);
543 
544       best_y += 2 * w;
545       best_uv += 3 * uv_w;
546       target_y += 2 * w;
547       target_uv += 3 * uv_w;
548     }
549     // test exit condition
550     if (iter > 0) {
551       if (diff_y_sum < diff_y_threshold) break;
552       if (diff_y_sum > prev_diff_y_sum) break;
553     }
554     prev_diff_y_sum = diff_y_sum;
555   }
556   // final reconstruction
557   ok = ConvertWRGBToYUV(best_y_base, best_uv_base, picture);
558 
559  End:
560   WebPSafeFree(best_y_base);
561   WebPSafeFree(best_uv_base);
562   WebPSafeFree(target_y_base);
563   WebPSafeFree(target_uv_base);
564   WebPSafeFree(best_rgb_y);
565   WebPSafeFree(best_rgb_uv);
566   WebPSafeFree(tmp_buffer);
567   return ok;
568 }
569 #undef SAFE_ALLOC
570 
571 //------------------------------------------------------------------------------
572 // "Fast" regular RGB->YUV
573 
574 #define SUM4(ptr, step) LinearToGamma(                     \
575     GammaToLinear((ptr)[0]) +                              \
576     GammaToLinear((ptr)[(step)]) +                         \
577     GammaToLinear((ptr)[rgb_stride]) +                     \
578     GammaToLinear((ptr)[rgb_stride + (step)]), 0)          \
579 
580 #define SUM2(ptr) \
581     LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1)
582 
583 #define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride])
584 #define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4))
585 
586 #if defined(USE_INVERSE_ALPHA_TABLE)
587 
588 static const int kAlphaFix = 19;
589 // Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix
590 // formula is then equal to v / a in most (99.6%) cases. Note that this table
591 // and constant are adjusted very tightly to fit 32b arithmetic.
592 // In particular, they use the fact that the operands for 'v / a' are actually
593 // derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3
594 // with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid
595 // overflow is: kGammaFix + kAlphaFix <= 31.
596 static const uint32_t kInvAlpha[4 * 0xff + 1] = {
597   0,  /* alpha = 0 */
598   524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536,
599   58254, 52428, 47662, 43690, 40329, 37449, 34952, 32768,
600   30840, 29127, 27594, 26214, 24966, 23831, 22795, 21845,
601   20971, 20164, 19418, 18724, 18078, 17476, 16912, 16384,
602   15887, 15420, 14979, 14563, 14169, 13797, 13443, 13107,
603   12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922,
604   10699, 10485, 10280, 10082, 9892, 9709, 9532, 9362,
605   9198, 9039, 8886, 8738, 8594, 8456, 8322, 8192,
606   8065, 7943, 7825, 7710, 7598, 7489, 7384, 7281,
607   7182, 7084, 6990, 6898, 6808, 6721, 6636, 6553,
608   6472, 6393, 6316, 6241, 6168, 6096, 6026, 5957,
609   5890, 5825, 5761, 5698, 5637, 5577, 5518, 5461,
610   5405, 5349, 5295, 5242, 5190, 5140, 5090, 5041,
611   4993, 4946, 4899, 4854, 4809, 4766, 4723, 4681,
612   4639, 4599, 4559, 4519, 4481, 4443, 4405, 4369,
613   4332, 4297, 4262, 4228, 4194, 4161, 4128, 4096,
614   4064, 4032, 4002, 3971, 3942, 3912, 3883, 3855,
615   3826, 3799, 3771, 3744, 3718, 3692, 3666, 3640,
616   3615, 3591, 3566, 3542, 3518, 3495, 3472, 3449,
617   3426, 3404, 3382, 3360, 3339, 3318, 3297, 3276,
618   3256, 3236, 3216, 3196, 3177, 3158, 3139, 3120,
619   3102, 3084, 3066, 3048, 3030, 3013, 2995, 2978,
620   2962, 2945, 2928, 2912, 2896, 2880, 2864, 2849,
621   2833, 2818, 2803, 2788, 2774, 2759, 2744, 2730,
622   2716, 2702, 2688, 2674, 2661, 2647, 2634, 2621,
623   2608, 2595, 2582, 2570, 2557, 2545, 2532, 2520,
624   2508, 2496, 2484, 2473, 2461, 2449, 2438, 2427,
625   2416, 2404, 2394, 2383, 2372, 2361, 2351, 2340,
626   2330, 2319, 2309, 2299, 2289, 2279, 2269, 2259,
627   2250, 2240, 2231, 2221, 2212, 2202, 2193, 2184,
628   2175, 2166, 2157, 2148, 2139, 2131, 2122, 2114,
629   2105, 2097, 2088, 2080, 2072, 2064, 2056, 2048,
630   2040, 2032, 2024, 2016, 2008, 2001, 1993, 1985,
631   1978, 1971, 1963, 1956, 1949, 1941, 1934, 1927,
632   1920, 1913, 1906, 1899, 1892, 1885, 1879, 1872,
633   1865, 1859, 1852, 1846, 1839, 1833, 1826, 1820,
634   1814, 1807, 1801, 1795, 1789, 1783, 1777, 1771,
635   1765, 1759, 1753, 1747, 1741, 1736, 1730, 1724,
636   1718, 1713, 1707, 1702, 1696, 1691, 1685, 1680,
637   1675, 1669, 1664, 1659, 1653, 1648, 1643, 1638,
638   1633, 1628, 1623, 1618, 1613, 1608, 1603, 1598,
639   1593, 1588, 1583, 1579, 1574, 1569, 1565, 1560,
640   1555, 1551, 1546, 1542, 1537, 1533, 1528, 1524,
641   1519, 1515, 1510, 1506, 1502, 1497, 1493, 1489,
642   1485, 1481, 1476, 1472, 1468, 1464, 1460, 1456,
643   1452, 1448, 1444, 1440, 1436, 1432, 1428, 1424,
644   1420, 1416, 1413, 1409, 1405, 1401, 1398, 1394,
645   1390, 1387, 1383, 1379, 1376, 1372, 1368, 1365,
646   1361, 1358, 1354, 1351, 1347, 1344, 1340, 1337,
647   1334, 1330, 1327, 1323, 1320, 1317, 1314, 1310,
648   1307, 1304, 1300, 1297, 1294, 1291, 1288, 1285,
649   1281, 1278, 1275, 1272, 1269, 1266, 1263, 1260,
650   1257, 1254, 1251, 1248, 1245, 1242, 1239, 1236,
651   1233, 1230, 1227, 1224, 1222, 1219, 1216, 1213,
652   1210, 1208, 1205, 1202, 1199, 1197, 1194, 1191,
653   1188, 1186, 1183, 1180, 1178, 1175, 1172, 1170,
654   1167, 1165, 1162, 1159, 1157, 1154, 1152, 1149,
655   1147, 1144, 1142, 1139, 1137, 1134, 1132, 1129,
656   1127, 1125, 1122, 1120, 1117, 1115, 1113, 1110,
657   1108, 1106, 1103, 1101, 1099, 1096, 1094, 1092,
658   1089, 1087, 1085, 1083, 1081, 1078, 1076, 1074,
659   1072, 1069, 1067, 1065, 1063, 1061, 1059, 1057,
660   1054, 1052, 1050, 1048, 1046, 1044, 1042, 1040,
661   1038, 1036, 1034, 1032, 1030, 1028, 1026, 1024,
662   1022, 1020, 1018, 1016, 1014, 1012, 1010, 1008,
663   1006, 1004, 1002, 1000, 998, 996, 994, 992,
664   991, 989, 987, 985, 983, 981, 979, 978,
665   976, 974, 972, 970, 969, 967, 965, 963,
666   961, 960, 958, 956, 954, 953, 951, 949,
667   948, 946, 944, 942, 941, 939, 937, 936,
668   934, 932, 931, 929, 927, 926, 924, 923,
669   921, 919, 918, 916, 914, 913, 911, 910,
670   908, 907, 905, 903, 902, 900, 899, 897,
671   896, 894, 893, 891, 890, 888, 887, 885,
672   884, 882, 881, 879, 878, 876, 875, 873,
673   872, 870, 869, 868, 866, 865, 863, 862,
674   860, 859, 858, 856, 855, 853, 852, 851,
675   849, 848, 846, 845, 844, 842, 841, 840,
676   838, 837, 836, 834, 833, 832, 830, 829,
677   828, 826, 825, 824, 823, 821, 820, 819,
678   817, 816, 815, 814, 812, 811, 810, 809,
679   807, 806, 805, 804, 802, 801, 800, 799,
680   798, 796, 795, 794, 793, 791, 790, 789,
681   788, 787, 786, 784, 783, 782, 781, 780,
682   779, 777, 776, 775, 774, 773, 772, 771,
683   769, 768, 767, 766, 765, 764, 763, 762,
684   760, 759, 758, 757, 756, 755, 754, 753,
685   752, 751, 750, 748, 747, 746, 745, 744,
686   743, 742, 741, 740, 739, 738, 737, 736,
687   735, 734, 733, 732, 731, 730, 729, 728,
688   727, 726, 725, 724, 723, 722, 721, 720,
689   719, 718, 717, 716, 715, 714, 713, 712,
690   711, 710, 709, 708, 707, 706, 705, 704,
691   703, 702, 701, 700, 699, 699, 698, 697,
692   696, 695, 694, 693, 692, 691, 690, 689,
693   688, 688, 687, 686, 685, 684, 683, 682,
694   681, 680, 680, 679, 678, 677, 676, 675,
695   674, 673, 673, 672, 671, 670, 669, 668,
696   667, 667, 666, 665, 664, 663, 662, 661,
697   661, 660, 659, 658, 657, 657, 656, 655,
698   654, 653, 652, 652, 651, 650, 649, 648,
699   648, 647, 646, 645, 644, 644, 643, 642,
700   641, 640, 640, 639, 638, 637, 637, 636,
701   635, 634, 633, 633, 632, 631, 630, 630,
702   629, 628, 627, 627, 626, 625, 624, 624,
703   623, 622, 621, 621, 620, 619, 618, 618,
704   617, 616, 616, 615, 614, 613, 613, 612,
705   611, 611, 610, 609, 608, 608, 607, 606,
706   606, 605, 604, 604, 603, 602, 601, 601,
707   600, 599, 599, 598, 597, 597, 596, 595,
708   595, 594, 593, 593, 592, 591, 591, 590,
709   589, 589, 588, 587, 587, 586, 585, 585,
710   584, 583, 583, 582, 581, 581, 580, 579,
711   579, 578, 578, 577, 576, 576, 575, 574,
712   574, 573, 572, 572, 571, 571, 570, 569,
713   569, 568, 568, 567, 566, 566, 565, 564,
714   564, 563, 563, 562, 561, 561, 560, 560,
715   559, 558, 558, 557, 557, 556, 555, 555,
716   554, 554, 553, 553, 552, 551, 551, 550,
717   550, 549, 548, 548, 547, 547, 546, 546,
718   545, 544, 544, 543, 543, 542, 542, 541,
719   541, 540, 539, 539, 538, 538, 537, 537,
720   536, 536, 535, 534, 534, 533, 533, 532,
721   532, 531, 531, 530, 530, 529, 529, 528,
722   527, 527, 526, 526, 525, 525, 524, 524,
723   523, 523, 522, 522, 521, 521, 520, 520,
724   519, 519, 518, 518, 517, 517, 516, 516,
725   515, 515, 514, 514
726 };
727 
728 // Note that LinearToGamma() expects the values to be premultiplied by 4,
729 // so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly.
730 #define DIVIDE_BY_ALPHA(sum, a)  (((sum) * kInvAlpha[(a)]) >> (kAlphaFix - 2))
731 
732 #else
733 
734 #define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a))
735 
736 #endif  // USE_INVERSE_ALPHA_TABLE
737 
LinearToGammaWeighted(const uint8_t * src,const uint8_t * a_ptr,uint32_t total_a,int step,int rgb_stride)738 static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src,
739                                              const uint8_t* a_ptr,
740                                              uint32_t total_a, int step,
741                                              int rgb_stride) {
742   const uint32_t sum =
743       a_ptr[0] * GammaToLinear(src[0]) +
744       a_ptr[step] * GammaToLinear(src[step]) +
745       a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) +
746       a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]);
747   assert(total_a > 0 && total_a <= 4 * 0xff);
748 #if defined(USE_INVERSE_ALPHA_TABLE)
749   assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32));
750 #endif
751   return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0);
752 }
753 
ConvertRowToY(const uint8_t * const r_ptr,const uint8_t * const g_ptr,const uint8_t * const b_ptr,int step,uint8_t * const dst_y,int width,VP8Random * const rg)754 static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr,
755                                       const uint8_t* const g_ptr,
756                                       const uint8_t* const b_ptr,
757                                       int step,
758                                       uint8_t* const dst_y,
759                                       int width,
760                                       VP8Random* const rg) {
761   int i, j;
762   for (i = 0, j = 0; i < width; i += 1, j += step) {
763     dst_y[i] = RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], rg);
764   }
765 }
766 
AccumulateRGBA(const uint8_t * const r_ptr,const uint8_t * const g_ptr,const uint8_t * const b_ptr,const uint8_t * const a_ptr,int rgb_stride,uint16_t * dst,int width)767 static WEBP_INLINE void AccumulateRGBA(const uint8_t* const r_ptr,
768                                        const uint8_t* const g_ptr,
769                                        const uint8_t* const b_ptr,
770                                        const uint8_t* const a_ptr,
771                                        int rgb_stride,
772                                        uint16_t* dst, int width) {
773   int i, j;
774   // we loop over 2x2 blocks and produce one R/G/B/A value for each.
775   for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * 4, dst += 4) {
776     const uint32_t a = SUM4ALPHA(a_ptr + j);
777     int r, g, b;
778     if (a == 4 * 0xff || a == 0) {
779       r = SUM4(r_ptr + j, 4);
780       g = SUM4(g_ptr + j, 4);
781       b = SUM4(b_ptr + j, 4);
782     } else {
783       r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride);
784       g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride);
785       b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride);
786     }
787     dst[0] = r;
788     dst[1] = g;
789     dst[2] = b;
790     dst[3] = a;
791   }
792   if (width & 1) {
793     const uint32_t a = 2u * SUM2ALPHA(a_ptr + j);
794     int r, g, b;
795     if (a == 4 * 0xff || a == 0) {
796       r = SUM2(r_ptr + j);
797       g = SUM2(g_ptr + j);
798       b = SUM2(b_ptr + j);
799     } else {
800       r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride);
801       g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride);
802       b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride);
803     }
804     dst[0] = r;
805     dst[1] = g;
806     dst[2] = b;
807     dst[3] = a;
808   }
809 }
810 
AccumulateRGB(const uint8_t * const r_ptr,const uint8_t * const g_ptr,const uint8_t * const b_ptr,int step,int rgb_stride,uint16_t * dst,int width)811 static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr,
812                                       const uint8_t* const g_ptr,
813                                       const uint8_t* const b_ptr,
814                                       int step, int rgb_stride,
815                                       uint16_t* dst, int width) {
816   int i, j;
817   for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * step, dst += 4) {
818     dst[0] = SUM4(r_ptr + j, step);
819     dst[1] = SUM4(g_ptr + j, step);
820     dst[2] = SUM4(b_ptr + j, step);
821   }
822   if (width & 1) {
823     dst[0] = SUM2(r_ptr + j);
824     dst[1] = SUM2(g_ptr + j);
825     dst[2] = SUM2(b_ptr + j);
826   }
827 }
828 
ConvertRowsToUV(const uint16_t * rgb,uint8_t * const dst_u,uint8_t * const dst_v,int width,VP8Random * const rg)829 static WEBP_INLINE void ConvertRowsToUV(const uint16_t* rgb,
830                                         uint8_t* const dst_u,
831                                         uint8_t* const dst_v,
832                                         int width,
833                                         VP8Random* const rg) {
834   int i;
835   for (i = 0; i < width; i += 1, rgb += 4) {
836     const int r = rgb[0], g = rgb[1], b = rgb[2];
837     dst_u[i] = RGBToU(r, g, b, rg);
838     dst_v[i] = RGBToV(r, g, b, rg);
839   }
840 }
841 
ImportYUVAFromRGBA(const uint8_t * r_ptr,const uint8_t * g_ptr,const uint8_t * b_ptr,const uint8_t * a_ptr,int step,int rgb_stride,float dithering,int use_iterative_conversion,WebPPicture * const picture)842 static int ImportYUVAFromRGBA(const uint8_t* r_ptr,
843                               const uint8_t* g_ptr,
844                               const uint8_t* b_ptr,
845                               const uint8_t* a_ptr,
846                               int step,         // bytes per pixel
847                               int rgb_stride,   // bytes per scanline
848                               float dithering,
849                               int use_iterative_conversion,
850                               WebPPicture* const picture) {
851   int y;
852   const int width = picture->width;
853   const int height = picture->height;
854   const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride);
855   const int is_rgb = (r_ptr < b_ptr);  // otherwise it's bgr
856 
857   picture->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420;
858   picture->use_argb = 0;
859 
860   // disable smart conversion if source is too small (overkill).
861   if (width < kMinDimensionIterativeConversion ||
862       height < kMinDimensionIterativeConversion) {
863     use_iterative_conversion = 0;
864   }
865 
866   if (!WebPPictureAllocYUVA(picture, width, height)) {
867     return 0;
868   }
869   if (has_alpha) {
870     assert(step == 4);
871 #if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE)
872     assert(kAlphaFix + kGammaFix <= 31);
873 #endif
874   }
875 
876   if (use_iterative_conversion) {
877     InitGammaTablesS();
878     if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) {
879       return 0;
880     }
881     if (has_alpha) {
882       WebPExtractAlpha(a_ptr, rgb_stride, width, height,
883                        picture->a, picture->a_stride);
884     }
885   } else {
886     const int uv_width = (width + 1) >> 1;
887     int use_dsp = (step == 3);  // use special function in this case
888     // temporary storage for accumulated R/G/B values during conversion to U/V
889     uint16_t* const tmp_rgb =
890         (uint16_t*)WebPSafeMalloc(4 * uv_width, sizeof(*tmp_rgb));
891     uint8_t* dst_y = picture->y;
892     uint8_t* dst_u = picture->u;
893     uint8_t* dst_v = picture->v;
894     uint8_t* dst_a = picture->a;
895 
896     VP8Random base_rg;
897     VP8Random* rg = NULL;
898     if (dithering > 0.) {
899       VP8InitRandom(&base_rg, dithering);
900       rg = &base_rg;
901       use_dsp = 0;   // can't use dsp in this case
902     }
903     WebPInitConvertARGBToYUV();
904     InitGammaTables();
905 
906     if (tmp_rgb == NULL) return 0;  // malloc error
907 
908     // Downsample Y/U/V planes, two rows at a time
909     for (y = 0; y < (height >> 1); ++y) {
910       int rows_have_alpha = has_alpha;
911       if (use_dsp) {
912         if (is_rgb) {
913           WebPConvertRGB24ToY(r_ptr, dst_y, width);
914           WebPConvertRGB24ToY(r_ptr + rgb_stride,
915                               dst_y + picture->y_stride, width);
916         } else {
917           WebPConvertBGR24ToY(b_ptr, dst_y, width);
918           WebPConvertBGR24ToY(b_ptr + rgb_stride,
919                               dst_y + picture->y_stride, width);
920         }
921       } else {
922         ConvertRowToY(r_ptr, g_ptr, b_ptr, step, dst_y, width, rg);
923         ConvertRowToY(r_ptr + rgb_stride,
924                       g_ptr + rgb_stride,
925                       b_ptr + rgb_stride, step,
926                       dst_y + picture->y_stride, width, rg);
927       }
928       dst_y += 2 * picture->y_stride;
929       if (has_alpha) {
930         rows_have_alpha &= !WebPExtractAlpha(a_ptr, rgb_stride, width, 2,
931                                              dst_a, picture->a_stride);
932         dst_a += 2 * picture->a_stride;
933       }
934       // Collect averaged R/G/B(/A)
935       if (!rows_have_alpha) {
936         AccumulateRGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, tmp_rgb, width);
937       } else {
938         AccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, rgb_stride, tmp_rgb, width);
939       }
940       // Convert to U/V
941       if (rg == NULL) {
942         WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
943       } else {
944         ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
945       }
946       dst_u += picture->uv_stride;
947       dst_v += picture->uv_stride;
948       r_ptr += 2 * rgb_stride;
949       b_ptr += 2 * rgb_stride;
950       g_ptr += 2 * rgb_stride;
951       if (has_alpha) a_ptr += 2 * rgb_stride;
952     }
953     if (height & 1) {    // extra last row
954       int row_has_alpha = has_alpha;
955       if (use_dsp) {
956         if (r_ptr < b_ptr) {
957           WebPConvertRGB24ToY(r_ptr, dst_y, width);
958         } else {
959           WebPConvertBGR24ToY(b_ptr, dst_y, width);
960         }
961       } else {
962         ConvertRowToY(r_ptr, g_ptr, b_ptr, step, dst_y, width, rg);
963       }
964       if (row_has_alpha) {
965         row_has_alpha &= !WebPExtractAlpha(a_ptr, 0, width, 1, dst_a, 0);
966       }
967       // Collect averaged R/G/B(/A)
968       if (!row_has_alpha) {
969         // Collect averaged R/G/B
970         AccumulateRGB(r_ptr, g_ptr, b_ptr, step, /* rgb_stride = */ 0,
971                       tmp_rgb, width);
972       } else {
973         AccumulateRGBA(r_ptr, g_ptr, b_ptr, a_ptr, /* rgb_stride = */ 0,
974                        tmp_rgb, width);
975       }
976       if (rg == NULL) {
977         WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
978       } else {
979         ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
980       }
981     }
982     WebPSafeFree(tmp_rgb);
983   }
984   return 1;
985 }
986 
987 #undef SUM4
988 #undef SUM2
989 #undef SUM4ALPHA
990 #undef SUM2ALPHA
991 
992 //------------------------------------------------------------------------------
993 // call for ARGB->YUVA conversion
994 
PictureARGBToYUVA(WebPPicture * picture,WebPEncCSP colorspace,float dithering,int use_iterative_conversion)995 static int PictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace,
996                              float dithering, int use_iterative_conversion) {
997   if (picture == NULL) return 0;
998   if (picture->argb == NULL) {
999     return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
1000   } else if ((colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
1001     return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
1002   } else {
1003     const uint8_t* const argb = (const uint8_t*)picture->argb;
1004     const uint8_t* const a = argb + CHANNEL_OFFSET(0);
1005     const uint8_t* const r = argb + CHANNEL_OFFSET(1);
1006     const uint8_t* const g = argb + CHANNEL_OFFSET(2);
1007     const uint8_t* const b = argb + CHANNEL_OFFSET(3);
1008 
1009     picture->colorspace = WEBP_YUV420;
1010     return ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride,
1011                               dithering, use_iterative_conversion, picture);
1012   }
1013 }
1014 
WebPPictureARGBToYUVADithered(WebPPicture * picture,WebPEncCSP colorspace,float dithering)1015 int WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace,
1016                                   float dithering) {
1017   return PictureARGBToYUVA(picture, colorspace, dithering, 0);
1018 }
1019 
WebPPictureARGBToYUVA(WebPPicture * picture,WebPEncCSP colorspace)1020 int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) {
1021   return PictureARGBToYUVA(picture, colorspace, 0.f, 0);
1022 }
1023 
WebPPictureSharpARGBToYUVA(WebPPicture * picture)1024 int WebPPictureSharpARGBToYUVA(WebPPicture* picture) {
1025   return PictureARGBToYUVA(picture, WEBP_YUV420, 0.f, 1);
1026 }
1027 // for backward compatibility
WebPPictureSmartARGBToYUVA(WebPPicture * picture)1028 int WebPPictureSmartARGBToYUVA(WebPPicture* picture) {
1029   return WebPPictureSharpARGBToYUVA(picture);
1030 }
1031 
1032 //------------------------------------------------------------------------------
1033 // call for YUVA -> ARGB conversion
1034 
WebPPictureYUVAToARGB(WebPPicture * picture)1035 int WebPPictureYUVAToARGB(WebPPicture* picture) {
1036   if (picture == NULL) return 0;
1037   if (picture->y == NULL || picture->u == NULL || picture->v == NULL) {
1038     return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
1039   }
1040   if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) {
1041     return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
1042   }
1043   if ((picture->colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
1044     return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
1045   }
1046   // Allocate a new argb buffer (discarding the previous one).
1047   if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0;
1048   picture->use_argb = 1;
1049 
1050   // Convert
1051   {
1052     int y;
1053     const int width = picture->width;
1054     const int height = picture->height;
1055     const int argb_stride = 4 * picture->argb_stride;
1056     uint8_t* dst = (uint8_t*)picture->argb;
1057     const uint8_t* cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y;
1058     WebPUpsampleLinePairFunc upsample =
1059         WebPGetLinePairConverter(ALPHA_OFFSET > 0);
1060 
1061     // First row, with replicated top samples.
1062     upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
1063     cur_y += picture->y_stride;
1064     dst += argb_stride;
1065     // Center rows.
1066     for (y = 1; y + 1 < height; y += 2) {
1067       const uint8_t* const top_u = cur_u;
1068       const uint8_t* const top_v = cur_v;
1069       cur_u += picture->uv_stride;
1070       cur_v += picture->uv_stride;
1071       upsample(cur_y, cur_y + picture->y_stride, top_u, top_v, cur_u, cur_v,
1072                dst, dst + argb_stride, width);
1073       cur_y += 2 * picture->y_stride;
1074       dst += 2 * argb_stride;
1075     }
1076     // Last row (if needed), with replicated bottom samples.
1077     if (height > 1 && !(height & 1)) {
1078       upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
1079     }
1080     // Insert alpha values if needed, in replacement for the default 0xff ones.
1081     if (picture->colorspace & WEBP_CSP_ALPHA_BIT) {
1082       for (y = 0; y < height; ++y) {
1083         uint32_t* const argb_dst = picture->argb + y * picture->argb_stride;
1084         const uint8_t* const src = picture->a + y * picture->a_stride;
1085         int x;
1086         for (x = 0; x < width; ++x) {
1087           argb_dst[x] = (argb_dst[x] & 0x00ffffffu) | ((uint32_t)src[x] << 24);
1088         }
1089       }
1090     }
1091   }
1092   return 1;
1093 }
1094 
1095 //------------------------------------------------------------------------------
1096 // automatic import / conversion
1097 
Import(WebPPicture * const picture,const uint8_t * rgb,int rgb_stride,int step,int swap_rb,int import_alpha)1098 static int Import(WebPPicture* const picture,
1099                   const uint8_t* rgb, int rgb_stride,
1100                   int step, int swap_rb, int import_alpha) {
1101   int y;
1102   // swap_rb -> b,g,r,a , !swap_rb -> r,g,b,a
1103   const uint8_t* r_ptr = rgb + (swap_rb ? 2 : 0);
1104   const uint8_t* g_ptr = rgb + 1;
1105   const uint8_t* b_ptr = rgb + (swap_rb ? 0 : 2);
1106   const int width = picture->width;
1107   const int height = picture->height;
1108 
1109   if (!picture->use_argb) {
1110     const uint8_t* a_ptr = import_alpha ? rgb + 3 : NULL;
1111     return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
1112                               0.f /* no dithering */, 0, picture);
1113   }
1114   if (!WebPPictureAlloc(picture)) return 0;
1115 
1116   VP8LDspInit();
1117   WebPInitAlphaProcessing();
1118 
1119   if (import_alpha) {
1120     // dst[] byte order is {a,r,g,b} for big-endian, {b,g,r,a} for little endian
1121     uint32_t* dst = picture->argb;
1122     const int do_copy = (ALPHA_OFFSET == 3) && swap_rb;
1123     assert(step == 4);
1124     if (do_copy) {
1125       for (y = 0; y < height; ++y) {
1126         memcpy(dst, rgb, width * 4);
1127         rgb += rgb_stride;
1128         dst += picture->argb_stride;
1129       }
1130     } else {
1131       for (y = 0; y < height; ++y) {
1132 #ifdef WORDS_BIGENDIAN
1133         // BGRA or RGBA input order.
1134         const uint8_t* a_ptr = rgb + 3;
1135         WebPPackARGB(a_ptr, r_ptr, g_ptr, b_ptr, width, dst);
1136         r_ptr += rgb_stride;
1137         g_ptr += rgb_stride;
1138         b_ptr += rgb_stride;
1139 #else
1140         // RGBA input order. Need to swap R and B.
1141         VP8LConvertBGRAToRGBA((const uint32_t*)rgb, width, (uint8_t*)dst);
1142 #endif
1143         rgb += rgb_stride;
1144         dst += picture->argb_stride;
1145       }
1146     }
1147   } else {
1148     uint32_t* dst = picture->argb;
1149     assert(step >= 3);
1150     for (y = 0; y < height; ++y) {
1151       WebPPackRGB(r_ptr, g_ptr, b_ptr, width, step, dst);
1152       r_ptr += rgb_stride;
1153       g_ptr += rgb_stride;
1154       b_ptr += rgb_stride;
1155       dst += picture->argb_stride;
1156     }
1157   }
1158   return 1;
1159 }
1160 
1161 // Public API
1162 
1163 #if !defined(WEBP_REDUCE_CSP)
1164 
WebPPictureImportBGR(WebPPicture * picture,const uint8_t * rgb,int rgb_stride)1165 int WebPPictureImportBGR(WebPPicture* picture,
1166                          const uint8_t* rgb, int rgb_stride) {
1167   return (picture != NULL && rgb != NULL)
1168              ? Import(picture, rgb, rgb_stride, 3, 1, 0)
1169              : 0;
1170 }
1171 
WebPPictureImportBGRA(WebPPicture * picture,const uint8_t * rgba,int rgba_stride)1172 int WebPPictureImportBGRA(WebPPicture* picture,
1173                           const uint8_t* rgba, int rgba_stride) {
1174   return (picture != NULL && rgba != NULL)
1175              ? Import(picture, rgba, rgba_stride, 4, 1, 1)
1176              : 0;
1177 }
1178 
1179 
WebPPictureImportBGRX(WebPPicture * picture,const uint8_t * rgba,int rgba_stride)1180 int WebPPictureImportBGRX(WebPPicture* picture,
1181                           const uint8_t* rgba, int rgba_stride) {
1182   return (picture != NULL && rgba != NULL)
1183              ? Import(picture, rgba, rgba_stride, 4, 1, 0)
1184              : 0;
1185 }
1186 
1187 #endif   // WEBP_REDUCE_CSP
1188 
WebPPictureImportRGB(WebPPicture * picture,const uint8_t * rgb,int rgb_stride)1189 int WebPPictureImportRGB(WebPPicture* picture,
1190                          const uint8_t* rgb, int rgb_stride) {
1191   return (picture != NULL && rgb != NULL)
1192              ? Import(picture, rgb, rgb_stride, 3, 0, 0)
1193              : 0;
1194 }
1195 
WebPPictureImportRGBA(WebPPicture * picture,const uint8_t * rgba,int rgba_stride)1196 int WebPPictureImportRGBA(WebPPicture* picture,
1197                           const uint8_t* rgba, int rgba_stride) {
1198   return (picture != NULL && rgba != NULL)
1199              ? Import(picture, rgba, rgba_stride, 4, 0, 1)
1200              : 0;
1201 }
1202 
WebPPictureImportRGBX(WebPPicture * picture,const uint8_t * rgba,int rgba_stride)1203 int WebPPictureImportRGBX(WebPPicture* picture,
1204                           const uint8_t* rgba, int rgba_stride) {
1205   return (picture != NULL && rgba != NULL)
1206              ? Import(picture, rgba, rgba_stride, 4, 0, 0)
1207              : 0;
1208 }
1209 
1210 //------------------------------------------------------------------------------
1211