1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vpx_config.h"
12 #include "./vpx_dsp_rtcd.h"
13 
14 #include "vpx_ports/mem.h"
15 #include "vpx/vpx_integer.h"
16 
17 #include "vpx_dsp/variance.h"
18 
19 static const uint8_t bilinear_filters[8][2] = {
20   { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
21   { 64, 64 }, { 48, 80 },  { 32, 96 }, { 16, 112 },
22 };
23 
vpx_get4x4sse_cs_c(const uint8_t * a,int a_stride,const uint8_t * b,int b_stride)24 uint32_t vpx_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
25                             int b_stride) {
26   int distortion = 0;
27   int r, c;
28 
29   for (r = 0; r < 4; ++r) {
30     for (c = 0; c < 4; ++c) {
31       int diff = a[c] - b[c];
32       distortion += diff * diff;
33     }
34 
35     a += a_stride;
36     b += b_stride;
37   }
38 
39   return distortion;
40 }
41 
vpx_get_mb_ss_c(const int16_t * a)42 uint32_t vpx_get_mb_ss_c(const int16_t *a) {
43   unsigned int i, sum = 0;
44 
45   for (i = 0; i < 256; ++i) {
46     sum += a[i] * a[i];
47   }
48 
49   return sum;
50 }
51 
variance(const uint8_t * a,int a_stride,const uint8_t * b,int b_stride,int w,int h,uint32_t * sse,int * sum)52 static void variance(const uint8_t *a, int a_stride, const uint8_t *b,
53                      int b_stride, int w, int h, uint32_t *sse, int *sum) {
54   int i, j;
55 
56   *sum = 0;
57   *sse = 0;
58 
59   for (i = 0; i < h; ++i) {
60     for (j = 0; j < w; ++j) {
61       const int diff = a[j] - b[j];
62       *sum += diff;
63       *sse += diff * diff;
64     }
65 
66     a += a_stride;
67     b += b_stride;
68   }
69 }
70 
71 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
72 // or vertical direction to produce the filtered output block. Used to implement
73 // the first-pass of 2-D separable filter.
74 //
75 // Produces int16_t output to retain precision for the next pass. Two filter
76 // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
77 // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
78 // It defines the offset required to move from one input to the next.
var_filter_block2d_bil_first_pass(const uint8_t * a,uint16_t * b,unsigned int src_pixels_per_line,int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)79 static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b,
80                                               unsigned int src_pixels_per_line,
81                                               int pixel_step,
82                                               unsigned int output_height,
83                                               unsigned int output_width,
84                                               const uint8_t *filter) {
85   unsigned int i, j;
86 
87   for (i = 0; i < output_height; ++i) {
88     for (j = 0; j < output_width; ++j) {
89       b[j] = ROUND_POWER_OF_TWO(
90           (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
91 
92       ++a;
93     }
94 
95     a += src_pixels_per_line - output_width;
96     b += output_width;
97   }
98 }
99 
100 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
101 // or vertical direction to produce the filtered output block. Used to implement
102 // the second-pass of 2-D separable filter.
103 //
104 // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
105 // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
106 // filter is applied horizontally (pixel_step = 1) or vertically
107 // (pixel_step = stride). It defines the offset required to move from one input
108 // to the next. Output is 8-bit.
var_filter_block2d_bil_second_pass(const uint16_t * a,uint8_t * b,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)109 static void var_filter_block2d_bil_second_pass(const uint16_t *a, uint8_t *b,
110                                                unsigned int src_pixels_per_line,
111                                                unsigned int pixel_step,
112                                                unsigned int output_height,
113                                                unsigned int output_width,
114                                                const uint8_t *filter) {
115   unsigned int i, j;
116 
117   for (i = 0; i < output_height; ++i) {
118     for (j = 0; j < output_width; ++j) {
119       b[j] = ROUND_POWER_OF_TWO(
120           (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
121       ++a;
122     }
123 
124     a += src_pixels_per_line - output_width;
125     b += output_width;
126   }
127 }
128 
129 #define VAR(W, H)                                                    \
130   uint32_t vpx_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
131                                      const uint8_t *b, int b_stride, \
132                                      uint32_t *sse) {                \
133     int sum;                                                         \
134     variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
135     return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H));        \
136   }
137 
138 #define SUBPIX_VAR(W, H)                                                \
139   uint32_t vpx_sub_pixel_variance##W##x##H##_c(                         \
140       const uint8_t *a, int a_stride, int xoffset, int yoffset,         \
141       const uint8_t *b, int b_stride, uint32_t *sse) {                  \
142     uint16_t fdata3[(H + 1) * W];                                       \
143     uint8_t temp2[H * W];                                               \
144                                                                         \
145     var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
146                                       bilinear_filters[xoffset]);       \
147     var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
148                                        bilinear_filters[yoffset]);      \
149                                                                         \
150     return vpx_variance##W##x##H##_c(temp2, W, b, b_stride, sse);       \
151   }
152 
153 #define SUBPIX_AVG_VAR(W, H)                                            \
154   uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c(                     \
155       const uint8_t *a, int a_stride, int xoffset, int yoffset,         \
156       const uint8_t *b, int b_stride, uint32_t *sse,                    \
157       const uint8_t *second_pred) {                                     \
158     uint16_t fdata3[(H + 1) * W];                                       \
159     uint8_t temp2[H * W];                                               \
160     DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                         \
161                                                                         \
162     var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
163                                       bilinear_filters[xoffset]);       \
164     var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
165                                        bilinear_filters[yoffset]);      \
166                                                                         \
167     vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W);              \
168                                                                         \
169     return vpx_variance##W##x##H##_c(temp3, W, b, b_stride, sse);       \
170   }
171 
172 /* Identical to the variance call except it takes an additional parameter, sum,
173  * and returns that value using pass-by-reference instead of returning
174  * sse - sum^2 / w*h
175  */
176 #define GET_VAR(W, H)                                                         \
177   void vpx_get##W##x##H##var_c(const uint8_t *a, int a_stride,                \
178                                const uint8_t *b, int b_stride, uint32_t *sse, \
179                                int *sum) {                                    \
180     variance(a, a_stride, b, b_stride, W, H, sse, sum);                       \
181   }
182 
183 /* Identical to the variance call except it does not calculate the
184  * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
185  * variable.
186  */
187 #define MSE(W, H)                                               \
188   uint32_t vpx_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
189                                 const uint8_t *b, int b_stride, \
190                                 uint32_t *sse) {                \
191     int sum;                                                    \
192     variance(a, a_stride, b, b_stride, W, H, sse, &sum);        \
193     return *sse;                                                \
194   }
195 
196 /* All three forms of the variance are available in the same sizes. */
197 #define VARIANCES(W, H) \
198   VAR(W, H)             \
199   SUBPIX_VAR(W, H)      \
200   SUBPIX_AVG_VAR(W, H)
201 
202 VARIANCES(64, 64)
203 VARIANCES(64, 32)
204 VARIANCES(32, 64)
205 VARIANCES(32, 32)
206 VARIANCES(32, 16)
207 VARIANCES(16, 32)
208 VARIANCES(16, 16)
209 VARIANCES(16, 8)
210 VARIANCES(8, 16)
211 VARIANCES(8, 8)
212 VARIANCES(8, 4)
213 VARIANCES(4, 8)
214 VARIANCES(4, 4)
215 
216 GET_VAR(16, 16)
217 GET_VAR(8, 8)
218 
219 MSE(16, 16)
220 MSE(16, 8)
221 MSE(8, 16)
222 MSE(8, 8)
223 
vpx_comp_avg_pred_c(uint8_t * comp_pred,const uint8_t * pred,int width,int height,const uint8_t * ref,int ref_stride)224 void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
225                          int height, const uint8_t *ref, int ref_stride) {
226   int i, j;
227 
228   for (i = 0; i < height; ++i) {
229     for (j = 0; j < width; ++j) {
230       const int tmp = pred[j] + ref[j];
231       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
232     }
233     comp_pred += width;
234     pred += width;
235     ref += ref_stride;
236   }
237 }
238 
239 #if CONFIG_VP9_HIGHBITDEPTH
highbd_variance64(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int w,int h,uint64_t * sse,int64_t * sum)240 static void highbd_variance64(const uint8_t *a8, int a_stride,
241                               const uint8_t *b8, int b_stride, int w, int h,
242                               uint64_t *sse, int64_t *sum) {
243   int i, j;
244 
245   uint16_t *a = CONVERT_TO_SHORTPTR(a8);
246   uint16_t *b = CONVERT_TO_SHORTPTR(b8);
247   *sum = 0;
248   *sse = 0;
249 
250   for (i = 0; i < h; ++i) {
251     for (j = 0; j < w; ++j) {
252       const int diff = a[j] - b[j];
253       *sum += diff;
254       *sse += diff * diff;
255     }
256     a += a_stride;
257     b += b_stride;
258   }
259 }
260 
highbd_8_variance(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int w,int h,uint32_t * sse,int * sum)261 static void highbd_8_variance(const uint8_t *a8, int a_stride,
262                               const uint8_t *b8, int b_stride, int w, int h,
263                               uint32_t *sse, int *sum) {
264   uint64_t sse_long = 0;
265   int64_t sum_long = 0;
266   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
267   *sse = (uint32_t)sse_long;
268   *sum = (int)sum_long;
269 }
270 
highbd_10_variance(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int w,int h,uint32_t * sse,int * sum)271 static void highbd_10_variance(const uint8_t *a8, int a_stride,
272                                const uint8_t *b8, int b_stride, int w, int h,
273                                uint32_t *sse, int *sum) {
274   uint64_t sse_long = 0;
275   int64_t sum_long = 0;
276   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
277   *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
278   *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
279 }
280 
highbd_12_variance(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int w,int h,uint32_t * sse,int * sum)281 static void highbd_12_variance(const uint8_t *a8, int a_stride,
282                                const uint8_t *b8, int b_stride, int w, int h,
283                                uint32_t *sse, int *sum) {
284   uint64_t sse_long = 0;
285   int64_t sum_long = 0;
286   highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
287   *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
288   *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
289 }
290 
291 #define HIGHBD_VAR(W, H)                                                       \
292   uint32_t vpx_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride,  \
293                                               const uint8_t *b, int b_stride,  \
294                                               uint32_t *sse) {                 \
295     int sum;                                                                   \
296     highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum);              \
297     return *sse - (((int64_t)sum * sum) / (W * H));                            \
298   }                                                                            \
299                                                                                \
300   uint32_t vpx_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
301                                                const uint8_t *b, int b_stride, \
302                                                uint32_t *sse) {                \
303     int sum;                                                                   \
304     int64_t var;                                                               \
305     highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
306     var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
307     return (var >= 0) ? (uint32_t)var : 0;                                     \
308   }                                                                            \
309                                                                                \
310   uint32_t vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
311                                                const uint8_t *b, int b_stride, \
312                                                uint32_t *sse) {                \
313     int sum;                                                                   \
314     int64_t var;                                                               \
315     highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum);             \
316     var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));                  \
317     return (var >= 0) ? (uint32_t)var : 0;                                     \
318   }
319 
320 #define HIGHBD_GET_VAR(S)                                                    \
321   void vpx_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride,  \
322                                         const uint8_t *ref, int ref_stride,  \
323                                         uint32_t *sse, int *sum) {           \
324     highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);     \
325   }                                                                          \
326                                                                              \
327   void vpx_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
328                                          const uint8_t *ref, int ref_stride, \
329                                          uint32_t *sse, int *sum) {          \
330     highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);    \
331   }                                                                          \
332                                                                              \
333   void vpx_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
334                                          const uint8_t *ref, int ref_stride, \
335                                          uint32_t *sse, int *sum) {          \
336     highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum);    \
337   }
338 
339 #define HIGHBD_MSE(W, H)                                                      \
340   uint32_t vpx_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride,  \
341                                          const uint8_t *ref, int ref_stride,  \
342                                          uint32_t *sse) {                     \
343     int sum;                                                                  \
344     highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);     \
345     return *sse;                                                              \
346   }                                                                           \
347                                                                               \
348   uint32_t vpx_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
349                                           const uint8_t *ref, int ref_stride, \
350                                           uint32_t *sse) {                    \
351     int sum;                                                                  \
352     highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);    \
353     return *sse;                                                              \
354   }                                                                           \
355                                                                               \
356   uint32_t vpx_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
357                                           const uint8_t *ref, int ref_stride, \
358                                           uint32_t *sse) {                    \
359     int sum;                                                                  \
360     highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum);    \
361     return *sse;                                                              \
362   }
363 
highbd_var_filter_block2d_bil_first_pass(const uint8_t * src_ptr8,uint16_t * output_ptr,unsigned int src_pixels_per_line,int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)364 static void highbd_var_filter_block2d_bil_first_pass(
365     const uint8_t *src_ptr8, uint16_t *output_ptr,
366     unsigned int src_pixels_per_line, int pixel_step,
367     unsigned int output_height, unsigned int output_width,
368     const uint8_t *filter) {
369   unsigned int i, j;
370   uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
371   for (i = 0; i < output_height; ++i) {
372     for (j = 0; j < output_width; ++j) {
373       output_ptr[j] = ROUND_POWER_OF_TWO(
374           (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
375           FILTER_BITS);
376 
377       ++src_ptr;
378     }
379 
380     // Next row...
381     src_ptr += src_pixels_per_line - output_width;
382     output_ptr += output_width;
383   }
384 }
385 
highbd_var_filter_block2d_bil_second_pass(const uint16_t * src_ptr,uint16_t * output_ptr,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)386 static void highbd_var_filter_block2d_bil_second_pass(
387     const uint16_t *src_ptr, uint16_t *output_ptr,
388     unsigned int src_pixels_per_line, unsigned int pixel_step,
389     unsigned int output_height, unsigned int output_width,
390     const uint8_t *filter) {
391   unsigned int i, j;
392 
393   for (i = 0; i < output_height; ++i) {
394     for (j = 0; j < output_width; ++j) {
395       output_ptr[j] = ROUND_POWER_OF_TWO(
396           (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
397           FILTER_BITS);
398       ++src_ptr;
399     }
400 
401     src_ptr += src_pixels_per_line - output_width;
402     output_ptr += output_width;
403   }
404 }
405 
406 #define HIGHBD_SUBPIX_VAR(W, H)                                              \
407   uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c(                     \
408       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
409       const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
410     uint16_t fdata3[(H + 1) * W];                                            \
411     uint16_t temp2[H * W];                                                   \
412                                                                              \
413     highbd_var_filter_block2d_bil_first_pass(                                \
414         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]);    \
415     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,     \
416                                               bilinear_filters[yoffset]);    \
417                                                                              \
418     return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W,  \
419                                               dst, dst_stride, sse);         \
420   }                                                                          \
421                                                                              \
422   uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c(                    \
423       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
424       const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
425     uint16_t fdata3[(H + 1) * W];                                            \
426     uint16_t temp2[H * W];                                                   \
427                                                                              \
428     highbd_var_filter_block2d_bil_first_pass(                                \
429         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]);    \
430     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,     \
431                                               bilinear_filters[yoffset]);    \
432                                                                              \
433     return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
434                                                dst, dst_stride, sse);        \
435   }                                                                          \
436                                                                              \
437   uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c(                    \
438       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
439       const uint8_t *dst, int dst_stride, uint32_t *sse) {                   \
440     uint16_t fdata3[(H + 1) * W];                                            \
441     uint16_t temp2[H * W];                                                   \
442                                                                              \
443     highbd_var_filter_block2d_bil_first_pass(                                \
444         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]);    \
445     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,     \
446                                               bilinear_filters[yoffset]);    \
447                                                                              \
448     return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
449                                                dst, dst_stride, sse);        \
450   }
451 
452 #define HIGHBD_SUBPIX_AVG_VAR(W, H)                                          \
453   uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c(                 \
454       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
455       const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
456       const uint8_t *second_pred) {                                          \
457     uint16_t fdata3[(H + 1) * W];                                            \
458     uint16_t temp2[H * W];                                                   \
459     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
460                                                                              \
461     highbd_var_filter_block2d_bil_first_pass(                                \
462         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]);    \
463     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,     \
464                                               bilinear_filters[yoffset]);    \
465                                                                              \
466     vpx_highbd_comp_avg_pred(temp3, second_pred, W, H,                       \
467                              CONVERT_TO_BYTEPTR(temp2), W);                  \
468                                                                              \
469     return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,  \
470                                               dst, dst_stride, sse);         \
471   }                                                                          \
472                                                                              \
473   uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c(                \
474       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
475       const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
476       const uint8_t *second_pred) {                                          \
477     uint16_t fdata3[(H + 1) * W];                                            \
478     uint16_t temp2[H * W];                                                   \
479     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
480                                                                              \
481     highbd_var_filter_block2d_bil_first_pass(                                \
482         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]);    \
483     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,     \
484                                               bilinear_filters[yoffset]);    \
485                                                                              \
486     vpx_highbd_comp_avg_pred(temp3, second_pred, W, H,                       \
487                              CONVERT_TO_BYTEPTR(temp2), W);                  \
488                                                                              \
489     return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
490                                                dst, dst_stride, sse);        \
491   }                                                                          \
492                                                                              \
493   uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c(                \
494       const uint8_t *src, int src_stride, int xoffset, int yoffset,          \
495       const uint8_t *dst, int dst_stride, uint32_t *sse,                     \
496       const uint8_t *second_pred) {                                          \
497     uint16_t fdata3[(H + 1) * W];                                            \
498     uint16_t temp2[H * W];                                                   \
499     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                             \
500                                                                              \
501     highbd_var_filter_block2d_bil_first_pass(                                \
502         src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]);    \
503     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,     \
504                                               bilinear_filters[yoffset]);    \
505                                                                              \
506     vpx_highbd_comp_avg_pred(temp3, second_pred, W, H,                       \
507                              CONVERT_TO_BYTEPTR(temp2), W);                  \
508                                                                              \
509     return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
510                                                dst, dst_stride, sse);        \
511   }
512 
513 /* All three forms of the variance are available in the same sizes. */
514 #define HIGHBD_VARIANCES(W, H) \
515   HIGHBD_VAR(W, H)             \
516   HIGHBD_SUBPIX_VAR(W, H)      \
517   HIGHBD_SUBPIX_AVG_VAR(W, H)
518 
519 HIGHBD_VARIANCES(64, 64)
520 HIGHBD_VARIANCES(64, 32)
521 HIGHBD_VARIANCES(32, 64)
522 HIGHBD_VARIANCES(32, 32)
523 HIGHBD_VARIANCES(32, 16)
524 HIGHBD_VARIANCES(16, 32)
525 HIGHBD_VARIANCES(16, 16)
526 HIGHBD_VARIANCES(16, 8)
527 HIGHBD_VARIANCES(8, 16)
528 HIGHBD_VARIANCES(8, 8)
529 HIGHBD_VARIANCES(8, 4)
530 HIGHBD_VARIANCES(4, 8)
531 HIGHBD_VARIANCES(4, 4)
532 
533 HIGHBD_GET_VAR(8)
534 HIGHBD_GET_VAR(16)
535 
536 HIGHBD_MSE(16, 16)
537 HIGHBD_MSE(16, 8)
538 HIGHBD_MSE(8, 16)
539 HIGHBD_MSE(8, 8)
540 
vpx_highbd_comp_avg_pred(uint16_t * comp_pred,const uint8_t * pred8,int width,int height,const uint8_t * ref8,int ref_stride)541 void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
542                               int width, int height, const uint8_t *ref8,
543                               int ref_stride) {
544   int i, j;
545   uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
546   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
547   for (i = 0; i < height; ++i) {
548     for (j = 0; j < width; ++j) {
549       const int tmp = pred[j] + ref[j];
550       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
551     }
552     comp_pred += width;
553     pred += width;
554     ref += ref_stride;
555   }
556 }
557 #endif  // CONFIG_VP9_HIGHBITDEPTH
558