1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "./vpx_config.h"
12 #include "./vpx_dsp_rtcd.h"
13 
14 #include "vpx_ports/mem.h"
15 #include "vpx/vpx_integer.h"
16 
17 #include "vpx_dsp/variance.h"
18 
19 static const uint8_t bilinear_filters[8][2] = {
20   { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
21   { 64, 64 }, { 48, 80 },  { 32, 96 }, { 16, 112 },
22 };
23 
vpx_get4x4sse_cs_c(const uint8_t * src_ptr,int src_stride,const uint8_t * ref_ptr,int ref_stride)24 uint32_t vpx_get4x4sse_cs_c(const uint8_t *src_ptr, int src_stride,
25                             const uint8_t *ref_ptr, int ref_stride) {
26   int distortion = 0;
27   int r, c;
28 
29   for (r = 0; r < 4; ++r) {
30     for (c = 0; c < 4; ++c) {
31       int diff = src_ptr[c] - ref_ptr[c];
32       distortion += diff * diff;
33     }
34 
35     src_ptr += src_stride;
36     ref_ptr += ref_stride;
37   }
38 
39   return distortion;
40 }
41 
vpx_get_mb_ss_c(const int16_t * src_ptr)42 uint32_t vpx_get_mb_ss_c(const int16_t *src_ptr) {
43   unsigned int i, sum = 0;
44 
45   for (i = 0; i < 256; ++i) {
46     sum += src_ptr[i] * src_ptr[i];
47   }
48 
49   return sum;
50 }
51 
variance(const uint8_t * src_ptr,int src_stride,const uint8_t * ref_ptr,int ref_stride,int w,int h,uint32_t * sse,int * sum)52 static void variance(const uint8_t *src_ptr, int src_stride,
53                      const uint8_t *ref_ptr, int ref_stride, int w, int h,
54                      uint32_t *sse, int *sum) {
55   int i, j;
56 
57   *sum = 0;
58   *sse = 0;
59 
60   for (i = 0; i < h; ++i) {
61     for (j = 0; j < w; ++j) {
62       const int diff = src_ptr[j] - ref_ptr[j];
63       *sum += diff;
64       *sse += diff * diff;
65     }
66 
67     src_ptr += src_stride;
68     ref_ptr += ref_stride;
69   }
70 }
71 
72 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
73 // or vertical direction to produce the filtered output block. Used to implement
74 // the first-pass of 2-D separable filter.
75 //
76 // Produces int16_t output to retain precision for the next pass. Two filter
77 // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
78 // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
79 // It defines the offset required to move from one input to the next.
var_filter_block2d_bil_first_pass(const uint8_t * src_ptr,uint16_t * ref_ptr,unsigned int src_pixels_per_line,int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)80 static void var_filter_block2d_bil_first_pass(
81     const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line,
82     int pixel_step, unsigned int output_height, unsigned int output_width,
83     const uint8_t *filter) {
84   unsigned int i, j;
85 
86   for (i = 0; i < output_height; ++i) {
87     for (j = 0; j < output_width; ++j) {
88       ref_ptr[j] = ROUND_POWER_OF_TWO(
89           (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
90           FILTER_BITS);
91 
92       ++src_ptr;
93     }
94 
95     src_ptr += src_pixels_per_line - output_width;
96     ref_ptr += output_width;
97   }
98 }
99 
100 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
101 // or vertical direction to produce the filtered output block. Used to implement
102 // the second-pass of 2-D separable filter.
103 //
104 // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
105 // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
106 // filter is applied horizontally (pixel_step = 1) or vertically
107 // (pixel_step = stride). It defines the offset required to move from one input
108 // to the next. Output is 8-bit.
var_filter_block2d_bil_second_pass(const uint16_t * src_ptr,uint8_t * ref_ptr,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)109 static void var_filter_block2d_bil_second_pass(
110     const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line,
111     unsigned int pixel_step, unsigned int output_height,
112     unsigned int output_width, const uint8_t *filter) {
113   unsigned int i, j;
114 
115   for (i = 0; i < output_height; ++i) {
116     for (j = 0; j < output_width; ++j) {
117       ref_ptr[j] = ROUND_POWER_OF_TWO(
118           (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
119           FILTER_BITS);
120       ++src_ptr;
121     }
122 
123     src_ptr += src_pixels_per_line - output_width;
124     ref_ptr += output_width;
125   }
126 }
127 
128 #define VAR(W, H)                                                            \
129   uint32_t vpx_variance##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \
130                                      const uint8_t *ref_ptr, int ref_stride, \
131                                      uint32_t *sse) {                        \
132     int sum;                                                                 \
133     variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum);     \
134     return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H));                \
135   }
136 
137 #define SUBPIX_VAR(W, H)                                                     \
138   uint32_t vpx_sub_pixel_variance##W##x##H##_c(                              \
139       const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,    \
140       const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) {               \
141     uint16_t fdata3[(H + 1) * W];                                            \
142     uint8_t temp2[H * W];                                                    \
143                                                                              \
144     var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \
145                                       W, bilinear_filters[x_offset]);        \
146     var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,            \
147                                        bilinear_filters[y_offset]);          \
148                                                                              \
149     return vpx_variance##W##x##H##_c(temp2, W, ref_ptr, ref_stride, sse);    \
150   }
151 
152 #define SUBPIX_AVG_VAR(W, H)                                                 \
153   uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c(                          \
154       const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,    \
155       const uint8_t *ref_ptr, int ref_stride, uint32_t *sse,                 \
156       const uint8_t *second_pred) {                                          \
157     uint16_t fdata3[(H + 1) * W];                                            \
158     uint8_t temp2[H * W];                                                    \
159     DECLARE_ALIGNED(16, uint8_t, temp3[H * W]);                              \
160                                                                              \
161     var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \
162                                       W, bilinear_filters[x_offset]);        \
163     var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,            \
164                                        bilinear_filters[y_offset]);          \
165                                                                              \
166     vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W);                 \
167                                                                              \
168     return vpx_variance##W##x##H##_c(temp3, W, ref_ptr, ref_stride, sse);    \
169   }
170 
171 /* Identical to the variance call except it takes an additional parameter, sum,
172  * and returns that value using pass-by-reference instead of returning
173  * sse - sum^2 / w*h
174  */
175 #define GET_VAR(W, H)                                                   \
176   void vpx_get##W##x##H##var_c(const uint8_t *src_ptr, int src_stride,  \
177                                const uint8_t *ref_ptr, int ref_stride,  \
178                                uint32_t *sse, int *sum) {               \
179     variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \
180   }
181 
182 /* Identical to the variance call except it does not calculate the
183  * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
184  * variable.
185  */
186 #define MSE(W, H)                                                        \
187   uint32_t vpx_mse##W##x##H##_c(const uint8_t *src_ptr, int src_stride,  \
188                                 const uint8_t *ref_ptr, int ref_stride,  \
189                                 uint32_t *sse) {                         \
190     int sum;                                                             \
191     variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \
192     return *sse;                                                         \
193   }
194 
195 /* All three forms of the variance are available in the same sizes. */
196 #define VARIANCES(W, H) \
197   VAR(W, H)             \
198   SUBPIX_VAR(W, H)      \
199   SUBPIX_AVG_VAR(W, H)
200 
201 VARIANCES(64, 64)
202 VARIANCES(64, 32)
203 VARIANCES(32, 64)
204 VARIANCES(32, 32)
205 VARIANCES(32, 16)
206 VARIANCES(16, 32)
207 VARIANCES(16, 16)
208 VARIANCES(16, 8)
209 VARIANCES(8, 16)
210 VARIANCES(8, 8)
211 VARIANCES(8, 4)
212 VARIANCES(4, 8)
213 VARIANCES(4, 4)
214 
215 GET_VAR(16, 16)
216 GET_VAR(8, 8)
217 
218 MSE(16, 16)
219 MSE(16, 8)
220 MSE(8, 16)
221 MSE(8, 8)
222 
vpx_comp_avg_pred_c(uint8_t * comp_pred,const uint8_t * pred,int width,int height,const uint8_t * ref,int ref_stride)223 void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
224                          int height, const uint8_t *ref, int ref_stride) {
225   int i, j;
226 
227   for (i = 0; i < height; ++i) {
228     for (j = 0; j < width; ++j) {
229       const int tmp = pred[j] + ref[j];
230       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
231     }
232     comp_pred += width;
233     pred += width;
234     ref += ref_stride;
235   }
236 }
237 
238 #if CONFIG_VP9_HIGHBITDEPTH
highbd_variance64(const uint8_t * src8_ptr,int src_stride,const uint8_t * ref8_ptr,int ref_stride,int w,int h,uint64_t * sse,int64_t * sum)239 static void highbd_variance64(const uint8_t *src8_ptr, int src_stride,
240                               const uint8_t *ref8_ptr, int ref_stride, int w,
241                               int h, uint64_t *sse, int64_t *sum) {
242   int i, j;
243 
244   uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8_ptr);
245   uint16_t *ref_ptr = CONVERT_TO_SHORTPTR(ref8_ptr);
246   *sum = 0;
247   *sse = 0;
248 
249   for (i = 0; i < h; ++i) {
250     for (j = 0; j < w; ++j) {
251       const int diff = src_ptr[j] - ref_ptr[j];
252       *sum += diff;
253       *sse += diff * diff;
254     }
255     src_ptr += src_stride;
256     ref_ptr += ref_stride;
257   }
258 }
259 
highbd_8_variance(const uint8_t * src8_ptr,int src_stride,const uint8_t * ref8_ptr,int ref_stride,int w,int h,uint32_t * sse,int * sum)260 static void highbd_8_variance(const uint8_t *src8_ptr, int src_stride,
261                               const uint8_t *ref8_ptr, int ref_stride, int w,
262                               int h, uint32_t *sse, int *sum) {
263   uint64_t sse_long = 0;
264   int64_t sum_long = 0;
265   highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long,
266                     &sum_long);
267   *sse = (uint32_t)sse_long;
268   *sum = (int)sum_long;
269 }
270 
highbd_10_variance(const uint8_t * src8_ptr,int src_stride,const uint8_t * ref8_ptr,int ref_stride,int w,int h,uint32_t * sse,int * sum)271 static void highbd_10_variance(const uint8_t *src8_ptr, int src_stride,
272                                const uint8_t *ref8_ptr, int ref_stride, int w,
273                                int h, uint32_t *sse, int *sum) {
274   uint64_t sse_long = 0;
275   int64_t sum_long = 0;
276   highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long,
277                     &sum_long);
278   *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
279   *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
280 }
281 
highbd_12_variance(const uint8_t * src8_ptr,int src_stride,const uint8_t * ref8_ptr,int ref_stride,int w,int h,uint32_t * sse,int * sum)282 static void highbd_12_variance(const uint8_t *src8_ptr, int src_stride,
283                                const uint8_t *ref8_ptr, int ref_stride, int w,
284                                int h, uint32_t *sse, int *sum) {
285   uint64_t sse_long = 0;
286   int64_t sum_long = 0;
287   highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long,
288                     &sum_long);
289   *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
290   *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
291 }
292 
293 #define HIGHBD_VAR(W, H)                                                    \
294   uint32_t vpx_highbd_8_variance##W##x##H##_c(                              \
295       const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,       \
296       int ref_stride, uint32_t *sse) {                                      \
297     int sum;                                                                \
298     highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse,  \
299                       &sum);                                                \
300     return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H));               \
301   }                                                                         \
302                                                                             \
303   uint32_t vpx_highbd_10_variance##W##x##H##_c(                             \
304       const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,       \
305       int ref_stride, uint32_t *sse) {                                      \
306     int sum;                                                                \
307     int64_t var;                                                            \
308     highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
309                        &sum);                                               \
310     var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));               \
311     return (var >= 0) ? (uint32_t)var : 0;                                  \
312   }                                                                         \
313                                                                             \
314   uint32_t vpx_highbd_12_variance##W##x##H##_c(                             \
315       const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,       \
316       int ref_stride, uint32_t *sse) {                                      \
317     int sum;                                                                \
318     int64_t var;                                                            \
319     highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
320                        &sum);                                               \
321     var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H));               \
322     return (var >= 0) ? (uint32_t)var : 0;                                  \
323   }
324 
325 #define HIGHBD_GET_VAR(S)                                                   \
326   void vpx_highbd_8_get##S##x##S##var_c(                                    \
327       const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,       \
328       int ref_stride, uint32_t *sse, int *sum) {                            \
329     highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse,  \
330                       sum);                                                 \
331   }                                                                         \
332                                                                             \
333   void vpx_highbd_10_get##S##x##S##var_c(                                   \
334       const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,       \
335       int ref_stride, uint32_t *sse, int *sum) {                            \
336     highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \
337                        sum);                                                \
338   }                                                                         \
339                                                                             \
340   void vpx_highbd_12_get##S##x##S##var_c(                                   \
341       const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,       \
342       int ref_stride, uint32_t *sse, int *sum) {                            \
343     highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \
344                        sum);                                                \
345   }
346 
347 #define HIGHBD_MSE(W, H)                                                    \
348   uint32_t vpx_highbd_8_mse##W##x##H##_c(                                   \
349       const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,       \
350       int ref_stride, uint32_t *sse) {                                      \
351     int sum;                                                                \
352     highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse,  \
353                       &sum);                                                \
354     return *sse;                                                            \
355   }                                                                         \
356                                                                             \
357   uint32_t vpx_highbd_10_mse##W##x##H##_c(                                  \
358       const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,       \
359       int ref_stride, uint32_t *sse) {                                      \
360     int sum;                                                                \
361     highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
362                        &sum);                                               \
363     return *sse;                                                            \
364   }                                                                         \
365                                                                             \
366   uint32_t vpx_highbd_12_mse##W##x##H##_c(                                  \
367       const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr,       \
368       int ref_stride, uint32_t *sse) {                                      \
369     int sum;                                                                \
370     highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \
371                        &sum);                                               \
372     return *sse;                                                            \
373   }
374 
highbd_var_filter_block2d_bil_first_pass(const uint8_t * src_ptr8,uint16_t * output_ptr,unsigned int src_pixels_per_line,int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)375 static void highbd_var_filter_block2d_bil_first_pass(
376     const uint8_t *src_ptr8, uint16_t *output_ptr,
377     unsigned int src_pixels_per_line, int pixel_step,
378     unsigned int output_height, unsigned int output_width,
379     const uint8_t *filter) {
380   unsigned int i, j;
381   uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
382   for (i = 0; i < output_height; ++i) {
383     for (j = 0; j < output_width; ++j) {
384       output_ptr[j] = ROUND_POWER_OF_TWO(
385           (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
386           FILTER_BITS);
387 
388       ++src_ptr;
389     }
390 
391     // Next row...
392     src_ptr += src_pixels_per_line - output_width;
393     output_ptr += output_width;
394   }
395 }
396 
highbd_var_filter_block2d_bil_second_pass(const uint16_t * src_ptr,uint16_t * output_ptr,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)397 static void highbd_var_filter_block2d_bil_second_pass(
398     const uint16_t *src_ptr, uint16_t *output_ptr,
399     unsigned int src_pixels_per_line, unsigned int pixel_step,
400     unsigned int output_height, unsigned int output_width,
401     const uint8_t *filter) {
402   unsigned int i, j;
403 
404   for (i = 0; i < output_height; ++i) {
405     for (j = 0; j < output_width; ++j) {
406       output_ptr[j] = ROUND_POWER_OF_TWO(
407           (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
408           FILTER_BITS);
409       ++src_ptr;
410     }
411 
412     src_ptr += src_pixels_per_line - output_width;
413     output_ptr += output_width;
414   }
415 }
416 
417 #define HIGHBD_SUBPIX_VAR(W, H)                                                \
418   uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c(                       \
419       const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,      \
420       const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) {                 \
421     uint16_t fdata3[(H + 1) * W];                                              \
422     uint16_t temp2[H * W];                                                     \
423                                                                                \
424     highbd_var_filter_block2d_bil_first_pass(                                  \
425         src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
426     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
427                                               bilinear_filters[y_offset]);     \
428                                                                                \
429     return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W,    \
430                                               ref_ptr, ref_stride, sse);       \
431   }                                                                            \
432                                                                                \
433   uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c(                      \
434       const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,      \
435       const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) {                 \
436     uint16_t fdata3[(H + 1) * W];                                              \
437     uint16_t temp2[H * W];                                                     \
438                                                                                \
439     highbd_var_filter_block2d_bil_first_pass(                                  \
440         src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
441     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
442                                               bilinear_filters[y_offset]);     \
443                                                                                \
444     return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W,   \
445                                                ref_ptr, ref_stride, sse);      \
446   }                                                                            \
447                                                                                \
448   uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c(                      \
449       const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,      \
450       const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) {                 \
451     uint16_t fdata3[(H + 1) * W];                                              \
452     uint16_t temp2[H * W];                                                     \
453                                                                                \
454     highbd_var_filter_block2d_bil_first_pass(                                  \
455         src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
456     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
457                                               bilinear_filters[y_offset]);     \
458                                                                                \
459     return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W,   \
460                                                ref_ptr, ref_stride, sse);      \
461   }
462 
463 #define HIGHBD_SUBPIX_AVG_VAR(W, H)                                            \
464   uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c(                   \
465       const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,      \
466       const uint8_t *ref_ptr, int ref_stride, uint32_t *sse,                   \
467       const uint8_t *second_pred) {                                            \
468     uint16_t fdata3[(H + 1) * W];                                              \
469     uint16_t temp2[H * W];                                                     \
470     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                               \
471                                                                                \
472     highbd_var_filter_block2d_bil_first_pass(                                  \
473         src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
474     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
475                                               bilinear_filters[y_offset]);     \
476                                                                                \
477     vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H,  \
478                                temp2, W);                                      \
479                                                                                \
480     return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,    \
481                                               ref_ptr, ref_stride, sse);       \
482   }                                                                            \
483                                                                                \
484   uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c(                  \
485       const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,      \
486       const uint8_t *ref_ptr, int ref_stride, uint32_t *sse,                   \
487       const uint8_t *second_pred) {                                            \
488     uint16_t fdata3[(H + 1) * W];                                              \
489     uint16_t temp2[H * W];                                                     \
490     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                               \
491                                                                                \
492     highbd_var_filter_block2d_bil_first_pass(                                  \
493         src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
494     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
495                                               bilinear_filters[y_offset]);     \
496                                                                                \
497     vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H,  \
498                                temp2, W);                                      \
499                                                                                \
500     return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,   \
501                                                ref_ptr, ref_stride, sse);      \
502   }                                                                            \
503                                                                                \
504   uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c(                  \
505       const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset,      \
506       const uint8_t *ref_ptr, int ref_stride, uint32_t *sse,                   \
507       const uint8_t *second_pred) {                                            \
508     uint16_t fdata3[(H + 1) * W];                                              \
509     uint16_t temp2[H * W];                                                     \
510     DECLARE_ALIGNED(16, uint16_t, temp3[H * W]);                               \
511                                                                                \
512     highbd_var_filter_block2d_bil_first_pass(                                  \
513         src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \
514     highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W,       \
515                                               bilinear_filters[y_offset]);     \
516                                                                                \
517     vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H,  \
518                                temp2, W);                                      \
519                                                                                \
520     return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W,   \
521                                                ref_ptr, ref_stride, sse);      \
522   }
523 
524 /* All three forms of the variance are available in the same sizes. */
525 #define HIGHBD_VARIANCES(W, H) \
526   HIGHBD_VAR(W, H)             \
527   HIGHBD_SUBPIX_VAR(W, H)      \
528   HIGHBD_SUBPIX_AVG_VAR(W, H)
529 
530 HIGHBD_VARIANCES(64, 64)
531 HIGHBD_VARIANCES(64, 32)
532 HIGHBD_VARIANCES(32, 64)
533 HIGHBD_VARIANCES(32, 32)
534 HIGHBD_VARIANCES(32, 16)
535 HIGHBD_VARIANCES(16, 32)
536 HIGHBD_VARIANCES(16, 16)
537 HIGHBD_VARIANCES(16, 8)
538 HIGHBD_VARIANCES(8, 16)
539 HIGHBD_VARIANCES(8, 8)
540 HIGHBD_VARIANCES(8, 4)
541 HIGHBD_VARIANCES(4, 8)
542 HIGHBD_VARIANCES(4, 4)
543 
544 HIGHBD_GET_VAR(8)
545 HIGHBD_GET_VAR(16)
546 
547 HIGHBD_MSE(16, 16)
548 HIGHBD_MSE(16, 8)
549 HIGHBD_MSE(8, 16)
550 HIGHBD_MSE(8, 8)
551 
vpx_highbd_comp_avg_pred(uint16_t * comp_pred,const uint16_t * pred,int width,int height,const uint16_t * ref,int ref_stride)552 void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint16_t *pred,
553                               int width, int height, const uint16_t *ref,
554                               int ref_stride) {
555   int i, j;
556   for (i = 0; i < height; ++i) {
557     for (j = 0; j < width; ++j) {
558       const int tmp = pred[j] + ref[j];
559       comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
560     }
561     comp_pred += width;
562     pred += width;
563     ref += ref_stride;
564   }
565 }
566 #endif  // CONFIG_VP9_HIGHBITDEPTH
567