1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "./vpx_config.h"
12 #include "./vpx_dsp_rtcd.h"
13
14 #include "vpx_ports/mem.h"
15 #include "vpx/vpx_integer.h"
16
17 #include "vpx_dsp/variance.h"
18
19 static const uint8_t bilinear_filters[8][2] = {
20 { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 },
21 { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 },
22 };
23
vpx_get4x4sse_cs_c(const uint8_t * a,int a_stride,const uint8_t * b,int b_stride)24 uint32_t vpx_get4x4sse_cs_c(const uint8_t *a, int a_stride, const uint8_t *b,
25 int b_stride) {
26 int distortion = 0;
27 int r, c;
28
29 for (r = 0; r < 4; ++r) {
30 for (c = 0; c < 4; ++c) {
31 int diff = a[c] - b[c];
32 distortion += diff * diff;
33 }
34
35 a += a_stride;
36 b += b_stride;
37 }
38
39 return distortion;
40 }
41
vpx_get_mb_ss_c(const int16_t * a)42 uint32_t vpx_get_mb_ss_c(const int16_t *a) {
43 unsigned int i, sum = 0;
44
45 for (i = 0; i < 256; ++i) {
46 sum += a[i] * a[i];
47 }
48
49 return sum;
50 }
51
variance(const uint8_t * a,int a_stride,const uint8_t * b,int b_stride,int w,int h,uint32_t * sse,int * sum)52 static void variance(const uint8_t *a, int a_stride, const uint8_t *b,
53 int b_stride, int w, int h, uint32_t *sse, int *sum) {
54 int i, j;
55
56 *sum = 0;
57 *sse = 0;
58
59 for (i = 0; i < h; ++i) {
60 for (j = 0; j < w; ++j) {
61 const int diff = a[j] - b[j];
62 *sum += diff;
63 *sse += diff * diff;
64 }
65
66 a += a_stride;
67 b += b_stride;
68 }
69 }
70
71 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
72 // or vertical direction to produce the filtered output block. Used to implement
73 // the first-pass of 2-D separable filter.
74 //
75 // Produces int16_t output to retain precision for the next pass. Two filter
76 // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is
77 // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride).
78 // It defines the offset required to move from one input to the next.
var_filter_block2d_bil_first_pass(const uint8_t * a,uint16_t * b,unsigned int src_pixels_per_line,int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)79 static void var_filter_block2d_bil_first_pass(const uint8_t *a, uint16_t *b,
80 unsigned int src_pixels_per_line,
81 int pixel_step,
82 unsigned int output_height,
83 unsigned int output_width,
84 const uint8_t *filter) {
85 unsigned int i, j;
86
87 for (i = 0; i < output_height; ++i) {
88 for (j = 0; j < output_width; ++j) {
89 b[j] = ROUND_POWER_OF_TWO(
90 (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
91
92 ++a;
93 }
94
95 a += src_pixels_per_line - output_width;
96 b += output_width;
97 }
98 }
99
100 // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal
101 // or vertical direction to produce the filtered output block. Used to implement
102 // the second-pass of 2-D separable filter.
103 //
104 // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two
105 // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the
106 // filter is applied horizontally (pixel_step = 1) or vertically
107 // (pixel_step = stride). It defines the offset required to move from one input
108 // to the next. Output is 8-bit.
var_filter_block2d_bil_second_pass(const uint16_t * a,uint8_t * b,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)109 static void var_filter_block2d_bil_second_pass(const uint16_t *a, uint8_t *b,
110 unsigned int src_pixels_per_line,
111 unsigned int pixel_step,
112 unsigned int output_height,
113 unsigned int output_width,
114 const uint8_t *filter) {
115 unsigned int i, j;
116
117 for (i = 0; i < output_height; ++i) {
118 for (j = 0; j < output_width; ++j) {
119 b[j] = ROUND_POWER_OF_TWO(
120 (int)a[0] * filter[0] + (int)a[pixel_step] * filter[1], FILTER_BITS);
121 ++a;
122 }
123
124 a += src_pixels_per_line - output_width;
125 b += output_width;
126 }
127 }
128
129 #define VAR(W, H) \
130 uint32_t vpx_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
131 const uint8_t *b, int b_stride, \
132 uint32_t *sse) { \
133 int sum; \
134 variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
135 return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
136 }
137
138 #define SUBPIX_VAR(W, H) \
139 uint32_t vpx_sub_pixel_variance##W##x##H##_c( \
140 const uint8_t *a, int a_stride, int xoffset, int yoffset, \
141 const uint8_t *b, int b_stride, uint32_t *sse) { \
142 uint16_t fdata3[(H + 1) * W]; \
143 uint8_t temp2[H * W]; \
144 \
145 var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
146 bilinear_filters[xoffset]); \
147 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
148 bilinear_filters[yoffset]); \
149 \
150 return vpx_variance##W##x##H##_c(temp2, W, b, b_stride, sse); \
151 }
152
153 #define SUBPIX_AVG_VAR(W, H) \
154 uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c( \
155 const uint8_t *a, int a_stride, int xoffset, int yoffset, \
156 const uint8_t *b, int b_stride, uint32_t *sse, \
157 const uint8_t *second_pred) { \
158 uint16_t fdata3[(H + 1) * W]; \
159 uint8_t temp2[H * W]; \
160 DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
161 \
162 var_filter_block2d_bil_first_pass(a, fdata3, a_stride, 1, H + 1, W, \
163 bilinear_filters[xoffset]); \
164 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
165 bilinear_filters[yoffset]); \
166 \
167 vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
168 \
169 return vpx_variance##W##x##H##_c(temp3, W, b, b_stride, sse); \
170 }
171
172 /* Identical to the variance call except it takes an additional parameter, sum,
173 * and returns that value using pass-by-reference instead of returning
174 * sse - sum^2 / w*h
175 */
176 #define GET_VAR(W, H) \
177 void vpx_get##W##x##H##var_c(const uint8_t *a, int a_stride, \
178 const uint8_t *b, int b_stride, uint32_t *sse, \
179 int *sum) { \
180 variance(a, a_stride, b, b_stride, W, H, sse, sum); \
181 }
182
183 /* Identical to the variance call except it does not calculate the
184 * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
185 * variable.
186 */
187 #define MSE(W, H) \
188 uint32_t vpx_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
189 const uint8_t *b, int b_stride, \
190 uint32_t *sse) { \
191 int sum; \
192 variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
193 return *sse; \
194 }
195
196 /* All three forms of the variance are available in the same sizes. */
197 #define VARIANCES(W, H) \
198 VAR(W, H) \
199 SUBPIX_VAR(W, H) \
200 SUBPIX_AVG_VAR(W, H)
201
202 VARIANCES(64, 64)
203 VARIANCES(64, 32)
204 VARIANCES(32, 64)
205 VARIANCES(32, 32)
206 VARIANCES(32, 16)
207 VARIANCES(16, 32)
208 VARIANCES(16, 16)
209 VARIANCES(16, 8)
210 VARIANCES(8, 16)
211 VARIANCES(8, 8)
212 VARIANCES(8, 4)
213 VARIANCES(4, 8)
214 VARIANCES(4, 4)
215
216 GET_VAR(16, 16)
217 GET_VAR(8, 8)
218
219 MSE(16, 16)
220 MSE(16, 8)
221 MSE(8, 16)
222 MSE(8, 8)
223
vpx_comp_avg_pred_c(uint8_t * comp_pred,const uint8_t * pred,int width,int height,const uint8_t * ref,int ref_stride)224 void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
225 int height, const uint8_t *ref, int ref_stride) {
226 int i, j;
227
228 for (i = 0; i < height; ++i) {
229 for (j = 0; j < width; ++j) {
230 const int tmp = pred[j] + ref[j];
231 comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
232 }
233 comp_pred += width;
234 pred += width;
235 ref += ref_stride;
236 }
237 }
238
239 #if CONFIG_VP9_HIGHBITDEPTH
highbd_variance64(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int w,int h,uint64_t * sse,int64_t * sum)240 static void highbd_variance64(const uint8_t *a8, int a_stride,
241 const uint8_t *b8, int b_stride, int w, int h,
242 uint64_t *sse, int64_t *sum) {
243 int i, j;
244
245 uint16_t *a = CONVERT_TO_SHORTPTR(a8);
246 uint16_t *b = CONVERT_TO_SHORTPTR(b8);
247 *sum = 0;
248 *sse = 0;
249
250 for (i = 0; i < h; ++i) {
251 for (j = 0; j < w; ++j) {
252 const int diff = a[j] - b[j];
253 *sum += diff;
254 *sse += diff * diff;
255 }
256 a += a_stride;
257 b += b_stride;
258 }
259 }
260
highbd_8_variance(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int w,int h,uint32_t * sse,int * sum)261 static void highbd_8_variance(const uint8_t *a8, int a_stride,
262 const uint8_t *b8, int b_stride, int w, int h,
263 uint32_t *sse, int *sum) {
264 uint64_t sse_long = 0;
265 int64_t sum_long = 0;
266 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
267 *sse = (uint32_t)sse_long;
268 *sum = (int)sum_long;
269 }
270
highbd_10_variance(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int w,int h,uint32_t * sse,int * sum)271 static void highbd_10_variance(const uint8_t *a8, int a_stride,
272 const uint8_t *b8, int b_stride, int w, int h,
273 uint32_t *sse, int *sum) {
274 uint64_t sse_long = 0;
275 int64_t sum_long = 0;
276 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
277 *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
278 *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
279 }
280
highbd_12_variance(const uint8_t * a8,int a_stride,const uint8_t * b8,int b_stride,int w,int h,uint32_t * sse,int * sum)281 static void highbd_12_variance(const uint8_t *a8, int a_stride,
282 const uint8_t *b8, int b_stride, int w, int h,
283 uint32_t *sse, int *sum) {
284 uint64_t sse_long = 0;
285 int64_t sum_long = 0;
286 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
287 *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
288 *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
289 }
290
291 #define HIGHBD_VAR(W, H) \
292 uint32_t vpx_highbd_8_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
293 const uint8_t *b, int b_stride, \
294 uint32_t *sse) { \
295 int sum; \
296 highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
297 return *sse - (((int64_t)sum * sum) / (W * H)); \
298 } \
299 \
300 uint32_t vpx_highbd_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
301 const uint8_t *b, int b_stride, \
302 uint32_t *sse) { \
303 int sum; \
304 int64_t var; \
305 highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
306 var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
307 return (var >= 0) ? (uint32_t)var : 0; \
308 } \
309 \
310 uint32_t vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
311 const uint8_t *b, int b_stride, \
312 uint32_t *sse) { \
313 int sum; \
314 int64_t var; \
315 highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
316 var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \
317 return (var >= 0) ? (uint32_t)var : 0; \
318 }
319
320 #define HIGHBD_GET_VAR(S) \
321 void vpx_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
322 const uint8_t *ref, int ref_stride, \
323 uint32_t *sse, int *sum) { \
324 highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
325 } \
326 \
327 void vpx_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
328 const uint8_t *ref, int ref_stride, \
329 uint32_t *sse, int *sum) { \
330 highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
331 } \
332 \
333 void vpx_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
334 const uint8_t *ref, int ref_stride, \
335 uint32_t *sse, int *sum) { \
336 highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
337 }
338
339 #define HIGHBD_MSE(W, H) \
340 uint32_t vpx_highbd_8_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
341 const uint8_t *ref, int ref_stride, \
342 uint32_t *sse) { \
343 int sum; \
344 highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
345 return *sse; \
346 } \
347 \
348 uint32_t vpx_highbd_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
349 const uint8_t *ref, int ref_stride, \
350 uint32_t *sse) { \
351 int sum; \
352 highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
353 return *sse; \
354 } \
355 \
356 uint32_t vpx_highbd_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
357 const uint8_t *ref, int ref_stride, \
358 uint32_t *sse) { \
359 int sum; \
360 highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
361 return *sse; \
362 }
363
highbd_var_filter_block2d_bil_first_pass(const uint8_t * src_ptr8,uint16_t * output_ptr,unsigned int src_pixels_per_line,int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)364 static void highbd_var_filter_block2d_bil_first_pass(
365 const uint8_t *src_ptr8, uint16_t *output_ptr,
366 unsigned int src_pixels_per_line, int pixel_step,
367 unsigned int output_height, unsigned int output_width,
368 const uint8_t *filter) {
369 unsigned int i, j;
370 uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
371 for (i = 0; i < output_height; ++i) {
372 for (j = 0; j < output_width; ++j) {
373 output_ptr[j] = ROUND_POWER_OF_TWO(
374 (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
375 FILTER_BITS);
376
377 ++src_ptr;
378 }
379
380 // Next row...
381 src_ptr += src_pixels_per_line - output_width;
382 output_ptr += output_width;
383 }
384 }
385
highbd_var_filter_block2d_bil_second_pass(const uint16_t * src_ptr,uint16_t * output_ptr,unsigned int src_pixels_per_line,unsigned int pixel_step,unsigned int output_height,unsigned int output_width,const uint8_t * filter)386 static void highbd_var_filter_block2d_bil_second_pass(
387 const uint16_t *src_ptr, uint16_t *output_ptr,
388 unsigned int src_pixels_per_line, unsigned int pixel_step,
389 unsigned int output_height, unsigned int output_width,
390 const uint8_t *filter) {
391 unsigned int i, j;
392
393 for (i = 0; i < output_height; ++i) {
394 for (j = 0; j < output_width; ++j) {
395 output_ptr[j] = ROUND_POWER_OF_TWO(
396 (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1],
397 FILTER_BITS);
398 ++src_ptr;
399 }
400
401 src_ptr += src_pixels_per_line - output_width;
402 output_ptr += output_width;
403 }
404 }
405
406 #define HIGHBD_SUBPIX_VAR(W, H) \
407 uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \
408 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
409 const uint8_t *dst, int dst_stride, uint32_t *sse) { \
410 uint16_t fdata3[(H + 1) * W]; \
411 uint16_t temp2[H * W]; \
412 \
413 highbd_var_filter_block2d_bil_first_pass( \
414 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
415 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
416 bilinear_filters[yoffset]); \
417 \
418 return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
419 dst, dst_stride, sse); \
420 } \
421 \
422 uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \
423 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
424 const uint8_t *dst, int dst_stride, uint32_t *sse) { \
425 uint16_t fdata3[(H + 1) * W]; \
426 uint16_t temp2[H * W]; \
427 \
428 highbd_var_filter_block2d_bil_first_pass( \
429 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
430 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
431 bilinear_filters[yoffset]); \
432 \
433 return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
434 dst, dst_stride, sse); \
435 } \
436 \
437 uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \
438 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
439 const uint8_t *dst, int dst_stride, uint32_t *sse) { \
440 uint16_t fdata3[(H + 1) * W]; \
441 uint16_t temp2[H * W]; \
442 \
443 highbd_var_filter_block2d_bil_first_pass( \
444 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
445 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
446 bilinear_filters[yoffset]); \
447 \
448 return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \
449 dst, dst_stride, sse); \
450 }
451
452 #define HIGHBD_SUBPIX_AVG_VAR(W, H) \
453 uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \
454 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
455 const uint8_t *dst, int dst_stride, uint32_t *sse, \
456 const uint8_t *second_pred) { \
457 uint16_t fdata3[(H + 1) * W]; \
458 uint16_t temp2[H * W]; \
459 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
460 \
461 highbd_var_filter_block2d_bil_first_pass( \
462 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
463 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
464 bilinear_filters[yoffset]); \
465 \
466 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
467 CONVERT_TO_BYTEPTR(temp2), W); \
468 \
469 return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
470 dst, dst_stride, sse); \
471 } \
472 \
473 uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
474 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
475 const uint8_t *dst, int dst_stride, uint32_t *sse, \
476 const uint8_t *second_pred) { \
477 uint16_t fdata3[(H + 1) * W]; \
478 uint16_t temp2[H * W]; \
479 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
480 \
481 highbd_var_filter_block2d_bil_first_pass( \
482 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
483 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
484 bilinear_filters[yoffset]); \
485 \
486 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
487 CONVERT_TO_BYTEPTR(temp2), W); \
488 \
489 return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
490 dst, dst_stride, sse); \
491 } \
492 \
493 uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
494 const uint8_t *src, int src_stride, int xoffset, int yoffset, \
495 const uint8_t *dst, int dst_stride, uint32_t *sse, \
496 const uint8_t *second_pred) { \
497 uint16_t fdata3[(H + 1) * W]; \
498 uint16_t temp2[H * W]; \
499 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
500 \
501 highbd_var_filter_block2d_bil_first_pass( \
502 src, fdata3, src_stride, 1, H + 1, W, bilinear_filters[xoffset]); \
503 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
504 bilinear_filters[yoffset]); \
505 \
506 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
507 CONVERT_TO_BYTEPTR(temp2), W); \
508 \
509 return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \
510 dst, dst_stride, sse); \
511 }
512
513 /* All three forms of the variance are available in the same sizes. */
514 #define HIGHBD_VARIANCES(W, H) \
515 HIGHBD_VAR(W, H) \
516 HIGHBD_SUBPIX_VAR(W, H) \
517 HIGHBD_SUBPIX_AVG_VAR(W, H)
518
519 HIGHBD_VARIANCES(64, 64)
520 HIGHBD_VARIANCES(64, 32)
521 HIGHBD_VARIANCES(32, 64)
522 HIGHBD_VARIANCES(32, 32)
523 HIGHBD_VARIANCES(32, 16)
524 HIGHBD_VARIANCES(16, 32)
525 HIGHBD_VARIANCES(16, 16)
526 HIGHBD_VARIANCES(16, 8)
527 HIGHBD_VARIANCES(8, 16)
528 HIGHBD_VARIANCES(8, 8)
529 HIGHBD_VARIANCES(8, 4)
530 HIGHBD_VARIANCES(4, 8)
531 HIGHBD_VARIANCES(4, 4)
532
533 HIGHBD_GET_VAR(8)
534 HIGHBD_GET_VAR(16)
535
536 HIGHBD_MSE(16, 16)
537 HIGHBD_MSE(16, 8)
538 HIGHBD_MSE(8, 16)
539 HIGHBD_MSE(8, 8)
540
vpx_highbd_comp_avg_pred(uint16_t * comp_pred,const uint8_t * pred8,int width,int height,const uint8_t * ref8,int ref_stride)541 void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
542 int width, int height, const uint8_t *ref8,
543 int ref_stride) {
544 int i, j;
545 uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
546 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
547 for (i = 0; i < height; ++i) {
548 for (j = 0; j < width; ++j) {
549 const int tmp = pred[j] + ref[j];
550 comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
551 }
552 comp_pred += width;
553 pred += width;
554 ref += ref_stride;
555 }
556 }
557 #endif // CONFIG_VP9_HIGHBITDEPTH
558