1 /*
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10 #include "./vp9_rtcd.h"
11 #include "./vpx_config.h"
12
13 #include "vp9/encoder/vp9_variance.h"
14 #include "vpx_ports/mem.h"
15
16 unsigned int vp9_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,
17 int x_offset, int y_offset,
18 const uint8_t *dst, int dst_stride,
19 int height,
20 unsigned int *sse);
21
22 unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,
23 int src_stride,
24 int x_offset,
25 int y_offset,
26 const uint8_t *dst,
27 int dst_stride,
28 const uint8_t *sec,
29 int sec_stride,
30 int height,
31 unsigned int *sseptr);
32
vp9_sub_pixel_variance64x64_avx2(const uint8_t * src,int src_stride,int x_offset,int y_offset,const uint8_t * dst,int dst_stride,unsigned int * sse)33 unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src,
34 int src_stride,
35 int x_offset,
36 int y_offset,
37 const uint8_t *dst,
38 int dst_stride,
39 unsigned int *sse) {
40 unsigned int sse1;
41 const int se1 = vp9_sub_pixel_variance32xh_avx2(src, src_stride, x_offset,
42 y_offset, dst, dst_stride,
43 64, &sse1);
44 unsigned int sse2;
45 const int se2 = vp9_sub_pixel_variance32xh_avx2(src + 32, src_stride,
46 x_offset, y_offset,
47 dst + 32, dst_stride,
48 64, &sse2);
49 const int se = se1 + se2;
50 *sse = sse1 + sse2;
51 return *sse - (((int64_t)se * se) >> 12);
52 }
53
vp9_sub_pixel_variance32x32_avx2(const uint8_t * src,int src_stride,int x_offset,int y_offset,const uint8_t * dst,int dst_stride,unsigned int * sse)54 unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src,
55 int src_stride,
56 int x_offset,
57 int y_offset,
58 const uint8_t *dst,
59 int dst_stride,
60 unsigned int *sse) {
61 const int se = vp9_sub_pixel_variance32xh_avx2(src, src_stride, x_offset,
62 y_offset, dst, dst_stride,
63 32, sse);
64 return *sse - (((int64_t)se * se) >> 10);
65 }
66
vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t * src,int src_stride,int x_offset,int y_offset,const uint8_t * dst,int dst_stride,unsigned int * sse,const uint8_t * sec)67 unsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src,
68 int src_stride,
69 int x_offset,
70 int y_offset,
71 const uint8_t *dst,
72 int dst_stride,
73 unsigned int *sse,
74 const uint8_t *sec) {
75 unsigned int sse1;
76 const int se1 = vp9_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset,
77 y_offset, dst, dst_stride,
78 sec, 64, 64, &sse1);
79 unsigned int sse2;
80 const int se2 =
81 vp9_sub_pixel_avg_variance32xh_avx2(src + 32, src_stride, x_offset,
82 y_offset, dst + 32, dst_stride,
83 sec + 32, 64, 64, &sse2);
84 const int se = se1 + se2;
85
86 *sse = sse1 + sse2;
87
88 return *sse - (((int64_t)se * se) >> 12);
89 }
90
vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t * src,int src_stride,int x_offset,int y_offset,const uint8_t * dst,int dst_stride,unsigned int * sse,const uint8_t * sec)91 unsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src,
92 int src_stride,
93 int x_offset,
94 int y_offset,
95 const uint8_t *dst,
96 int dst_stride,
97 unsigned int *sse,
98 const uint8_t *sec) {
99 // processing 32 element in parallel
100 const int se = vp9_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset,
101 y_offset, dst, dst_stride,
102 sec, 32, 32, sse);
103 return *sse - (((int64_t)se * se) >> 10);
104 }
105