1 /*
2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 #include "./vp9_rtcd.h"
11 #include "./vpx_config.h"
12 
13 #include "vp9/encoder/vp9_variance.h"
14 #include "vpx_ports/mem.h"
15 
16 unsigned int vp9_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,
17                                              int x_offset, int y_offset,
18                                              const uint8_t *dst, int dst_stride,
19                                              int height,
20                                              unsigned int *sse);
21 
22 unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,
23                                                  int src_stride,
24                                                  int x_offset,
25                                                  int y_offset,
26                                                  const uint8_t *dst,
27                                                  int dst_stride,
28                                                  const uint8_t *sec,
29                                                  int sec_stride,
30                                                  int height,
31                                                  unsigned int *sseptr);
32 
vp9_sub_pixel_variance64x64_avx2(const uint8_t * src,int src_stride,int x_offset,int y_offset,const uint8_t * dst,int dst_stride,unsigned int * sse)33 unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src,
34                                               int src_stride,
35                                               int x_offset,
36                                               int y_offset,
37                                               const uint8_t *dst,
38                                               int dst_stride,
39                                               unsigned int *sse) {
40   unsigned int sse1;
41   const int se1 = vp9_sub_pixel_variance32xh_avx2(src, src_stride, x_offset,
42                                                   y_offset, dst, dst_stride,
43                                                   64, &sse1);
44   unsigned int sse2;
45   const int se2 = vp9_sub_pixel_variance32xh_avx2(src + 32, src_stride,
46                                                   x_offset, y_offset,
47                                                   dst + 32, dst_stride,
48                                                   64, &sse2);
49   const int se = se1 + se2;
50   *sse = sse1 + sse2;
51   return *sse - (((int64_t)se * se) >> 12);
52 }
53 
vp9_sub_pixel_variance32x32_avx2(const uint8_t * src,int src_stride,int x_offset,int y_offset,const uint8_t * dst,int dst_stride,unsigned int * sse)54 unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src,
55                                               int src_stride,
56                                               int x_offset,
57                                               int y_offset,
58                                               const uint8_t *dst,
59                                               int dst_stride,
60                                               unsigned int *sse) {
61   const int se = vp9_sub_pixel_variance32xh_avx2(src, src_stride, x_offset,
62                                                  y_offset, dst, dst_stride,
63                                                  32, sse);
64   return *sse - (((int64_t)se * se) >> 10);
65 }
66 
vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t * src,int src_stride,int x_offset,int y_offset,const uint8_t * dst,int dst_stride,unsigned int * sse,const uint8_t * sec)67 unsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src,
68                                                   int src_stride,
69                                                   int x_offset,
70                                                   int y_offset,
71                                                   const uint8_t *dst,
72                                                   int dst_stride,
73                                                   unsigned int *sse,
74                                                   const uint8_t *sec) {
75   unsigned int sse1;
76   const int se1 = vp9_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset,
77                                                       y_offset, dst, dst_stride,
78                                                       sec, 64, 64, &sse1);
79   unsigned int sse2;
80   const int se2 =
81       vp9_sub_pixel_avg_variance32xh_avx2(src + 32, src_stride, x_offset,
82                                           y_offset, dst + 32, dst_stride,
83                                           sec + 32, 64, 64, &sse2);
84   const int se = se1 + se2;
85 
86   *sse = sse1 + sse2;
87 
88   return *sse - (((int64_t)se * se) >> 12);
89 }
90 
vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t * src,int src_stride,int x_offset,int y_offset,const uint8_t * dst,int dst_stride,unsigned int * sse,const uint8_t * sec)91 unsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src,
92                                                   int src_stride,
93                                                   int x_offset,
94                                                   int y_offset,
95                                                   const uint8_t *dst,
96                                                   int dst_stride,
97                                                   unsigned int *sse,
98                                                   const uint8_t *sec) {
99   // processing 32 element in parallel
100   const int se = vp9_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset,
101                                                      y_offset, dst, dst_stride,
102                                                      sec, 32, 32, sse);
103   return *sse - (((int64_t)se * se) >> 10);
104 }
105