1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10  */
11 
12 #include "EbDefinitions.h"
13 #include "aom_dsp_rtcd.h"
14 
15 #ifdef __cplusplus
16 extern "C" {
17 #endif
18 typedef uint32_t (*HighVarianceFn)(const uint16_t *src, int32_t src_stride, const uint16_t *ref,
19                                    int32_t ref_stride, uint32_t *sse, int32_t *sum);
20 
21 uint32_t svt_aom_highbd_calc4x4var_sse2(const uint16_t *src, int32_t src_stride,
22                                         const uint16_t *ref, int32_t ref_stride, uint32_t *sse,
23                                         int32_t *sum);
24 
25 uint32_t svt_aom_highbd_calc8x8var_sse2(const uint16_t *src, int32_t src_stride,
26                                         const uint16_t *ref, int32_t ref_stride, uint32_t *sse,
27                                         int32_t *sum);
28 
29 uint32_t svt_aom_highbd_calc16x16var_sse2(const uint16_t *src, int32_t src_stride,
30                                           const uint16_t *ref, int32_t ref_stride, uint32_t *sse,
31                                           int32_t *sum);
32 
33 #ifdef __cplusplus
34 }
35 #endif // __cplusplus
highbd_8_variance_sse2(const uint16_t * src,int32_t src_stride,const uint16_t * ref,int32_t ref_stride,int32_t w,int32_t h,uint32_t * sse,int32_t * sum,HighVarianceFn var_fn,int32_t block_size)36 static void highbd_8_variance_sse2(const uint16_t *src, int32_t src_stride, const uint16_t *ref,
37                                    int32_t ref_stride, int32_t w, int32_t h, uint32_t *sse,
38                                    int32_t *sum, HighVarianceFn var_fn, int32_t block_size) {
39     int32_t i, j;
40 
41     *sse = 0;
42     *sum = 0;
43 
44     for (i = 0; i < h; i += block_size) {
45         for (j = 0; j < w; j += block_size) {
46             uint32_t sse0;
47             int32_t  sum0;
48             var_fn(src + src_stride * i + j,
49                    src_stride,
50                    ref + ref_stride * i + j,
51                    ref_stride,
52                    &sse0,
53                    &sum0);
54             *sse += sse0;
55             *sum += sum0;
56         }
57     }
58 }
59 
highbd_10_variance_sse2(const uint16_t * src,int32_t src_stride,const uint16_t * ref,int32_t ref_stride,int32_t w,int32_t h,uint32_t * sse,int32_t * sum,HighVarianceFn var_fn,int32_t block_size)60 static void highbd_10_variance_sse2(const uint16_t *src, int32_t src_stride, const uint16_t *ref,
61                                     int32_t ref_stride, int32_t w, int32_t h, uint32_t *sse,
62                                     int32_t *sum, HighVarianceFn var_fn, int32_t block_size) {
63     int32_t  i, j;
64     uint64_t sse_long = 0;
65     int32_t  sum_long = 0;
66 
67     for (i = 0; i < h; i += block_size) {
68         for (j = 0; j < w; j += block_size) {
69             uint32_t sse0;
70             int32_t  sum0;
71             var_fn(src + src_stride * i + j,
72                    src_stride,
73                    ref + ref_stride * i + j,
74                    ref_stride,
75                    &sse0,
76                    &sum0);
77             sse_long += sse0;
78             sum_long += sum0;
79         }
80     }
81     *sum = ROUND_POWER_OF_TWO(sum_long, 2);
82     *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
83 }
84 
highbd_12_variance_sse2(const uint16_t * src,int32_t src_stride,const uint16_t * ref,int32_t ref_stride,int32_t w,int32_t h,uint32_t * sse,int32_t * sum,HighVarianceFn var_fn,int32_t block_size)85 static void highbd_12_variance_sse2(const uint16_t *src, int32_t src_stride, const uint16_t *ref,
86                                     int32_t ref_stride, int32_t w, int32_t h, uint32_t *sse,
87                                     int32_t *sum, HighVarianceFn var_fn, int32_t block_size) {
88     int32_t  i, j;
89     uint64_t sse_long = 0;
90     int32_t  sum_long = 0;
91 
92     for (i = 0; i < h; i += block_size) {
93         for (j = 0; j < w; j += block_size) {
94             uint32_t sse0;
95             int32_t  sum0;
96             var_fn(src + src_stride * i + j,
97                    src_stride,
98                    ref + ref_stride * i + j,
99                    ref_stride,
100                    &sse0,
101                    &sum0);
102             sse_long += sse0;
103             sum_long += sum0;
104         }
105     }
106     *sum = ROUND_POWER_OF_TWO(sum_long, 4);
107     *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
108 }
109 
110 #define HIGH_GET_VAR(S)                                                                     \
111     void svt_aom_highbd_get##S##x##S##var_sse2(const uint8_t *src8,                         \
112                                                int32_t        src_stride,                   \
113                                                const uint8_t *ref8,                         \
114                                                int32_t        ref_stride,                   \
115                                                uint32_t *     sse,                          \
116                                                int32_t *      sum) {                              \
117         uint16_t *src = CONVERT_TO_SHORTPTR(src8);                                          \
118         uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);                                          \
119         svt_aom_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, sse, sum); \
120     }                                                                                       \
121                                                                                             \
122     void svt_aom_highbd_10_get##S##x##S##var_sse2(const uint8_t *src8,                      \
123                                                   int32_t        src_stride,                \
124                                                   const uint8_t *ref8,                      \
125                                                   int32_t        ref_stride,                \
126                                                   uint32_t *     sse,                       \
127                                                   int32_t *      sum) {                           \
128         uint16_t *src = CONVERT_TO_SHORTPTR(src8);                                          \
129         uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);                                          \
130         svt_aom_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, sse, sum); \
131         *sum = ROUND_POWER_OF_TWO(*sum, 2);                                                 \
132         *sse = ROUND_POWER_OF_TWO(*sse, 4);                                                 \
133     }                                                                                       \
134                                                                                             \
135     void svt_aom_highbd_12_get##S##x##S##var_sse2(const uint8_t *src8,                      \
136                                                   int32_t        src_stride,                \
137                                                   const uint8_t *ref8,                      \
138                                                   int32_t        ref_stride,                \
139                                                   uint32_t *     sse,                       \
140                                                   int32_t *      sum) {                           \
141         uint16_t *src = CONVERT_TO_SHORTPTR(src8);                                          \
142         uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);                                          \
143         svt_aom_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, sse, sum); \
144         *sum = ROUND_POWER_OF_TWO(*sum, 4);                                                 \
145         *sse = ROUND_POWER_OF_TWO(*sse, 8);                                                 \
146     }
147 
148 HIGH_GET_VAR(16);
149 HIGH_GET_VAR(8);
150 
151 #undef HIGH_GET_VAR
152 
153 #define VAR_FN(w, h, block_size, shift)                                                   \
154     uint32_t svt_aom_highbd_8_variance##w##x##h##_sse2(const uint8_t *src8,               \
155                                                        int32_t        src_stride,         \
156                                                        const uint8_t *ref8,               \
157                                                        int32_t        ref_stride,         \
158                                                        uint32_t *     sse) {                   \
159         int32_t   sum;                                                                    \
160         uint16_t *src = CONVERT_TO_SHORTPTR(src8);                                        \
161         uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);                                        \
162         highbd_8_variance_sse2(src,                                                       \
163                                src_stride,                                                \
164                                ref,                                                       \
165                                ref_stride,                                                \
166                                w,                                                         \
167                                h,                                                         \
168                                sse,                                                       \
169                                &sum,                                                      \
170                                svt_aom_highbd_calc##block_size##x##block_size##var_sse2,  \
171                                block_size);                                               \
172         return *sse - (uint32_t)(((int64_t)sum * sum) >> shift);                          \
173     }                                                                                     \
174                                                                                           \
175     uint32_t svt_aom_highbd_10_variance##w##x##h##_sse2(const uint8_t *src8,              \
176                                                         int32_t        src_stride,        \
177                                                         const uint8_t *ref8,              \
178                                                         int32_t        ref_stride,        \
179                                                         uint32_t *     sse) {                  \
180         int32_t   sum;                                                                    \
181         int64_t   var;                                                                    \
182         uint16_t *src = CONVERT_TO_SHORTPTR(src8);                                        \
183         uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);                                        \
184         highbd_10_variance_sse2(src,                                                      \
185                                 src_stride,                                               \
186                                 ref,                                                      \
187                                 ref_stride,                                               \
188                                 w,                                                        \
189                                 h,                                                        \
190                                 sse,                                                      \
191                                 &sum,                                                     \
192                                 svt_aom_highbd_calc##block_size##x##block_size##var_sse2, \
193                                 block_size);                                              \
194         var = (int64_t)(*sse) - (((int64_t)sum * sum) >> shift);                          \
195         return (var >= 0) ? (uint32_t)var : 0;                                            \
196     }                                                                                     \
197                                                                                           \
198     uint32_t svt_aom_highbd_12_variance##w##x##h##_sse2(const uint8_t *src8,              \
199                                                         int32_t        src_stride,        \
200                                                         const uint8_t *ref8,              \
201                                                         int32_t        ref_stride,        \
202                                                         uint32_t *     sse) {                  \
203         int32_t   sum;                                                                    \
204         int64_t   var;                                                                    \
205         uint16_t *src = CONVERT_TO_SHORTPTR(src8);                                        \
206         uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);                                        \
207         highbd_12_variance_sse2(src,                                                      \
208                                 src_stride,                                               \
209                                 ref,                                                      \
210                                 ref_stride,                                               \
211                                 w,                                                        \
212                                 h,                                                        \
213                                 sse,                                                      \
214                                 &sum,                                                     \
215                                 svt_aom_highbd_calc##block_size##x##block_size##var_sse2, \
216                                 block_size);                                              \
217         var = (int64_t)(*sse) - (((int64_t)sum * sum) >> shift);                          \
218         return (var >= 0) ? (uint32_t)var : 0;                                            \
219     }
220 
221 VAR_FN(64, 64, 16, 12);
222 VAR_FN(64, 32, 16, 11);
223 VAR_FN(32, 64, 16, 11);
224 VAR_FN(32, 32, 16, 10);
225 VAR_FN(32, 16, 16, 9);
226 VAR_FN(16, 32, 16, 9);
227 VAR_FN(16, 16, 16, 8);
228 VAR_FN(16, 8, 8, 7);
229 VAR_FN(8, 16, 8, 7);
230 VAR_FN(8, 8, 8, 6);
231 VAR_FN(16, 4, 4, 6);
232 VAR_FN(8, 32, 8, 8);
233 VAR_FN(32, 8, 8, 8);
234 VAR_FN(16, 64, 16, 10);
235 VAR_FN(64, 16, 16, 10);
236 
237 #undef VAR_FN
238 
svt_aom_highbd_8_mse16x16_sse2(const uint8_t * src8,int32_t src_stride,const uint8_t * ref8,int32_t ref_stride,uint32_t * sse)239 void svt_aom_highbd_8_mse16x16_sse2(const uint8_t *src8, int32_t src_stride, const uint8_t *ref8,
240                                     int32_t ref_stride, uint32_t *sse) {
241     int32_t   sum;
242     uint16_t *src = CONVERT_TO_SHORTPTR(src8);
243     uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
244 
245     /*TODO: Remove calculate unused sum.*/
246     highbd_8_variance_sse2(
247         src, src_stride, ref, ref_stride, 16, 16, sse, &sum, svt_aom_highbd_calc16x16var_sse2, 16);
248 }
249