1 #include <gtest/gtest.h>
2 #include "cpu.h"
3 #include "cpu_core.h"
4 #include "util.h"
5 #include "macros.h"
6 #include "IWelsVP.h"
7 #include "vaacalculation.h"
8
9 using namespace WelsVP;
10
VAACalcSadSsd_ref(const uint8_t * pCurData,const uint8_t * pRefData,int32_t iPicWidth,int32_t iPicHeight,int32_t iPicStride,int32_t * pFrameSad,int32_t * pSad8x8,int32_t * pSum16x16,int32_t * psqsum16x16,int32_t * psqdiff16x16)11 void VAACalcSadSsd_ref (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
12 int32_t iPicStride,
13 int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16, int32_t* psqdiff16x16) {
14 const uint8_t* tmp_ref = pRefData;
15 const uint8_t* tmp_cur = pCurData;
16 int32_t iMbWidth = (iPicWidth >> 4);
17 int32_t mb_height = (iPicHeight >> 4);
18 int32_t mb_index = 0;
19 int32_t pic_stride_x8 = iPicStride << 3;
20 int32_t step = (iPicStride << 4) - iPicWidth;
21
22 *pFrameSad = 0;
23 for (int32_t i = 0; i < mb_height; i ++) {
24 for (int32_t j = 0; j < iMbWidth; j ++) {
25 int32_t k, l;
26 int32_t l_sad, l_sqdiff, l_sum, l_sqsum;
27 const uint8_t* tmp_cur_row;
28 const uint8_t* tmp_ref_row;
29
30 pSum16x16[mb_index] = 0;
31 psqsum16x16[mb_index] = 0;
32 psqdiff16x16[mb_index] = 0;
33
34 l_sad = l_sqdiff = l_sum = l_sqsum = 0;
35 tmp_cur_row = tmp_cur;
36 tmp_ref_row = tmp_ref;
37 for (k = 0; k < 8; k ++) {
38 for (l = 0; l < 8; l ++) {
39 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
40 l_sad += diff;
41 l_sqdiff += diff * diff;
42 l_sum += tmp_cur_row[l];
43 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
44 }
45 tmp_cur_row += iPicStride;
46 tmp_ref_row += iPicStride;
47 }
48 *pFrameSad += l_sad;
49 pSad8x8[ (mb_index << 2) + 0] = l_sad;
50 pSum16x16[mb_index] += l_sum;
51 psqsum16x16[mb_index] += l_sqsum;
52 psqdiff16x16[mb_index] += l_sqdiff;
53
54 l_sad = l_sqdiff = l_sum = l_sqsum = 0;
55 tmp_cur_row = tmp_cur + 8;
56 tmp_ref_row = tmp_ref + 8;
57 for (k = 0; k < 8; k ++) {
58 for (l = 0; l < 8; l ++) {
59 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
60 l_sad += diff;
61 l_sqdiff += diff * diff;
62 l_sum += tmp_cur_row[l];
63 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
64 }
65 tmp_cur_row += iPicStride;
66 tmp_ref_row += iPicStride;
67 }
68 *pFrameSad += l_sad;
69 pSad8x8[ (mb_index << 2) + 1] = l_sad;
70 pSum16x16[mb_index] += l_sum;
71 psqsum16x16[mb_index] += l_sqsum;
72 psqdiff16x16[mb_index] += l_sqdiff;
73
74 l_sad = l_sqdiff = l_sum = l_sqsum = 0;
75 tmp_cur_row = tmp_cur + pic_stride_x8;
76 tmp_ref_row = tmp_ref + pic_stride_x8;
77 for (k = 0; k < 8; k ++) {
78 for (l = 0; l < 8; l ++) {
79 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
80 l_sad += diff;
81 l_sqdiff += diff * diff;
82 l_sum += tmp_cur_row[l];
83 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
84 }
85 tmp_cur_row += iPicStride;
86 tmp_ref_row += iPicStride;
87 }
88 *pFrameSad += l_sad;
89 pSad8x8[ (mb_index << 2) + 2] = l_sad;
90 pSum16x16[mb_index] += l_sum;
91 psqsum16x16[mb_index] += l_sqsum;
92 psqdiff16x16[mb_index] += l_sqdiff;
93
94 l_sad = l_sqdiff = l_sum = l_sqsum = 0;
95 tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
96 tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
97 for (k = 0; k < 8; k ++) {
98 for (l = 0; l < 8; l ++) {
99 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
100 l_sad += diff;
101 l_sqdiff += diff * diff;
102 l_sum += tmp_cur_row[l];
103 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
104 }
105 tmp_cur_row += iPicStride;
106 tmp_ref_row += iPicStride;
107 }
108 *pFrameSad += l_sad;
109 pSad8x8[ (mb_index << 2) + 3] = l_sad;
110 pSum16x16[mb_index] += l_sum;
111 psqsum16x16[mb_index] += l_sqsum;
112 psqdiff16x16[mb_index] += l_sqdiff;
113
114
115 tmp_ref += 16;
116 tmp_cur += 16;
117 ++mb_index;
118 }
119 tmp_ref += step;
120 tmp_cur += step;
121 }
122 }
VAACalcSadVar_ref(const uint8_t * pCurData,const uint8_t * pRefData,int32_t iPicWidth,int32_t iPicHeight,int32_t iPicStride,int32_t * pFrameSad,int32_t * pSad8x8,int32_t * pSum16x16,int32_t * psqsum16x16)123 void VAACalcSadVar_ref (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
124 int32_t iPicStride,
125 int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16) {
126 const uint8_t* tmp_ref = pRefData;
127 const uint8_t* tmp_cur = pCurData;
128 int32_t iMbWidth = (iPicWidth >> 4);
129 int32_t mb_height = (iPicHeight >> 4);
130 int32_t mb_index = 0;
131 int32_t pic_stride_x8 = iPicStride << 3;
132 int32_t step = (iPicStride << 4) - iPicWidth;
133
134 *pFrameSad = 0;
135 for (int32_t i = 0; i < mb_height; i ++) {
136 for (int32_t j = 0; j < iMbWidth; j ++) {
137 int32_t k, l;
138 int32_t l_sad, l_sum, l_sqsum;
139 const uint8_t* tmp_cur_row;
140 const uint8_t* tmp_ref_row;
141
142 pSum16x16[mb_index] = 0;
143 psqsum16x16[mb_index] = 0;
144
145 l_sad = l_sum = l_sqsum = 0;
146 tmp_cur_row = tmp_cur;
147 tmp_ref_row = tmp_ref;
148 for (k = 0; k < 8; k ++) {
149 for (l = 0; l < 8; l ++) {
150 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
151 l_sad += diff;
152 l_sum += tmp_cur_row[l];
153 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
154 }
155 tmp_cur_row += iPicStride;
156 tmp_ref_row += iPicStride;
157 }
158 *pFrameSad += l_sad;
159 pSad8x8[ (mb_index << 2) + 0] = l_sad;
160 pSum16x16[mb_index] += l_sum;
161 psqsum16x16[mb_index] += l_sqsum;
162
163 l_sad = l_sum = l_sqsum = 0;
164 tmp_cur_row = tmp_cur + 8;
165 tmp_ref_row = tmp_ref + 8;
166 for (k = 0; k < 8; k ++) {
167 for (l = 0; l < 8; l ++) {
168 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
169 l_sad += diff;
170 l_sum += tmp_cur_row[l];
171 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
172 }
173 tmp_cur_row += iPicStride;
174 tmp_ref_row += iPicStride;
175 }
176 *pFrameSad += l_sad;
177 pSad8x8[ (mb_index << 2) + 1] = l_sad;
178 pSum16x16[mb_index] += l_sum;
179 psqsum16x16[mb_index] += l_sqsum;
180
181 l_sad = l_sum = l_sqsum = 0;
182 tmp_cur_row = tmp_cur + pic_stride_x8;
183 tmp_ref_row = tmp_ref + pic_stride_x8;
184 for (k = 0; k < 8; k ++) {
185 for (l = 0; l < 8; l ++) {
186 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
187 l_sad += diff;
188 l_sum += tmp_cur_row[l];
189 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
190 }
191 tmp_cur_row += iPicStride;
192 tmp_ref_row += iPicStride;
193 }
194 *pFrameSad += l_sad;
195 pSad8x8[ (mb_index << 2) + 2] = l_sad;
196 pSum16x16[mb_index] += l_sum;
197 psqsum16x16[mb_index] += l_sqsum;
198
199 l_sad = l_sum = l_sqsum = 0;
200 tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
201 tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
202 for (k = 0; k < 8; k ++) {
203 for (l = 0; l < 8; l ++) {
204 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
205 l_sad += diff;
206 l_sum += tmp_cur_row[l];
207 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
208 }
209 tmp_cur_row += iPicStride;
210 tmp_ref_row += iPicStride;
211 }
212 *pFrameSad += l_sad;
213 pSad8x8[ (mb_index << 2) + 3] = l_sad;
214 pSum16x16[mb_index] += l_sum;
215 psqsum16x16[mb_index] += l_sqsum;
216
217
218 tmp_ref += 16;
219 tmp_cur += 16;
220 ++mb_index;
221 }
222 tmp_ref += step;
223 tmp_cur += step;
224 }
225 }
226
VAACalcSad_ref(const uint8_t * pCurData,const uint8_t * pRefData,int32_t iPicWidth,int32_t iPicHeight,int32_t iPicStride,int32_t * pFrameSad,int32_t * pSad8x8)227 void VAACalcSad_ref (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
228 int32_t iPicStride,
229 int32_t* pFrameSad, int32_t* pSad8x8) {
230 const uint8_t* tmp_ref = pRefData;
231 const uint8_t* tmp_cur = pCurData;
232 int32_t iMbWidth = (iPicWidth >> 4);
233 int32_t mb_height = (iPicHeight >> 4);
234 int32_t mb_index = 0;
235 int32_t pic_stride_x8 = iPicStride << 3;
236 int32_t step = (iPicStride << 4) - iPicWidth;
237
238 *pFrameSad = 0;
239 for (int32_t i = 0; i < mb_height; i ++) {
240 for (int32_t j = 0; j < iMbWidth; j ++) {
241 int32_t k, l;
242 int32_t l_sad;
243 const uint8_t* tmp_cur_row;
244 const uint8_t* tmp_ref_row;
245
246 l_sad = 0;
247 tmp_cur_row = tmp_cur;
248 tmp_ref_row = tmp_ref;
249 for (k = 0; k < 8; k ++) {
250 for (l = 0; l < 8; l ++) {
251 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
252 l_sad += diff;
253 }
254 tmp_cur_row += iPicStride;
255 tmp_ref_row += iPicStride;
256 }
257 *pFrameSad += l_sad;
258 pSad8x8[ (mb_index << 2) + 0] = l_sad;
259
260 l_sad = 0;
261 tmp_cur_row = tmp_cur + 8;
262 tmp_ref_row = tmp_ref + 8;
263 for (k = 0; k < 8; k ++) {
264 for (l = 0; l < 8; l ++) {
265 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
266 l_sad += diff;
267 }
268 tmp_cur_row += iPicStride;
269 tmp_ref_row += iPicStride;
270 }
271 *pFrameSad += l_sad;
272 pSad8x8[ (mb_index << 2) + 1] = l_sad;
273
274 l_sad = 0;
275 tmp_cur_row = tmp_cur + pic_stride_x8;
276 tmp_ref_row = tmp_ref + pic_stride_x8;
277 for (k = 0; k < 8; k ++) {
278 for (l = 0; l < 8; l ++) {
279 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
280 l_sad += diff;
281 }
282 tmp_cur_row += iPicStride;
283 tmp_ref_row += iPicStride;
284 }
285 *pFrameSad += l_sad;
286 pSad8x8[ (mb_index << 2) + 2] = l_sad;
287
288 l_sad = 0;
289 tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
290 tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
291 for (k = 0; k < 8; k ++) {
292 for (l = 0; l < 8; l ++) {
293 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
294 l_sad += diff;
295 }
296 tmp_cur_row += iPicStride;
297 tmp_ref_row += iPicStride;
298 }
299 *pFrameSad += l_sad;
300 pSad8x8[ (mb_index << 2) + 3] = l_sad;
301
302 tmp_ref += 16;
303 tmp_cur += 16;
304 ++mb_index;
305 }
306 tmp_ref += step;
307 tmp_cur += step;
308 }
309 }
310
VAACalcSadSsdBgd_ref(const uint8_t * pCurData,const uint8_t * pRefData,int32_t iPicWidth,int32_t iPicHeight,int32_t iPicStride,int32_t * pFrameSad,int32_t * pSad8x8,int32_t * pSum16x16,int32_t * psqsum16x16,int32_t * psqdiff16x16,int32_t * pSd8x8,uint8_t * pMad8x8)311 void VAACalcSadSsdBgd_ref (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
312 int32_t iPicStride,
313 int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16, int32_t* psqdiff16x16, int32_t* pSd8x8,
314 uint8_t* pMad8x8) {
315 const uint8_t* tmp_ref = pRefData;
316 const uint8_t* tmp_cur = pCurData;
317 int32_t iMbWidth = (iPicWidth >> 4);
318 int32_t mb_height = (iPicHeight >> 4);
319 int32_t mb_index = 0;
320 int32_t pic_stride_x8 = iPicStride << 3;
321 int32_t step = (iPicStride << 4) - iPicWidth;
322
323 *pFrameSad = 0;
324 for (int32_t i = 0; i < mb_height; i ++) {
325 for (int32_t j = 0; j < iMbWidth; j ++) {
326 int32_t k, l;
327 int32_t l_sad, l_sqdiff, l_sum, l_sqsum, l_sd, l_mad;
328 const uint8_t* tmp_cur_row;
329 const uint8_t* tmp_ref_row;
330
331 pSum16x16[mb_index] = 0;
332 psqsum16x16[mb_index] = 0;
333 psqdiff16x16[mb_index] = 0;
334
335 l_sd = l_mad = l_sad = l_sqdiff = l_sum = l_sqsum = 0;
336 tmp_cur_row = tmp_cur;
337 tmp_ref_row = tmp_ref;
338 for (k = 0; k < 8; k ++) {
339 for (l = 0; l < 8; l ++) {
340 int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
341 int32_t abs_diff = WELS_ABS (diff);
342
343 l_sd += diff;
344 if (abs_diff > l_mad) {
345 l_mad = abs_diff;
346 }
347 l_sad += abs_diff;
348 l_sqdiff += abs_diff * abs_diff;
349 l_sum += tmp_cur_row[l];
350 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
351 }
352 tmp_cur_row += iPicStride;
353 tmp_ref_row += iPicStride;
354 }
355 *pFrameSad += l_sad;
356 pSad8x8[ (mb_index << 2) + 0] = l_sad;
357 pSum16x16[mb_index] += l_sum;
358 psqsum16x16[mb_index] += l_sqsum;
359 psqdiff16x16[mb_index] += l_sqdiff;
360 pSd8x8[ (mb_index << 2) + 0] = l_sd;
361 pMad8x8[ (mb_index << 2) + 0] = l_mad;
362
363
364 l_sd = l_mad = l_sad = l_sqdiff = l_sum = l_sqsum = 0;
365 tmp_cur_row = tmp_cur + 8;
366 tmp_ref_row = tmp_ref + 8;
367 for (k = 0; k < 8; k ++) {
368 for (l = 0; l < 8; l ++) {
369 int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
370 int32_t abs_diff = WELS_ABS (diff);
371
372 l_sd += diff;
373 if (abs_diff > l_mad) {
374 l_mad = abs_diff;
375 }
376 l_sad += abs_diff;
377 l_sqdiff += abs_diff * abs_diff;
378 l_sum += tmp_cur_row[l];
379 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
380 }
381 tmp_cur_row += iPicStride;
382 tmp_ref_row += iPicStride;
383 }
384 *pFrameSad += l_sad;
385 pSad8x8[ (mb_index << 2) + 1] = l_sad;
386 pSum16x16[mb_index] += l_sum;
387 psqsum16x16[mb_index] += l_sqsum;
388 psqdiff16x16[mb_index] += l_sqdiff;
389 pSd8x8[ (mb_index << 2) + 1] = l_sd;
390 pMad8x8[ (mb_index << 2) + 1] = l_mad;
391
392 l_sd = l_mad = l_sad = l_sqdiff = l_sum = l_sqsum = 0;
393 tmp_cur_row = tmp_cur + pic_stride_x8;
394 tmp_ref_row = tmp_ref + pic_stride_x8;
395 for (k = 0; k < 8; k ++) {
396 for (l = 0; l < 8; l ++) {
397 int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
398 int32_t abs_diff = WELS_ABS (diff);
399
400 l_sd += diff;
401 if (abs_diff > l_mad) {
402 l_mad = abs_diff;
403 }
404 l_sad += abs_diff;
405 l_sqdiff += abs_diff * abs_diff;
406 l_sum += tmp_cur_row[l];
407 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
408 }
409 tmp_cur_row += iPicStride;
410 tmp_ref_row += iPicStride;
411 }
412 *pFrameSad += l_sad;
413 pSad8x8[ (mb_index << 2) + 2] = l_sad;
414 pSum16x16[mb_index] += l_sum;
415 psqsum16x16[mb_index] += l_sqsum;
416 psqdiff16x16[mb_index] += l_sqdiff;
417 pSd8x8[ (mb_index << 2) + 2] = l_sd;
418 pMad8x8[ (mb_index << 2) + 2] = l_mad;
419
420 l_sd = l_mad = l_sad = l_sqdiff = l_sum = l_sqsum = 0;
421 tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
422 tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
423 for (k = 0; k < 8; k ++) {
424 for (l = 0; l < 8; l ++) {
425 int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
426 int32_t abs_diff = WELS_ABS (diff);
427
428 l_sd += diff;
429 if (abs_diff > l_mad) {
430 l_mad = abs_diff;
431 }
432 l_sad += abs_diff;
433 l_sqdiff += abs_diff * abs_diff;
434 l_sum += tmp_cur_row[l];
435 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
436 }
437 tmp_cur_row += iPicStride;
438 tmp_ref_row += iPicStride;
439 }
440 *pFrameSad += l_sad;
441 pSad8x8[ (mb_index << 2) + 3] = l_sad;
442 pSum16x16[mb_index] += l_sum;
443 psqsum16x16[mb_index] += l_sqsum;
444 psqdiff16x16[mb_index] += l_sqdiff;
445 pSd8x8[ (mb_index << 2) + 3] = l_sd;
446 pMad8x8[ (mb_index << 2) + 3] = l_mad;
447
448 tmp_ref += 16;
449 tmp_cur += 16;
450 ++mb_index;
451 }
452 tmp_ref += step;
453 tmp_cur += step;
454 }
455 }
456
VAACalcSadBgd_ref(const uint8_t * pCurData,const uint8_t * pRefData,int32_t iPicWidth,int32_t iPicHeight,int32_t iPicStride,int32_t * pFrameSad,int32_t * pSad8x8,int32_t * pSd8x8,uint8_t * pMad8x8)457 void VAACalcSadBgd_ref (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
458 int32_t iPicStride,
459 int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSd8x8, uint8_t* pMad8x8) {
460 const uint8_t* tmp_ref = pRefData;
461 const uint8_t* tmp_cur = pCurData;
462 int32_t iMbWidth = (iPicWidth >> 4);
463 int32_t mb_height = (iPicHeight >> 4);
464 int32_t mb_index = 0;
465 int32_t pic_stride_x8 = iPicStride << 3;
466 int32_t step = (iPicStride << 4) - iPicWidth;
467
468 *pFrameSad = 0;
469 for (int32_t i = 0; i < mb_height; i ++) {
470 for (int32_t j = 0; j < iMbWidth; j ++) {
471 int32_t k, l;
472 int32_t l_sad, l_sd, l_mad;
473 const uint8_t* tmp_cur_row;
474 const uint8_t* tmp_ref_row;
475
476 l_mad = l_sd = l_sad = 0;
477 tmp_cur_row = tmp_cur;
478 tmp_ref_row = tmp_ref;
479 for (k = 0; k < 8; k ++) {
480 for (l = 0; l < 8; l ++) {
481 int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
482 int32_t abs_diff = WELS_ABS (diff);
483 l_sd += diff;
484 l_sad += abs_diff;
485 if (abs_diff > l_mad) {
486 l_mad = abs_diff;
487 }
488 }
489 tmp_cur_row += iPicStride;
490 tmp_ref_row += iPicStride;
491 }
492 *pFrameSad += l_sad;
493 pSad8x8[ (mb_index << 2) + 0] = l_sad;
494 pSd8x8[ (mb_index << 2) + 0] = l_sd;
495 pMad8x8[ (mb_index << 2) + 0] = l_mad;
496
497 l_mad = l_sd = l_sad = 0;
498 tmp_cur_row = tmp_cur + 8;
499 tmp_ref_row = tmp_ref + 8;
500 for (k = 0; k < 8; k ++) {
501 for (l = 0; l < 8; l ++) {
502 int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
503 int32_t abs_diff = WELS_ABS (diff);
504 l_sd += diff;
505 l_sad += abs_diff;
506 if (abs_diff > l_mad) {
507 l_mad = abs_diff;
508 }
509 }
510 tmp_cur_row += iPicStride;
511 tmp_ref_row += iPicStride;
512 }
513 *pFrameSad += l_sad;
514 pSad8x8[ (mb_index << 2) + 1] = l_sad;
515 pSd8x8[ (mb_index << 2) + 1] = l_sd;
516 pMad8x8[ (mb_index << 2) + 1] = l_mad;
517
518 l_mad = l_sd = l_sad = 0;
519 tmp_cur_row = tmp_cur + pic_stride_x8;
520 tmp_ref_row = tmp_ref + pic_stride_x8;
521 for (k = 0; k < 8; k ++) {
522 for (l = 0; l < 8; l ++) {
523 int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
524 int32_t abs_diff = WELS_ABS (diff);
525 l_sd += diff;
526 l_sad += abs_diff;
527 if (abs_diff > l_mad) {
528 l_mad = abs_diff;
529 }
530 }
531 tmp_cur_row += iPicStride;
532 tmp_ref_row += iPicStride;
533 }
534 *pFrameSad += l_sad;
535 pSad8x8[ (mb_index << 2) + 2] = l_sad;
536 pSd8x8[ (mb_index << 2) + 2] = l_sd;
537 pMad8x8[ (mb_index << 2) + 2] = l_mad;
538
539 l_mad = l_sd = l_sad = 0;
540 tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
541 tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
542 for (k = 0; k < 8; k ++) {
543 for (l = 0; l < 8; l ++) {
544 int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
545 int32_t abs_diff = WELS_ABS (diff);
546 l_sd += diff;
547 l_sad += abs_diff;
548 if (abs_diff > l_mad) {
549 l_mad = abs_diff;
550 }
551 }
552 tmp_cur_row += iPicStride;
553 tmp_ref_row += iPicStride;
554 }
555 *pFrameSad += l_sad;
556 pSad8x8[ (mb_index << 2) + 3] = l_sad;
557 pSd8x8[ (mb_index << 2) + 3] = l_sd;
558 pMad8x8[ (mb_index << 2) + 3] = l_mad;
559
560 tmp_ref += 16;
561 tmp_cur += 16;
562 ++mb_index;
563 }
564 tmp_ref += step;
565 tmp_cur += step;
566 }
567 }
568
569 #define BUFFER_SIZE (320*320)
570
571 #define GENERATE_VAACalcSad_UT(func, ASM, CPUFLAGS) \
572 TEST (VAACalcFuncTest, func) { \
573 if (ASM) {\
574 int32_t iCpuCores = 0; \
575 uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores); \
576 if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
577 return; \
578 } \
579 ENFORCE_STACK_ALIGN_1D (uint8_t, cur_data_c, BUFFER_SIZE, 16); \
580 ENFORCE_STACK_ALIGN_1D (uint8_t, ref_data_c, BUFFER_SIZE, 16); \
581 ENFORCE_STACK_ALIGN_1D (int32_t, psad8x8_c, BUFFER_SIZE/64, 16); \
582 int32_t pic_width_c; \
583 int32_t pic_height_c; \
584 int32_t pic_stride_c; \
585 int32_t psadframe_c; \
586 ENFORCE_STACK_ALIGN_1D (uint8_t, cur_data_a, BUFFER_SIZE, 16); \
587 ENFORCE_STACK_ALIGN_1D (uint8_t, ref_data_a, BUFFER_SIZE, 16); \
588 ENFORCE_STACK_ALIGN_1D (int32_t, psad8x8_a, BUFFER_SIZE/64, 16); \
589 int32_t pic_width_a; \
590 int32_t pic_height_a; \
591 int32_t pic_stride_a; \
592 int32_t psadframe_a; \
593 for (int i=0; i<4; i++) { \
594 pic_width_c = pic_width_a = 320-16*i; \
595 pic_height_c = pic_height_a = 320; \
596 pic_stride_c = pic_stride_a = 320; \
597 psadframe_c = psadframe_a = 0; \
598 for (int j=0; j<BUFFER_SIZE; j++) { \
599 cur_data_c[j] = cur_data_a[j] = (rand()%256); \
600 ref_data_c[j] = ref_data_a[j] = (rand()%256); \
601 psad8x8_c[j%(BUFFER_SIZE/64)] = psad8x8_a[j%(BUFFER_SIZE/64)] = (rand()%256); \
602 } \
603 VAACalcSad_ref (cur_data_c, ref_data_c, pic_width_c, pic_height_c, pic_stride_c, &psadframe_c, psad8x8_c); \
604 func (cur_data_a, ref_data_a, pic_width_a, pic_height_a, pic_stride_a, &psadframe_a, psad8x8_a); \
605 ASSERT_EQ (psadframe_a, psadframe_c); \
606 for (int j=0; j<(BUFFER_SIZE/64); j++) \
607 ASSERT_EQ (psad8x8_a[j], psad8x8_c[j]); \
608 } \
609 }
610
611
612 #define GENERATE_VAACalcSadBgd_UT(func, ASM, CPUFLAGS) \
613 TEST (VAACalcFuncTest, func) { \
614 if (ASM) {\
615 int32_t iCpuCores = 0; \
616 uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores); \
617 if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
618 return; \
619 } \
620 ENFORCE_STACK_ALIGN_1D (uint8_t, cur_data_c, BUFFER_SIZE, 16); \
621 ENFORCE_STACK_ALIGN_1D (uint8_t, ref_data_c, BUFFER_SIZE, 16); \
622 ENFORCE_STACK_ALIGN_1D (int32_t, psad8x8_c, BUFFER_SIZE/64, 16); \
623 ENFORCE_STACK_ALIGN_1D (int32_t, psd8x8_c, BUFFER_SIZE/64, 16); \
624 ENFORCE_STACK_ALIGN_1D (uint8_t, pmad8x8_c, BUFFER_SIZE/64, 16); \
625 int32_t pic_width_c; \
626 int32_t pic_height_c; \
627 int32_t pic_stride_c; \
628 int32_t psadframe_c; \
629 ENFORCE_STACK_ALIGN_1D (uint8_t, cur_data_a, BUFFER_SIZE, 16); \
630 ENFORCE_STACK_ALIGN_1D (uint8_t, ref_data_a, BUFFER_SIZE, 16); \
631 ENFORCE_STACK_ALIGN_1D (int32_t, psad8x8_a, BUFFER_SIZE/64, 16); \
632 ENFORCE_STACK_ALIGN_1D (int32_t, psd8x8_a, BUFFER_SIZE/64, 16); \
633 ENFORCE_STACK_ALIGN_1D (uint8_t, pmad8x8_a, BUFFER_SIZE/64, 16); \
634 int32_t pic_width_a; \
635 int32_t pic_height_a; \
636 int32_t pic_stride_a; \
637 int32_t psadframe_a; \
638 for (int i=0; i<4; i++) { \
639 pic_width_c = pic_width_a = 320-16*i; \
640 pic_height_c = pic_height_a = 320; \
641 pic_stride_c = pic_stride_a = 320; \
642 psadframe_c = psadframe_a = 0; \
643 for (int j=0; j<BUFFER_SIZE; j++) { \
644 cur_data_c[j] = cur_data_a[j] = (rand()%256); \
645 ref_data_c[j] = ref_data_a[j] = (rand()%256); \
646 psad8x8_c[j%(BUFFER_SIZE/64)] = psad8x8_a[j%(BUFFER_SIZE/64)] = (rand()%256); \
647 psd8x8_c[j%(BUFFER_SIZE/64)] = psd8x8_a[j%(BUFFER_SIZE/64)] = (rand()%256); \
648 pmad8x8_c[j%(BUFFER_SIZE/64)] = pmad8x8_a[j%(BUFFER_SIZE/64)] = (rand()%256); \
649 } \
650 VAACalcSadBgd_ref (cur_data_c, ref_data_c, pic_width_c, pic_height_c, pic_stride_c, &psadframe_c, psad8x8_c, psd8x8_c, pmad8x8_c); \
651 func (cur_data_a, ref_data_a, pic_width_a, pic_height_a, pic_stride_a, &psadframe_a, psad8x8_a, psd8x8_a, pmad8x8_a); \
652 ASSERT_EQ (psadframe_a, psadframe_c); \
653 for (int j=0; j<(BUFFER_SIZE/64); j++) {\
654 ASSERT_EQ (psad8x8_a[j], psad8x8_c[j]); \
655 ASSERT_EQ (psd8x8_a[j], psd8x8_c[j]); \
656 ASSERT_EQ (pmad8x8_a[j], pmad8x8_c[j]); \
657 } \
658 } \
659 }
660
661 #define GENERATE_VAACalcSadSsd_UT(func, ASM, CPUFLAGS) \
662 TEST (VAACalcFuncTest, func) { \
663 if (ASM) {\
664 int32_t iCpuCores = 0; \
665 uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores); \
666 if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
667 return; \
668 } \
669 ENFORCE_STACK_ALIGN_1D (uint8_t, cur_data_c, BUFFER_SIZE, 16); \
670 ENFORCE_STACK_ALIGN_1D (uint8_t, ref_data_c, BUFFER_SIZE, 16); \
671 ENFORCE_STACK_ALIGN_1D (int32_t, psad8x8_c, BUFFER_SIZE/64, 16); \
672 ENFORCE_STACK_ALIGN_1D (int32_t, psum16x16_c, BUFFER_SIZE/256, 16); \
673 ENFORCE_STACK_ALIGN_1D (int32_t, psqsum16x16_c, BUFFER_SIZE/256, 16); \
674 ENFORCE_STACK_ALIGN_1D (int32_t, psqdiff16x16_c, BUFFER_SIZE/256, 16); \
675 int32_t pic_width_c; \
676 int32_t pic_height_c; \
677 int32_t pic_stride_c; \
678 int32_t psadframe_c; \
679 ENFORCE_STACK_ALIGN_1D (uint8_t, cur_data_a, BUFFER_SIZE, 16); \
680 ENFORCE_STACK_ALIGN_1D (uint8_t, ref_data_a, BUFFER_SIZE, 16); \
681 ENFORCE_STACK_ALIGN_1D (int32_t, psad8x8_a, BUFFER_SIZE/64, 16); \
682 ENFORCE_STACK_ALIGN_1D (int32_t, psum16x16_a, BUFFER_SIZE/256, 16); \
683 ENFORCE_STACK_ALIGN_1D (int32_t, psqsum16x16_a, BUFFER_SIZE/256, 16); \
684 ENFORCE_STACK_ALIGN_1D (int32_t, psqdiff16x16_a, BUFFER_SIZE/256, 16); \
685 int32_t pic_width_a; \
686 int32_t pic_height_a; \
687 int32_t pic_stride_a; \
688 int32_t psadframe_a; \
689 for (int i=0; i<4; i++) { \
690 pic_width_c = pic_width_a = 320-16*i; \
691 pic_height_c = pic_height_a = 320; \
692 pic_stride_c = pic_stride_a = 320; \
693 psadframe_c = psadframe_a = 0; \
694 for (int j=0; j<BUFFER_SIZE; j++) { \
695 cur_data_c[j] = cur_data_a[j] = (rand()%256); \
696 ref_data_c[j] = ref_data_a[j] = (rand()%256); \
697 psad8x8_c[j%(BUFFER_SIZE/64)] = psad8x8_a[j%(BUFFER_SIZE/64)] = (rand()%256); \
698 psum16x16_c[j%(BUFFER_SIZE/256)] = psum16x16_a[j%(BUFFER_SIZE/256)] = (rand()%256); \
699 psqsum16x16_c[j%(BUFFER_SIZE/256)] = psqsum16x16_a[j%(BUFFER_SIZE/256)] = (rand()%256); \
700 psqdiff16x16_c[j%(BUFFER_SIZE/256)] = psqdiff16x16_a[j%(BUFFER_SIZE/256)] = (rand()%256); \
701 } \
702 VAACalcSadSsd_ref (cur_data_c, ref_data_c, pic_width_c, pic_height_c, pic_stride_c, &psadframe_c, psad8x8_c, psum16x16_c, psqsum16x16_c, psqdiff16x16_c); \
703 func (cur_data_a, ref_data_a, pic_width_a, pic_height_a, pic_stride_a, &psadframe_a, psad8x8_a, psum16x16_a, psqsum16x16_a, psqdiff16x16_a); \
704 ASSERT_EQ (psadframe_a, psadframe_c); \
705 for (int j=0; j<(BUFFER_SIZE/64); j++) {\
706 ASSERT_EQ (psad8x8_a[j], psad8x8_c[j]); \
707 } \
708 for (int j=0; j<(BUFFER_SIZE/256); j++) {\
709 ASSERT_EQ (psum16x16_a[j], psum16x16_c[j]); \
710 ASSERT_EQ (psqsum16x16_a[j], psqsum16x16_c[j]); \
711 ASSERT_EQ (psqdiff16x16_a[j], psqdiff16x16_c[j]); \
712 } \
713 } \
714 }
715
716 #define GENERATE_VAACalcSadVar_UT(func, ASM, CPUFLAGS) \
717 TEST (VAACalcFuncTest, func) { \
718 if (ASM) {\
719 int32_t iCpuCores = 0; \
720 uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores); \
721 if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
722 return; \
723 } \
724 ENFORCE_STACK_ALIGN_1D (uint8_t, cur_data_c, BUFFER_SIZE, 16); \
725 ENFORCE_STACK_ALIGN_1D (uint8_t, ref_data_c, BUFFER_SIZE, 16); \
726 ENFORCE_STACK_ALIGN_1D (int32_t, psad8x8_c, BUFFER_SIZE/64, 16); \
727 ENFORCE_STACK_ALIGN_1D (int32_t, psum16x16_c, BUFFER_SIZE/256, 16); \
728 ENFORCE_STACK_ALIGN_1D (int32_t, psqsum16x16_c, BUFFER_SIZE/256, 16); \
729 int32_t pic_width_c; \
730 int32_t pic_height_c; \
731 int32_t pic_stride_c; \
732 int32_t psadframe_c; \
733 ENFORCE_STACK_ALIGN_1D (uint8_t, cur_data_a, BUFFER_SIZE, 16); \
734 ENFORCE_STACK_ALIGN_1D (uint8_t, ref_data_a, BUFFER_SIZE, 16); \
735 ENFORCE_STACK_ALIGN_1D (int32_t, psad8x8_a, BUFFER_SIZE/64, 16); \
736 ENFORCE_STACK_ALIGN_1D (int32_t, psum16x16_a, BUFFER_SIZE/256, 16); \
737 ENFORCE_STACK_ALIGN_1D (int32_t, psqsum16x16_a, BUFFER_SIZE/256, 16); \
738 int32_t pic_width_a; \
739 int32_t pic_height_a; \
740 int32_t pic_stride_a; \
741 int32_t psadframe_a; \
742 for (int i=0; i<4; i++) { \
743 pic_width_c = pic_width_a = 320-16*i; \
744 pic_height_c = pic_height_a = 320; \
745 pic_stride_c = pic_stride_a = 320; \
746 psadframe_c = psadframe_a = 0; \
747 for (int j=0; j<BUFFER_SIZE; j++) { \
748 cur_data_c[j] = cur_data_a[j] = (rand()%256); \
749 ref_data_c[j] = ref_data_a[j] = (rand()%256); \
750 psad8x8_c[j%(BUFFER_SIZE/64)] = psad8x8_a[j%(BUFFER_SIZE/64)] = (rand()%256); \
751 psum16x16_c[j%(BUFFER_SIZE/256)] = psum16x16_a[j%(BUFFER_SIZE/256)] = (rand()%256); \
752 psqsum16x16_c[j%(BUFFER_SIZE/256)] = psqsum16x16_a[j%(BUFFER_SIZE/256)] = (rand()%256); \
753 } \
754 VAACalcSadVar_ref (cur_data_c, ref_data_c, pic_width_c, pic_height_c, pic_stride_c, &psadframe_c, psad8x8_c, psum16x16_c, psqsum16x16_c); \
755 func (cur_data_a, ref_data_a, pic_width_a, pic_height_a, pic_stride_a, &psadframe_a, psad8x8_a, psum16x16_a, psqsum16x16_a); \
756 ASSERT_EQ (psadframe_a, psadframe_c); \
757 for (int j=0; j<(BUFFER_SIZE/64); j++) {\
758 ASSERT_EQ (psad8x8_a[j], psad8x8_c[j]); \
759 } \
760 for (int j=0; j<(BUFFER_SIZE/256); j++) {\
761 ASSERT_EQ (psum16x16_a[j], psum16x16_c[j]); \
762 ASSERT_EQ (psqsum16x16_a[j], psqsum16x16_c[j]); \
763 } \
764 } \
765 }
766
767 #define GENERATE_VAACalcSadSsdBgd_UT(func, ASM, CPUFLAGS) \
768 TEST (VAACalcFuncTest, func) { \
769 if (ASM) {\
770 int32_t iCpuCores = 0; \
771 uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores); \
772 if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
773 return; \
774 } \
775 ENFORCE_STACK_ALIGN_1D (uint8_t, cur_data_c, BUFFER_SIZE, 16); \
776 ENFORCE_STACK_ALIGN_1D (uint8_t, ref_data_c, BUFFER_SIZE, 16); \
777 ENFORCE_STACK_ALIGN_1D (int32_t, psad8x8_c, BUFFER_SIZE/64, 16); \
778 ENFORCE_STACK_ALIGN_1D (int32_t, psd8x8_c, BUFFER_SIZE/64, 16); \
779 ENFORCE_STACK_ALIGN_1D (uint8_t, pmad8x8_c, BUFFER_SIZE/64, 16); \
780 ENFORCE_STACK_ALIGN_1D (int32_t, psum16x16_c, BUFFER_SIZE/256, 16); \
781 ENFORCE_STACK_ALIGN_1D (int32_t, psqsum16x16_c, BUFFER_SIZE/256, 16); \
782 ENFORCE_STACK_ALIGN_1D (int32_t, psqdiff16x16_c, BUFFER_SIZE/256, 16); \
783 int32_t pic_width_c; \
784 int32_t pic_height_c; \
785 int32_t pic_stride_c; \
786 int32_t psadframe_c; \
787 ENFORCE_STACK_ALIGN_1D (uint8_t, cur_data_a, BUFFER_SIZE, 16); \
788 ENFORCE_STACK_ALIGN_1D (uint8_t, ref_data_a, BUFFER_SIZE, 16); \
789 ENFORCE_STACK_ALIGN_1D (int32_t, psad8x8_a, BUFFER_SIZE/64, 16); \
790 ENFORCE_STACK_ALIGN_1D (int32_t, psd8x8_a, BUFFER_SIZE/64, 16); \
791 ENFORCE_STACK_ALIGN_1D (uint8_t, pmad8x8_a, BUFFER_SIZE/64, 16); \
792 ENFORCE_STACK_ALIGN_1D (int32_t, psum16x16_a, BUFFER_SIZE/256, 16); \
793 ENFORCE_STACK_ALIGN_1D (int32_t, psqsum16x16_a, BUFFER_SIZE/256, 16); \
794 ENFORCE_STACK_ALIGN_1D (int32_t, psqdiff16x16_a, BUFFER_SIZE/256, 16); \
795 int32_t pic_width_a; \
796 int32_t pic_height_a; \
797 int32_t pic_stride_a; \
798 int32_t psadframe_a; \
799 for (int i=0; i<4; i++) { \
800 pic_width_c = pic_width_a = 320-16*i; \
801 pic_height_c = pic_height_a = 320; \
802 pic_stride_c = pic_stride_a = 320; \
803 psadframe_c = psadframe_a = 0; \
804 for (int j=0; j<BUFFER_SIZE; j++) { \
805 cur_data_c[j] = cur_data_a[j] = (rand()%256); \
806 ref_data_c[j] = ref_data_a[j] = (rand()%256); \
807 psad8x8_c[j%(BUFFER_SIZE/64)] = psad8x8_a[j%(BUFFER_SIZE/64)] = (rand()%256); \
808 psd8x8_c[j%(BUFFER_SIZE/64)] = psd8x8_a[j%(BUFFER_SIZE/64)] = (rand()%256); \
809 pmad8x8_c[j%(BUFFER_SIZE/64)] = pmad8x8_a[j%(BUFFER_SIZE/64)] = (rand()%256); \
810 psum16x16_c[j%(BUFFER_SIZE/256)] = psum16x16_a[j%(BUFFER_SIZE/256)] = (rand()%256); \
811 psqsum16x16_c[j%(BUFFER_SIZE/256)] = psqsum16x16_a[j%(BUFFER_SIZE/256)] = (rand()%256); \
812 psqdiff16x16_c[j%(BUFFER_SIZE/256)] = psqdiff16x16_a[j%(BUFFER_SIZE/256)] = (rand()%256); \
813 } \
814 VAACalcSadSsdBgd_ref (cur_data_c, ref_data_c, pic_width_c, pic_height_c, pic_stride_c, &psadframe_c, psad8x8_c, psum16x16_c, psqsum16x16_c, psqdiff16x16_c, psd8x8_c, pmad8x8_c); \
815 func (cur_data_a, ref_data_a, pic_width_a, pic_height_a, pic_stride_a, &psadframe_a, psad8x8_a, psum16x16_a, psqsum16x16_a, psqdiff16x16_a, psd8x8_a, pmad8x8_a); \
816 ASSERT_EQ (psadframe_a, psadframe_c); \
817 for (int j=0; j<(BUFFER_SIZE/64); j++) {\
818 ASSERT_EQ (psad8x8_a[j], psad8x8_c[j]); \
819 ASSERT_EQ (psd8x8_a[j], psd8x8_c[j]); \
820 ASSERT_EQ (pmad8x8_a[j], pmad8x8_c[j]); \
821 } \
822 for (int j=0; j<(BUFFER_SIZE/256); j++) {\
823 ASSERT_EQ (psum16x16_a[j], psum16x16_c[j]); \
824 ASSERT_EQ (psqsum16x16_a[j], psqsum16x16_c[j]); \
825 ASSERT_EQ (psqdiff16x16_a[j], psqdiff16x16_c[j]); \
826 } \
827 } \
828 }
829
830 GENERATE_VAACalcSad_UT (VAACalcSad_c, 0, 0)
831 GENERATE_VAACalcSadBgd_UT (VAACalcSadBgd_c, 0, 0)
832 GENERATE_VAACalcSadSsdBgd_UT (VAACalcSadSsdBgd_c, 0, 0)
833 GENERATE_VAACalcSadSsd_UT (VAACalcSadSsd_c, 0, 0)
834 GENERATE_VAACalcSadVar_UT (VAACalcSadVar_c, 0, 0)
835 #if defined(X86_ASM)
836 GENERATE_VAACalcSad_UT (VAACalcSad_sse2, 1, WELS_CPU_SSE2)
837 GENERATE_VAACalcSadBgd_UT (VAACalcSadBgd_sse2, 1, WELS_CPU_SSE2)
838 GENERATE_VAACalcSadSsdBgd_UT (VAACalcSadSsdBgd_sse2, 1, WELS_CPU_SSE2)
839 GENERATE_VAACalcSadSsd_UT (VAACalcSadSsd_sse2, 1, WELS_CPU_SSE2)
840 GENERATE_VAACalcSadVar_UT (VAACalcSadVar_sse2, 1, WELS_CPU_SSE2)
841
842 #if defined(HAVE_AVX2)
843 GENERATE_VAACalcSad_UT (VAACalcSad_avx2, 1, WELS_CPU_AVX2)
844 GENERATE_VAACalcSadBgd_UT (VAACalcSadBgd_avx2, 1, WELS_CPU_AVX2)
845 GENERATE_VAACalcSadSsdBgd_UT (VAACalcSadSsdBgd_avx2, 1, WELS_CPU_AVX2)
846 GENERATE_VAACalcSadSsd_UT (VAACalcSadSsd_avx2, 1, WELS_CPU_AVX2)
847 GENERATE_VAACalcSadVar_UT (VAACalcSadVar_avx2, 1, WELS_CPU_AVX2)
848 #endif //HAVE_AVX2
849 #endif
850
851 #if defined(HAVE_NEON)
852 GENERATE_VAACalcSad_UT (VAACalcSad_neon, 1, WELS_CPU_NEON)
853 GENERATE_VAACalcSadBgd_UT (VAACalcSadBgd_neon, 1, WELS_CPU_NEON)
854 GENERATE_VAACalcSadSsdBgd_UT (VAACalcSadSsdBgd_neon, 1, WELS_CPU_NEON)
855 GENERATE_VAACalcSadSsd_UT (VAACalcSadSsd_neon, 1, WELS_CPU_NEON)
856 GENERATE_VAACalcSadVar_UT (VAACalcSadVar_neon, 1, WELS_CPU_NEON)
857 #endif
858
859 #if defined(HAVE_NEON_AARCH64)
860 GENERATE_VAACalcSad_UT (VAACalcSad_AArch64_neon, 1, WELS_CPU_NEON)
861 GENERATE_VAACalcSadBgd_UT (VAACalcSadBgd_AArch64_neon, 1, WELS_CPU_NEON)
862 GENERATE_VAACalcSadSsdBgd_UT (VAACalcSadSsdBgd_AArch64_neon, 1, WELS_CPU_NEON)
863 GENERATE_VAACalcSadSsd_UT (VAACalcSadSsd_AArch64_neon, 1, WELS_CPU_NEON)
864 GENERATE_VAACalcSadVar_UT (VAACalcSadVar_AArch64_neon, 1, WELS_CPU_NEON)
865 #endif
866
867 #if defined(HAVE_MMI)
868 GENERATE_VAACalcSad_UT (VAACalcSad_mmi, 1, WELS_CPU_MMI)
869 GENERATE_VAACalcSadBgd_UT (VAACalcSadBgd_mmi, 1, WELS_CPU_MMI)
870 GENERATE_VAACalcSadSsdBgd_UT (VAACalcSadSsdBgd_mmi, 1, WELS_CPU_MMI)
871 GENERATE_VAACalcSadSsd_UT (VAACalcSadSsd_mmi, 1, WELS_CPU_MMI)
872 GENERATE_VAACalcSadVar_UT (VAACalcSadVar_mmi, 1, WELS_CPU_MMI)
873 #endif
874