1 /*!
2 * \copy
3 * Copyright (c) 2013, Cisco Systems
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 */
32
33 #include "util.h"
34
35 WELSVP_NAMESPACE_BEGIN
36
VAACalcSadSsd_c(const uint8_t * pCurData,const uint8_t * pRefData,int32_t iPicWidth,int32_t iPicHeight,int32_t iPicStride,int32_t * pFrameSad,int32_t * pSad8x8,int32_t * pSum16x16,int32_t * psqsum16x16,int32_t * psqdiff16x16)37 void VAACalcSadSsd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
38 int32_t iPicStride,
39 int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16, int32_t* psqdiff16x16) {
40 const uint8_t* tmp_ref = pRefData;
41 const uint8_t* tmp_cur = pCurData;
42 int32_t iMbWidth = (iPicWidth >> 4);
43 int32_t mb_height = (iPicHeight >> 4);
44 int32_t mb_index = 0;
45 int32_t pic_stride_x8 = iPicStride << 3;
46 int32_t step = (iPicStride << 4) - iPicWidth;
47
48 *pFrameSad = 0;
49 for (int32_t i = 0; i < mb_height; i ++) {
50 for (int32_t j = 0; j < iMbWidth; j ++) {
51 int32_t k, l;
52 int32_t l_sad, l_sqdiff, l_sum, l_sqsum;
53 const uint8_t* tmp_cur_row;
54 const uint8_t* tmp_ref_row;
55
56 pSum16x16[mb_index] = 0;
57 psqsum16x16[mb_index] = 0;
58 psqdiff16x16[mb_index] = 0;
59
60 l_sad = l_sqdiff = l_sum = l_sqsum = 0;
61 tmp_cur_row = tmp_cur;
62 tmp_ref_row = tmp_ref;
63 for (k = 0; k < 8; k ++) {
64 for (l = 0; l < 8; l ++) {
65 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
66 l_sad += diff;
67 l_sqdiff += diff * diff;
68 l_sum += tmp_cur_row[l];
69 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
70 }
71 tmp_cur_row += iPicStride;
72 tmp_ref_row += iPicStride;
73 }
74 *pFrameSad += l_sad;
75 pSad8x8[ (mb_index << 2) + 0] = l_sad;
76 pSum16x16[mb_index] += l_sum;
77 psqsum16x16[mb_index] += l_sqsum;
78 psqdiff16x16[mb_index] += l_sqdiff;
79
80 l_sad = l_sqdiff = l_sum = l_sqsum = 0;
81 tmp_cur_row = tmp_cur + 8;
82 tmp_ref_row = tmp_ref + 8;
83 for (k = 0; k < 8; k ++) {
84 for (l = 0; l < 8; l ++) {
85 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
86 l_sad += diff;
87 l_sqdiff += diff * diff;
88 l_sum += tmp_cur_row[l];
89 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
90 }
91 tmp_cur_row += iPicStride;
92 tmp_ref_row += iPicStride;
93 }
94 *pFrameSad += l_sad;
95 pSad8x8[ (mb_index << 2) + 1] = l_sad;
96 pSum16x16[mb_index] += l_sum;
97 psqsum16x16[mb_index] += l_sqsum;
98 psqdiff16x16[mb_index] += l_sqdiff;
99
100 l_sad = l_sqdiff = l_sum = l_sqsum = 0;
101 tmp_cur_row = tmp_cur + pic_stride_x8;
102 tmp_ref_row = tmp_ref + pic_stride_x8;
103 for (k = 0; k < 8; k ++) {
104 for (l = 0; l < 8; l ++) {
105 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
106 l_sad += diff;
107 l_sqdiff += diff * diff;
108 l_sum += tmp_cur_row[l];
109 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
110 }
111 tmp_cur_row += iPicStride;
112 tmp_ref_row += iPicStride;
113 }
114 *pFrameSad += l_sad;
115 pSad8x8[ (mb_index << 2) + 2] = l_sad;
116 pSum16x16[mb_index] += l_sum;
117 psqsum16x16[mb_index] += l_sqsum;
118 psqdiff16x16[mb_index] += l_sqdiff;
119
120 l_sad = l_sqdiff = l_sum = l_sqsum = 0;
121 tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
122 tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
123 for (k = 0; k < 8; k ++) {
124 for (l = 0; l < 8; l ++) {
125 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
126 l_sad += diff;
127 l_sqdiff += diff * diff;
128 l_sum += tmp_cur_row[l];
129 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
130 }
131 tmp_cur_row += iPicStride;
132 tmp_ref_row += iPicStride;
133 }
134 *pFrameSad += l_sad;
135 pSad8x8[ (mb_index << 2) + 3] = l_sad;
136 pSum16x16[mb_index] += l_sum;
137 psqsum16x16[mb_index] += l_sqsum;
138 psqdiff16x16[mb_index] += l_sqdiff;
139
140
141 tmp_ref += 16;
142 tmp_cur += 16;
143 ++mb_index;
144 }
145 tmp_ref += step;
146 tmp_cur += step;
147 }
148 }
VAACalcSadVar_c(const uint8_t * pCurData,const uint8_t * pRefData,int32_t iPicWidth,int32_t iPicHeight,int32_t iPicStride,int32_t * pFrameSad,int32_t * pSad8x8,int32_t * pSum16x16,int32_t * psqsum16x16)149 void VAACalcSadVar_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
150 int32_t iPicStride,
151 int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16) {
152 const uint8_t* tmp_ref = pRefData;
153 const uint8_t* tmp_cur = pCurData;
154 int32_t iMbWidth = (iPicWidth >> 4);
155 int32_t mb_height = (iPicHeight >> 4);
156 int32_t mb_index = 0;
157 int32_t pic_stride_x8 = iPicStride << 3;
158 int32_t step = (iPicStride << 4) - iPicWidth;
159
160 *pFrameSad = 0;
161 for (int32_t i = 0; i < mb_height; i ++) {
162 for (int32_t j = 0; j < iMbWidth; j ++) {
163 int32_t k, l;
164 int32_t l_sad, l_sum, l_sqsum;
165 const uint8_t* tmp_cur_row;
166 const uint8_t* tmp_ref_row;
167
168 pSum16x16[mb_index] = 0;
169 psqsum16x16[mb_index] = 0;
170
171 l_sad = l_sum = l_sqsum = 0;
172 tmp_cur_row = tmp_cur;
173 tmp_ref_row = tmp_ref;
174 for (k = 0; k < 8; k ++) {
175 for (l = 0; l < 8; l ++) {
176 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
177 l_sad += diff;
178 l_sum += tmp_cur_row[l];
179 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
180 }
181 tmp_cur_row += iPicStride;
182 tmp_ref_row += iPicStride;
183 }
184 *pFrameSad += l_sad;
185 pSad8x8[ (mb_index << 2) + 0] = l_sad;
186 pSum16x16[mb_index] += l_sum;
187 psqsum16x16[mb_index] += l_sqsum;
188
189 l_sad = l_sum = l_sqsum = 0;
190 tmp_cur_row = tmp_cur + 8;
191 tmp_ref_row = tmp_ref + 8;
192 for (k = 0; k < 8; k ++) {
193 for (l = 0; l < 8; l ++) {
194 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
195 l_sad += diff;
196 l_sum += tmp_cur_row[l];
197 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
198 }
199 tmp_cur_row += iPicStride;
200 tmp_ref_row += iPicStride;
201 }
202 *pFrameSad += l_sad;
203 pSad8x8[ (mb_index << 2) + 1] = l_sad;
204 pSum16x16[mb_index] += l_sum;
205 psqsum16x16[mb_index] += l_sqsum;
206
207 l_sad = l_sum = l_sqsum = 0;
208 tmp_cur_row = tmp_cur + pic_stride_x8;
209 tmp_ref_row = tmp_ref + pic_stride_x8;
210 for (k = 0; k < 8; k ++) {
211 for (l = 0; l < 8; l ++) {
212 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
213 l_sad += diff;
214 l_sum += tmp_cur_row[l];
215 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
216 }
217 tmp_cur_row += iPicStride;
218 tmp_ref_row += iPicStride;
219 }
220 *pFrameSad += l_sad;
221 pSad8x8[ (mb_index << 2) + 2] = l_sad;
222 pSum16x16[mb_index] += l_sum;
223 psqsum16x16[mb_index] += l_sqsum;
224
225 l_sad = l_sum = l_sqsum = 0;
226 tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
227 tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
228 for (k = 0; k < 8; k ++) {
229 for (l = 0; l < 8; l ++) {
230 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
231 l_sad += diff;
232 l_sum += tmp_cur_row[l];
233 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
234 }
235 tmp_cur_row += iPicStride;
236 tmp_ref_row += iPicStride;
237 }
238 *pFrameSad += l_sad;
239 pSad8x8[ (mb_index << 2) + 3] = l_sad;
240 pSum16x16[mb_index] += l_sum;
241 psqsum16x16[mb_index] += l_sqsum;
242
243
244 tmp_ref += 16;
245 tmp_cur += 16;
246 ++mb_index;
247 }
248 tmp_ref += step;
249 tmp_cur += step;
250 }
251 }
252
253
VAACalcSad_c(const uint8_t * pCurData,const uint8_t * pRefData,int32_t iPicWidth,int32_t iPicHeight,int32_t iPicStride,int32_t * pFrameSad,int32_t * pSad8x8)254 void VAACalcSad_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
255 int32_t iPicStride,
256 int32_t* pFrameSad, int32_t* pSad8x8) {
257 const uint8_t* tmp_ref = pRefData;
258 const uint8_t* tmp_cur = pCurData;
259 int32_t iMbWidth = (iPicWidth >> 4);
260 int32_t mb_height = (iPicHeight >> 4);
261 int32_t mb_index = 0;
262 int32_t pic_stride_x8 = iPicStride << 3;
263 int32_t step = (iPicStride << 4) - iPicWidth;
264
265 *pFrameSad = 0;
266 for (int32_t i = 0; i < mb_height; i ++) {
267 for (int32_t j = 0; j < iMbWidth; j ++) {
268 int32_t k, l;
269 int32_t l_sad;
270 const uint8_t* tmp_cur_row;
271 const uint8_t* tmp_ref_row;
272
273 l_sad = 0;
274 tmp_cur_row = tmp_cur;
275 tmp_ref_row = tmp_ref;
276 for (k = 0; k < 8; k ++) {
277 for (l = 0; l < 8; l ++) {
278 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
279 l_sad += diff;
280 }
281 tmp_cur_row += iPicStride;
282 tmp_ref_row += iPicStride;
283 }
284 *pFrameSad += l_sad;
285 pSad8x8[ (mb_index << 2) + 0] = l_sad;
286
287 l_sad = 0;
288 tmp_cur_row = tmp_cur + 8;
289 tmp_ref_row = tmp_ref + 8;
290 for (k = 0; k < 8; k ++) {
291 for (l = 0; l < 8; l ++) {
292 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
293 l_sad += diff;
294 }
295 tmp_cur_row += iPicStride;
296 tmp_ref_row += iPicStride;
297 }
298 *pFrameSad += l_sad;
299 pSad8x8[ (mb_index << 2) + 1] = l_sad;
300
301 l_sad = 0;
302 tmp_cur_row = tmp_cur + pic_stride_x8;
303 tmp_ref_row = tmp_ref + pic_stride_x8;
304 for (k = 0; k < 8; k ++) {
305 for (l = 0; l < 8; l ++) {
306 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
307 l_sad += diff;
308 }
309 tmp_cur_row += iPicStride;
310 tmp_ref_row += iPicStride;
311 }
312 *pFrameSad += l_sad;
313 pSad8x8[ (mb_index << 2) + 2] = l_sad;
314
315 l_sad = 0;
316 tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
317 tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
318 for (k = 0; k < 8; k ++) {
319 for (l = 0; l < 8; l ++) {
320 int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
321 l_sad += diff;
322 }
323 tmp_cur_row += iPicStride;
324 tmp_ref_row += iPicStride;
325 }
326 *pFrameSad += l_sad;
327 pSad8x8[ (mb_index << 2) + 3] = l_sad;
328
329 tmp_ref += 16;
330 tmp_cur += 16;
331 ++mb_index;
332 }
333 tmp_ref += step;
334 tmp_cur += step;
335 }
336 }
337
VAACalcSadSsdBgd_c(const uint8_t * pCurData,const uint8_t * pRefData,int32_t iPicWidth,int32_t iPicHeight,int32_t iPicStride,int32_t * pFrameSad,int32_t * pSad8x8,int32_t * pSum16x16,int32_t * psqsum16x16,int32_t * psqdiff16x16,int32_t * pSd8x8,uint8_t * pMad8x8)338 void VAACalcSadSsdBgd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
339 int32_t iPicStride,
340 int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16, int32_t* psqdiff16x16, int32_t* pSd8x8,
341 uint8_t* pMad8x8)
342
343 {
344 const uint8_t* tmp_ref = pRefData;
345 const uint8_t* tmp_cur = pCurData;
346 int32_t iMbWidth = (iPicWidth >> 4);
347 int32_t mb_height = (iPicHeight >> 4);
348 int32_t mb_index = 0;
349 int32_t pic_stride_x8 = iPicStride << 3;
350 int32_t step = (iPicStride << 4) - iPicWidth;
351
352 *pFrameSad = 0;
353 for (int32_t i = 0; i < mb_height; i ++) {
354 for (int32_t j = 0; j < iMbWidth; j ++) {
355 int32_t k, l;
356 int32_t l_sad, l_sqdiff, l_sum, l_sqsum, l_sd, l_mad;
357 const uint8_t* tmp_cur_row;
358 const uint8_t* tmp_ref_row;
359
360 pSum16x16[mb_index] = 0;
361 psqsum16x16[mb_index] = 0;
362 psqdiff16x16[mb_index] = 0;
363
364 l_sd = l_mad = l_sad = l_sqdiff = l_sum = l_sqsum = 0;
365 tmp_cur_row = tmp_cur;
366 tmp_ref_row = tmp_ref;
367 for (k = 0; k < 8; k ++) {
368 for (l = 0; l < 8; l ++) {
369 int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
370 int32_t abs_diff = WELS_ABS (diff);
371
372 l_sd += diff;
373 if (abs_diff > l_mad) {
374 l_mad = abs_diff;
375 }
376 l_sad += abs_diff;
377 l_sqdiff += abs_diff * abs_diff;
378 l_sum += tmp_cur_row[l];
379 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
380 }
381 tmp_cur_row += iPicStride;
382 tmp_ref_row += iPicStride;
383 }
384 *pFrameSad += l_sad;
385 pSad8x8[ (mb_index << 2) + 0] = l_sad;
386 pSum16x16[mb_index] += l_sum;
387 psqsum16x16[mb_index] += l_sqsum;
388 psqdiff16x16[mb_index] += l_sqdiff;
389 pSd8x8[ (mb_index << 2) + 0] = l_sd;
390 pMad8x8[ (mb_index << 2) + 0] = l_mad;
391
392
393 l_sd = l_mad = l_sad = l_sqdiff = l_sum = l_sqsum = 0;
394 tmp_cur_row = tmp_cur + 8;
395 tmp_ref_row = tmp_ref + 8;
396 for (k = 0; k < 8; k ++) {
397 for (l = 0; l < 8; l ++) {
398 int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
399 int32_t abs_diff = WELS_ABS (diff);
400
401 l_sd += diff;
402 if (abs_diff > l_mad) {
403 l_mad = abs_diff;
404 }
405 l_sad += abs_diff;
406 l_sqdiff += abs_diff * abs_diff;
407 l_sum += tmp_cur_row[l];
408 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
409 }
410 tmp_cur_row += iPicStride;
411 tmp_ref_row += iPicStride;
412 }
413 *pFrameSad += l_sad;
414 pSad8x8[ (mb_index << 2) + 1] = l_sad;
415 pSum16x16[mb_index] += l_sum;
416 psqsum16x16[mb_index] += l_sqsum;
417 psqdiff16x16[mb_index] += l_sqdiff;
418 pSd8x8[ (mb_index << 2) + 1] = l_sd;
419 pMad8x8[ (mb_index << 2) + 1] = l_mad;
420
421 l_sd = l_mad = l_sad = l_sqdiff = l_sum = l_sqsum = 0;
422 tmp_cur_row = tmp_cur + pic_stride_x8;
423 tmp_ref_row = tmp_ref + pic_stride_x8;
424 for (k = 0; k < 8; k ++) {
425 for (l = 0; l < 8; l ++) {
426 int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
427 int32_t abs_diff = WELS_ABS (diff);
428
429 l_sd += diff;
430 if (abs_diff > l_mad) {
431 l_mad = abs_diff;
432 }
433 l_sad += abs_diff;
434 l_sqdiff += abs_diff * abs_diff;
435 l_sum += tmp_cur_row[l];
436 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
437 }
438 tmp_cur_row += iPicStride;
439 tmp_ref_row += iPicStride;
440 }
441 *pFrameSad += l_sad;
442 pSad8x8[ (mb_index << 2) + 2] = l_sad;
443 pSum16x16[mb_index] += l_sum;
444 psqsum16x16[mb_index] += l_sqsum;
445 psqdiff16x16[mb_index] += l_sqdiff;
446 pSd8x8[ (mb_index << 2) + 2] = l_sd;
447 pMad8x8[ (mb_index << 2) + 2] = l_mad;
448
449 l_sd = l_mad = l_sad = l_sqdiff = l_sum = l_sqsum = 0;
450 tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
451 tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
452 for (k = 0; k < 8; k ++) {
453 for (l = 0; l < 8; l ++) {
454 int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
455 int32_t abs_diff = WELS_ABS (diff);
456
457 l_sd += diff;
458 if (abs_diff > l_mad) {
459 l_mad = abs_diff;
460 }
461 l_sad += abs_diff;
462 l_sqdiff += abs_diff * abs_diff;
463 l_sum += tmp_cur_row[l];
464 l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
465 }
466 tmp_cur_row += iPicStride;
467 tmp_ref_row += iPicStride;
468 }
469 *pFrameSad += l_sad;
470 pSad8x8[ (mb_index << 2) + 3] = l_sad;
471 pSum16x16[mb_index] += l_sum;
472 psqsum16x16[mb_index] += l_sqsum;
473 psqdiff16x16[mb_index] += l_sqdiff;
474 pSd8x8[ (mb_index << 2) + 3] = l_sd;
475 pMad8x8[ (mb_index << 2) + 3] = l_mad;
476
477 tmp_ref += 16;
478 tmp_cur += 16;
479 ++mb_index;
480 }
481 tmp_ref += step;
482 tmp_cur += step;
483 }
484 }
485
VAACalcSadBgd_c(const uint8_t * pCurData,const uint8_t * pRefData,int32_t iPicWidth,int32_t iPicHeight,int32_t iPicStride,int32_t * pFrameSad,int32_t * pSad8x8,int32_t * pSd8x8,uint8_t * pMad8x8)486 void VAACalcSadBgd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
487 int32_t iPicStride,
488 int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSd8x8, uint8_t* pMad8x8) {
489 const uint8_t* tmp_ref = pRefData;
490 const uint8_t* tmp_cur = pCurData;
491 int32_t iMbWidth = (iPicWidth >> 4);
492 int32_t mb_height = (iPicHeight >> 4);
493 int32_t mb_index = 0;
494 int32_t pic_stride_x8 = iPicStride << 3;
495 int32_t step = (iPicStride << 4) - iPicWidth;
496
497 *pFrameSad = 0;
498 for (int32_t i = 0; i < mb_height; i ++) {
499 for (int32_t j = 0; j < iMbWidth; j ++) {
500 int32_t k, l;
501 int32_t l_sad, l_sd, l_mad;
502 const uint8_t* tmp_cur_row;
503 const uint8_t* tmp_ref_row;
504
505 l_mad = l_sd = l_sad = 0;
506 tmp_cur_row = tmp_cur;
507 tmp_ref_row = tmp_ref;
508 for (k = 0; k < 8; k ++) {
509 for (l = 0; l < 8; l ++) {
510 int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
511 int32_t abs_diff = WELS_ABS (diff);
512 l_sd += diff;
513 l_sad += abs_diff;
514 if (abs_diff > l_mad) {
515 l_mad = abs_diff;
516 }
517 }
518 tmp_cur_row += iPicStride;
519 tmp_ref_row += iPicStride;
520 }
521 *pFrameSad += l_sad;
522 pSad8x8[ (mb_index << 2) + 0] = l_sad;
523 pSd8x8[ (mb_index << 2) + 0] = l_sd;
524 pMad8x8[ (mb_index << 2) + 0] = l_mad;
525
526 l_mad = l_sd = l_sad = 0;
527 tmp_cur_row = tmp_cur + 8;
528 tmp_ref_row = tmp_ref + 8;
529 for (k = 0; k < 8; k ++) {
530 for (l = 0; l < 8; l ++) {
531 int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
532 int32_t abs_diff = WELS_ABS (diff);
533 l_sd += diff;
534 l_sad += abs_diff;
535 if (abs_diff > l_mad) {
536 l_mad = abs_diff;
537 }
538 }
539 tmp_cur_row += iPicStride;
540 tmp_ref_row += iPicStride;
541 }
542 *pFrameSad += l_sad;
543 pSad8x8[ (mb_index << 2) + 1] = l_sad;
544 pSd8x8[ (mb_index << 2) + 1] = l_sd;
545 pMad8x8[ (mb_index << 2) + 1] = l_mad;
546
547 l_mad = l_sd = l_sad = 0;
548 tmp_cur_row = tmp_cur + pic_stride_x8;
549 tmp_ref_row = tmp_ref + pic_stride_x8;
550 for (k = 0; k < 8; k ++) {
551 for (l = 0; l < 8; l ++) {
552 int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
553 int32_t abs_diff = WELS_ABS (diff);
554 l_sd += diff;
555 l_sad += abs_diff;
556 if (abs_diff > l_mad) {
557 l_mad = abs_diff;
558 }
559 }
560 tmp_cur_row += iPicStride;
561 tmp_ref_row += iPicStride;
562 }
563 *pFrameSad += l_sad;
564 pSad8x8[ (mb_index << 2) + 2] = l_sad;
565 pSd8x8[ (mb_index << 2) + 2] = l_sd;
566 pMad8x8[ (mb_index << 2) + 2] = l_mad;
567
568 l_mad = l_sd = l_sad = 0;
569 tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
570 tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
571 for (k = 0; k < 8; k ++) {
572 for (l = 0; l < 8; l ++) {
573 int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
574 int32_t abs_diff = WELS_ABS (diff);
575 l_sd += diff;
576 l_sad += abs_diff;
577 if (abs_diff > l_mad) {
578 l_mad = abs_diff;
579 }
580 }
581 tmp_cur_row += iPicStride;
582 tmp_ref_row += iPicStride;
583 }
584 *pFrameSad += l_sad;
585 pSad8x8[ (mb_index << 2) + 3] = l_sad;
586 pSd8x8[ (mb_index << 2) + 3] = l_sd;
587 pMad8x8[ (mb_index << 2) + 3] = l_mad;
588
589 tmp_ref += 16;
590 tmp_cur += 16;
591 ++mb_index;
592 }
593 tmp_ref += step;
594 tmp_cur += step;
595 }
596 }
597
598 WELSVP_NAMESPACE_END
599