1 /*!
2  * \copy
3  *     Copyright (c)  2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  */
32 
33 #include "util.h"
34 
35 WELSVP_NAMESPACE_BEGIN
36 
VAACalcSadSsd_c(const uint8_t * pCurData,const uint8_t * pRefData,int32_t iPicWidth,int32_t iPicHeight,int32_t iPicStride,int32_t * pFrameSad,int32_t * pSad8x8,int32_t * pSum16x16,int32_t * psqsum16x16,int32_t * psqdiff16x16)37 void VAACalcSadSsd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
38                       int32_t iPicStride,
39                       int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16, int32_t* psqdiff16x16) {
40   const uint8_t* tmp_ref = pRefData;
41   const uint8_t* tmp_cur = pCurData;
42   int32_t iMbWidth = (iPicWidth >> 4);
43   int32_t mb_height = (iPicHeight >> 4);
44   int32_t mb_index = 0;
45   int32_t pic_stride_x8 = iPicStride << 3;
46   int32_t step = (iPicStride << 4) - iPicWidth;
47 
48   *pFrameSad = 0;
49   for (int32_t i = 0; i < mb_height; i ++) {
50     for (int32_t j = 0; j < iMbWidth; j ++) {
51       int32_t k, l;
52       int32_t l_sad, l_sqdiff, l_sum, l_sqsum;
53       const uint8_t* tmp_cur_row;
54       const uint8_t* tmp_ref_row;
55 
56       pSum16x16[mb_index] = 0;
57       psqsum16x16[mb_index] = 0;
58       psqdiff16x16[mb_index] = 0;
59 
60       l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
61       tmp_cur_row = tmp_cur;
62       tmp_ref_row = tmp_ref;
63       for (k = 0; k < 8; k ++) {
64         for (l = 0; l < 8; l ++) {
65           int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
66           l_sad += diff;
67           l_sqdiff += diff * diff;
68           l_sum += tmp_cur_row[l];
69           l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
70         }
71         tmp_cur_row += iPicStride;
72         tmp_ref_row += iPicStride;
73       }
74       *pFrameSad += l_sad;
75       pSad8x8[ (mb_index << 2) + 0] = l_sad;
76       pSum16x16[mb_index] += l_sum;
77       psqsum16x16[mb_index] += l_sqsum;
78       psqdiff16x16[mb_index] += l_sqdiff;
79 
80       l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
81       tmp_cur_row = tmp_cur + 8;
82       tmp_ref_row = tmp_ref + 8;
83       for (k = 0; k < 8; k ++) {
84         for (l = 0; l < 8; l ++) {
85           int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
86           l_sad += diff;
87           l_sqdiff += diff * diff;
88           l_sum += tmp_cur_row[l];
89           l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
90         }
91         tmp_cur_row += iPicStride;
92         tmp_ref_row += iPicStride;
93       }
94       *pFrameSad += l_sad;
95       pSad8x8[ (mb_index << 2) + 1] = l_sad;
96       pSum16x16[mb_index] += l_sum;
97       psqsum16x16[mb_index] += l_sqsum;
98       psqdiff16x16[mb_index] += l_sqdiff;
99 
100       l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
101       tmp_cur_row = tmp_cur + pic_stride_x8;
102       tmp_ref_row = tmp_ref + pic_stride_x8;
103       for (k = 0; k < 8; k ++) {
104         for (l = 0; l < 8; l ++) {
105           int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
106           l_sad += diff;
107           l_sqdiff += diff * diff;
108           l_sum += tmp_cur_row[l];
109           l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
110         }
111         tmp_cur_row += iPicStride;
112         tmp_ref_row += iPicStride;
113       }
114       *pFrameSad += l_sad;
115       pSad8x8[ (mb_index << 2) + 2] = l_sad;
116       pSum16x16[mb_index] += l_sum;
117       psqsum16x16[mb_index] += l_sqsum;
118       psqdiff16x16[mb_index] += l_sqdiff;
119 
120       l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
121       tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
122       tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
123       for (k = 0; k < 8; k ++) {
124         for (l = 0; l < 8; l ++) {
125           int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
126           l_sad += diff;
127           l_sqdiff += diff * diff;
128           l_sum += tmp_cur_row[l];
129           l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
130         }
131         tmp_cur_row += iPicStride;
132         tmp_ref_row += iPicStride;
133       }
134       *pFrameSad += l_sad;
135       pSad8x8[ (mb_index << 2) + 3] = l_sad;
136       pSum16x16[mb_index] += l_sum;
137       psqsum16x16[mb_index] += l_sqsum;
138       psqdiff16x16[mb_index] += l_sqdiff;
139 
140 
141       tmp_ref += 16;
142       tmp_cur += 16;
143       ++mb_index;
144     }
145     tmp_ref += step;
146     tmp_cur += step;
147   }
148 }
VAACalcSadVar_c(const uint8_t * pCurData,const uint8_t * pRefData,int32_t iPicWidth,int32_t iPicHeight,int32_t iPicStride,int32_t * pFrameSad,int32_t * pSad8x8,int32_t * pSum16x16,int32_t * psqsum16x16)149 void VAACalcSadVar_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
150                       int32_t iPicStride,
151                       int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16) {
152   const uint8_t* tmp_ref = pRefData;
153   const uint8_t* tmp_cur = pCurData;
154   int32_t iMbWidth = (iPicWidth >> 4);
155   int32_t mb_height = (iPicHeight >> 4);
156   int32_t mb_index = 0;
157   int32_t pic_stride_x8 = iPicStride << 3;
158   int32_t step = (iPicStride << 4) - iPicWidth;
159 
160   *pFrameSad = 0;
161   for (int32_t i = 0; i < mb_height; i ++) {
162     for (int32_t j = 0; j < iMbWidth; j ++) {
163       int32_t k, l;
164       int32_t l_sad, l_sum, l_sqsum;
165       const uint8_t* tmp_cur_row;
166       const uint8_t* tmp_ref_row;
167 
168       pSum16x16[mb_index] = 0;
169       psqsum16x16[mb_index] = 0;
170 
171       l_sad =  l_sum =  l_sqsum = 0;
172       tmp_cur_row = tmp_cur;
173       tmp_ref_row = tmp_ref;
174       for (k = 0; k < 8; k ++) {
175         for (l = 0; l < 8; l ++) {
176           int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
177           l_sad += diff;
178           l_sum += tmp_cur_row[l];
179           l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
180         }
181         tmp_cur_row += iPicStride;
182         tmp_ref_row += iPicStride;
183       }
184       *pFrameSad += l_sad;
185       pSad8x8[ (mb_index << 2) + 0] = l_sad;
186       pSum16x16[mb_index] += l_sum;
187       psqsum16x16[mb_index] += l_sqsum;
188 
189       l_sad =  l_sum =  l_sqsum = 0;
190       tmp_cur_row = tmp_cur + 8;
191       tmp_ref_row = tmp_ref + 8;
192       for (k = 0; k < 8; k ++) {
193         for (l = 0; l < 8; l ++) {
194           int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
195           l_sad += diff;
196           l_sum += tmp_cur_row[l];
197           l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
198         }
199         tmp_cur_row += iPicStride;
200         tmp_ref_row += iPicStride;
201       }
202       *pFrameSad += l_sad;
203       pSad8x8[ (mb_index << 2) + 1] = l_sad;
204       pSum16x16[mb_index] += l_sum;
205       psqsum16x16[mb_index] += l_sqsum;
206 
207       l_sad =  l_sum =  l_sqsum = 0;
208       tmp_cur_row = tmp_cur + pic_stride_x8;
209       tmp_ref_row = tmp_ref + pic_stride_x8;
210       for (k = 0; k < 8; k ++) {
211         for (l = 0; l < 8; l ++) {
212           int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
213           l_sad += diff;
214           l_sum += tmp_cur_row[l];
215           l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
216         }
217         tmp_cur_row += iPicStride;
218         tmp_ref_row += iPicStride;
219       }
220       *pFrameSad += l_sad;
221       pSad8x8[ (mb_index << 2) + 2] = l_sad;
222       pSum16x16[mb_index] += l_sum;
223       psqsum16x16[mb_index] += l_sqsum;
224 
225       l_sad =  l_sum =  l_sqsum = 0;
226       tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
227       tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
228       for (k = 0; k < 8; k ++) {
229         for (l = 0; l < 8; l ++) {
230           int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
231           l_sad += diff;
232           l_sum += tmp_cur_row[l];
233           l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
234         }
235         tmp_cur_row += iPicStride;
236         tmp_ref_row += iPicStride;
237       }
238       *pFrameSad += l_sad;
239       pSad8x8[ (mb_index << 2) + 3] = l_sad;
240       pSum16x16[mb_index] += l_sum;
241       psqsum16x16[mb_index] += l_sqsum;
242 
243 
244       tmp_ref += 16;
245       tmp_cur += 16;
246       ++mb_index;
247     }
248     tmp_ref += step;
249     tmp_cur += step;
250   }
251 }
252 
253 
VAACalcSad_c(const uint8_t * pCurData,const uint8_t * pRefData,int32_t iPicWidth,int32_t iPicHeight,int32_t iPicStride,int32_t * pFrameSad,int32_t * pSad8x8)254 void VAACalcSad_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
255                    int32_t iPicStride,
256                    int32_t* pFrameSad, int32_t* pSad8x8) {
257   const uint8_t* tmp_ref = pRefData;
258   const uint8_t* tmp_cur = pCurData;
259   int32_t iMbWidth = (iPicWidth >> 4);
260   int32_t mb_height = (iPicHeight >> 4);
261   int32_t mb_index = 0;
262   int32_t pic_stride_x8 = iPicStride << 3;
263   int32_t step = (iPicStride << 4) - iPicWidth;
264 
265   *pFrameSad = 0;
266   for (int32_t i = 0; i < mb_height; i ++) {
267     for (int32_t j = 0; j < iMbWidth; j ++) {
268       int32_t k, l;
269       int32_t l_sad;
270       const uint8_t* tmp_cur_row;
271       const uint8_t* tmp_ref_row;
272 
273       l_sad =  0;
274       tmp_cur_row = tmp_cur;
275       tmp_ref_row = tmp_ref;
276       for (k = 0; k < 8; k ++) {
277         for (l = 0; l < 8; l ++) {
278           int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
279           l_sad += diff;
280         }
281         tmp_cur_row += iPicStride;
282         tmp_ref_row += iPicStride;
283       }
284       *pFrameSad += l_sad;
285       pSad8x8[ (mb_index << 2) + 0] = l_sad;
286 
287       l_sad =  0;
288       tmp_cur_row = tmp_cur + 8;
289       tmp_ref_row = tmp_ref + 8;
290       for (k = 0; k < 8; k ++) {
291         for (l = 0; l < 8; l ++) {
292           int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
293           l_sad += diff;
294         }
295         tmp_cur_row += iPicStride;
296         tmp_ref_row += iPicStride;
297       }
298       *pFrameSad += l_sad;
299       pSad8x8[ (mb_index << 2) + 1] = l_sad;
300 
301       l_sad =  0;
302       tmp_cur_row = tmp_cur + pic_stride_x8;
303       tmp_ref_row = tmp_ref + pic_stride_x8;
304       for (k = 0; k < 8; k ++) {
305         for (l = 0; l < 8; l ++) {
306           int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
307           l_sad += diff;
308         }
309         tmp_cur_row += iPicStride;
310         tmp_ref_row += iPicStride;
311       }
312       *pFrameSad += l_sad;
313       pSad8x8[ (mb_index << 2) + 2] = l_sad;
314 
315       l_sad =  0;
316       tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
317       tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
318       for (k = 0; k < 8; k ++) {
319         for (l = 0; l < 8; l ++) {
320           int32_t diff = WELS_ABS (tmp_cur_row[l] - tmp_ref_row[l]);
321           l_sad += diff;
322         }
323         tmp_cur_row += iPicStride;
324         tmp_ref_row += iPicStride;
325       }
326       *pFrameSad += l_sad;
327       pSad8x8[ (mb_index << 2) + 3] = l_sad;
328 
329       tmp_ref += 16;
330       tmp_cur += 16;
331       ++mb_index;
332     }
333     tmp_ref += step;
334     tmp_cur += step;
335   }
336 }
337 
VAACalcSadSsdBgd_c(const uint8_t * pCurData,const uint8_t * pRefData,int32_t iPicWidth,int32_t iPicHeight,int32_t iPicStride,int32_t * pFrameSad,int32_t * pSad8x8,int32_t * pSum16x16,int32_t * psqsum16x16,int32_t * psqdiff16x16,int32_t * pSd8x8,uint8_t * pMad8x8)338 void VAACalcSadSsdBgd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
339                          int32_t iPicStride,
340                          int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSum16x16, int32_t* psqsum16x16, int32_t* psqdiff16x16, int32_t* pSd8x8,
341                          uint8_t* pMad8x8)
342 
343 {
344   const uint8_t* tmp_ref = pRefData;
345   const uint8_t* tmp_cur = pCurData;
346   int32_t iMbWidth = (iPicWidth >> 4);
347   int32_t mb_height = (iPicHeight >> 4);
348   int32_t mb_index = 0;
349   int32_t pic_stride_x8 = iPicStride << 3;
350   int32_t step = (iPicStride << 4) - iPicWidth;
351 
352   *pFrameSad = 0;
353   for (int32_t i = 0; i < mb_height; i ++) {
354     for (int32_t j = 0; j < iMbWidth; j ++) {
355       int32_t k, l;
356       int32_t l_sad, l_sqdiff, l_sum, l_sqsum, l_sd, l_mad;
357       const uint8_t* tmp_cur_row;
358       const uint8_t* tmp_ref_row;
359 
360       pSum16x16[mb_index] = 0;
361       psqsum16x16[mb_index] = 0;
362       psqdiff16x16[mb_index] = 0;
363 
364       l_sd = l_mad = l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
365       tmp_cur_row = tmp_cur;
366       tmp_ref_row = tmp_ref;
367       for (k = 0; k < 8; k ++) {
368         for (l = 0; l < 8; l ++) {
369           int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
370           int32_t abs_diff = WELS_ABS (diff);
371 
372           l_sd += diff;
373           if (abs_diff > l_mad) {
374             l_mad = abs_diff;
375           }
376           l_sad += abs_diff;
377           l_sqdiff += abs_diff * abs_diff;
378           l_sum += tmp_cur_row[l];
379           l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
380         }
381         tmp_cur_row += iPicStride;
382         tmp_ref_row += iPicStride;
383       }
384       *pFrameSad += l_sad;
385       pSad8x8[ (mb_index << 2) + 0] = l_sad;
386       pSum16x16[mb_index] += l_sum;
387       psqsum16x16[mb_index] += l_sqsum;
388       psqdiff16x16[mb_index] += l_sqdiff;
389       pSd8x8[ (mb_index << 2) + 0] = l_sd;
390       pMad8x8[ (mb_index << 2) + 0] = l_mad;
391 
392 
393       l_sd = l_mad = l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
394       tmp_cur_row = tmp_cur + 8;
395       tmp_ref_row = tmp_ref + 8;
396       for (k = 0; k < 8; k ++) {
397         for (l = 0; l < 8; l ++) {
398           int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
399           int32_t abs_diff = WELS_ABS (diff);
400 
401           l_sd += diff;
402           if (abs_diff > l_mad) {
403             l_mad = abs_diff;
404           }
405           l_sad += abs_diff;
406           l_sqdiff += abs_diff * abs_diff;
407           l_sum += tmp_cur_row[l];
408           l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
409         }
410         tmp_cur_row += iPicStride;
411         tmp_ref_row += iPicStride;
412       }
413       *pFrameSad += l_sad;
414       pSad8x8[ (mb_index << 2) + 1] = l_sad;
415       pSum16x16[mb_index] += l_sum;
416       psqsum16x16[mb_index] += l_sqsum;
417       psqdiff16x16[mb_index] += l_sqdiff;
418       pSd8x8[ (mb_index << 2) + 1] = l_sd;
419       pMad8x8[ (mb_index << 2) + 1] = l_mad;
420 
421       l_sd = l_mad = l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
422       tmp_cur_row = tmp_cur + pic_stride_x8;
423       tmp_ref_row = tmp_ref + pic_stride_x8;
424       for (k = 0; k < 8; k ++) {
425         for (l = 0; l < 8; l ++) {
426           int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
427           int32_t abs_diff = WELS_ABS (diff);
428 
429           l_sd += diff;
430           if (abs_diff > l_mad) {
431             l_mad = abs_diff;
432           }
433           l_sad += abs_diff;
434           l_sqdiff += abs_diff * abs_diff;
435           l_sum += tmp_cur_row[l];
436           l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
437         }
438         tmp_cur_row += iPicStride;
439         tmp_ref_row += iPicStride;
440       }
441       *pFrameSad += l_sad;
442       pSad8x8[ (mb_index << 2) + 2] = l_sad;
443       pSum16x16[mb_index] += l_sum;
444       psqsum16x16[mb_index] += l_sqsum;
445       psqdiff16x16[mb_index] += l_sqdiff;
446       pSd8x8[ (mb_index << 2) + 2] = l_sd;
447       pMad8x8[ (mb_index << 2) + 2] = l_mad;
448 
449       l_sd = l_mad = l_sad =  l_sqdiff =  l_sum =  l_sqsum = 0;
450       tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
451       tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
452       for (k = 0; k < 8; k ++) {
453         for (l = 0; l < 8; l ++) {
454           int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
455           int32_t abs_diff = WELS_ABS (diff);
456 
457           l_sd += diff;
458           if (abs_diff > l_mad) {
459             l_mad = abs_diff;
460           }
461           l_sad += abs_diff;
462           l_sqdiff += abs_diff * abs_diff;
463           l_sum += tmp_cur_row[l];
464           l_sqsum += tmp_cur_row[l] * tmp_cur_row[l];
465         }
466         tmp_cur_row += iPicStride;
467         tmp_ref_row += iPicStride;
468       }
469       *pFrameSad += l_sad;
470       pSad8x8[ (mb_index << 2) + 3] = l_sad;
471       pSum16x16[mb_index] += l_sum;
472       psqsum16x16[mb_index] += l_sqsum;
473       psqdiff16x16[mb_index] += l_sqdiff;
474       pSd8x8[ (mb_index << 2) + 3] = l_sd;
475       pMad8x8[ (mb_index << 2) + 3] = l_mad;
476 
477       tmp_ref += 16;
478       tmp_cur += 16;
479       ++mb_index;
480     }
481     tmp_ref += step;
482     tmp_cur += step;
483   }
484 }
485 
VAACalcSadBgd_c(const uint8_t * pCurData,const uint8_t * pRefData,int32_t iPicWidth,int32_t iPicHeight,int32_t iPicStride,int32_t * pFrameSad,int32_t * pSad8x8,int32_t * pSd8x8,uint8_t * pMad8x8)486 void VAACalcSadBgd_c (const uint8_t* pCurData, const uint8_t* pRefData, int32_t iPicWidth, int32_t iPicHeight,
487                       int32_t iPicStride,
488                       int32_t* pFrameSad, int32_t* pSad8x8, int32_t* pSd8x8, uint8_t* pMad8x8) {
489   const uint8_t* tmp_ref = pRefData;
490   const uint8_t* tmp_cur = pCurData;
491   int32_t iMbWidth = (iPicWidth >> 4);
492   int32_t mb_height = (iPicHeight >> 4);
493   int32_t mb_index = 0;
494   int32_t pic_stride_x8 = iPicStride << 3;
495   int32_t step = (iPicStride << 4) - iPicWidth;
496 
497   *pFrameSad = 0;
498   for (int32_t i = 0; i < mb_height; i ++) {
499     for (int32_t j = 0; j < iMbWidth; j ++) {
500       int32_t k, l;
501       int32_t l_sad, l_sd, l_mad;
502       const uint8_t* tmp_cur_row;
503       const uint8_t* tmp_ref_row;
504 
505       l_mad = l_sd = l_sad =  0;
506       tmp_cur_row = tmp_cur;
507       tmp_ref_row = tmp_ref;
508       for (k = 0; k < 8; k ++) {
509         for (l = 0; l < 8; l ++) {
510           int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
511           int32_t abs_diff = WELS_ABS (diff);
512           l_sd += diff;
513           l_sad += abs_diff;
514           if (abs_diff > l_mad) {
515             l_mad = abs_diff;
516           }
517         }
518         tmp_cur_row += iPicStride;
519         tmp_ref_row += iPicStride;
520       }
521       *pFrameSad += l_sad;
522       pSad8x8[ (mb_index << 2) + 0] = l_sad;
523       pSd8x8[ (mb_index << 2) + 0] = l_sd;
524       pMad8x8[ (mb_index << 2) + 0] = l_mad;
525 
526       l_mad = l_sd = l_sad =  0;
527       tmp_cur_row = tmp_cur + 8;
528       tmp_ref_row = tmp_ref + 8;
529       for (k = 0; k < 8; k ++) {
530         for (l = 0; l < 8; l ++) {
531           int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
532           int32_t abs_diff = WELS_ABS (diff);
533           l_sd += diff;
534           l_sad += abs_diff;
535           if (abs_diff > l_mad) {
536             l_mad = abs_diff;
537           }
538         }
539         tmp_cur_row += iPicStride;
540         tmp_ref_row += iPicStride;
541       }
542       *pFrameSad += l_sad;
543       pSad8x8[ (mb_index << 2) + 1] = l_sad;
544       pSd8x8[ (mb_index << 2) + 1] = l_sd;
545       pMad8x8[ (mb_index << 2) + 1] = l_mad;
546 
547       l_mad = l_sd = l_sad =  0;
548       tmp_cur_row = tmp_cur + pic_stride_x8;
549       tmp_ref_row = tmp_ref + pic_stride_x8;
550       for (k = 0; k < 8; k ++) {
551         for (l = 0; l < 8; l ++) {
552           int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
553           int32_t abs_diff = WELS_ABS (diff);
554           l_sd += diff;
555           l_sad += abs_diff;
556           if (abs_diff > l_mad) {
557             l_mad = abs_diff;
558           }
559         }
560         tmp_cur_row += iPicStride;
561         tmp_ref_row += iPicStride;
562       }
563       *pFrameSad += l_sad;
564       pSad8x8[ (mb_index << 2) + 2] = l_sad;
565       pSd8x8[ (mb_index << 2) + 2] = l_sd;
566       pMad8x8[ (mb_index << 2) + 2] = l_mad;
567 
568       l_mad = l_sd = l_sad =  0;
569       tmp_cur_row = tmp_cur + pic_stride_x8 + 8;
570       tmp_ref_row = tmp_ref + pic_stride_x8 + 8;
571       for (k = 0; k < 8; k ++) {
572         for (l = 0; l < 8; l ++) {
573           int32_t diff = tmp_cur_row[l] - tmp_ref_row[l];
574           int32_t abs_diff = WELS_ABS (diff);
575           l_sd += diff;
576           l_sad += abs_diff;
577           if (abs_diff > l_mad) {
578             l_mad = abs_diff;
579           }
580         }
581         tmp_cur_row += iPicStride;
582         tmp_ref_row += iPicStride;
583       }
584       *pFrameSad += l_sad;
585       pSad8x8[ (mb_index << 2) + 3] = l_sad;
586       pSd8x8[ (mb_index << 2) + 3] = l_sd;
587       pMad8x8[ (mb_index << 2) + 3] = l_mad;
588 
589       tmp_ref += 16;
590       tmp_cur += 16;
591       ++mb_index;
592     }
593     tmp_ref += step;
594     tmp_cur += step;
595   }
596 }
597 
598 WELSVP_NAMESPACE_END
599