1 /*****************************************************************************
2 * This file is part of Kvazaar HEVC encoder.
3 *
4 * Copyright (c) 2021, Tampere University, ITU/ISO/IEC, project contributors
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without modification,
8 * are permitted provided that the following conditions are met:
9 *
10 * * Redistributions of source code must retain the above copyright notice, this
11 * list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright notice, this
14 * list of conditions and the following disclaimer in the documentation and/or
15 * other materials provided with the distribution.
16 *
17 * * Neither the name of the Tampere University or ITU/ISO/IEC nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 * INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION HOWEVER CAUSED AND ON
28 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * INCLUDING NEGLIGENCE OR OTHERWISE ARISING IN ANY WAY OUT OF THE USE OF THIS
31 ****************************************************************************/
32
33 #include "strategies/generic/picture-generic.h"
34
35 #include <stdlib.h>
36
37 #include "strategies/strategies-picture.h"
38 #include "strategyselector.h"
39
40 // Function to clip int16_t to pixel. (0-255 or 0-1023)
41 // Assumes PIXEL_MAX to be 2^n-1
kvz_fast_clip_16bit_to_pixel(int16_t value)42 kvz_pixel kvz_fast_clip_16bit_to_pixel(int16_t value)
43 {
44 // Ensure that compiler generates arithmetic shift from ">>"
45 #if defined(_MSC_VER) || defined(__GNUC__) || defined(__clang__)
46
47 if (value & ~PIXEL_MAX) {
48 int16_t temp = (-value) >> 15;
49 #if KVZ_BIT_DEPTH == 10
50 temp &= PIXEL_MAX;
51 #endif
52 return temp;
53 }
54 else {
55 return value;
56 }
57 #else
58 return CLIP(PIXEL_MIN, PIXEL_MAX, value);
59 #endif
60 }
61
62 // Function to clip int32_t to pixel. (0-255 or 0-1023)
63 // Assumes PIXEL_MAX to be 2^n-1
kvz_fast_clip_32bit_to_pixel(int32_t value)64 kvz_pixel kvz_fast_clip_32bit_to_pixel(int32_t value)
65 {
66 // Ensure that compiler generates arithmetic shift from ">>"
67 #if defined(_MSC_VER) || defined(__GNUC__) || defined(__clang__)
68
69 if (value & ~PIXEL_MAX) {
70 int32_t temp = (-value) >> 31;
71 #if KVZ_BIT_DEPTH == 10
72 temp &= PIXEL_MAX;
73 #endif
74 return temp;
75 }
76 else {
77 return value;
78 }
79 #else
80 return CLIP(PIXEL_MIN, PIXEL_MAX, value);
81 #endif
82 }
83
84 /**
85 * \brief Calculate Sum of Absolute Differences (SAD)
86 *
87 * Calculate Sum of Absolute Differences (SAD) between two rectangular regions
88 * located in arbitrary points in the picture.
89 *
90 * \param data1 Starting point of the first picture.
91 * \param data2 Starting point of the second picture.
92 * \param width Width of the region for which SAD is calculated.
93 * \param height Height of the region for which SAD is calculated.
94 * \param stride Width of the pixel array.
95 *
96 * \returns Sum of Absolute Differences
97 */
reg_sad_generic(const kvz_pixel * const data1,const kvz_pixel * const data2,const int width,const int height,const unsigned stride1,const unsigned stride2)98 static unsigned reg_sad_generic(const kvz_pixel * const data1, const kvz_pixel * const data2,
99 const int width, const int height, const unsigned stride1, const unsigned stride2)
100 {
101 int y, x;
102 unsigned sad = 0;
103
104 for (y = 0; y < height; ++y) {
105 for (x = 0; x < width; ++x) {
106 sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);
107 }
108 }
109
110 return sad;
111 }
112
113 /**
114 * \brief Transform differences between two 4x4 blocks.
115 * From HM 13.0
116 */
hadamard_4x4_generic(int32_t diff[4* 4])117 static int32_t hadamard_4x4_generic(int32_t diff[4*4])
118 {
119 int32_t m[4 * 4];
120 m[0] = diff[0] + diff[12];
121 m[1] = diff[1] + diff[13];
122 m[2] = diff[2] + diff[14];
123 m[3] = diff[3] + diff[15];
124 m[4] = diff[4] + diff[8];
125 m[5] = diff[5] + diff[9];
126 m[6] = diff[6] + diff[10];
127 m[7] = diff[7] + diff[11];
128 m[8] = diff[4] - diff[8];
129 m[9] = diff[5] - diff[9];
130 m[10] = diff[6] - diff[10];
131 m[11] = diff[7] - diff[11];
132 m[12] = diff[0] - diff[12];
133 m[13] = diff[1] - diff[13];
134 m[14] = diff[2] - diff[14];
135 m[15] = diff[3] - diff[15];
136
137 int32_t d[4 * 4];
138 d[0] = m[0] + m[4];
139 d[1] = m[1] + m[5];
140 d[2] = m[2] + m[6];
141 d[3] = m[3] + m[7];
142 d[4] = m[8] + m[12];
143 d[5] = m[9] + m[13];
144 d[6] = m[10] + m[14];
145 d[7] = m[11] + m[15];
146 d[8] = m[0] - m[4];
147 d[9] = m[1] - m[5];
148 d[10] = m[2] - m[6];
149 d[11] = m[3] - m[7];
150 d[12] = m[12] - m[8];
151 d[13] = m[13] - m[9];
152 d[14] = m[14] - m[10];
153 d[15] = m[15] - m[11];
154
155 m[0] = d[0] + d[3];
156 m[1] = d[1] + d[2];
157 m[2] = d[1] - d[2];
158 m[3] = d[0] - d[3];
159 m[4] = d[4] + d[7];
160 m[5] = d[5] + d[6];
161 m[6] = d[5] - d[6];
162 m[7] = d[4] - d[7];
163 m[8] = d[8] + d[11];
164 m[9] = d[9] + d[10];
165 m[10] = d[9] - d[10];
166 m[11] = d[8] - d[11];
167 m[12] = d[12] + d[15];
168 m[13] = d[13] + d[14];
169 m[14] = d[13] - d[14];
170 m[15] = d[12] - d[15];
171
172 d[0] = m[0] + m[1];
173 d[1] = m[0] - m[1];
174 d[2] = m[2] + m[3];
175 d[3] = m[3] - m[2];
176 d[4] = m[4] + m[5];
177 d[5] = m[4] - m[5];
178 d[6] = m[6] + m[7];
179 d[7] = m[7] - m[6];
180 d[8] = m[8] + m[9];
181 d[9] = m[8] - m[9];
182 d[10] = m[10] + m[11];
183 d[11] = m[11] - m[10];
184 d[12] = m[12] + m[13];
185 d[13] = m[12] - m[13];
186 d[14] = m[14] + m[15];
187 d[15] = m[15] - m[14];
188
189 int32_t satd = 0;
190 for (int i = 0; i < 16; i++) {
191 satd += abs(d[i]);
192 }
193 satd = ((satd + 1) >> 1);
194
195 return satd;
196 }
197
198 /**
199 * \brief Calculate SATD between two 4x4 blocks.
200 */
satd_4x4_generic(const kvz_pixel * piOrg,const kvz_pixel * piCur)201 static unsigned satd_4x4_generic(const kvz_pixel *piOrg, const kvz_pixel *piCur)
202 {
203 int32_t diff[4 * 4];
204 for (int i = 0; i < 4 * 4; i++) {
205 diff[i] = piOrg[i] - piCur[i];
206 }
207 return hadamard_4x4_generic(diff);
208 }
209
210 /**
211 * \brief Calculate SATD between two 4x4 blocks inside bigger arrays.
212 */
kvz_satd_4x4_subblock_generic(const kvz_pixel * buf1,const int32_t stride1,const kvz_pixel * buf2,const int32_t stride2)213 unsigned kvz_satd_4x4_subblock_generic(const kvz_pixel * buf1,
214 const int32_t stride1,
215 const kvz_pixel * buf2,
216 const int32_t stride2)
217 {
218 int32_t diff[4 * 4];
219 for (int y = 0; y < 4; y++) {
220 for (int x = 0; x < 4; x++) {
221 diff[x + y * 4] = buf1[x + y * stride1] - buf2[x + y * stride2];
222 }
223 }
224 return hadamard_4x4_generic(diff);
225 }
226
kvz_satd_4x4_subblock_quad_generic(const kvz_pixel * preds[4],const int stride,const kvz_pixel * orig,const int orig_stride,unsigned costs[4])227 void kvz_satd_4x4_subblock_quad_generic(const kvz_pixel *preds[4],
228 const int stride,
229 const kvz_pixel *orig,
230 const int orig_stride,
231 unsigned costs[4])
232 {
233 int32_t diff[4][4 * 4];
234 for (int y = 0; y < 4; y++) {
235 for (int x = 0; x < 4; x++) {
236 diff[0][x + y * 4] = orig[x + y * orig_stride] - preds[0][x + y * stride];
237 diff[1][x + y * 4] = orig[x + y * orig_stride] - preds[1][x + y * stride];
238 diff[2][x + y * 4] = orig[x + y * orig_stride] - preds[2][x + y * stride];
239 diff[3][x + y * 4] = orig[x + y * orig_stride] - preds[3][x + y * stride];
240 }
241 }
242
243 costs[0] = hadamard_4x4_generic(diff[0]);
244 costs[1] = hadamard_4x4_generic(diff[1]);
245 costs[2] = hadamard_4x4_generic(diff[2]);
246 costs[3] = hadamard_4x4_generic(diff[3]);
247 }
248
249 /**
250 * \brief Calculate SATD between two 8x8 blocks inside bigger arrays.
251 */
satd_8x8_subblock_generic(const kvz_pixel * piOrg,const int32_t iStrideOrg,const kvz_pixel * piCur,const int32_t iStrideCur)252 static unsigned satd_8x8_subblock_generic(const kvz_pixel * piOrg, const int32_t iStrideOrg,
253 const kvz_pixel * piCur, const int32_t iStrideCur)
254 {
255 int32_t k, i, j, jj, sad = 0;
256 int32_t diff[64], m1[8][8], m2[8][8], m3[8][8];
257
258 for (k = 0; k < 64; k += 8) {
259 diff[k + 0] = piOrg[0] - piCur[0];
260 diff[k + 1] = piOrg[1] - piCur[1];
261 diff[k + 2] = piOrg[2] - piCur[2];
262 diff[k + 3] = piOrg[3] - piCur[3];
263 diff[k + 4] = piOrg[4] - piCur[4];
264 diff[k + 5] = piOrg[5] - piCur[5];
265 diff[k + 6] = piOrg[6] - piCur[6];
266 diff[k + 7] = piOrg[7] - piCur[7];
267
268 piCur += iStrideCur;
269 piOrg += iStrideOrg;
270 }
271
272 // horizontal
273 for (j = 0; j < 8; ++j) {
274 jj = j << 3;
275 m2[j][0] = diff[jj] + diff[jj + 4];
276 m2[j][1] = diff[jj + 1] + diff[jj + 5];
277 m2[j][2] = diff[jj + 2] + diff[jj + 6];
278 m2[j][3] = diff[jj + 3] + diff[jj + 7];
279 m2[j][4] = diff[jj] - diff[jj + 4];
280 m2[j][5] = diff[jj + 1] - diff[jj + 5];
281 m2[j][6] = diff[jj + 2] - diff[jj + 6];
282 m2[j][7] = diff[jj + 3] - diff[jj + 7];
283
284 m1[j][0] = m2[j][0] + m2[j][2];
285 m1[j][1] = m2[j][1] + m2[j][3];
286 m1[j][2] = m2[j][0] - m2[j][2];
287 m1[j][3] = m2[j][1] - m2[j][3];
288 m1[j][4] = m2[j][4] + m2[j][6];
289 m1[j][5] = m2[j][5] + m2[j][7];
290 m1[j][6] = m2[j][4] - m2[j][6];
291 m1[j][7] = m2[j][5] - m2[j][7];
292
293 m2[j][0] = m1[j][0] + m1[j][1];
294 m2[j][1] = m1[j][0] - m1[j][1];
295 m2[j][2] = m1[j][2] + m1[j][3];
296 m2[j][3] = m1[j][2] - m1[j][3];
297 m2[j][4] = m1[j][4] + m1[j][5];
298 m2[j][5] = m1[j][4] - m1[j][5];
299 m2[j][6] = m1[j][6] + m1[j][7];
300 m2[j][7] = m1[j][6] - m1[j][7];
301 }
302
303 // vertical
304 for (i = 0; i < 8; ++i) {
305 m3[0][i] = m2[0][i] + m2[4][i];
306 m3[1][i] = m2[1][i] + m2[5][i];
307 m3[2][i] = m2[2][i] + m2[6][i];
308 m3[3][i] = m2[3][i] + m2[7][i];
309 m3[4][i] = m2[0][i] - m2[4][i];
310 m3[5][i] = m2[1][i] - m2[5][i];
311 m3[6][i] = m2[2][i] - m2[6][i];
312 m3[7][i] = m2[3][i] - m2[7][i];
313
314 m1[0][i] = m3[0][i] + m3[2][i];
315 m1[1][i] = m3[1][i] + m3[3][i];
316 m1[2][i] = m3[0][i] - m3[2][i];
317 m1[3][i] = m3[1][i] - m3[3][i];
318 m1[4][i] = m3[4][i] + m3[6][i];
319 m1[5][i] = m3[5][i] + m3[7][i];
320 m1[6][i] = m3[4][i] - m3[6][i];
321 m1[7][i] = m3[5][i] - m3[7][i];
322
323 m2[0][i] = m1[0][i] + m1[1][i];
324 m2[1][i] = m1[0][i] - m1[1][i];
325 m2[2][i] = m1[2][i] + m1[3][i];
326 m2[3][i] = m1[2][i] - m1[3][i];
327 m2[4][i] = m1[4][i] + m1[5][i];
328 m2[5][i] = m1[4][i] - m1[5][i];
329 m2[6][i] = m1[6][i] + m1[7][i];
330 m2[7][i] = m1[6][i] - m1[7][i];
331 }
332
333 for (i = 0; i < 64; ++i) {
334 sad += abs(((int*)m2)[i]);
335 }
336
337 sad = (sad + 2) >> 2;
338
339 return sad;
340 }
341
satd_8x8_subblock_quad_generic(const kvz_pixel ** preds,const int stride,const kvz_pixel * orig,const int orig_stride,unsigned * costs)342 static void satd_8x8_subblock_quad_generic(const kvz_pixel **preds,
343 const int stride,
344 const kvz_pixel *orig,
345 const int orig_stride,
346 unsigned *costs)
347 {
348 costs[0] = satd_8x8_subblock_generic(orig, orig_stride, preds[0], stride);
349 costs[1] = satd_8x8_subblock_generic(orig, orig_stride, preds[1], stride);
350 costs[2] = satd_8x8_subblock_generic(orig, orig_stride, preds[2], stride);
351 costs[3] = satd_8x8_subblock_generic(orig, orig_stride, preds[3], stride);
352 }
353
354 // These macros define sadt_16bit_NxN for N = 8, 16, 32, 64
355 SATD_NxN(generic, 8)
356 SATD_NxN(generic, 16)
357 SATD_NxN(generic, 32)
358 SATD_NxN(generic, 64)
359 SATD_ANY_SIZE(generic)
360
361
362 // Declare these functions to make sure the signature of the macro matches.
363 static cost_pixel_nxn_multi_func satd_4x4_dual_generic;
364 static cost_pixel_nxn_multi_func satd_8x8_dual_generic;
365 static cost_pixel_nxn_multi_func satd_16x16_dual_generic;
366 static cost_pixel_nxn_multi_func satd_32x32_dual_generic;
367 static cost_pixel_nxn_multi_func satd_64x64_dual_generic;
368
369 #define SATD_DUAL_NXN(n, pixel_type) \
370 static void satd_ ## n ## x ## n ## _dual_generic( \
371 const pred_buffer preds, const pixel_type * const orig, unsigned num_modes, unsigned *costs_out) \
372 { \
373 unsigned x, y; \
374 unsigned sum = 0; \
375 for (y = 0; y < (n); y += 8) { \
376 unsigned row = y * (n); \
377 for (x = 0; x < (n); x += 8) { \
378 sum += satd_8x8_subblock_generic(&preds[0][row + x], (n), &orig[row + x], (n)); \
379 } \
380 } \
381 costs_out[0] = sum>>(KVZ_BIT_DEPTH-8); \
382 \
383 sum = 0; \
384 for (y = 0; y < (n); y += 8) { \
385 unsigned row = y * (n); \
386 for (x = 0; x < (n); x += 8) { \
387 sum += satd_8x8_subblock_generic(&preds[1][row + x], (n), &orig[row + x], (n)); \
388 } \
389 } \
390 costs_out[1] = sum>>(KVZ_BIT_DEPTH-8); \
391 }
392
satd_4x4_dual_generic(const pred_buffer preds,const kvz_pixel * const orig,unsigned num_modes,unsigned * costs_out)393 static void satd_4x4_dual_generic(const pred_buffer preds, const kvz_pixel * const orig, unsigned num_modes, unsigned *costs_out)
394 {
395 costs_out[0] = satd_4x4_generic(orig, preds[0]);
396 costs_out[1] = satd_4x4_generic(orig, preds[1]);
397 }
398
399 SATD_DUAL_NXN(8, kvz_pixel)
400 SATD_DUAL_NXN(16, kvz_pixel)
401 SATD_DUAL_NXN(32, kvz_pixel)
402 SATD_DUAL_NXN(64, kvz_pixel)
403
404 #define SATD_ANY_SIZE_MULTI_GENERIC(suffix, num_parallel_blocks) \
405 static cost_pixel_any_size_multi_func satd_any_size_## suffix; \
406 static void satd_any_size_ ## suffix ( \
407 int width, int height, \
408 const kvz_pixel **preds, \
409 const int stride, \
410 const kvz_pixel *orig, \
411 const int orig_stride, \
412 unsigned num_modes, \
413 unsigned *costs_out, \
414 int8_t *valid) \
415 { \
416 unsigned sums[num_parallel_blocks] = { 0 }; \
417 const kvz_pixel *pred_ptrs[4] = { preds[0], preds[1], preds[2], preds[3] };\
418 const kvz_pixel *orig_ptr = orig; \
419 costs_out[0] = 0; costs_out[1] = 0; costs_out[2] = 0; costs_out[3] = 0; \
420 if (width % 8 != 0) { \
421 /* Process the first column using 4x4 blocks. */ \
422 for (int y = 0; y < height; y += 4) { \
423 kvz_satd_4x4_subblock_ ## suffix(preds, stride, orig, orig_stride, sums); \
424 } \
425 orig_ptr += 4; \
426 for(int blk = 0; blk < num_parallel_blocks; ++blk){\
427 pred_ptrs[blk] += 4; \
428 }\
429 width -= 4; \
430 } \
431 if (height % 8 != 0) { \
432 /* Process the first row using 4x4 blocks. */ \
433 for (int x = 0; x < width; x += 4 ) { \
434 kvz_satd_4x4_subblock_ ## suffix(pred_ptrs, stride, orig_ptr, orig_stride, sums); \
435 } \
436 orig_ptr += 4 * orig_stride; \
437 for(int blk = 0; blk < num_parallel_blocks; ++blk){\
438 pred_ptrs[blk] += 4 * stride; \
439 }\
440 height -= 4; \
441 } \
442 /* The rest can now be processed with 8x8 blocks. */ \
443 for (int y = 0; y < height; y += 8) { \
444 orig_ptr = &orig[y * orig_stride]; \
445 pred_ptrs[0] = &preds[0][y * stride]; \
446 pred_ptrs[1] = &preds[1][y * stride]; \
447 pred_ptrs[2] = &preds[2][y * stride]; \
448 pred_ptrs[3] = &preds[3][y * stride]; \
449 for (int x = 0; x < width; x += 8) { \
450 satd_8x8_subblock_ ## suffix(pred_ptrs, stride, orig_ptr, orig_stride, sums); \
451 orig_ptr += 8; \
452 pred_ptrs[0] += 8; \
453 pred_ptrs[1] += 8; \
454 pred_ptrs[2] += 8; \
455 pred_ptrs[3] += 8; \
456 costs_out[0] += sums[0]; \
457 costs_out[1] += sums[1]; \
458 costs_out[2] += sums[2]; \
459 costs_out[3] += sums[3]; \
460 } \
461 } \
462 for(int i = 0; i < num_parallel_blocks; ++i){\
463 costs_out[i] = costs_out[i] >> (KVZ_BIT_DEPTH - 8);\
464 } \
465 return; \
466 }
467
468 SATD_ANY_SIZE_MULTI_GENERIC(quad_generic, 4)
469
470 // Function macro for defining SAD calculating functions
471 // for fixed size blocks.
472 #define SAD_NXN(n, pixel_type) \
473 static unsigned sad_ ## n ## x ## n ## _generic( \
474 const pixel_type * const block1, const pixel_type * const block2) \
475 { \
476 unsigned i; \
477 unsigned sum = 0; \
478 for (i = 0; i < (n)*(n); ++i) { \
479 sum += abs(block1[i] - block2[i]); \
480 } \
481 return sum>>(KVZ_BIT_DEPTH-8); \
482 }
483
484 // Declare these functions to make sure the signature of the macro matches.
485 static cost_pixel_nxn_func sad_4x4_generic;
486 static cost_pixel_nxn_func sad_8x8_generic;
487 static cost_pixel_nxn_func sad_16x16_generic;
488 static cost_pixel_nxn_func sad_32x32_generic;
489 static cost_pixel_nxn_func sad_64x64_generic;
490
491 // These macros define sad_16bit_nxn functions for n = 4, 8, 16, 32, 64
492 // with function signatures of cost_16bit_nxn_func.
493 // They are used through get_pixel_sad_func.
494 SAD_NXN(4, kvz_pixel)
495 SAD_NXN(8, kvz_pixel)
496 SAD_NXN(16, kvz_pixel)
497 SAD_NXN(32, kvz_pixel)
498 SAD_NXN(64, kvz_pixel)
499
500 // Declare these functions to make sure the signature of the macro matches.
501 static cost_pixel_nxn_multi_func sad_4x4_dual_generic;
502 static cost_pixel_nxn_multi_func sad_8x8_dual_generic;
503 static cost_pixel_nxn_multi_func sad_16x16_dual_generic;
504 static cost_pixel_nxn_multi_func sad_32x32_dual_generic;
505 static cost_pixel_nxn_multi_func sad_64x64_dual_generic;
506
507 // Function macro for defining SAD calculating functions
508 // for fixed size blocks.
509 #define SAD_DUAL_NXN(n, pixel_type) \
510 static void sad_ ## n ## x ## n ## _dual_generic( \
511 const pred_buffer preds, const pixel_type * const orig, unsigned num_modes, unsigned *costs_out) \
512 { \
513 unsigned i; \
514 unsigned sum = 0; \
515 for (i = 0; i < (n)*(n); ++i) { \
516 sum += abs(preds[0][i] - orig[i]); \
517 } \
518 costs_out[0] = sum>>(KVZ_BIT_DEPTH-8); \
519 \
520 sum = 0; \
521 for (i = 0; i < (n)*(n); ++i) { \
522 sum += abs(preds[1][i] - orig[i]); \
523 } \
524 costs_out[1] = sum>>(KVZ_BIT_DEPTH-8); \
525 }
526
527 SAD_DUAL_NXN(4, kvz_pixel)
528 SAD_DUAL_NXN(8, kvz_pixel)
529 SAD_DUAL_NXN(16, kvz_pixel)
530 SAD_DUAL_NXN(32, kvz_pixel)
531 SAD_DUAL_NXN(64, kvz_pixel)
532
pixels_calc_ssd_generic(const kvz_pixel * const ref,const kvz_pixel * const rec,const int ref_stride,const int rec_stride,const int width)533 static unsigned pixels_calc_ssd_generic(const kvz_pixel *const ref, const kvz_pixel *const rec,
534 const int ref_stride, const int rec_stride,
535 const int width)
536 {
537 int ssd = 0;
538 int y, x;
539
540 for (y = 0; y < width; ++y) {
541 for (x = 0; x < width; ++x) {
542 int diff = ref[x + y * ref_stride] - rec[x + y * rec_stride];
543 ssd += diff * diff;
544 }
545 }
546
547 return ssd >> (2*(KVZ_BIT_DEPTH-8));
548 }
549
inter_recon_bipred_generic(const int hi_prec_luma_rec0,const int hi_prec_luma_rec1,const int hi_prec_chroma_rec0,const int hi_prec_chroma_rec1,int32_t height,int32_t width,int32_t ypos,int32_t xpos,const hi_prec_buf_t * high_precision_rec0,const hi_prec_buf_t * high_precision_rec1,lcu_t * lcu,kvz_pixel * temp_lcu_y,kvz_pixel * temp_lcu_u,kvz_pixel * temp_lcu_v,bool predict_luma,bool predict_chroma)550 static void inter_recon_bipred_generic(const int hi_prec_luma_rec0,
551 const int hi_prec_luma_rec1,
552 const int hi_prec_chroma_rec0,
553 const int hi_prec_chroma_rec1,
554 int32_t height,
555 int32_t width,
556 int32_t ypos,
557 int32_t xpos,
558 const hi_prec_buf_t*high_precision_rec0,
559 const hi_prec_buf_t*high_precision_rec1,
560 lcu_t* lcu,
561 kvz_pixel* temp_lcu_y,
562 kvz_pixel* temp_lcu_u,
563 kvz_pixel* temp_lcu_v,
564 bool predict_luma,
565 bool predict_chroma) {
566
567 int shift = 15 - KVZ_BIT_DEPTH;
568 int offset = 1 << (shift - 1);
569
570 int y_in_lcu;
571 int x_in_lcu;
572
573 //After reconstruction, merge the predictors by taking an average of each pixel
574 for (int temp_y = 0; temp_y < height; ++temp_y) {
575
576
577 for (int temp_x = 0; temp_x < width; ++temp_x) {
578 y_in_lcu = ((ypos + temp_y) & ((LCU_WIDTH)-1));
579 x_in_lcu = ((xpos + temp_x) & ((LCU_WIDTH)-1));
580
581 if (predict_luma) {
582 int16_t sample0_y = (hi_prec_luma_rec0 ? high_precision_rec0->y[y_in_lcu * LCU_WIDTH + x_in_lcu] : (temp_lcu_y[y_in_lcu * LCU_WIDTH + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
583 int16_t sample1_y = (hi_prec_luma_rec1 ? high_precision_rec1->y[y_in_lcu * LCU_WIDTH + x_in_lcu] : (lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
584
585 lcu->rec.y[y_in_lcu * LCU_WIDTH + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_y + sample1_y + offset) >> shift);
586 }
587
588 if (predict_chroma && (temp_x < width >> 1 && temp_y < height >> 1)) {
589
590 y_in_lcu = (((ypos >> 1) + temp_y) & (LCU_WIDTH_C - 1));
591 x_in_lcu = (((xpos >> 1) + temp_x) & (LCU_WIDTH_C - 1));
592
593 int16_t sample0_u = (hi_prec_chroma_rec0 ? high_precision_rec0->u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (temp_lcu_u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
594 int16_t sample1_u = (hi_prec_chroma_rec1 ? high_precision_rec1->u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
595 lcu->rec.u[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_u + sample1_u + offset) >> shift);
596
597 int16_t sample0_v = (hi_prec_chroma_rec0 ? high_precision_rec0->v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (temp_lcu_v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
598 int16_t sample1_v = (hi_prec_chroma_rec1 ? high_precision_rec1->v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] : (lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] << (14 - KVZ_BIT_DEPTH)));
599 lcu->rec.v[y_in_lcu * LCU_WIDTH_C + x_in_lcu] = (kvz_pixel)kvz_fast_clip_32bit_to_pixel((sample0_v + sample1_v + offset) >> shift);
600 }
601 }
602 }
603
604 }
605
606
get_optimized_sad_generic(int32_t width)607 static optimized_sad_func_ptr_t get_optimized_sad_generic(int32_t width)
608 {
609 return NULL;
610 }
611
612 /**
613 * \brief Vertically interpolate SAD outside the frame.
614 *
615 * \param data1 Starting point of the first picture.
616 * \param data2 Starting point of the second picture.
617 * \param width Width of the region for which SAD is calculated.
618 * \param height Height of the region for which SAD is calculated.
619 * \param width Width of the pixel array.
620 *
621 * \returns Sum of Absolute Differences
622 */
ver_sad_generic(const kvz_pixel * pic_data,const kvz_pixel * ref_data,int block_width,int block_height,unsigned pic_stride)623 static uint32_t ver_sad_generic(const kvz_pixel *pic_data, const kvz_pixel *ref_data,
624 int block_width, int block_height, unsigned pic_stride)
625 {
626 int x, y;
627 unsigned sad = 0;
628
629 for (y = 0; y < block_height; ++y) {
630 for (x = 0; x < block_width; ++x) {
631 sad += abs(pic_data[y * pic_stride + x] - ref_data[x]);
632 }
633 }
634
635 return sad;
636 }
637
638 /**
639 * \brief Horizontally interpolate SAD outside the frame.
640 *
641 * \param data1 Starting point of the first picture.
642 * \param data2 Starting point of the second picture.
643 * \param width Width of the region for which SAD is calculated.
644 * \param height Height of the region for which SAD is calculated.
645 * \param width Width of the pixel array.
646 *
647 * \returns Sum of Absolute Differences
648 */
hor_sad(const kvz_pixel * pic_data,const kvz_pixel * ref_data,int block_width,int block_height,unsigned pic_stride,unsigned ref_stride)649 static unsigned hor_sad(const kvz_pixel *pic_data, const kvz_pixel *ref_data,
650 int block_width, int block_height, unsigned pic_stride, unsigned ref_stride)
651 {
652 int x, y;
653 unsigned sad = 0;
654
655 for (y = 0; y < block_height; ++y) {
656 for (x = 0; x < block_width; ++x) {
657 sad += abs(pic_data[y * pic_stride + x] - ref_data[y * ref_stride]);
658 }
659 }
660
661 return sad;
662 }
663
664
hor_sad_generic(const kvz_pixel * pic_data,const kvz_pixel * ref_data,int32_t width,int32_t height,uint32_t pic_stride,uint32_t ref_stride,uint32_t left,uint32_t right)665 static uint32_t hor_sad_generic(const kvz_pixel *pic_data, const kvz_pixel *ref_data,
666 int32_t width, int32_t height, uint32_t pic_stride,
667 uint32_t ref_stride, uint32_t left, uint32_t right)
668 {
669 uint32_t result = 0;
670 if (left) {
671 result += hor_sad (pic_data, ref_data + left, left,
672 height, pic_stride, ref_stride);
673
674 result += kvz_reg_sad(pic_data + left, ref_data + left, width - left,
675 height, pic_stride, ref_stride);
676 } else if (right) {
677 result += kvz_reg_sad(pic_data, ref_data, width - right,
678 height, pic_stride, ref_stride);
679
680 result += hor_sad (pic_data + width - right,
681 ref_data + width - right - 1,
682 right, height, pic_stride, ref_stride);
683 } else {
684 result += kvz_reg_sad(pic_data, ref_data, width,
685 height, pic_stride, ref_stride);
686 }
687 return result;
688 }
689
690 // Calculate pixel value variance. Takes in arrays of kvz_pixel
pixel_var_generic(const kvz_pixel * arr,const uint32_t len)691 static double pixel_var_generic(const kvz_pixel *arr, const uint32_t len)
692 {
693 double var = 0;
694 double arr_mean = 0;
695
696 // Calculate array mean
697 int i = 0;
698 double sum = 0;
699
700 for (; i < len; ++i) {
701 sum += arr[i];
702 }
703 arr_mean = sum / (double)len;
704
705 // Calculate array variance
706 for (i = 0; i < len; ++i) {
707 double tmp = (double)arr[i] - arr_mean;
708 var += tmp*tmp;
709 }
710
711 var /= len;
712
713 return var;
714 }
715
kvz_strategy_register_picture_generic(void * opaque,uint8_t bitdepth)716 int kvz_strategy_register_picture_generic(void* opaque, uint8_t bitdepth)
717 {
718 bool success = true;
719
720 success &= kvz_strategyselector_register(opaque, "reg_sad", "generic", 0, ®_sad_generic);
721
722 success &= kvz_strategyselector_register(opaque, "sad_4x4", "generic", 0, &sad_4x4_generic);
723 success &= kvz_strategyselector_register(opaque, "sad_8x8", "generic", 0, &sad_8x8_generic);
724 success &= kvz_strategyselector_register(opaque, "sad_16x16", "generic", 0, &sad_16x16_generic);
725 success &= kvz_strategyselector_register(opaque, "sad_32x32", "generic", 0, &sad_32x32_generic);
726 success &= kvz_strategyselector_register(opaque, "sad_64x64", "generic", 0, &sad_64x64_generic);
727
728 success &= kvz_strategyselector_register(opaque, "satd_4x4", "generic", 0, &satd_4x4_generic);
729 success &= kvz_strategyselector_register(opaque, "satd_8x8", "generic", 0, &satd_8x8_generic);
730 success &= kvz_strategyselector_register(opaque, "satd_16x16", "generic", 0, &satd_16x16_generic);
731 success &= kvz_strategyselector_register(opaque, "satd_32x32", "generic", 0, &satd_32x32_generic);
732 success &= kvz_strategyselector_register(opaque, "satd_64x64", "generic", 0, &satd_64x64_generic);
733
734 success &= kvz_strategyselector_register(opaque, "sad_4x4_dual", "generic", 0, &sad_4x4_dual_generic);
735 success &= kvz_strategyselector_register(opaque, "sad_8x8_dual", "generic", 0, &sad_8x8_dual_generic);
736 success &= kvz_strategyselector_register(opaque, "sad_16x16_dual", "generic", 0, &sad_16x16_dual_generic);
737 success &= kvz_strategyselector_register(opaque, "sad_32x32_dual", "generic", 0, &sad_32x32_dual_generic);
738 success &= kvz_strategyselector_register(opaque, "sad_64x64_dual", "generic", 0, &sad_64x64_dual_generic);
739
740 success &= kvz_strategyselector_register(opaque, "satd_4x4_dual", "generic", 0, &satd_4x4_dual_generic);
741 success &= kvz_strategyselector_register(opaque, "satd_8x8_dual", "generic", 0, &satd_8x8_dual_generic);
742 success &= kvz_strategyselector_register(opaque, "satd_16x16_dual", "generic", 0, &satd_16x16_dual_generic);
743 success &= kvz_strategyselector_register(opaque, "satd_32x32_dual", "generic", 0, &satd_32x32_dual_generic);
744 success &= kvz_strategyselector_register(opaque, "satd_64x64_dual", "generic", 0, &satd_64x64_dual_generic);
745 success &= kvz_strategyselector_register(opaque, "satd_any_size", "generic", 0, &satd_any_size_generic);
746 success &= kvz_strategyselector_register(opaque, "satd_any_size_quad", "generic", 0, &satd_any_size_quad_generic);
747
748 success &= kvz_strategyselector_register(opaque, "pixels_calc_ssd", "generic", 0, &pixels_calc_ssd_generic);
749 success &= kvz_strategyselector_register(opaque, "inter_recon_bipred", "generic", 0, &inter_recon_bipred_generic);
750
751 success &= kvz_strategyselector_register(opaque, "get_optimized_sad", "generic", 0, &get_optimized_sad_generic);
752 success &= kvz_strategyselector_register(opaque, "ver_sad", "generic", 0, &ver_sad_generic);
753 success &= kvz_strategyselector_register(opaque, "hor_sad", "generic", 0, &hor_sad_generic);
754
755 success &= kvz_strategyselector_register(opaque, "pixel_var", "generic", 0, &pixel_var_generic);
756
757 return success;
758 }
759