1 /*
2 * Copyright(c) 2018 Intel Corporation
3 * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 */
5
6 #ifndef EbTransforms_h
7 #define EbTransforms_h
8
9 #include "EbTransforms_C.h"
10 #include "EbTransforms_SSE2.h"
11 #include "EbTransforms_SSSE3.h"
12 #include "EbTransforms_SSE4_1.h"
13 #include "EbTransforms_AVX2.h"
14 #include "EbSequenceControlSet.h"
15 #include "EbPictureControlSet.h"
16
17
18 #include "EbEncDecProcess.h"
19
20 #include "EbDefinitions.h"
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24
25
26 static const EB_U32 QFunc[] = {26214,23302,20560,18396,16384,14564};
27 static const EB_U32 FFunc[] = {40,45,51,57,64,72};
28
29 //QP in [0..63]
30 static const EB_U8 QpModSix[]=
31 {
32 0, 1, 2, 3, 4, 5,
33 0, 1, 2, 3, 4, 5,
34 0, 1, 2, 3, 4, 5,
35 0, 1, 2, 3, 4, 5,
36 0, 1, 2, 3, 4, 5,
37 0, 1, 2, 3, 4, 5,
38 0, 1, 2, 3, 4, 5,
39 0, 1, 2, 3, 4, 5,
40 0, 1, 2, 3, 4, 5,
41 0, 1, 2, 3, 4, 5,
42 0, 1, 2, 3
43 };
44
45 static const EB_U8 QpDivSix[]=
46 {
47 0, 0, 0, 0, 0, 0,
48 1, 1, 1, 1, 1, 1,
49 2, 2, 2, 2, 2, 2,
50 3, 3, 3, 3, 3, 3,
51 4, 4, 4, 4, 4, 4,
52 5, 5, 5, 5, 5, 5,
53 6, 6, 6, 6, 6, 6,
54 7, 7, 7, 7, 7, 7,
55 8, 8, 8, 8, 8, 8,
56 9, 9, 9, 9, 9, 9,
57 10, 10, 10,10
58
59 };
60
QiQVoidFunc()61 static void QiQVoidFunc(){}
62
63 /*****************************
64 * DEBUG MACROS
65 *****************************/
66 #define ZERO_COEFF 0
67 #define ZERO_COEFF_CHROMA 0
68
69 extern EB_ERRORTYPE Quantize(
70 EB_S16 *coeff,
71 const EB_U32 coeffStride,
72 EB_S16 *quantCoeff,
73 const EB_U32 quantCoeffStride,
74 const EB_BITDEPTH bitDepth,
75 const EB_U32 transformSize,
76 const EB_U32 qp,
77 const EB_PICTURE sliceType);
78
79 extern EB_ERRORTYPE InvQuantize(
80 EB_S16 *quantCoeff,
81 const EB_U32 quantCoeffStride,
82 EB_S16 *reconCoeff,
83 const EB_U32 reconCoeffStride,
84 const EB_BITDEPTH bitDepth,
85 const EB_U32 transformSize,
86 const EB_U32 qp);
87
88 extern void UnifiedQuantizeInvQuantize(
89
90 EncDecContext_t *contextPtr,
91
92 PictureControlSet_t *pictureControlSetPtr,
93 EB_S16 *coeff,
94 const EB_U32 coeffStride,
95 EB_S16 *quantCoeff,
96 EB_S16 *reconCoeff,
97 EB_U32 qp,
98 EB_U32 bitDepth,
99 EB_U32 areaSize,
100 EB_PICTURE sliceType,
101 EB_U32 *yCountNonZeroCoeffs,
102 EB_TRANS_COEFF_SHAPE transCoeffShape,
103
104 EB_U8 cleanSparseCeoffPfEncDec,
105 EB_U8 pmpMaskingLevelEncDec,
106 EB_MODETYPE type,
107 EB_U32 enableCbflag,
108 EB_U8 enableContouringQCUpdateFlag,
109 EB_U32 componentType,
110 EB_U32 temporalLayerIndex,
111 EB_U32 dZoffset,
112
113 CabacEncodeContext_t *cabacEncodeCtxPtr,
114 EB_U64 lambda,
115 EB_U32 intraLumaMode,
116 EB_U32 intraChromaMode,
117 CabacCost_t *CabacCost);
118
119
120 extern EB_ERRORTYPE EncodeTransform(
121 EB_S16 *residualBuffer,
122 EB_U32 residualStride,
123 EB_S16 *coeffBuffer,
124 EB_U32 coeffStride,
125 EB_U32 transformSize,
126 EB_S16 *transformInnerArrayPtr,
127 EB_U32 bitIncrement,
128 EB_BOOL dstTransformFlag,
129 EB_TRANS_COEFF_SHAPE transCoeffShape);
130
131 extern EB_ERRORTYPE EstimateTransform(
132 EB_S16 *residualBuffer,
133 EB_U32 residualStride,
134 EB_S16 *coeffBuffer,
135 EB_U32 coeffStride,
136 EB_U32 transformSize,
137 EB_S16 *transformInnerArrayPtr,
138 EB_U32 bitIncrement,
139 EB_BOOL dstTansformFlag,
140 EB_TRANS_COEFF_SHAPE transCoeffShape);
141
142
143 extern EB_ERRORTYPE EstimateInvTransform(
144 EB_S16 *coeffBuffer,
145 EB_U32 coeffStride,
146 EB_S16 *reconBuffer,
147 EB_U32 reconStride,
148 EB_U32 transformSize,
149 EB_S16 *transformInnerArrayPtr,
150 EB_U32 bitIncrement,
151 EB_BOOL dstTransformFlag,
152 EB_U32 partialFrequencyN2Flag);
153
154 extern EB_ERRORTYPE EncodeInvTransform(
155 EB_BOOL isOnlyDc,
156 EB_S16 *coeffBuffer,
157 EB_U32 coeffStride,
158 EB_S16 *reconBuffer,
159 EB_U32 reconStride,
160 EB_U32 transformSize,
161 EB_S16 *transformInnerArrayPtr,
162 EB_U32 bitIncrement,
163 EB_BOOL dstTransformFlag);
164
165 extern EB_ERRORTYPE CalculateCbf(
166 EB_S16 *buffer,
167 EB_U32 stride,
168 EB_U32 cuSize,
169 EB_U32 transformSize,
170 EB_U32 *cbfBuffer);
171 extern EB_U8 MapChromaQp(
172 EB_U8 qp
173 );
174
175
176
177 extern void DecoupledQuantizeInvQuantizeLoops(
178 EB_S16 *coeff,
179 const EB_U32 coeffStride,
180 EB_S16 *quantCoeff,
181 EB_S16 *reconCoeff,
182 CabacEncodeContext_t *cabacEncodeCtxPtr,
183 EB_U64 lambda,
184 EB_MODETYPE type, // Input: CU type (INTRA, INTER)
185 EB_U32 intraLumaMode,
186 EB_U32 intraChromaMode,
187 EB_U32 componentType,
188 EB_U8 temporalLayerIndex,
189 EB_BOOL isUsedAsReferenceFlag,
190 EB_U8 chromaLambda,
191 EB_U16 qp,
192 EB_U32 bitDepth,
193 CabacCost_t *CabacCost,
194 const EB_U32 qFunc,
195 const EB_U32 q_offset,
196 const EB_S32 shiftedQBits,
197 const EB_S32 shiftedFFunc,
198 const EB_S32 iq_offset,
199 const EB_S32 shiftNum,
200 const EB_U32 areaSize,
201 EB_U32 *nonzerocoeff,
202 EB_RDOQ_PMCORE_TYPE rdoType);
203
204
205 extern void PfZeroOutUselessQuadrants(
206 EB_S16* transformCoeffBuffer,
207 EB_U32 transformCoeffStride,
208 EB_U32 quadrantSize);
209
210 /*****************************
211 * Function Pointer Typedef
212 *****************************/
213 typedef void(*EB_QIQ_TYPE)(
214 EB_S16 *coeff,
215 const EB_U32 coeffStride,
216 EB_S16 *quantCoeff,
217 EB_S16 *reconCoeff,
218 const EB_U32 qFunc,
219 const EB_U32 q_offset,
220 const EB_S32 shiftedQBits,
221 const EB_S32 shiftedFFunc,
222 const EB_S32 iq_offset,
223 const EB_S32 shiftNum,
224 const EB_U32 areaSize,
225 EB_U32 *nonzerocoeff);
226
227 typedef void(*EB_MAT_MUL_TYPE)(
228 EB_S16 *coeff,
229 const EB_U32 coeffStride,
230 const EB_U16 *maskingMatrix,
231 const EB_U32 maskingMatrixStride,
232 const EB_U32 computeSize,
233 const EB_S32 offset,
234 const EB_S32 shiftNum,
235 EB_U32 *nonzerocoeff);
236
237 extern void MatMult(
238 EB_S16 *coeff,
239 const EB_U32 coeffStride,
240 const EB_U16 *maskingMatrix,
241 const EB_U32 maskingMatrixStride,
242 const EB_U32 computeSize,
243 const EB_S32 offset,
244 const EB_S32 shiftNum,
245 EB_U32 *nonzerocoeff);
246
247 typedef void(*EB_MAT_OUT_MUL_TYPE)(
248 EB_S16 *coeff,
249 const EB_U32 coeffStride,
250 EB_S16* coeffOut,
251 const EB_U32 coeffOutStride,
252 const EB_U16 *maskingMatrix,
253 const EB_U32 maskingMatrixStride,
254 const EB_U32 computeSize,
255 const EB_S32 offset,
256 const EB_S32 shiftNum,
257 EB_U32 *nonzerocoeff);
258
259 void MatMultOut(
260 EB_S16 *coeff,
261 const EB_U32 coeffStride,
262 EB_S16* coeffOut,
263 const EB_U32 coeffOutStride,
264 const EB_U16 *maskingMatrix,
265 const EB_U32 maskingMatrixStride,
266 const EB_U32 computeSize,
267 const EB_S32 offset,
268 const EB_S32 shiftNum,
269 EB_U32 *nonzerocoeff);
270
271 static EB_MAT_OUT_MUL_TYPE FUNC_TABLE MatMulOut_funcPtrArray[EB_ASM_TYPE_TOTAL] =
272 {
273 MatMultOut,
274 MatMult4x4_OutBuff_AVX2_INTRIN,
275 };
276
277
278
279 typedef void(*EB_TRANSFORM_FUNC)(
280 EB_S16 *residual,
281 const EB_U32 srcStride,
282 EB_S16 *transformCoefficients,
283 const EB_U32 dstStride,
284 EB_S16 *transformInnerArrayPtr,
285 EB_U32 bitIncrement);
286
287 typedef void(*EB_INVTRANSFORM_FUNC)(
288 EB_S16 *transformCoefficients,
289 const EB_U32 srcStride,
290 EB_S16 *residual,
291 const EB_U32 dstStride,
292 EB_S16 *transformInnerArrayPtr,
293 EB_U32 bitIncrement);
294
295 /*****************************
296 * Function Tables
297 *****************************/
298 static EB_QIQ_TYPE FUNC_TABLE QiQ_funcPtrArray[EB_ASM_TYPE_TOTAL][5] = {
299 // C_DEFAULT
300 {
301 /*0 4x4 */ QuantizeInvQuantize,
302 /*1 8x8 */ QuantizeInvQuantize,
303 /*2 16x16 */ QuantizeInvQuantize,
304 /*3 */ (EB_QIQ_TYPE)QiQVoidFunc,
305 /*4 32x32 */ QuantizeInvQuantize
306 },
307 // AVX2
308 {
309 /*0 4x4 */ QuantizeInvQuantize4x4_SSE3,
310 /*1 8x8 */ QuantizeInvQuantize8x8_AVX2_INTRIN,
311 /*2 16x16 */ QuantizeInvQuantizeNxN_AVX2_INTRIN,
312 /*3 */ (EB_QIQ_TYPE)QiQVoidFunc,
313 /*4 32x32 */ QuantizeInvQuantizeNxN_AVX2_INTRIN,
314
315 },
316 };
317
318 static EB_MAT_MUL_TYPE FUNC_TABLE MatMul_funcPtrArray[EB_ASM_TYPE_TOTAL][5] = {
319 // C_DEFAULT
320 {
321 /*0 4x4 */ MatMult,
322 /*1 8x8 */ MatMult,
323 /*2 16x16 */ MatMult,
324 /*3 16x16 */ MatMult,
325 /*4 32x32 */ MatMult
326 },
327 // AVX2
328 {
329 /*0 4x4 */ MatMult4x4_AVX2_INTRIN,
330 /*1 8x8 */ MatMult8x8_AVX2_INTRIN,
331 /*2 16x16 */ MatMultNxN_AVX2_INTRIN,
332 /*3 16x16 */ MatMultNxN_AVX2_INTRIN,
333 /*4 32x32 */ MatMultNxN_AVX2_INTRIN
334 },
335
336
337 };
338
339 static const EB_TRANSFORM_FUNC transformFunctionTableEstimate[EB_ASM_TYPE_TOTAL][5] = {
340 // C_DEFAULT
341 {
342 Transform32x32Estimate,
343 Transform16x16Estimate,
344 Transform8x8,
345 Transform4x4,
346 DstTransform4x4
347 },
348 // AVX2
349 {
350 lowPrecisionTransform32x32_AVX2_INTRIN,
351 lowPrecisionTransform16x16_AVX2_INTRIN,
352 Transform8x8_SSE4_1_INTRIN,
353 Transform4x4_SSE2_INTRIN,
354 DstTransform4x4_SSE2_INTRIN
355 },
356 };
357
358 static const EB_TRANSFORM_FUNC PfreqN2TransformTable0[EB_ASM_TYPE_TOTAL][5] = {
359 // NON_AVX2
360 {
361 Transform32x32Estimate,
362 Transform16x16Estimate,
363 Transform8x8,
364 Transform4x4,
365 DstTransform4x4
366
367 },
368 // AVX2
369 {
370 PfreqTransform32x32_AVX2_INTRIN,
371 PfreqTransform16x16_SSE2,
372 PfreqTransform8x8_SSE4_1_INTRIN,
373 Transform4x4_SSE2_INTRIN,
374 DstTransform4x4_SSE2_INTRIN
375 }
376 };
377
378 static const EB_TRANSFORM_FUNC PfreqN2TransformTable1[EB_ASM_TYPE_TOTAL][5] = {
379 // NON_AVX2
380 {
381 PfreqTransform32x32_SSE2,
382 PfreqTransform16x16_SSE2,
383 PfreqTransform8x8_SSE2_INTRIN,
384 Transform4x4_SSE2_INTRIN,
385 DstTransform4x4_SSE2_INTRIN
386
387 },
388 // AVX2
389 {
390 PfreqTransform32x32_AVX2_INTRIN,
391 PfreqTransform16x16_SSE2,
392 PfreqTransform8x8_SSE4_1_INTRIN,
393 Transform4x4_SSE2_INTRIN,
394 DstTransform4x4_SSE2_INTRIN
395 }
396 };
397
398 static const EB_TRANSFORM_FUNC PfreqN4TransformTable0[EB_ASM_TYPE_TOTAL][5] = {
399 // NON_AVX2
400 {
401 Transform32x32Estimate,
402 Transform16x16Estimate,
403 Transform8x8,
404 Transform4x4,
405 DstTransform4x4
406 },
407 // AVX2
408 {
409 PfreqN4Transform32x32_AVX2_INTRIN,
410 PfreqN4Transform16x16_SSE2,
411 PfreqN4Transform8x8_SSE4_1_INTRIN,
412 Transform4x4_SSE2_INTRIN,
413 DstTransform4x4_SSE2_INTRIN
414 }
415 };
416
417 static const EB_TRANSFORM_FUNC PfreqN4TransformTable1[EB_ASM_TYPE_TOTAL][5] = {
418 // NON_AVX2
419 {
420 PfreqN4Transform32x32_SSE2,
421 PfreqN4Transform16x16_SSE2,
422 PfreqN4Transform8x8_SSE2_INTRIN,
423 Transform4x4_SSE2_INTRIN,
424 DstTransform4x4_SSE2_INTRIN
425 },
426 // AVX2
427 {
428 PfreqN4Transform32x32_AVX2_INTRIN,
429 PfreqN4Transform16x16_SSE2,
430 PfreqN4Transform8x8_SSE4_1_INTRIN,
431 Transform4x4_SSE2_INTRIN,
432 DstTransform4x4_SSE2_INTRIN
433 }
434 };
435
436 static const EB_TRANSFORM_FUNC transformFunctionTableEncode0[EB_ASM_TYPE_TOTAL][5] = {
437 // NON_AVX2
438 {
439 Transform32x32Estimate,
440 Transform16x16Estimate,
441 Transform8x8,
442 Transform4x4,
443 DstTransform4x4
444 },
445 // AVX2
446 {
447 Transform32x32_SSE2,
448 Transform16x16_SSE2,
449 Transform8x8_SSE4_1_INTRIN,
450 Transform4x4_SSE2_INTRIN,
451 DstTransform4x4_SSE2_INTRIN
452 },
453 };
454
455 static const EB_TRANSFORM_FUNC transformFunctionTableEncode1[EB_ASM_TYPE_TOTAL][5] = {
456 // NON_AVX2
457 {
458 Transform32x32_SSE2,
459 Transform16x16_SSE2,
460 Transform8x8_SSE2_INTRIN,
461 Transform4x4_SSE2_INTRIN,
462 DstTransform4x4_SSE2_INTRIN
463 },
464 // AVX2
465 {
466 Transform32x32_SSE2,
467 Transform16x16_SSE2,
468 Transform8x8_SSE4_1_INTRIN,
469 Transform4x4_SSE2_INTRIN,
470 DstTransform4x4_SSE2_INTRIN
471 },
472 };
473
474 static const EB_INVTRANSFORM_FUNC invTransformFunctionTableEstimate[EB_ASM_TYPE_TOTAL][5] = {
475 // C_DEFAULT
476 {
477 InvTransform32x32,
478 InvTransform16x16,
479 InvTransform8x8,
480 InvTransform4x4,
481 InvDstTransform4x4
482 },
483 // AVX2
484 {
485 EstimateInvTransform32x32_SSE2,
486 EstimateInvTransform16x16_SSE2,
487 InvTransform8x8_SSE2_INTRIN,
488 InvTransform4x4_SSE2_INTRIN,
489 InvDstTransform4x4_SSE2_INTRIN
490 },
491 };
492
493 static const EB_INVTRANSFORM_FUNC invTransformFunctionTableEncode[EB_ASM_TYPE_TOTAL][5] = {
494 // C_DEFAULT
495 {
496 InvTransform32x32,
497 InvTransform16x16,
498 InvTransform8x8,
499 InvTransform4x4,
500 InvDstTransform4x4
501 },
502 // AVX2
503 {
504 PFinvTransform32x32_SSSE3,
505 PFinvTransform16x16_SSSE3,
506 InvTransform8x8_SSE2_INTRIN,
507 InvTransform4x4_SSE2_INTRIN,
508 InvDstTransform4x4_SSE2_INTRIN
509 },
510 };
511
512 #ifdef __cplusplus
513 }
514 #endif
515
516 #endif // EbTransforms_h
517
518
519