1 /*
2 * Copyright(c) 2018 Intel Corporation
3 * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 */
5 
6 #ifndef EbTransforms_h
7 #define EbTransforms_h
8 
9 #include "EbTransforms_C.h"
10 #include "EbTransforms_SSE2.h"
11 #include "EbTransforms_SSSE3.h"
12 #include "EbTransforms_SSE4_1.h"
13 #include "EbTransforms_AVX2.h"
14 #include "EbSequenceControlSet.h"
15 #include "EbPictureControlSet.h"
16 
17 
18 #include "EbEncDecProcess.h"
19 
20 #include "EbDefinitions.h"
21 #ifdef __cplusplus
22 extern "C" {
23 #endif
24 
25 
26 static const EB_U32 QFunc[] = {26214,23302,20560,18396,16384,14564};
27 static const EB_U32 FFunc[] = {40,45,51,57,64,72};
28 
29 //QP in [0..63]
30 static const EB_U8 QpModSix[]=
31 {
32   0, 1, 2, 3, 4, 5,
33   0, 1, 2, 3, 4, 5,
34   0, 1, 2, 3, 4, 5,
35   0, 1, 2, 3, 4, 5,
36   0, 1, 2, 3, 4, 5,
37   0, 1, 2, 3, 4, 5,
38   0, 1, 2, 3, 4, 5,
39   0, 1, 2, 3, 4, 5,
40   0, 1, 2, 3, 4, 5,
41   0, 1, 2, 3, 4, 5,
42   0, 1, 2, 3
43 };
44 
45 static const EB_U8 QpDivSix[]=
46 {
47   0, 0, 0, 0, 0, 0,
48   1, 1, 1, 1, 1, 1,
49   2, 2, 2, 2, 2, 2,
50   3, 3, 3, 3, 3, 3,
51   4, 4, 4, 4, 4, 4,
52   5, 5, 5, 5, 5, 5,
53   6, 6, 6, 6, 6, 6,
54   7, 7, 7, 7, 7, 7,
55   8, 8, 8, 8, 8, 8,
56   9, 9, 9, 9, 9, 9,
57   10, 10, 10,10
58 
59 };
60 
QiQVoidFunc()61 static void  QiQVoidFunc(){}
62 
63 /*****************************
64  * DEBUG MACROS
65  *****************************/
66 #define ZERO_COEFF 0
67 #define ZERO_COEFF_CHROMA 0
68 
69 extern EB_ERRORTYPE Quantize(
70     EB_S16            *coeff,
71     const EB_U32       coeffStride,
72     EB_S16            *quantCoeff,
73     const EB_U32       quantCoeffStride,
74     const EB_BITDEPTH  bitDepth,
75     const EB_U32       transformSize,
76     const EB_U32       qp,
77     const EB_PICTURE     sliceType);
78 
79 extern EB_ERRORTYPE InvQuantize(
80     EB_S16            *quantCoeff,
81     const EB_U32       quantCoeffStride,
82     EB_S16            *reconCoeff,
83     const EB_U32       reconCoeffStride,
84     const EB_BITDEPTH  bitDepth,
85     const EB_U32       transformSize,
86     const EB_U32       qp);
87 
88 extern void UnifiedQuantizeInvQuantize(
89 
90 	EncDecContext_t       *contextPtr,
91 
92 	PictureControlSet_t *pictureControlSetPtr,
93 	EB_S16              *coeff,
94 	const EB_U32         coeffStride,
95 	EB_S16              *quantCoeff,
96 	EB_S16              *reconCoeff,
97 	EB_U32               qp,
98 	EB_U32               bitDepth,
99 	EB_U32               areaSize,
100 	EB_PICTURE             sliceType,
101 	EB_U32			    *yCountNonZeroCoeffs,
102 	EB_TRANS_COEFF_SHAPE transCoeffShape,
103 
104 	EB_U8		         cleanSparseCeoffPfEncDec,
105 	EB_U8			     pmpMaskingLevelEncDec,
106 	EB_MODETYPE		     type,
107 	EB_U32               enableCbflag,
108 	EB_U8                enableContouringQCUpdateFlag,
109 	EB_U32               componentType,
110 	EB_U32               temporalLayerIndex,
111 	EB_U32               dZoffset,
112 
113 	CabacEncodeContext_t         *cabacEncodeCtxPtr,
114 	EB_U64                        lambda,
115 	EB_U32                        intraLumaMode,
116 	EB_U32                        intraChromaMode,
117 	CabacCost_t                  *CabacCost);
118 
119 
120 extern EB_ERRORTYPE EncodeTransform(
121     EB_S16              *residualBuffer,
122     EB_U32               residualStride,
123     EB_S16              *coeffBuffer,
124     EB_U32               coeffStride,
125     EB_U32               transformSize,
126     EB_S16              *transformInnerArrayPtr,
127     EB_U32               bitIncrement,
128     EB_BOOL              dstTransformFlag,
129     EB_TRANS_COEFF_SHAPE transCoeffShape);
130 
131 extern EB_ERRORTYPE EstimateTransform(
132 	EB_S16              *residualBuffer,
133 	EB_U32               residualStride,
134 	EB_S16              *coeffBuffer,
135 	EB_U32               coeffStride,
136 	EB_U32               transformSize,
137 	EB_S16              *transformInnerArrayPtr,
138 	EB_U32               bitIncrement,
139 	EB_BOOL              dstTansformFlag,
140     EB_TRANS_COEFF_SHAPE transCoeffShape);
141 
142 
143 extern EB_ERRORTYPE EstimateInvTransform(
144     EB_S16      *coeffBuffer,
145     EB_U32       coeffStride,
146     EB_S16      *reconBuffer,
147     EB_U32       reconStride,
148     EB_U32       transformSize,
149     EB_S16      *transformInnerArrayPtr,
150     EB_U32       bitIncrement,
151     EB_BOOL      dstTransformFlag,
152     EB_U32       partialFrequencyN2Flag);
153 
154 extern EB_ERRORTYPE EncodeInvTransform(
155 	EB_BOOL      isOnlyDc,
156     EB_S16      *coeffBuffer,
157     EB_U32       coeffStride,
158     EB_S16      *reconBuffer,
159     EB_U32       reconStride,
160     EB_U32       transformSize,
161     EB_S16      *transformInnerArrayPtr,
162     EB_U32       bitIncrement,
163     EB_BOOL      dstTransformFlag);
164 
165 extern EB_ERRORTYPE CalculateCbf(
166     EB_S16      *buffer,
167     EB_U32       stride,
168     EB_U32       cuSize,
169     EB_U32       transformSize,
170     EB_U32      *cbfBuffer);
171 extern EB_U8 MapChromaQp(
172 	EB_U8 qp
173 	);
174 
175 
176 
177 extern void DecoupledQuantizeInvQuantizeLoops(
178     EB_S16                        *coeff,
179 	const EB_U32                  coeffStride,
180 	EB_S16                        *quantCoeff,
181 	EB_S16                        *reconCoeff,
182 	CabacEncodeContext_t         *cabacEncodeCtxPtr,
183 	EB_U64                        lambda,
184 	EB_MODETYPE                   type,                 // Input: CU type (INTRA, INTER)
185 	EB_U32                        intraLumaMode,
186 	EB_U32                        intraChromaMode,
187 	EB_U32                        componentType,
188 	EB_U8                         temporalLayerIndex,
189 	EB_BOOL                       isUsedAsReferenceFlag,
190 	EB_U8                         chromaLambda,
191     EB_U16                        qp,
192     EB_U32                        bitDepth,
193 	CabacCost_t                  *CabacCost,
194 	const EB_U32                  qFunc,
195 	const EB_U32                  q_offset,
196 	const EB_S32                  shiftedQBits,
197 	const EB_S32                  shiftedFFunc,
198 	const EB_S32                  iq_offset,
199 	const EB_S32                  shiftNum,
200 	const EB_U32                  areaSize,
201 	EB_U32                        *nonzerocoeff,
202     EB_RDOQ_PMCORE_TYPE            rdoType);
203 
204 
205 extern void PfZeroOutUselessQuadrants(
206     EB_S16* transformCoeffBuffer,
207     EB_U32  transformCoeffStride,
208     EB_U32  quadrantSize);
209 
210 /*****************************
211 * Function Pointer Typedef
212 *****************************/
213 typedef void(*EB_QIQ_TYPE)(
214     EB_S16           *coeff,
215     const EB_U32     coeffStride,
216     EB_S16           *quantCoeff,
217     EB_S16           *reconCoeff,
218     const EB_U32     qFunc,
219     const EB_U32     q_offset,
220     const EB_S32     shiftedQBits,
221     const EB_S32     shiftedFFunc,
222     const EB_S32     iq_offset,
223     const EB_S32     shiftNum,
224     const EB_U32     areaSize,
225     EB_U32			 *nonzerocoeff);
226 
227 typedef void(*EB_MAT_MUL_TYPE)(
228     EB_S16           *coeff,
229     const EB_U32     coeffStride,
230     const EB_U16     *maskingMatrix,
231     const EB_U32     maskingMatrixStride,
232     const EB_U32     computeSize,
233     const EB_S32     offset,
234     const EB_S32     shiftNum,
235     EB_U32			 *nonzerocoeff);
236 
237 extern void MatMult(
238     EB_S16           *coeff,
239     const EB_U32     coeffStride,
240     const EB_U16     *maskingMatrix,
241     const EB_U32     maskingMatrixStride,
242     const EB_U32     computeSize,
243     const EB_S32     offset,
244     const EB_S32     shiftNum,
245     EB_U32			 *nonzerocoeff);
246 
247 typedef void(*EB_MAT_OUT_MUL_TYPE)(
248 	EB_S16           *coeff,
249 	const EB_U32     coeffStride,
250 	EB_S16*          coeffOut,
251 	const EB_U32     coeffOutStride,
252 	const EB_U16     *maskingMatrix,
253 	const EB_U32     maskingMatrixStride,
254 	const EB_U32     computeSize,
255 	const EB_S32     offset,
256 	const EB_S32     shiftNum,
257 	EB_U32			 *nonzerocoeff);
258 
259 void MatMultOut(
260 	EB_S16           *coeff,
261 	const EB_U32     coeffStride,
262 	EB_S16*          coeffOut,
263 	const EB_U32     coeffOutStride,
264 	const EB_U16     *maskingMatrix,
265 	const EB_U32     maskingMatrixStride,
266 	const EB_U32     computeSize,
267 	const EB_S32     offset,
268 	const EB_S32     shiftNum,
269 	EB_U32			 *nonzerocoeff);
270 
271 static EB_MAT_OUT_MUL_TYPE FUNC_TABLE MatMulOut_funcPtrArray[EB_ASM_TYPE_TOTAL] =
272 {
273 	  MatMultOut,
274 	  MatMult4x4_OutBuff_AVX2_INTRIN,
275 };
276 
277 
278 
279 typedef void(*EB_TRANSFORM_FUNC)(
280     EB_S16                  *residual,
281     const EB_U32             srcStride,
282     EB_S16                  *transformCoefficients,
283     const EB_U32             dstStride,
284     EB_S16                  *transformInnerArrayPtr,
285     EB_U32                   bitIncrement);
286 
287 typedef void(*EB_INVTRANSFORM_FUNC)(
288     EB_S16                  *transformCoefficients,
289     const EB_U32             srcStride,
290     EB_S16                  *residual,
291     const EB_U32             dstStride,
292     EB_S16                  *transformInnerArrayPtr,
293     EB_U32                   bitIncrement);
294 
295 /*****************************
296 * Function Tables
297 *****************************/
298 static EB_QIQ_TYPE FUNC_TABLE QiQ_funcPtrArray[EB_ASM_TYPE_TOTAL][5] = {
299 	// C_DEFAULT
300 	{
301         /*0 4x4   */     QuantizeInvQuantize,
302         /*1 8x8   */     QuantizeInvQuantize,
303         /*2 16x16 */     QuantizeInvQuantize,
304         /*3       */     (EB_QIQ_TYPE)QiQVoidFunc,
305         /*4 32x32 */     QuantizeInvQuantize
306 	},
307 	// AVX2
308 	{
309 		/*0 4x4   */    QuantizeInvQuantize4x4_SSE3,
310 		/*1 8x8   */	 QuantizeInvQuantize8x8_AVX2_INTRIN,
311 		/*2 16x16 */	 QuantizeInvQuantizeNxN_AVX2_INTRIN,
312 		/*3       */    (EB_QIQ_TYPE)QiQVoidFunc,
313 		/*4 32x32 */	 QuantizeInvQuantizeNxN_AVX2_INTRIN,
314 
315 	},
316 };
317 
318 static EB_MAT_MUL_TYPE FUNC_TABLE MatMul_funcPtrArray[EB_ASM_TYPE_TOTAL][5] = {
319     // C_DEFAULT
320     {
321         /*0 4x4   */     MatMult,
322         /*1 8x8   */     MatMult,
323         /*2 16x16 */     MatMult,
324         /*3 16x16 */     MatMult,
325         /*4 32x32 */     MatMult
326     },
327     // AVX2
328     {
329         /*0 4x4   */     MatMult4x4_AVX2_INTRIN,
330         /*1 8x8   */     MatMult8x8_AVX2_INTRIN,
331         /*2 16x16 */     MatMultNxN_AVX2_INTRIN,
332         /*3 16x16 */     MatMultNxN_AVX2_INTRIN,
333         /*4 32x32 */     MatMultNxN_AVX2_INTRIN
334     },
335 
336 
337 };
338 
339 static const EB_TRANSFORM_FUNC transformFunctionTableEstimate[EB_ASM_TYPE_TOTAL][5] = {
340         // C_DEFAULT
341         {
342 	    	Transform32x32Estimate,
343 		    Transform16x16Estimate,
344             Transform8x8,
345             Transform4x4,
346             DstTransform4x4
347         },
348         // AVX2
349         {
350             lowPrecisionTransform32x32_AVX2_INTRIN,
351             lowPrecisionTransform16x16_AVX2_INTRIN,
352             Transform8x8_SSE4_1_INTRIN,
353             Transform4x4_SSE2_INTRIN,
354             DstTransform4x4_SSE2_INTRIN
355         },
356 };
357 
358 static const EB_TRANSFORM_FUNC PfreqN2TransformTable0[EB_ASM_TYPE_TOTAL][5] = {
359     // NON_AVX2
360     {
361         Transform32x32Estimate,
362         Transform16x16Estimate,
363         Transform8x8,
364         Transform4x4,
365         DstTransform4x4
366 
367     },
368     // AVX2
369     {
370         PfreqTransform32x32_AVX2_INTRIN,
371         PfreqTransform16x16_SSE2,
372         PfreqTransform8x8_SSE4_1_INTRIN,
373         Transform4x4_SSE2_INTRIN,
374         DstTransform4x4_SSE2_INTRIN
375     }
376 };
377 
378 static const EB_TRANSFORM_FUNC PfreqN2TransformTable1[EB_ASM_TYPE_TOTAL][5] = {
379     // NON_AVX2
380     {
381         PfreqTransform32x32_SSE2,
382         PfreqTransform16x16_SSE2,
383         PfreqTransform8x8_SSE2_INTRIN,
384         Transform4x4_SSE2_INTRIN,
385         DstTransform4x4_SSE2_INTRIN
386 
387     },
388     // AVX2
389     {
390         PfreqTransform32x32_AVX2_INTRIN,
391         PfreqTransform16x16_SSE2,
392         PfreqTransform8x8_SSE4_1_INTRIN,
393         Transform4x4_SSE2_INTRIN,
394         DstTransform4x4_SSE2_INTRIN
395     }
396 };
397 
398 static const EB_TRANSFORM_FUNC PfreqN4TransformTable0[EB_ASM_TYPE_TOTAL][5] = {
399     // NON_AVX2
400     {
401         Transform32x32Estimate,
402         Transform16x16Estimate,
403         Transform8x8,
404         Transform4x4,
405         DstTransform4x4
406     },
407     // AVX2
408     {
409         PfreqN4Transform32x32_AVX2_INTRIN,
410         PfreqN4Transform16x16_SSE2,
411         PfreqN4Transform8x8_SSE4_1_INTRIN,
412         Transform4x4_SSE2_INTRIN,
413         DstTransform4x4_SSE2_INTRIN
414     }
415 };
416 
417 static const EB_TRANSFORM_FUNC PfreqN4TransformTable1[EB_ASM_TYPE_TOTAL][5] = {
418     // NON_AVX2
419     {
420         PfreqN4Transform32x32_SSE2,
421         PfreqN4Transform16x16_SSE2,
422         PfreqN4Transform8x8_SSE2_INTRIN,
423         Transform4x4_SSE2_INTRIN,
424         DstTransform4x4_SSE2_INTRIN
425     },
426     // AVX2
427     {
428         PfreqN4Transform32x32_AVX2_INTRIN,
429         PfreqN4Transform16x16_SSE2,
430         PfreqN4Transform8x8_SSE4_1_INTRIN,
431         Transform4x4_SSE2_INTRIN,
432         DstTransform4x4_SSE2_INTRIN
433     }
434 };
435 
436 static const EB_TRANSFORM_FUNC transformFunctionTableEncode0[EB_ASM_TYPE_TOTAL][5] = {
437     // NON_AVX2
438     {
439         Transform32x32Estimate,
440         Transform16x16Estimate,
441         Transform8x8,
442         Transform4x4,
443         DstTransform4x4
444     },
445     // AVX2
446     {
447         Transform32x32_SSE2,
448         Transform16x16_SSE2,
449         Transform8x8_SSE4_1_INTRIN,
450         Transform4x4_SSE2_INTRIN,
451         DstTransform4x4_SSE2_INTRIN
452     },
453 };
454 
455 static const EB_TRANSFORM_FUNC transformFunctionTableEncode1[EB_ASM_TYPE_TOTAL][5] = {
456     // NON_AVX2
457     {
458         Transform32x32_SSE2,
459         Transform16x16_SSE2,
460         Transform8x8_SSE2_INTRIN,
461         Transform4x4_SSE2_INTRIN,
462         DstTransform4x4_SSE2_INTRIN
463     },
464     // AVX2
465     {
466         Transform32x32_SSE2,
467         Transform16x16_SSE2,
468         Transform8x8_SSE4_1_INTRIN,
469         Transform4x4_SSE2_INTRIN,
470         DstTransform4x4_SSE2_INTRIN
471     },
472 };
473 
474 static const EB_INVTRANSFORM_FUNC invTransformFunctionTableEstimate[EB_ASM_TYPE_TOTAL][5] = {
475         // C_DEFAULT
476         {
477             InvTransform32x32,
478             InvTransform16x16,
479             InvTransform8x8,
480             InvTransform4x4,
481             InvDstTransform4x4
482         },
483         // AVX2
484         {
485             EstimateInvTransform32x32_SSE2,
486             EstimateInvTransform16x16_SSE2,
487             InvTransform8x8_SSE2_INTRIN,
488             InvTransform4x4_SSE2_INTRIN,
489             InvDstTransform4x4_SSE2_INTRIN
490         },
491 };
492 
493 static const EB_INVTRANSFORM_FUNC invTransformFunctionTableEncode[EB_ASM_TYPE_TOTAL][5] = {
494         // C_DEFAULT
495         {
496             InvTransform32x32,
497             InvTransform16x16,
498             InvTransform8x8,
499             InvTransform4x4,
500             InvDstTransform4x4
501         },
502         // AVX2
503         {
504             PFinvTransform32x32_SSSE3,
505             PFinvTransform16x16_SSSE3,
506             InvTransform8x8_SSE2_INTRIN,
507             InvTransform4x4_SSE2_INTRIN,
508             InvDstTransform4x4_SSE2_INTRIN
509         },
510 };
511 
512 #ifdef __cplusplus
513 }
514 #endif
515 
516 #endif // EbTransforms_h
517 
518 
519