1 /*!
2  * \copy
3  *     Copyright (c)  2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  */
32 
33 #ifndef ENCODE_MB_AUX_H
34 #define ENCODE_MB_AUX_H
35 
36 #include "typedefs.h"
37 #include "wels_func_ptr_def.h"
38 #include "copy_mb.h"
39 
40 namespace WelsEnc {
41 void WelsInitEncodingFuncs (SWelsFuncPtrList* pFuncList, uint32_t  uiCpuFlag);
42 int32_t WelsGetNoneZeroCount_c (int16_t* pLevel);
43 
44 /****************************************************************************
45  * Scan and Score functions
46  ****************************************************************************/
47 void    WelsScan4x4Ac_c (int16_t* pZigValue, int16_t* pDct);
48 void    WelsScan4x4Dc (int16_t* pLevel, int16_t* pDct);
49 void    WelsScan4x4DcAc_c (int16_t* pLevel, int16_t* pDct);
50 int32_t WelsCalculateSingleCtr4x4_c (int16_t* pDct);
51 
52 /****************************************************************************
53  * HDM and Quant functions
54  ****************************************************************************/
55 void WelsHadamardT4Dc_c (int16_t* pLumaDc, int16_t* pDct);
56 int32_t WelsHadamardQuant2x2_c (int16_t* pRes, const int16_t kiFF, int16_t iMF, int16_t* pDct, int16_t* pBlock);
57 int32_t WelsHadamardQuant2x2Skip_c (int16_t* pRes, int16_t iFF,  int16_t iMF);
58 
59 void WelsQuant4x4_c (int16_t* pDct, const int16_t* pFF, const int16_t* pMF);
60 void WelsQuant4x4Dc_c (int16_t* pDct, int16_t iFF,  int16_t iMF);
61 void WelsQuantFour4x4_c (int16_t* pDct, const int16_t* pFF, const int16_t* pQpTable);
62 void WelsQuantFour4x4Max_c (int16_t* pDct, const int16_t* pF, const int16_t* pQpTable, int16_t* pMax);
63 
64 
65 /****************************************************************************
66  * DCT functions
67  ****************************************************************************/
68 void WelsDctT4_c (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2);
69 // dct_data is no-use here, just for the same interface with dct_save functions
70 void WelsDctFourT4_c (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2);
71 
72 #if defined(__cplusplus)
73 extern "C" {
74 #endif//__cplusplus
75 
76 #ifdef X86_ASM
77 
78 int32_t WelsGetNoneZeroCount_sse2 (int16_t* pLevel);
79 int32_t WelsGetNoneZeroCount_sse42 (int16_t* pLevel);
80 
81 /****************************************************************************
82  * Scan and Score functions
83  ****************************************************************************/
84 void WelsScan4x4Ac_sse2 (int16_t* zig_value, int16_t* pDct);
85 void WelsScan4x4DcAc_ssse3 (int16_t* pLevel, int16_t* pDct);
86 void WelsScan4x4DcAc_sse2 (int16_t* pLevel, int16_t* pDct);
87 int32_t WelsCalculateSingleCtr4x4_sse2 (int16_t* pDct);
88 
89 /****************************************************************************
90  * DCT functions
91  ****************************************************************************/
92 void WelsDctT4_mmx (int16_t* pDct,  uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2);
93 void WelsDctT4_sse2 (int16_t* pDct,  uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2);
94 void WelsDctFourT4_sse2 (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2);
95 void WelsDctT4_avx2 (int16_t* pDct,  uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2);
96 void WelsDctFourT4_avx2 (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2);
97 
98 /****************************************************************************
99  * HDM and Quant functions
100  ****************************************************************************/
101 int32_t WelsHadamardQuant2x2_mmx (int16_t* pRes, const int16_t kiFF, int16_t iMF, int16_t* pDct, int16_t* pBlock);
102 void WelsHadamardT4Dc_sse2 (int16_t* pLumaDc, int16_t* pDct);
103 int32_t WelsHadamardQuant2x2Skip_mmx (int16_t* pRes, int16_t iFF,  int16_t iMF);
104 
105 void WelsQuant4x4_sse2 (int16_t* pDct, const int16_t* pFF, const int16_t* pMF);
106 void WelsQuant4x4Dc_sse2 (int16_t* pDct, int16_t iFF, int16_t iMF);
107 void WelsQuantFour4x4_sse2 (int16_t* pDct, const int16_t* pFF, const int16_t* pMF);
108 void WelsQuantFour4x4Max_sse2 (int16_t* pDct, const int16_t* pFF, const int16_t* pMF, int16_t* pMax);
109 
110 void WelsQuant4x4_avx2 (int16_t* pDct, const int16_t* pFF, const int16_t* pMF);
111 void WelsQuant4x4Dc_avx2 (int16_t* pDct, int16_t iFF, int16_t iMF);
112 void WelsQuantFour4x4_avx2 (int16_t* pDct, const int16_t* pFF, const int16_t* pMF);
113 void WelsQuantFour4x4Max_avx2 (int16_t* pDct, const int16_t* pFF, const int16_t* pMF, int16_t* pMax);
114 
115 #endif
116 
117 #ifdef HAVE_NEON
118 void WelsHadamardT4Dc_neon (int16_t* pLumaDc, int16_t* pDct);
119 int32_t WelsHadamardQuant2x2_neon (int16_t* pRes, const int16_t kiFF, int16_t iMF, int16_t* pDct, int16_t* pBlock);
120 int32_t WelsHadamardQuant2x2Skip_neon (int16_t* pRes, int16_t iFF,  int16_t iMF);
121 int32_t WelsHadamardQuant2x2SkipKernel_neon (int16_t* pRes, int16_t iThreshold); // avoid divide operator
122 
123 void WelsDctT4_neon (int16_t* pDct,  uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2);
124 void WelsDctFourT4_neon (int16_t* pDct,  uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2);
125 
126 int32_t WelsGetNoneZeroCount_neon (int16_t* pLevel);
127 
128 void WelsQuant4x4_neon (int16_t* pDct, const int16_t* pFF, const int16_t* pMF);
129 void WelsQuant4x4Dc_neon (int16_t* pDct, int16_t iFF, int16_t iMF);
130 void WelsQuantFour4x4_neon (int16_t* pDct, const int16_t* pFF, const int16_t* pMF);
131 void WelsQuantFour4x4Max_neon (int16_t* pDct, const int16_t* pFF, const int16_t* pMF, int16_t* pMax);
132 #endif
133 
134 #ifdef HAVE_NEON_AARCH64
135 void WelsHadamardT4Dc_AArch64_neon (int16_t* pLumaDc, int16_t* pDct);
136 int32_t WelsHadamardQuant2x2_AArch64_neon (int16_t* pRes, const int16_t kiFF, int16_t iMF, int16_t* pDct, int16_t* pBlock);
137 int32_t WelsHadamardQuant2x2Skip_AArch64_neon (int16_t* pRes, int16_t iFF,  int16_t iMF);
138 int32_t WelsHadamardQuant2x2SkipKernel_AArch64_neon (int16_t* pRes, int16_t iThreshold); // avoid divide operator
139 
140 void WelsDctT4_AArch64_neon (int16_t* pDct,  uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2);
141 void WelsDctFourT4_AArch64_neon (int16_t* pDct,  uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2);
142 
143 int32_t WelsGetNoneZeroCount_AArch64_neon (int16_t* pLevel);
144 
145 void WelsQuant4x4_AArch64_neon (int16_t* pDct, const int16_t* pFF, const int16_t* pMF);
146 void WelsQuant4x4Dc_AArch64_neon (int16_t* pDct, int16_t iFF, int16_t iMF);
147 void WelsQuantFour4x4_AArch64_neon (int16_t* pDct, const int16_t* pFF, const int16_t* pMF);
148 void WelsQuantFour4x4Max_AArch64_neon (int16_t* pDct, const int16_t* pFF, const int16_t* pMF, int16_t* pMax);
149 #endif
150 
151 #ifdef HAVE_MMI
152 int32_t WelsGetNoneZeroCount_mmi (int16_t* pLevel);
153 
154 /****************************************************************************
155  *  * Scan and Score functions
156  *   ****************************************************************************/
157 void WelsScan4x4Ac_mmi (int16_t* zig_value, int16_t* pDct);
158 void WelsScan4x4DcAc_mmi (int16_t* pLevel, int16_t* pDct);
159 int32_t WelsCalculateSingleCtr4x4_mmi (int16_t* pDct);
160 
161 /****************************************************************************
162  *  * DCT functions
163  *   ****************************************************************************/
164 void WelsDctT4_mmi (int16_t* pDct,  uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2);
165 void WelsDctFourT4_mmi (int16_t* pDct, uint8_t* pPixel1, int32_t iStride1, uint8_t* pPixel2, int32_t iStride2);
166 
167 /****************************************************************************
168  *  * HDM and Quant functions
169  *   ****************************************************************************/
170 void WelsHadamardT4Dc_mmi (int16_t* pLumaDc, int16_t* pDct);
171 
172 void WelsQuant4x4_mmi (int16_t* pDct, const int16_t* pFF, const int16_t* pMF);
173 void WelsQuant4x4Dc_mmi (int16_t* pDct, int16_t iFF, int16_t iMF);
174 void WelsQuantFour4x4_mmi (int16_t* pDct, const int16_t* pFF, const int16_t* pMF);
175 void WelsQuantFour4x4Max_mmi (int16_t* pDct, const int16_t* pFF, const int16_t* pMF, int16_t* pMax);
176 #endif//HAVE_MMI
177 #if defined(__cplusplus)
178 }
179 #endif//__cplusplus
180 
181 ALIGNED_DECLARE (extern const int16_t, g_kiQuantInterFF[58][8], 16);
182 #define g_iQuantIntraFF (g_kiQuantInterFF +6 )
183 ALIGNED_DECLARE (extern const int16_t, g_kiQuantMF[52][8], 16);
184 }
185 #endif//ENCODE_MB_AUX_H
186