1 /* 2 * Copyright(c) 2018 Intel Corporation 3 * SPDX - License - Identifier: BSD - 2 - Clause - Patent 4 */ 5 6 #ifndef EbPictureOperators_SSE2_h 7 #define EbPictureOperators_SSE2_h 8 9 #include "EbDefinitions.h" 10 11 #ifdef __cplusplus 12 extern "C" { 13 #endif 14 15 void FullDistortionKernel4x4_32bit_BT_SSE2( 16 EB_S16 *coeff, 17 EB_U32 coeffStride, 18 EB_S16 *reconCoeff, 19 EB_U32 reconCoeffStride, 20 EB_U64 distortionResult[2], 21 EB_U32 areaWidth, 22 EB_U32 areaHeight); 23 24 void FullDistortionKernelCbfZero4x4_32bit_BT_SSE2( 25 EB_S16 *coeff, 26 EB_U32 coeffStride, 27 EB_S16 *reconCoeff, 28 EB_U32 reconCoeffStride, 29 EB_U64 distortionResult[2], 30 EB_U32 areaWidth, 31 EB_U32 areaHeight); 32 33 void FullDistortionKernelIntra4x4_32bit_BT_SSE2( 34 EB_S16 *coeff, 35 EB_U32 coeffStride, 36 EB_S16 *reconCoeff, 37 EB_U32 reconCoeffStride, 38 EB_U64 distortionResult[2], 39 EB_U32 areaWidth, 40 EB_U32 areaHeight); 41 42 void FullDistortionKernel8x8_32bit_BT_SSE2( 43 EB_S16 *coeff, 44 EB_U32 coeffStride, 45 EB_S16 *reconCoeff, 46 EB_U32 reconCoeffStride, 47 EB_U64 distortionResult[2], 48 EB_U32 areaWidth, 49 EB_U32 areaHeight); 50 51 void FullDistortionKernelCbfZero8x8_32bit_BT_SSE2( 52 EB_S16 *coeff, 53 EB_U32 coeffStride, 54 EB_S16 *reconCoeff, 55 EB_U32 reconCoeffStride, 56 EB_U64 distortionResult[2], 57 EB_U32 areaWidth, 58 EB_U32 areaHeight); 59 60 void FullDistortionKernelIntra8x8_32bit_BT_SSE2( 61 EB_S16 *coeff, 62 EB_U32 coeffStride, 63 EB_S16 *reconCoeff, 64 EB_U32 reconCoeffStride, 65 EB_U64 distortionResult[2], 66 EB_U32 areaWidth, 67 EB_U32 areaHeight); 68 69 void FullDistortionKernelIntra16MxN_32bit_BT_SSE2( 70 EB_S16 *coeff, 71 EB_U32 coeffStride, 72 EB_S16 *reconCoeff, 73 EB_U32 reconCoeffStride, 74 EB_U64 distortionResult[2], 75 EB_U32 areaWidth, 76 EB_U32 areaHeight); 77 78 void FullDistortionKernel16MxN_32bit_BT_SSE2( 79 EB_S16 *coeff, 80 EB_U32 coeffStride, 81 EB_S16 *reconCoeff, 82 EB_U32 reconCoeffStride, 83 EB_U64 distortionResult[2], 84 EB_U32 areaWidth, 85 EB_U32 areaHeight); 86 87 88 void FullDistortionKernelCbfZero16MxN_32bit_BT_SSE2( 89 EB_S16 *coeff, 90 EB_U32 coeffStride, 91 EB_S16 *reconCoeff, 92 EB_U32 reconCoeffStride, 93 EB_U64 distortionResult[2], 94 EB_U32 areaWidth, 95 EB_U32 areaHeight); 96 97 //----- 98 extern void ZeroOutCoeff4x4_SSE( 99 EB_S16* coeffbuffer, 100 EB_U32 coeffStride, 101 EB_U32 coeffOriginIndex, 102 EB_U32 areaWidth, 103 EB_U32 areaHeight); 104 extern void ZeroOutCoeff8x8_SSE2( 105 EB_S16* coeffbuffer, 106 EB_U32 coeffStride, 107 EB_U32 coeffOriginIndex, 108 EB_U32 areaWidth, 109 EB_U32 areaHeight); 110 extern void ZeroOutCoeff16x16_SSE2( 111 EB_S16* coeffbuffer, 112 EB_U32 coeffStride, 113 EB_U32 coeffOriginIndex, 114 EB_U32 areaWidth, 115 EB_U32 areaHeight); 116 extern void ZeroOutCoeff32x32_SSE2( 117 EB_S16* coeffbuffer, 118 EB_U32 coeffStride, 119 EB_U32 coeffOriginIndex, 120 EB_U32 areaWidth, 121 EB_U32 areaHeight); 122 123 extern void ResidualKernel16bit_SSE2_INTRIN( 124 EB_U16 *input, 125 EB_U32 inputStride, 126 EB_U16 *pred, 127 EB_U32 predStride, 128 EB_S16 *residual, 129 EB_U32 residualStride, 130 EB_U32 areaWidth, 131 EB_U32 areaHeight); 132 133 void PictureCopyKernel4x4_SSE_INTRIN( 134 EB_BYTE src, 135 EB_U32 srcStride, 136 EB_BYTE dst, 137 EB_U32 dstStride, 138 EB_U32 areaWidth, 139 EB_U32 areaHeight); 140 141 void PictureCopyKernel8x8_SSE2_INTRIN( 142 EB_BYTE src, 143 EB_U32 srcStride, 144 EB_BYTE dst, 145 EB_U32 dstStride, 146 EB_U32 areaWidth, 147 EB_U32 areaHeight); 148 149 void PictureCopyKernel16x16_SSE2_INTRIN( 150 EB_BYTE src, 151 EB_U32 srcStride, 152 EB_BYTE dst, 153 EB_U32 dstStride, 154 EB_U32 areaWidth, 155 EB_U32 areaHeight); 156 157 158 void PictureCopyKernel32x32_SSE2_INTRIN( 159 EB_BYTE src, 160 EB_U32 srcStride, 161 EB_BYTE dst, 162 EB_U32 dstStride, 163 EB_U32 areaWidth, 164 EB_U32 areaHeight); 165 166 void PictureCopyKernel64x64_SSE2_INTRIN( 167 EB_BYTE src, 168 EB_U32 srcStride, 169 EB_BYTE dst, 170 EB_U32 dstStride, 171 EB_U32 areaWidth, 172 EB_U32 areaHeight); 173 174 void PictureAdditionKernel4x4_SSE_INTRIN( 175 EB_U8 *predPtr, 176 EB_U32 predStride, 177 EB_S16 *residualPtr, 178 EB_U32 residualStride, 179 EB_U8 *reconPtr, 180 EB_U32 reconStride, 181 EB_U32 width, 182 EB_U32 height); 183 184 void PictureAdditionKernel8x8_SSE2_INTRIN( 185 EB_U8 *predPtr, 186 EB_U32 predStride, 187 EB_S16 *residualPtr, 188 EB_U32 residualStride, 189 EB_U8 *reconPtr, 190 EB_U32 reconStride, 191 EB_U32 width, 192 EB_U32 height); 193 194 void PictureAdditionKernel16x16_SSE2_INTRIN( 195 EB_U8 *predPtr, 196 EB_U32 predStride, 197 EB_S16 *residualPtr, 198 EB_U32 residualStride, 199 EB_U8 *reconPtr, 200 EB_U32 reconStride, 201 EB_U32 width, 202 EB_U32 height); 203 204 void PictureAdditionKernel32x32_SSE2_INTRIN( 205 EB_U8 *predPtr, 206 EB_U32 predStride, 207 EB_S16 *residualPtr, 208 EB_U32 residualStride, 209 EB_U8 *reconPtr, 210 EB_U32 reconStride, 211 EB_U32 width, 212 EB_U32 height); 213 214 void PictureAdditionKernel64x64_SSE2_INTRIN( 215 EB_U8 *predPtr, 216 EB_U32 predStride, 217 EB_S16 *residualPtr, 218 EB_U32 residualStride, 219 EB_U8 *reconPtr, 220 EB_U32 reconStride, 221 EB_U32 width, 222 EB_U32 height); 223 224 void ResidualKernel4x4_SSE_INTRIN( 225 EB_U8 *input, 226 EB_U32 inputStride, 227 EB_U8 *pred, 228 EB_U32 predStride, 229 EB_S16 *residual, 230 EB_U32 residualStride, 231 EB_U32 areaWidth, 232 EB_U32 areaHeight); 233 234 void ResidualKernel8x8_SSE2_INTRIN( 235 EB_U8 *input, 236 EB_U32 inputStride, 237 EB_U8 *pred, 238 EB_U32 predStride, 239 EB_S16 *residual, 240 EB_U32 residualStride, 241 EB_U32 areaWidth, 242 EB_U32 areaHeight); 243 244 void ResidualKernel16x16_SSE2_INTRIN( 245 EB_U8 *input, 246 EB_U32 inputStride, 247 EB_U8 *pred, 248 EB_U32 predStride, 249 EB_S16 *residual, 250 EB_U32 residualStride, 251 EB_U32 areaWidth, 252 EB_U32 areaHeight); 253 254 void ResidualKernelSubSampled4x4_SSE_INTRIN( 255 EB_U8 *input, 256 EB_U32 inputStride, 257 EB_U8 *pred, 258 EB_U32 predStride, 259 EB_S16 *residual, 260 EB_U32 residualStride, 261 EB_U32 areaWidth, 262 EB_U32 areaHeight, 263 EB_U8 lastLine ); 264 265 void ResidualKernelSubSampled8x8_SSE2_INTRIN( 266 EB_U8 *input, 267 EB_U32 inputStride, 268 EB_U8 *pred, 269 EB_U32 predStride, 270 EB_S16 *residual, 271 EB_U32 residualStride, 272 EB_U32 areaWidth, 273 EB_U32 areaHeight, 274 EB_U8 lastLine); 275 276 void ResidualKernelSubSampled16x16_SSE2_INTRIN( 277 EB_U8 *input, 278 EB_U32 inputStride, 279 EB_U8 *pred, 280 EB_U32 predStride, 281 EB_S16 *residual, 282 EB_U32 residualStride, 283 EB_U32 areaWidth, 284 EB_U32 areaHeight, 285 EB_U8 lastLine); 286 287 void ResidualKernelSubSampled32x32_SSE2_INTRIN( 288 EB_U8 *input, 289 EB_U32 inputStride, 290 EB_U8 *pred, 291 EB_U32 predStride, 292 EB_S16 *residual, 293 EB_U32 residualStride, 294 EB_U32 areaWidth, 295 EB_U32 areaHeight, 296 EB_U8 lastLine); 297 298 void ResidualKernelSubSampled64x64_SSE2_INTRIN( 299 EB_U8 *input, 300 EB_U32 inputStride, 301 EB_U8 *pred, 302 EB_U32 predStride, 303 EB_S16 *residual, 304 EB_U32 residualStride, 305 EB_U32 areaWidth, 306 EB_U32 areaHeight, 307 EB_U8 lastLine); 308 309 void ResidualKernel32x32_SSE2_INTRIN( 310 EB_U8 *input, 311 EB_U32 inputStride, 312 EB_U8 *pred, 313 EB_U32 predStride, 314 EB_S16 *residual, 315 EB_U32 residualStride, 316 EB_U32 areaWidth, 317 EB_U32 areaHeight); 318 319 void ResidualKernel64x64_SSE2_INTRIN( 320 EB_U8 *input, 321 EB_U32 inputStride, 322 EB_U8 *pred, 323 EB_U32 predStride, 324 EB_S16 *residual, 325 EB_U32 residualStride, 326 EB_U32 areaWidth, 327 EB_U32 areaHeight); 328 329 void PictureAdditionKernel16bit_SSE2_INTRIN( 330 EB_U16 *predPtr, 331 EB_U32 predStride, 332 EB_S16 *residualPtr, 333 EB_U32 residualStride, 334 EB_U16 *reconPtr, 335 EB_U32 reconStride, 336 EB_U32 width, 337 EB_U32 height); 338 339 340 341 #ifdef __cplusplus 342 } 343 #endif 344 #endif // EbPictureOperators_SSE2_h 345