1 /*
2 * Copyright(c) 2018 Intel Corporation
3 * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 */
5 
6 #ifndef EbPictureOperators_SSE2_h
7 #define EbPictureOperators_SSE2_h
8 
9 #include "EbDefinitions.h"
10 
11 #ifdef __cplusplus
12 extern "C" {
13 #endif
14 
15 void FullDistortionKernel4x4_32bit_BT_SSE2(
16     EB_S16  *coeff,
17     EB_U32   coeffStride,
18     EB_S16  *reconCoeff,
19     EB_U32   reconCoeffStride,
20     EB_U64   distortionResult[2],
21     EB_U32   areaWidth,
22     EB_U32   areaHeight);
23 
24 void FullDistortionKernelCbfZero4x4_32bit_BT_SSE2(
25     EB_S16  *coeff,
26     EB_U32   coeffStride,
27     EB_S16  *reconCoeff,
28     EB_U32   reconCoeffStride,
29     EB_U64   distortionResult[2],
30     EB_U32   areaWidth,
31     EB_U32   areaHeight);
32 
33 void FullDistortionKernelIntra4x4_32bit_BT_SSE2(
34     EB_S16  *coeff,
35     EB_U32   coeffStride,
36     EB_S16  *reconCoeff,
37     EB_U32   reconCoeffStride,
38     EB_U64   distortionResult[2],
39     EB_U32   areaWidth,
40     EB_U32   areaHeight);
41 
42 void FullDistortionKernel8x8_32bit_BT_SSE2(
43     EB_S16  *coeff,
44     EB_U32   coeffStride,
45     EB_S16  *reconCoeff,
46     EB_U32   reconCoeffStride,
47     EB_U64   distortionResult[2],
48     EB_U32   areaWidth,
49     EB_U32   areaHeight);
50 
51 void FullDistortionKernelCbfZero8x8_32bit_BT_SSE2(
52     EB_S16  *coeff,
53     EB_U32   coeffStride,
54     EB_S16  *reconCoeff,
55     EB_U32   reconCoeffStride,
56     EB_U64   distortionResult[2],
57     EB_U32   areaWidth,
58     EB_U32   areaHeight);
59 
60 void FullDistortionKernelIntra8x8_32bit_BT_SSE2(
61     EB_S16  *coeff,
62     EB_U32   coeffStride,
63     EB_S16  *reconCoeff,
64     EB_U32   reconCoeffStride,
65     EB_U64   distortionResult[2],
66     EB_U32   areaWidth,
67     EB_U32   areaHeight);
68 
69 void FullDistortionKernelIntra16MxN_32bit_BT_SSE2(
70     EB_S16  *coeff,
71     EB_U32   coeffStride,
72     EB_S16  *reconCoeff,
73     EB_U32   reconCoeffStride,
74     EB_U64   distortionResult[2],
75     EB_U32   areaWidth,
76     EB_U32   areaHeight);
77 
78 void FullDistortionKernel16MxN_32bit_BT_SSE2(
79     EB_S16  *coeff,
80     EB_U32   coeffStride,
81     EB_S16  *reconCoeff,
82     EB_U32   reconCoeffStride,
83     EB_U64   distortionResult[2],
84     EB_U32   areaWidth,
85     EB_U32   areaHeight);
86 
87 
88 void FullDistortionKernelCbfZero16MxN_32bit_BT_SSE2(
89     EB_S16  *coeff,
90     EB_U32   coeffStride,
91     EB_S16  *reconCoeff,
92     EB_U32   reconCoeffStride,
93     EB_U64   distortionResult[2],
94     EB_U32   areaWidth,
95     EB_U32   areaHeight);
96 
97 //-----
98 extern void ZeroOutCoeff4x4_SSE(
99     EB_S16*                  coeffbuffer,
100     EB_U32                   coeffStride,
101     EB_U32                   coeffOriginIndex,
102     EB_U32                   areaWidth,
103     EB_U32                   areaHeight);
104 extern void ZeroOutCoeff8x8_SSE2(
105     EB_S16*                  coeffbuffer,
106     EB_U32                   coeffStride,
107     EB_U32                   coeffOriginIndex,
108     EB_U32                   areaWidth,
109     EB_U32                   areaHeight);
110 extern void ZeroOutCoeff16x16_SSE2(
111     EB_S16*                  coeffbuffer,
112     EB_U32                   coeffStride,
113     EB_U32                   coeffOriginIndex,
114     EB_U32                   areaWidth,
115     EB_U32                   areaHeight);
116 extern void ZeroOutCoeff32x32_SSE2(
117     EB_S16*                  coeffbuffer,
118     EB_U32                   coeffStride,
119     EB_U32                   coeffOriginIndex,
120     EB_U32                   areaWidth,
121     EB_U32                   areaHeight);
122 
123 extern void ResidualKernel16bit_SSE2_INTRIN(
124 	EB_U16   *input,
125 	EB_U32   inputStride,
126 	EB_U16   *pred,
127 	EB_U32   predStride,
128 	EB_S16  *residual,
129 	EB_U32   residualStride,
130 	EB_U32   areaWidth,
131 	EB_U32   areaHeight);
132 
133 void PictureCopyKernel4x4_SSE_INTRIN(
134 	EB_BYTE                  src,
135 	EB_U32                   srcStride,
136 	EB_BYTE                  dst,
137 	EB_U32                   dstStride,
138 	EB_U32                   areaWidth,
139 	EB_U32                   areaHeight);
140 
141 void PictureCopyKernel8x8_SSE2_INTRIN(
142 	EB_BYTE                  src,
143 	EB_U32                   srcStride,
144 	EB_BYTE                  dst,
145 	EB_U32                   dstStride,
146 	EB_U32                   areaWidth,
147 	EB_U32                   areaHeight);
148 
149 void PictureCopyKernel16x16_SSE2_INTRIN(
150 	EB_BYTE                  src,
151 	EB_U32                   srcStride,
152 	EB_BYTE                  dst,
153 	EB_U32                   dstStride,
154 	EB_U32                   areaWidth,
155 	EB_U32                   areaHeight);
156 
157 
158 void PictureCopyKernel32x32_SSE2_INTRIN(
159 	EB_BYTE                  src,
160 	EB_U32                   srcStride,
161 	EB_BYTE                  dst,
162 	EB_U32                   dstStride,
163 	EB_U32                   areaWidth,
164 	EB_U32                   areaHeight);
165 
166 void PictureCopyKernel64x64_SSE2_INTRIN(
167 	EB_BYTE                  src,
168 	EB_U32                   srcStride,
169 	EB_BYTE                  dst,
170 	EB_U32                   dstStride,
171 	EB_U32                   areaWidth,
172 	EB_U32                   areaHeight);
173 
174 void PictureAdditionKernel4x4_SSE_INTRIN(
175 	EB_U8  *predPtr,
176 	EB_U32  predStride,
177 	EB_S16 *residualPtr,
178 	EB_U32  residualStride,
179 	EB_U8  *reconPtr,
180 	EB_U32  reconStride,
181 	EB_U32  width,
182 	EB_U32  height);
183 
184 void PictureAdditionKernel8x8_SSE2_INTRIN(
185 	EB_U8  *predPtr,
186 	EB_U32  predStride,
187 	EB_S16 *residualPtr,
188 	EB_U32  residualStride,
189 	EB_U8  *reconPtr,
190 	EB_U32  reconStride,
191 	EB_U32  width,
192 	EB_U32  height);
193 
194 void PictureAdditionKernel16x16_SSE2_INTRIN(
195 	EB_U8  *predPtr,
196 	EB_U32  predStride,
197 	EB_S16 *residualPtr,
198 	EB_U32  residualStride,
199 	EB_U8  *reconPtr,
200 	EB_U32  reconStride,
201 	EB_U32  width,
202 	EB_U32  height);
203 
204 void PictureAdditionKernel32x32_SSE2_INTRIN(
205 	EB_U8  *predPtr,
206 	EB_U32  predStride,
207 	EB_S16 *residualPtr,
208 	EB_U32  residualStride,
209 	EB_U8  *reconPtr,
210 	EB_U32  reconStride,
211 	EB_U32  width,
212 	EB_U32  height);
213 
214 void PictureAdditionKernel64x64_SSE2_INTRIN(
215 	EB_U8  *predPtr,
216 	EB_U32  predStride,
217 	EB_S16 *residualPtr,
218 	EB_U32  residualStride,
219 	EB_U8  *reconPtr,
220 	EB_U32  reconStride,
221 	EB_U32  width,
222 	EB_U32  height);
223 
224 void ResidualKernel4x4_SSE_INTRIN(
225 	EB_U8   *input,
226 	EB_U32   inputStride,
227 	EB_U8   *pred,
228 	EB_U32   predStride,
229 	EB_S16  *residual,
230 	EB_U32   residualStride,
231 	EB_U32   areaWidth,
232 	EB_U32   areaHeight);
233 
234 void ResidualKernel8x8_SSE2_INTRIN(
235 	EB_U8   *input,
236 	EB_U32   inputStride,
237 	EB_U8   *pred,
238 	EB_U32   predStride,
239 	EB_S16  *residual,
240 	EB_U32   residualStride,
241 	EB_U32   areaWidth,
242 	EB_U32   areaHeight);
243 
244 void ResidualKernel16x16_SSE2_INTRIN(
245 	EB_U8   *input,
246 	EB_U32   inputStride,
247 	EB_U8   *pred,
248 	EB_U32   predStride,
249 	EB_S16  *residual,
250 	EB_U32   residualStride,
251 	EB_U32   areaWidth,
252 	EB_U32   areaHeight);
253 
254 void ResidualKernelSubSampled4x4_SSE_INTRIN(
255 	EB_U8   *input,
256 	EB_U32   inputStride,
257 	EB_U8   *pred,
258 	EB_U32   predStride,
259 	EB_S16  *residual,
260 	EB_U32   residualStride,
261 	EB_U32   areaWidth,
262 	EB_U32   areaHeight,
263     EB_U8    lastLine );
264 
265 void ResidualKernelSubSampled8x8_SSE2_INTRIN(
266 	EB_U8   *input,
267 	EB_U32   inputStride,
268 	EB_U8   *pred,
269 	EB_U32   predStride,
270 	EB_S16  *residual,
271 	EB_U32   residualStride,
272 	EB_U32   areaWidth,
273 	EB_U32   areaHeight,
274     EB_U8    lastLine);
275 
276 void ResidualKernelSubSampled16x16_SSE2_INTRIN(
277 	EB_U8   *input,
278 	EB_U32   inputStride,
279 	EB_U8   *pred,
280 	EB_U32   predStride,
281 	EB_S16  *residual,
282 	EB_U32   residualStride,
283 	EB_U32   areaWidth,
284 	EB_U32   areaHeight,
285     EB_U8    lastLine);
286 
287 void ResidualKernelSubSampled32x32_SSE2_INTRIN(
288 	EB_U8   *input,
289 	EB_U32   inputStride,
290 	EB_U8   *pred,
291 	EB_U32   predStride,
292 	EB_S16  *residual,
293 	EB_U32   residualStride,
294 	EB_U32   areaWidth,
295 	EB_U32   areaHeight,
296     EB_U8    lastLine);
297 
298 void ResidualKernelSubSampled64x64_SSE2_INTRIN(
299 	EB_U8   *input,
300 	EB_U32   inputStride,
301 	EB_U8   *pred,
302 	EB_U32   predStride,
303 	EB_S16  *residual,
304 	EB_U32   residualStride,
305 	EB_U32   areaWidth,
306 	EB_U32   areaHeight,
307     EB_U8    lastLine);
308 
309 void ResidualKernel32x32_SSE2_INTRIN(
310 	EB_U8   *input,
311 	EB_U32   inputStride,
312 	EB_U8   *pred,
313 	EB_U32   predStride,
314 	EB_S16  *residual,
315 	EB_U32   residualStride,
316 	EB_U32   areaWidth,
317 	EB_U32   areaHeight);
318 
319 void ResidualKernel64x64_SSE2_INTRIN(
320 	EB_U8   *input,
321 	EB_U32   inputStride,
322 	EB_U8   *pred,
323 	EB_U32   predStride,
324 	EB_S16  *residual,
325 	EB_U32   residualStride,
326 	EB_U32   areaWidth,
327 	EB_U32   areaHeight);
328 
329 void PictureAdditionKernel16bit_SSE2_INTRIN(
330 	EB_U16  *predPtr,
331 	EB_U32  predStride,
332 	EB_S16 *residualPtr,
333 	EB_U32  residualStride,
334 	EB_U16  *reconPtr,
335 	EB_U32  reconStride,
336 	EB_U32  width,
337 	EB_U32  height);
338 
339 
340 
341 #ifdef __cplusplus
342 }
343 #endif
344 #endif // EbPictureOperators_SSE2_h
345