1 /*
2 * Copyright(c) 2019 Intel Corporation
3 * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 */
5
6 /*********************************
7 * Includes
8 *********************************/
9
10 #include "EbPictureOperators.h"
11 #define VARIANCE_PRECISION 16
12 #define MEAN_PRECISION (VARIANCE_PRECISION >> 1)
13
14 #include "EbDefinitions.h"
15 #include "EbPackUnPack.h"
16
17 /*********************************
18 * x86 implememtation of Picture Addition
19 *********************************/
eb_vp9_picture_addition(uint8_t * pred_ptr,uint32_t pred_stride,int16_t * residual_ptr,uint32_t residual_stride,uint8_t * recon_ptr,uint32_t recon_stride,uint32_t width,uint32_t height)20 void eb_vp9_picture_addition(
21 uint8_t *pred_ptr,
22 uint32_t pred_stride,
23 int16_t *residual_ptr,
24 uint32_t residual_stride,
25 uint8_t *recon_ptr,
26 uint32_t recon_stride,
27 uint32_t width,
28 uint32_t height)
29 {
30
31 addition_kernel_func_ptr_array[(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1][width >> 3](
32 pred_ptr,
33 pred_stride,
34 residual_ptr,
35 residual_stride,
36 recon_ptr,
37 recon_stride,
38 width,
39 height
40 );
41
42 return;
43 }
44
45 /*********************************
46 * Picture Copy 8bit Elements
47 *********************************/
picture_copy8_bit(EbPictureBufferDesc * src,uint32_t src_luma_origin_index,uint32_t src_chroma_origin_index,EbPictureBufferDesc * dst,uint32_t dst_luma_origin_index,uint32_t dst_chroma_origin_index,uint32_t area_width,uint32_t area_height,uint32_t chroma_area_width,uint32_t chroma_area_height,uint32_t component_mask)48 EbErrorType picture_copy8_bit(
49 EbPictureBufferDesc *src,
50 uint32_t src_luma_origin_index,
51 uint32_t src_chroma_origin_index,
52 EbPictureBufferDesc *dst,
53 uint32_t dst_luma_origin_index,
54 uint32_t dst_chroma_origin_index,
55 uint32_t area_width,
56 uint32_t area_height,
57 uint32_t chroma_area_width,
58 uint32_t chroma_area_height,
59 uint32_t component_mask)
60 {
61 EbErrorType return_error = EB_ErrorNone;
62
63 // Execute the Kernels
64 if (component_mask & PICTURE_BUFFER_DESC_Y_FLAG) {
65
66 pic_copy_kernel_func_ptr_array[(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1][area_width>>3](
67 &(src->buffer_y[src_luma_origin_index]),
68 src->stride_y,
69 &(dst->buffer_y[dst_luma_origin_index]),
70 dst->stride_y,
71 area_width,
72 area_height);
73 }
74
75 if (component_mask & PICTURE_BUFFER_DESC_Cb_FLAG) {
76
77 pic_copy_kernel_func_ptr_array[(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1][chroma_area_width >> 3](
78 &(src->buffer_cb[src_chroma_origin_index]),
79 src->stride_cb,
80 &(dst->buffer_cb[dst_chroma_origin_index]),
81 dst->stride_cb,
82 chroma_area_width,
83 chroma_area_height);
84 }
85
86 if (component_mask & PICTURE_BUFFER_DESC_Cr_FLAG) {
87
88 pic_copy_kernel_func_ptr_array[(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1][chroma_area_width >> 3](
89 &(src->buffer_cr[src_chroma_origin_index]),
90 src->stride_cr,
91 &(dst->buffer_cr[dst_chroma_origin_index]),
92 dst->stride_cr,
93 chroma_area_width,
94 chroma_area_height);
95 }
96
97 return return_error;
98 }
99
100 /*******************************************
101 * Picture Residue : subsampled version
102 Computes the residual data
103 *******************************************/
picture_sub_sampled_residual(uint8_t * input,uint32_t input_stride,uint8_t * pred,uint32_t pred_stride,int16_t * residual,uint32_t residual_stride,uint32_t area_width,uint32_t area_height,uint8_t last_line)104 void picture_sub_sampled_residual(
105 uint8_t *input,
106 uint32_t input_stride,
107 uint8_t *pred,
108 uint32_t pred_stride,
109 int16_t *residual,
110 uint32_t residual_stride,
111 uint32_t area_width,
112 uint32_t area_height,
113 uint8_t last_line) //the last line has correct prediction data, so no duplication to be done.
114 {
115
116 eb_vp9_residual_kernel_sub_sampled_func_ptr_array[(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1][area_width>>3](
117 input,
118 input_stride,
119 pred,
120 pred_stride,
121 residual,
122 residual_stride,
123 area_width,
124 area_height,
125 last_line);
126
127 return;
128 }
129 /*******************************************
130 * Pciture Residue
131 Computes the residual data
132 *******************************************/
picture_residual(uint8_t * input,uint32_t input_stride,uint8_t * pred,uint32_t pred_stride,int16_t * residual,uint32_t residual_stride,uint32_t area_width,uint32_t area_height)133 void picture_residual(
134 uint8_t *input,
135 uint32_t input_stride,
136 uint8_t *pred,
137 uint32_t pred_stride,
138 int16_t *residual,
139 uint32_t residual_stride,
140 uint32_t area_width,
141 uint32_t area_height)
142 {
143
144 eb_vp9_residual_kernel_func_ptr_array[(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1][area_width>>3](
145 input,
146 input_stride,
147 pred,
148 pred_stride,
149 residual,
150 residual_stride,
151 area_width,
152 area_height);
153
154 return;
155 }
156
157 /*******************************************
158 * Pciture Residue 16bit input
159 Computes the residual data
160 *******************************************/
picture_residual16bit(uint16_t * input,uint32_t input_stride,uint16_t * pred,uint32_t pred_stride,int16_t * residual,uint32_t residual_stride,uint32_t area_width,uint32_t area_height)161 void picture_residual16bit(
162 uint16_t *input,
163 uint32_t input_stride,
164 uint16_t *pred,
165 uint32_t pred_stride,
166 int16_t *residual,
167 uint32_t residual_stride,
168 uint32_t area_width,
169 uint32_t area_height)
170 {
171
172 eb_vp9_residual_kernel_func_ptr_array16_bit[(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1](
173 input,
174 input_stride,
175 pred,
176 pred_stride,
177 residual,
178 residual_stride,
179 area_width,
180 area_height);
181
182 return;
183 }
184
185 /*******************************************
186 * Picture Full Distortion
187 * Used in the Full Mode Decision Loop
188 *******************************************/
189
picture_full_distortion(EbPictureBufferDesc * coeff,uint32_t coeff_origin_index,EbPictureBufferDesc * recon_coeff,uint32_t recon_coeff_origin_index,uint32_t area_size,uint64_t distortion[DIST_CALC_TOTAL],uint32_t eob)190 EbErrorType picture_full_distortion(
191 EbPictureBufferDesc *coeff,
192 uint32_t coeff_origin_index,
193 EbPictureBufferDesc *recon_coeff,
194 uint32_t recon_coeff_origin_index,
195 uint32_t area_size,
196 uint64_t distortion[DIST_CALC_TOTAL],
197 uint32_t eob)
198 {
199 EbErrorType return_error = EB_ErrorNone;
200
201 //TODO due to a change in full kernel distortion , ASM has to be updated to not accumulate the input distortion by the output
202 distortion[0] = 0;
203 distortion[1] = 0;
204 // Y
205 full_distortion_intrinsic_func_ptr_array[(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1][eob != 0][0][area_size >> 3](
206 &(((int16_t*) coeff->buffer_y)[coeff_origin_index]),
207 coeff->stride_y,
208 &(((int16_t*) recon_coeff->buffer_y)[recon_coeff_origin_index]),
209 recon_coeff->stride_y,
210 distortion,
211 area_size,
212 area_size);
213
214 return return_error;
215 }
216
eb_vp9_extract_8bit_data(uint16_t * in16_bit_buffer,uint32_t in_stride,uint8_t * out8_bit_buffer,uint32_t out8_stride,uint32_t width,uint32_t height)217 void eb_vp9_extract_8bit_data(
218 uint16_t *in16_bit_buffer,
219 uint32_t in_stride,
220 uint8_t *out8_bit_buffer,
221 uint32_t out8_stride,
222 uint32_t width,
223 uint32_t height
224 )
225 {
226
227 unpack_8bit_func_ptr_array_16bit[((width & 3) == 0) && ((height & 1)== 0)][(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1](
228 in16_bit_buffer,
229 in_stride,
230 out8_bit_buffer,
231 out8_stride,
232 width,
233 height);
234 }
eb_vp9_unpack_l0l1_avg(uint16_t * ref16_l0,uint32_t ref_l0_stride,uint16_t * ref16_l1,uint32_t ref_l1_stride,uint8_t * dst_ptr,uint32_t dst_stride,uint32_t width,uint32_t height)235 void eb_vp9_unpack_l0l1_avg(
236 uint16_t *ref16_l0,
237 uint32_t ref_l0_stride,
238 uint16_t *ref16_l1,
239 uint32_t ref_l1_stride,
240 uint8_t *dst_ptr,
241 uint32_t dst_stride,
242 uint32_t width,
243 uint32_t height)
244 {
245
246 eb_vp9_unpack_avg_func_ptr_array[(eb_vp9_ASM_TYPES & AVX2_MASK) && 1](
247 ref16_l0,
248 ref_l0_stride,
249 ref16_l1,
250 ref_l1_stride,
251 dst_ptr,
252 dst_stride,
253 width,
254 height);
255
256 }
eb_vp9_extract8_bitdata_safe_sub(uint16_t * in16_bit_buffer,uint32_t in_stride,uint8_t * out8_bit_buffer,uint32_t out8_stride,uint32_t width,uint32_t height)257 void eb_vp9_extract8_bitdata_safe_sub(
258 uint16_t *in16_bit_buffer,
259 uint32_t in_stride,
260 uint8_t *out8_bit_buffer,
261 uint32_t out8_stride,
262 uint32_t width,
263 uint32_t height
264 )
265 {
266
267 unpack_8bit_safe_sub_func_ptr_array_16bit[(eb_vp9_ASM_TYPES & AVX2_MASK) && 1](
268 in16_bit_buffer,
269 in_stride,
270 out8_bit_buffer,
271 out8_stride,
272 width,
273 height
274 );
275 }
eb_vp9_unpack_l0l1_avg_safe_sub(uint16_t * ref16_l0,uint32_t ref_l0_stride,uint16_t * ref16_l1,uint32_t ref_l1_stride,uint8_t * dst_ptr,uint32_t dst_stride,uint32_t width,uint32_t height)276 void eb_vp9_unpack_l0l1_avg_safe_sub(
277 uint16_t *ref16_l0,
278 uint32_t ref_l0_stride,
279 uint16_t *ref16_l1,
280 uint32_t ref_l1_stride,
281 uint8_t *dst_ptr,
282 uint32_t dst_stride,
283 uint32_t width,
284 uint32_t height)
285 {
286 //fix C
287
288 eb_vp9_unpack_avg_safe_sub_func_ptr_array[(eb_vp9_ASM_TYPES & AVX2_MASK) && 1](
289 ref16_l0,
290 ref_l0_stride,
291 ref16_l1,
292 ref_l1_stride,
293 dst_ptr,
294 dst_stride,
295 width,
296 height);
297
298 }
unpack_2d(uint16_t * in16_bit_buffer,uint32_t in_stride,uint8_t * out8_bit_buffer,uint32_t out8_stride,uint8_t * outn_bit_buffer,uint32_t outn_stride,uint32_t width,uint32_t height)299 void unpack_2d(
300 uint16_t *in16_bit_buffer,
301 uint32_t in_stride,
302 uint8_t *out8_bit_buffer,
303 uint32_t out8_stride,
304 uint8_t *outn_bit_buffer,
305 uint32_t outn_stride,
306 uint32_t width,
307 uint32_t height
308 )
309 {
310
311 unpack2_d_func_ptr_array_16_bit[((width & 3) == 0) && ((height & 1)== 0)][(eb_vp9_ASM_TYPES & AVX2_MASK) && 1](
312 in16_bit_buffer,
313 in_stride,
314 out8_bit_buffer,
315 outn_bit_buffer,
316 out8_stride,
317 outn_stride,
318 width,
319 height);
320 }
321
pack_2d_src(uint8_t * in8_bit_buffer,uint32_t in8_stride,uint8_t * inn_bit_buffer,uint32_t inn_stride,uint16_t * out16_bit_buffer,uint32_t out_stride,uint32_t width,uint32_t height)322 void pack_2d_src(
323 uint8_t *in8_bit_buffer,
324 uint32_t in8_stride,
325 uint8_t *inn_bit_buffer,
326 uint32_t inn_stride,
327 uint16_t *out16_bit_buffer,
328 uint32_t out_stride,
329 uint32_t width,
330 uint32_t height
331 )
332 {
333
334 pack2_d_func_ptr_array_16_bit_src[((width & 3) == 0) && ((height & 1)== 0)][(eb_vp9_ASM_TYPES & AVX2_MASK) && 1](
335 in8_bit_buffer,
336 in8_stride,
337 inn_bit_buffer,
338 out16_bit_buffer,
339 inn_stride,
340 out_stride,
341 width,
342 height);
343 }
344
compressed_pack_blk(uint8_t * in8_bit_buffer,uint32_t in8_stride,uint8_t * inn_bit_buffer,uint32_t inn_stride,uint16_t * out16_bit_buffer,uint32_t out_stride,uint32_t width,uint32_t height)345 void compressed_pack_blk(
346 uint8_t *in8_bit_buffer,
347 uint32_t in8_stride,
348 uint8_t *inn_bit_buffer,
349 uint32_t inn_stride,
350 uint16_t *out16_bit_buffer,
351 uint32_t out_stride,
352 uint32_t width,
353 uint32_t height
354 )
355 {
356
357 compressed_pack_func_ptr_array[((width == 64 || width == 32 || width == 16 || width == 8) ? ((eb_vp9_ASM_TYPES & AVX2_MASK) && 1) : ASM_NON_AVX2)](
358 in8_bit_buffer,
359 in8_stride,
360 inn_bit_buffer,
361 out16_bit_buffer,
362 inn_stride,
363 out_stride,
364 width,
365 height);
366
367 }
368
369 /*******************************************
370 * eb_vp9_memset16bit
371 *******************************************/
eb_vp9_memset16bit(uint16_t * in_ptr,uint16_t value,uint64_t num_of_elements)372 void eb_vp9_memset16bit(
373 uint16_t *in_ptr,
374 uint16_t value,
375 uint64_t num_of_elements )
376 {
377 uint64_t i;
378
379 for(i = 0; i < num_of_elements; i++) {
380 in_ptr[i] = value;
381 }
382 }
383 /*******************************************
384 * eb_vp9_memcpy16bit
385 *******************************************/
eb_vp9_memcpy16bit(uint16_t * out_ptr,uint16_t * in_ptr,uint64_t num_of_elements)386 void eb_vp9_memcpy16bit(
387 uint16_t *out_ptr,
388 uint16_t *in_ptr,
389 uint64_t num_of_elements )
390 {
391 uint64_t i;
392
393 for( i =0; i<num_of_elements; i++) {
394 out_ptr[i] = in_ptr[i] ;
395 }
396 }
397
sum_residual(int16_t * in_ptr,uint32_t size,uint32_t stride_in)398 int32_t sum_residual(
399 int16_t *in_ptr,
400 uint32_t size,
401 uint32_t stride_in )
402 {
403
404 int32_t sum_block = 0;
405 uint32_t i,j;
406
407 for(j=0; j<size; j++)
408 for(i=0; i<size; i++)
409 sum_block+=in_ptr[j*stride_in + i];
410
411 return sum_block;
412
413 }
414
memset_16bit_block(int16_t * in_ptr,uint32_t stride_in,uint32_t size,int16_t value)415 void memset_16bit_block (
416 int16_t *in_ptr,
417 uint32_t stride_in,
418 uint32_t size,
419 int16_t value)
420 {
421
422 uint32_t i;
423 for (i = 0; i < size; i++)
424 eb_vp9_memset16bit((uint16_t*)in_ptr + i*stride_in, value, size);
425
426 }
427