1 /*
2 * Copyright(c) 2019 Intel Corporation
3 * SPDX - License - Identifier: BSD - 2 - Clause - Patent
4 */
5 
6 /*********************************
7  * Includes
8  *********************************/
9 
10 #include "EbPictureOperators.h"
11 #define VARIANCE_PRECISION      16
12 #define MEAN_PRECISION      (VARIANCE_PRECISION >> 1)
13 
14 #include "EbDefinitions.h"
15 #include "EbPackUnPack.h"
16 
17 /*********************************
18  * x86 implememtation of Picture Addition
19  *********************************/
eb_vp9_picture_addition(uint8_t * pred_ptr,uint32_t pred_stride,int16_t * residual_ptr,uint32_t residual_stride,uint8_t * recon_ptr,uint32_t recon_stride,uint32_t width,uint32_t height)20 void eb_vp9_picture_addition(
21     uint8_t  *pred_ptr,
22     uint32_t  pred_stride,
23     int16_t  *residual_ptr,
24     uint32_t  residual_stride,
25     uint8_t  *recon_ptr,
26     uint32_t  recon_stride,
27     uint32_t  width,
28     uint32_t  height)
29 {
30 
31     addition_kernel_func_ptr_array[(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1][width >> 3](
32         pred_ptr,
33         pred_stride,
34         residual_ptr,
35         residual_stride,
36         recon_ptr,
37         recon_stride,
38         width,
39         height
40     );
41 
42     return;
43 }
44 
45 /*********************************
46  * Picture Copy 8bit Elements
47  *********************************/
picture_copy8_bit(EbPictureBufferDesc * src,uint32_t src_luma_origin_index,uint32_t src_chroma_origin_index,EbPictureBufferDesc * dst,uint32_t dst_luma_origin_index,uint32_t dst_chroma_origin_index,uint32_t area_width,uint32_t area_height,uint32_t chroma_area_width,uint32_t chroma_area_height,uint32_t component_mask)48 EbErrorType picture_copy8_bit(
49     EbPictureBufferDesc *src,
50     uint32_t             src_luma_origin_index,
51     uint32_t             src_chroma_origin_index,
52     EbPictureBufferDesc *dst,
53     uint32_t             dst_luma_origin_index,
54     uint32_t             dst_chroma_origin_index,
55     uint32_t             area_width,
56     uint32_t             area_height,
57     uint32_t             chroma_area_width,
58     uint32_t             chroma_area_height,
59     uint32_t             component_mask)
60 {
61     EbErrorType return_error = EB_ErrorNone;
62 
63     // Execute the Kernels
64     if (component_mask & PICTURE_BUFFER_DESC_Y_FLAG) {
65 
66         pic_copy_kernel_func_ptr_array[(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1][area_width>>3](
67             &(src->buffer_y[src_luma_origin_index]),
68             src->stride_y,
69             &(dst->buffer_y[dst_luma_origin_index]),
70             dst->stride_y,
71             area_width,
72             area_height);
73     }
74 
75     if (component_mask & PICTURE_BUFFER_DESC_Cb_FLAG) {
76 
77         pic_copy_kernel_func_ptr_array[(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1][chroma_area_width >> 3](
78             &(src->buffer_cb[src_chroma_origin_index]),
79             src->stride_cb,
80             &(dst->buffer_cb[dst_chroma_origin_index]),
81             dst->stride_cb,
82             chroma_area_width,
83             chroma_area_height);
84     }
85 
86     if (component_mask & PICTURE_BUFFER_DESC_Cr_FLAG) {
87 
88         pic_copy_kernel_func_ptr_array[(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1][chroma_area_width >> 3](
89             &(src->buffer_cr[src_chroma_origin_index]),
90             src->stride_cr,
91             &(dst->buffer_cr[dst_chroma_origin_index]),
92             dst->stride_cr,
93             chroma_area_width,
94             chroma_area_height);
95     }
96 
97     return return_error;
98 }
99 
100 /*******************************************
101 * Picture Residue : subsampled version
102   Computes the residual data
103 *******************************************/
picture_sub_sampled_residual(uint8_t * input,uint32_t input_stride,uint8_t * pred,uint32_t pred_stride,int16_t * residual,uint32_t residual_stride,uint32_t area_width,uint32_t area_height,uint8_t last_line)104 void picture_sub_sampled_residual(
105     uint8_t  *input,
106     uint32_t  input_stride,
107     uint8_t  *pred,
108     uint32_t  pred_stride,
109     int16_t  *residual,
110     uint32_t  residual_stride,
111     uint32_t  area_width,
112     uint32_t  area_height,
113     uint8_t   last_line)    //the last line has correct prediction data, so no duplication to be done.
114 {
115 
116     eb_vp9_residual_kernel_sub_sampled_func_ptr_array[(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1][area_width>>3](
117         input,
118         input_stride,
119         pred,
120         pred_stride,
121         residual,
122         residual_stride,
123         area_width,
124         area_height,
125         last_line);
126 
127     return;
128 }
129 /*******************************************
130 * Pciture Residue
131   Computes the residual data
132 *******************************************/
picture_residual(uint8_t * input,uint32_t input_stride,uint8_t * pred,uint32_t pred_stride,int16_t * residual,uint32_t residual_stride,uint32_t area_width,uint32_t area_height)133 void picture_residual(
134     uint8_t  *input,
135     uint32_t  input_stride,
136     uint8_t  *pred,
137     uint32_t  pred_stride,
138     int16_t  *residual,
139     uint32_t  residual_stride,
140     uint32_t  area_width,
141     uint32_t  area_height)
142 {
143 
144     eb_vp9_residual_kernel_func_ptr_array[(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1][area_width>>3](
145         input,
146         input_stride,
147         pred,
148         pred_stride,
149         residual,
150         residual_stride,
151         area_width,
152         area_height);
153 
154     return;
155 }
156 
157 /*******************************************
158  * Pciture Residue 16bit input
159    Computes the residual data
160  *******************************************/
picture_residual16bit(uint16_t * input,uint32_t input_stride,uint16_t * pred,uint32_t pred_stride,int16_t * residual,uint32_t residual_stride,uint32_t area_width,uint32_t area_height)161 void picture_residual16bit(
162     uint16_t *input,
163     uint32_t  input_stride,
164     uint16_t *pred,
165     uint32_t  pred_stride,
166     int16_t  *residual,
167     uint32_t  residual_stride,
168     uint32_t  area_width,
169     uint32_t  area_height)
170 {
171 
172     eb_vp9_residual_kernel_func_ptr_array16_bit[(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1](
173         input,
174         input_stride,
175         pred,
176         pred_stride,
177         residual,
178         residual_stride,
179         area_width,
180         area_height);
181 
182     return;
183 }
184 
185 /*******************************************
186  * Picture Full Distortion
187  *  Used in the Full Mode Decision Loop
188  *******************************************/
189 
picture_full_distortion(EbPictureBufferDesc * coeff,uint32_t coeff_origin_index,EbPictureBufferDesc * recon_coeff,uint32_t recon_coeff_origin_index,uint32_t area_size,uint64_t distortion[DIST_CALC_TOTAL],uint32_t eob)190 EbErrorType picture_full_distortion(
191     EbPictureBufferDesc *coeff,
192     uint32_t             coeff_origin_index,
193     EbPictureBufferDesc *recon_coeff,
194     uint32_t             recon_coeff_origin_index,
195     uint32_t             area_size,
196     uint64_t             distortion[DIST_CALC_TOTAL],
197     uint32_t             eob)
198 {
199     EbErrorType return_error = EB_ErrorNone;
200 
201     //TODO due to a change in full kernel distortion , ASM has to be updated to not accumulate the input distortion by the output
202     distortion[0]   = 0;
203     distortion[1]   = 0;
204     // Y
205     full_distortion_intrinsic_func_ptr_array[(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1][eob != 0][0][area_size >> 3](
206         &(((int16_t*) coeff->buffer_y)[coeff_origin_index]),
207         coeff->stride_y,
208         &(((int16_t*) recon_coeff->buffer_y)[recon_coeff_origin_index]),
209         recon_coeff->stride_y,
210         distortion,
211         area_size,
212         area_size);
213 
214     return return_error;
215 }
216 
eb_vp9_extract_8bit_data(uint16_t * in16_bit_buffer,uint32_t in_stride,uint8_t * out8_bit_buffer,uint32_t out8_stride,uint32_t width,uint32_t height)217 void eb_vp9_extract_8bit_data(
218     uint16_t *in16_bit_buffer,
219     uint32_t  in_stride,
220     uint8_t  *out8_bit_buffer,
221     uint32_t  out8_stride,
222     uint32_t  width,
223     uint32_t  height
224     )
225 {
226 
227     unpack_8bit_func_ptr_array_16bit[((width & 3) == 0) && ((height & 1)== 0)][(eb_vp9_ASM_TYPES & PREAVX2_MASK) && 1](
228         in16_bit_buffer,
229         in_stride,
230         out8_bit_buffer,
231         out8_stride,
232         width,
233         height);
234 }
eb_vp9_unpack_l0l1_avg(uint16_t * ref16_l0,uint32_t ref_l0_stride,uint16_t * ref16_l1,uint32_t ref_l1_stride,uint8_t * dst_ptr,uint32_t dst_stride,uint32_t width,uint32_t height)235 void eb_vp9_unpack_l0l1_avg(
236         uint16_t *ref16_l0,
237         uint32_t  ref_l0_stride,
238         uint16_t *ref16_l1,
239         uint32_t  ref_l1_stride,
240         uint8_t  *dst_ptr,
241         uint32_t  dst_stride,
242         uint32_t  width,
243         uint32_t  height)
244  {
245 
246      eb_vp9_unpack_avg_func_ptr_array[(eb_vp9_ASM_TYPES & AVX2_MASK) && 1](
247         ref16_l0,
248         ref_l0_stride,
249         ref16_l1,
250         ref_l1_stride,
251         dst_ptr,
252         dst_stride,
253         width,
254         height);
255 
256  }
eb_vp9_extract8_bitdata_safe_sub(uint16_t * in16_bit_buffer,uint32_t in_stride,uint8_t * out8_bit_buffer,uint32_t out8_stride,uint32_t width,uint32_t height)257 void eb_vp9_extract8_bitdata_safe_sub(
258     uint16_t *in16_bit_buffer,
259     uint32_t  in_stride,
260     uint8_t  *out8_bit_buffer,
261     uint32_t  out8_stride,
262     uint32_t  width,
263     uint32_t  height
264     )
265 {
266 
267     unpack_8bit_safe_sub_func_ptr_array_16bit[(eb_vp9_ASM_TYPES & AVX2_MASK) && 1](
268         in16_bit_buffer,
269         in_stride,
270         out8_bit_buffer,
271         out8_stride,
272         width,
273         height
274         );
275 }
eb_vp9_unpack_l0l1_avg_safe_sub(uint16_t * ref16_l0,uint32_t ref_l0_stride,uint16_t * ref16_l1,uint32_t ref_l1_stride,uint8_t * dst_ptr,uint32_t dst_stride,uint32_t width,uint32_t height)276 void eb_vp9_unpack_l0l1_avg_safe_sub(
277         uint16_t *ref16_l0,
278         uint32_t  ref_l0_stride,
279         uint16_t *ref16_l1,
280         uint32_t  ref_l1_stride,
281         uint8_t  *dst_ptr,
282         uint32_t  dst_stride,
283         uint32_t  width,
284         uint32_t  height)
285  {
286      //fix C
287 
288      eb_vp9_unpack_avg_safe_sub_func_ptr_array[(eb_vp9_ASM_TYPES & AVX2_MASK) && 1](
289         ref16_l0,
290         ref_l0_stride,
291         ref16_l1,
292         ref_l1_stride,
293         dst_ptr,
294         dst_stride,
295         width,
296         height);
297 
298  }
unpack_2d(uint16_t * in16_bit_buffer,uint32_t in_stride,uint8_t * out8_bit_buffer,uint32_t out8_stride,uint8_t * outn_bit_buffer,uint32_t outn_stride,uint32_t width,uint32_t height)299 void unpack_2d(
300     uint16_t *in16_bit_buffer,
301     uint32_t  in_stride,
302     uint8_t  *out8_bit_buffer,
303     uint32_t  out8_stride,
304     uint8_t  *outn_bit_buffer,
305     uint32_t  outn_stride,
306     uint32_t  width,
307     uint32_t  height
308     )
309 {
310 
311     unpack2_d_func_ptr_array_16_bit[((width & 3) == 0) && ((height & 1)== 0)][(eb_vp9_ASM_TYPES & AVX2_MASK) && 1](
312         in16_bit_buffer,
313         in_stride,
314         out8_bit_buffer,
315         outn_bit_buffer,
316         out8_stride,
317         outn_stride,
318         width,
319         height);
320 }
321 
pack_2d_src(uint8_t * in8_bit_buffer,uint32_t in8_stride,uint8_t * inn_bit_buffer,uint32_t inn_stride,uint16_t * out16_bit_buffer,uint32_t out_stride,uint32_t width,uint32_t height)322 void pack_2d_src(
323     uint8_t  *in8_bit_buffer,
324     uint32_t  in8_stride,
325     uint8_t  *inn_bit_buffer,
326     uint32_t  inn_stride,
327     uint16_t *out16_bit_buffer,
328     uint32_t  out_stride,
329     uint32_t  width,
330     uint32_t  height
331    )
332 {
333 
334     pack2_d_func_ptr_array_16_bit_src[((width & 3) == 0) && ((height & 1)== 0)][(eb_vp9_ASM_TYPES & AVX2_MASK) && 1](
335         in8_bit_buffer,
336         in8_stride,
337         inn_bit_buffer,
338         out16_bit_buffer,
339         inn_stride,
340         out_stride,
341         width,
342         height);
343 }
344 
compressed_pack_blk(uint8_t * in8_bit_buffer,uint32_t in8_stride,uint8_t * inn_bit_buffer,uint32_t inn_stride,uint16_t * out16_bit_buffer,uint32_t out_stride,uint32_t width,uint32_t height)345 void compressed_pack_blk(
346     uint8_t  *in8_bit_buffer,
347     uint32_t  in8_stride,
348     uint8_t  *inn_bit_buffer,
349     uint32_t  inn_stride,
350     uint16_t *out16_bit_buffer,
351     uint32_t  out_stride,
352     uint32_t  width,
353     uint32_t  height
354     )
355 {
356 
357     compressed_pack_func_ptr_array[((width == 64 || width == 32 || width == 16 || width == 8) ? ((eb_vp9_ASM_TYPES & AVX2_MASK) && 1) : ASM_NON_AVX2)](
358         in8_bit_buffer,
359         in8_stride,
360         inn_bit_buffer,
361         out16_bit_buffer,
362         inn_stride,
363         out_stride,
364         width,
365         height);
366 
367 }
368 
369 /*******************************************
370  * eb_vp9_memset16bit
371  *******************************************/
eb_vp9_memset16bit(uint16_t * in_ptr,uint16_t value,uint64_t num_of_elements)372 void eb_vp9_memset16bit(
373     uint16_t *in_ptr,
374     uint16_t  value,
375     uint64_t  num_of_elements )
376 {
377     uint64_t i;
378 
379     for(i = 0; i < num_of_elements; i++) {
380         in_ptr[i]  = value;
381     }
382 }
383 /*******************************************
384  * eb_vp9_memcpy16bit
385  *******************************************/
eb_vp9_memcpy16bit(uint16_t * out_ptr,uint16_t * in_ptr,uint64_t num_of_elements)386 void eb_vp9_memcpy16bit(
387     uint16_t *out_ptr,
388     uint16_t *in_ptr,
389     uint64_t  num_of_elements )
390 {
391     uint64_t i;
392 
393     for( i =0;  i<num_of_elements;   i++) {
394         out_ptr[i]  =  in_ptr[i]  ;
395     }
396 }
397 
sum_residual(int16_t * in_ptr,uint32_t size,uint32_t stride_in)398 int32_t  sum_residual(
399     int16_t *in_ptr,
400     uint32_t size,
401     uint32_t stride_in )
402 {
403 
404     int32_t sum_block = 0;
405     uint32_t i,j;
406 
407     for(j=0; j<size;    j++)
408          for(i=0; i<size;    i++)
409              sum_block+=in_ptr[j*stride_in + i];
410 
411     return sum_block;
412 
413 }
414 
memset_16bit_block(int16_t * in_ptr,uint32_t stride_in,uint32_t size,int16_t value)415 void memset_16bit_block (
416     int16_t *in_ptr,
417     uint32_t stride_in,
418     uint32_t size,
419     int16_t  value)
420 {
421 
422     uint32_t i;
423     for (i = 0; i < size; i++)
424        eb_vp9_memset16bit((uint16_t*)in_ptr + i*stride_in, value, size);
425 
426 }
427