1 // This file is generated. Do not edit.
2 #ifndef AV1_RTCD_H_
3 #define AV1_RTCD_H_
4
5 #ifdef RTCD_C
6 #define RTCD_EXTERN
7 #else
8 #define RTCD_EXTERN extern
9 #endif
10
11 /*
12 * AV1
13 */
14
15 #include "aom/aom_integer.h"
16 #include "aom_dsp/txfm_common.h"
17 #include "av1/common/av1_txfm.h"
18 #include "av1/common/common.h"
19 #include "av1/common/convolve.h"
20 #include "av1/common/enums.h"
21 #include "av1/common/filter.h"
22 #include "av1/common/odintrin.h"
23 #include "av1/common/quant_common.h"
24 #include "av1/common/restoration.h"
25
26 struct macroblockd;
27
28 /* Encoder forward decls */
29 struct macroblock;
30 struct txfm_param;
31 struct aom_variance_vtable;
32 struct search_site_config;
33 struct yv12_buffer_config;
34 struct NN_CONFIG;
35 typedef struct NN_CONFIG NN_CONFIG;
36
37 enum { NONE, RELU, SOFTSIGN, SIGMOID } UENUM1BYTE(ACTIVATION);
38 #if CONFIG_NN_V2
39 enum { SOFTMAX_CROSS_ENTROPY } UENUM1BYTE(LOSS);
40 struct NN_CONFIG_V2;
41 typedef struct NN_CONFIG_V2 NN_CONFIG_V2;
42 struct FC_LAYER;
43 typedef struct FC_LAYER FC_LAYER;
44 #endif // CONFIG_NN_V2
45
46 struct CNN_CONFIG;
47 typedef struct CNN_CONFIG CNN_CONFIG;
48 struct CNN_LAYER_CONFIG;
49 typedef struct CNN_LAYER_CONFIG CNN_LAYER_CONFIG;
50 struct CNN_THREAD_DATA;
51 typedef struct CNN_THREAD_DATA CNN_THREAD_DATA;
52 struct CNN_BRANCH_CONFIG;
53 typedef struct CNN_BRANCH_CONFIG CNN_BRANCH_CONFIG;
54 struct CNN_MULTI_OUT;
55 typedef struct CNN_MULTI_OUT CNN_MULTI_OUT;
56
57 /* Function pointers return by CfL functions */
58 typedef void (*cfl_subsample_lbd_fn)(const uint8_t* input,
59 int input_stride,
60 uint16_t* output_q3);
61
62 #if CONFIG_AV1_HIGHBITDEPTH
63 typedef void (*cfl_subsample_hbd_fn)(const uint16_t* input,
64 int input_stride,
65 uint16_t* output_q3);
66
67 typedef void (*cfl_predict_hbd_fn)(const int16_t* src,
68 uint16_t* dst,
69 int dst_stride,
70 int alpha_q3,
71 int bd);
72 #endif
73
74 typedef void (*cfl_subtract_average_fn)(const uint16_t* src, int16_t* dst);
75
76 typedef void (*cfl_predict_lbd_fn)(const int16_t* src,
77 uint8_t* dst,
78 int dst_stride,
79 int alpha_q3);
80
81 #ifdef __cplusplus
82 extern "C" {
83 #endif
84
85 void av1_apply_selfguided_restoration_c(const uint8_t* dat,
86 int width,
87 int height,
88 int stride,
89 int eps,
90 const int* xqd,
91 uint8_t* dst,
92 int dst_stride,
93 int32_t* tmpbuf,
94 int bit_depth,
95 int highbd);
96 void av1_apply_selfguided_restoration_sse4_1(const uint8_t* dat,
97 int width,
98 int height,
99 int stride,
100 int eps,
101 const int* xqd,
102 uint8_t* dst,
103 int dst_stride,
104 int32_t* tmpbuf,
105 int bit_depth,
106 int highbd);
107 void av1_apply_selfguided_restoration_avx2(const uint8_t* dat,
108 int width,
109 int height,
110 int stride,
111 int eps,
112 const int* xqd,
113 uint8_t* dst,
114 int dst_stride,
115 int32_t* tmpbuf,
116 int bit_depth,
117 int highbd);
118 RTCD_EXTERN void (*av1_apply_selfguided_restoration)(const uint8_t* dat,
119 int width,
120 int height,
121 int stride,
122 int eps,
123 const int* xqd,
124 uint8_t* dst,
125 int dst_stride,
126 int32_t* tmpbuf,
127 int bit_depth,
128 int highbd);
129
130 void av1_apply_temporal_filter_planewise_c(
131 const struct yv12_buffer_config* ref_frame,
132 const struct macroblockd* mbd,
133 const BLOCK_SIZE block_size,
134 const int mb_row,
135 const int mb_col,
136 const int num_planes,
137 const double* noise_levels,
138 const uint8_t* pred,
139 uint32_t* accum,
140 uint16_t* count);
141 void av1_apply_temporal_filter_planewise_sse2(
142 const struct yv12_buffer_config* ref_frame,
143 const struct macroblockd* mbd,
144 const BLOCK_SIZE block_size,
145 const int mb_row,
146 const int mb_col,
147 const int num_planes,
148 const double* noise_levels,
149 const uint8_t* pred,
150 uint32_t* accum,
151 uint16_t* count);
152 void av1_apply_temporal_filter_planewise_avx2(
153 const struct yv12_buffer_config* ref_frame,
154 const struct macroblockd* mbd,
155 const BLOCK_SIZE block_size,
156 const int mb_row,
157 const int mb_col,
158 const int num_planes,
159 const double* noise_levels,
160 const uint8_t* pred,
161 uint32_t* accum,
162 uint16_t* count);
163 RTCD_EXTERN void (*av1_apply_temporal_filter_planewise)(
164 const struct yv12_buffer_config* ref_frame,
165 const struct macroblockd* mbd,
166 const BLOCK_SIZE block_size,
167 const int mb_row,
168 const int mb_col,
169 const int num_planes,
170 const double* noise_levels,
171 const uint8_t* pred,
172 uint32_t* accum,
173 uint16_t* count);
174
175 void av1_apply_temporal_filter_yuv_c(const struct yv12_buffer_config* ref_frame,
176 const struct macroblockd* mbd,
177 const BLOCK_SIZE block_size,
178 const int mb_row,
179 const int mb_col,
180 const int num_planes,
181 const int strength,
182 const int use_subblock,
183 const int* subblock_filter_weights,
184 const uint8_t* pred,
185 uint32_t* accum,
186 uint16_t* count);
187 void av1_apply_temporal_filter_yuv_sse4_1(
188 const struct yv12_buffer_config* ref_frame,
189 const struct macroblockd* mbd,
190 const BLOCK_SIZE block_size,
191 const int mb_row,
192 const int mb_col,
193 const int num_planes,
194 const int strength,
195 const int use_subblock,
196 const int* subblock_filter_weights,
197 const uint8_t* pred,
198 uint32_t* accum,
199 uint16_t* count);
200 RTCD_EXTERN void (*av1_apply_temporal_filter_yuv)(
201 const struct yv12_buffer_config* ref_frame,
202 const struct macroblockd* mbd,
203 const BLOCK_SIZE block_size,
204 const int mb_row,
205 const int mb_col,
206 const int num_planes,
207 const int strength,
208 const int use_subblock,
209 const int* subblock_filter_weights,
210 const uint8_t* pred,
211 uint32_t* accum,
212 uint16_t* count);
213
214 int64_t av1_block_error_c(const tran_low_t* coeff,
215 const tran_low_t* dqcoeff,
216 intptr_t block_size,
217 int64_t* ssz);
218 int64_t av1_block_error_sse2(const tran_low_t* coeff,
219 const tran_low_t* dqcoeff,
220 intptr_t block_size,
221 int64_t* ssz);
222 int64_t av1_block_error_avx2(const tran_low_t* coeff,
223 const tran_low_t* dqcoeff,
224 intptr_t block_size,
225 int64_t* ssz);
226 RTCD_EXTERN int64_t (*av1_block_error)(const tran_low_t* coeff,
227 const tran_low_t* dqcoeff,
228 intptr_t block_size,
229 int64_t* ssz);
230
231 int64_t av1_block_error_lp_c(const int16_t* coeff,
232 const int16_t* dqcoeff,
233 intptr_t block_size);
234 int64_t av1_block_error_lp_avx2(const int16_t* coeff,
235 const int16_t* dqcoeff,
236 intptr_t block_size);
237 RTCD_EXTERN int64_t (*av1_block_error_lp)(const int16_t* coeff,
238 const int16_t* dqcoeff,
239 intptr_t block_size);
240
241 void av1_build_compound_diffwtd_mask_c(uint8_t* mask,
242 DIFFWTD_MASK_TYPE mask_type,
243 const uint8_t* src0,
244 int src0_stride,
245 const uint8_t* src1,
246 int src1_stride,
247 int h,
248 int w);
249 void av1_build_compound_diffwtd_mask_sse4_1(uint8_t* mask,
250 DIFFWTD_MASK_TYPE mask_type,
251 const uint8_t* src0,
252 int src0_stride,
253 const uint8_t* src1,
254 int src1_stride,
255 int h,
256 int w);
257 void av1_build_compound_diffwtd_mask_avx2(uint8_t* mask,
258 DIFFWTD_MASK_TYPE mask_type,
259 const uint8_t* src0,
260 int src0_stride,
261 const uint8_t* src1,
262 int src1_stride,
263 int h,
264 int w);
265 RTCD_EXTERN void (*av1_build_compound_diffwtd_mask)(uint8_t* mask,
266 DIFFWTD_MASK_TYPE mask_type,
267 const uint8_t* src0,
268 int src0_stride,
269 const uint8_t* src1,
270 int src1_stride,
271 int h,
272 int w);
273
274 void av1_build_compound_diffwtd_mask_d16_c(uint8_t* mask,
275 DIFFWTD_MASK_TYPE mask_type,
276 const CONV_BUF_TYPE* src0,
277 int src0_stride,
278 const CONV_BUF_TYPE* src1,
279 int src1_stride,
280 int h,
281 int w,
282 ConvolveParams* conv_params,
283 int bd);
284 void av1_build_compound_diffwtd_mask_d16_sse4_1(uint8_t* mask,
285 DIFFWTD_MASK_TYPE mask_type,
286 const CONV_BUF_TYPE* src0,
287 int src0_stride,
288 const CONV_BUF_TYPE* src1,
289 int src1_stride,
290 int h,
291 int w,
292 ConvolveParams* conv_params,
293 int bd);
294 void av1_build_compound_diffwtd_mask_d16_avx2(uint8_t* mask,
295 DIFFWTD_MASK_TYPE mask_type,
296 const CONV_BUF_TYPE* src0,
297 int src0_stride,
298 const CONV_BUF_TYPE* src1,
299 int src1_stride,
300 int h,
301 int w,
302 ConvolveParams* conv_params,
303 int bd);
304 RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_d16)(
305 uint8_t* mask,
306 DIFFWTD_MASK_TYPE mask_type,
307 const CONV_BUF_TYPE* src0,
308 int src0_stride,
309 const CONV_BUF_TYPE* src1,
310 int src1_stride,
311 int h,
312 int w,
313 ConvolveParams* conv_params,
314 int bd);
315
316 void av1_build_compound_diffwtd_mask_highbd_c(uint8_t* mask,
317 DIFFWTD_MASK_TYPE mask_type,
318 const uint8_t* src0,
319 int src0_stride,
320 const uint8_t* src1,
321 int src1_stride,
322 int h,
323 int w,
324 int bd);
325 void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t* mask,
326 DIFFWTD_MASK_TYPE mask_type,
327 const uint8_t* src0,
328 int src0_stride,
329 const uint8_t* src1,
330 int src1_stride,
331 int h,
332 int w,
333 int bd);
334 void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t* mask,
335 DIFFWTD_MASK_TYPE mask_type,
336 const uint8_t* src0,
337 int src0_stride,
338 const uint8_t* src1,
339 int src1_stride,
340 int h,
341 int w,
342 int bd);
343 RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(
344 uint8_t* mask,
345 DIFFWTD_MASK_TYPE mask_type,
346 const uint8_t* src0,
347 int src0_stride,
348 const uint8_t* src1,
349 int src1_stride,
350 int h,
351 int w,
352 int bd);
353
354 int64_t av1_calc_frame_error_c(const uint8_t* const ref,
355 int stride,
356 const uint8_t* const dst,
357 int p_width,
358 int p_height,
359 int p_stride);
360 int64_t av1_calc_frame_error_sse2(const uint8_t* const ref,
361 int stride,
362 const uint8_t* const dst,
363 int p_width,
364 int p_height,
365 int p_stride);
366 int64_t av1_calc_frame_error_avx2(const uint8_t* const ref,
367 int stride,
368 const uint8_t* const dst,
369 int p_width,
370 int p_height,
371 int p_stride);
372 RTCD_EXTERN int64_t (*av1_calc_frame_error)(const uint8_t* const ref,
373 int stride,
374 const uint8_t* const dst,
375 int p_width,
376 int p_height,
377 int p_stride);
378
379 void av1_calc_proj_params_c(const uint8_t* src8,
380 int width,
381 int height,
382 int src_stride,
383 const uint8_t* dat8,
384 int dat_stride,
385 int32_t* flt0,
386 int flt0_stride,
387 int32_t* flt1,
388 int flt1_stride,
389 int64_t H[2][2],
390 int64_t C[2],
391 const sgr_params_type* params);
392 void av1_calc_proj_params_avx2(const uint8_t* src8,
393 int width,
394 int height,
395 int src_stride,
396 const uint8_t* dat8,
397 int dat_stride,
398 int32_t* flt0,
399 int flt0_stride,
400 int32_t* flt1,
401 int flt1_stride,
402 int64_t H[2][2],
403 int64_t C[2],
404 const sgr_params_type* params);
405 RTCD_EXTERN void (*av1_calc_proj_params)(const uint8_t* src8,
406 int width,
407 int height,
408 int src_stride,
409 const uint8_t* dat8,
410 int dat_stride,
411 int32_t* flt0,
412 int flt0_stride,
413 int32_t* flt1,
414 int flt1_stride,
415 int64_t H[2][2],
416 int64_t C[2],
417 const sgr_params_type* params);
418
419 void av1_cnn_activate_c(float** input,
420 int channels,
421 int width,
422 int height,
423 int stride,
424 ACTIVATION layer_activation);
425 #define av1_cnn_activate av1_cnn_activate_c
426
427 void av1_cnn_add_c(float** input,
428 int channels,
429 int width,
430 int height,
431 int stride,
432 const float** add);
433 #define av1_cnn_add av1_cnn_add_c
434
435 void av1_cnn_batchnorm_c(float** image,
436 int channels,
437 int width,
438 int height,
439 int stride,
440 const float* gamma,
441 const float* beta,
442 const float* mean,
443 const float* std);
444 #define av1_cnn_batchnorm av1_cnn_batchnorm_c
445
446 void av1_cnn_convolve_c(const float** input,
447 int in_width,
448 int in_height,
449 int in_stride,
450 const CNN_LAYER_CONFIG* layer_config,
451 float** output,
452 int out_stride,
453 int start_idx,
454 int step);
455 #define av1_cnn_convolve av1_cnn_convolve_c
456
457 void av1_cnn_deconvolve_c(const float** input,
458 int in_width,
459 int in_height,
460 int in_stride,
461 const CNN_LAYER_CONFIG* layer_config,
462 float** output,
463 int out_stride);
464 #define av1_cnn_deconvolve av1_cnn_deconvolve_c
465
466 void av1_cnn_predict_c(const float** input,
467 int in_width,
468 int in_height,
469 int in_stride,
470 const CNN_CONFIG* cnn_config,
471 const CNN_THREAD_DATA* thread_data,
472 CNN_MULTI_OUT* output_struct);
473 #define av1_cnn_predict av1_cnn_predict_c
474
475 double av1_compute_cross_correlation_c(unsigned char* im1,
476 int stride1,
477 int x1,
478 int y1,
479 unsigned char* im2,
480 int stride2,
481 int x2,
482 int y2);
483 double av1_compute_cross_correlation_sse4_1(unsigned char* im1,
484 int stride1,
485 int x1,
486 int y1,
487 unsigned char* im2,
488 int stride2,
489 int x2,
490 int y2);
491 double av1_compute_cross_correlation_avx2(unsigned char* im1,
492 int stride1,
493 int x1,
494 int y1,
495 unsigned char* im2,
496 int stride2,
497 int x2,
498 int y2);
499 RTCD_EXTERN double (*av1_compute_cross_correlation)(unsigned char* im1,
500 int stride1,
501 int x1,
502 int y1,
503 unsigned char* im2,
504 int stride2,
505 int x2,
506 int y2);
507
508 void av1_compute_stats_c(int wiener_win,
509 const uint8_t* dgd8,
510 const uint8_t* src8,
511 int h_start,
512 int h_end,
513 int v_start,
514 int v_end,
515 int dgd_stride,
516 int src_stride,
517 int64_t* M,
518 int64_t* H);
519 void av1_compute_stats_sse4_1(int wiener_win,
520 const uint8_t* dgd8,
521 const uint8_t* src8,
522 int h_start,
523 int h_end,
524 int v_start,
525 int v_end,
526 int dgd_stride,
527 int src_stride,
528 int64_t* M,
529 int64_t* H);
530 void av1_compute_stats_avx2(int wiener_win,
531 const uint8_t* dgd8,
532 const uint8_t* src8,
533 int h_start,
534 int h_end,
535 int v_start,
536 int v_end,
537 int dgd_stride,
538 int src_stride,
539 int64_t* M,
540 int64_t* H);
541 RTCD_EXTERN void (*av1_compute_stats)(int wiener_win,
542 const uint8_t* dgd8,
543 const uint8_t* src8,
544 int h_start,
545 int h_end,
546 int v_start,
547 int v_end,
548 int dgd_stride,
549 int src_stride,
550 int64_t* M,
551 int64_t* H);
552
553 void av1_compute_stats_highbd_c(int wiener_win,
554 const uint8_t* dgd8,
555 const uint8_t* src8,
556 int h_start,
557 int h_end,
558 int v_start,
559 int v_end,
560 int dgd_stride,
561 int src_stride,
562 int64_t* M,
563 int64_t* H,
564 aom_bit_depth_t bit_depth);
565 void av1_compute_stats_highbd_sse4_1(int wiener_win,
566 const uint8_t* dgd8,
567 const uint8_t* src8,
568 int h_start,
569 int h_end,
570 int v_start,
571 int v_end,
572 int dgd_stride,
573 int src_stride,
574 int64_t* M,
575 int64_t* H,
576 aom_bit_depth_t bit_depth);
577 void av1_compute_stats_highbd_avx2(int wiener_win,
578 const uint8_t* dgd8,
579 const uint8_t* src8,
580 int h_start,
581 int h_end,
582 int v_start,
583 int v_end,
584 int dgd_stride,
585 int src_stride,
586 int64_t* M,
587 int64_t* H,
588 aom_bit_depth_t bit_depth);
589 RTCD_EXTERN void (*av1_compute_stats_highbd)(int wiener_win,
590 const uint8_t* dgd8,
591 const uint8_t* src8,
592 int h_start,
593 int h_end,
594 int v_start,
595 int v_end,
596 int dgd_stride,
597 int src_stride,
598 int64_t* M,
599 int64_t* H,
600 aom_bit_depth_t bit_depth);
601
602 void av1_convolve_2d_copy_sr_c(const uint8_t* src,
603 int src_stride,
604 uint8_t* dst,
605 int dst_stride,
606 int w,
607 int h,
608 const InterpFilterParams* filter_params_x,
609 const InterpFilterParams* filter_params_y,
610 const int subpel_x_qn,
611 const int subpel_y_qn,
612 ConvolveParams* conv_params);
613 void av1_convolve_2d_copy_sr_sse2(const uint8_t* src,
614 int src_stride,
615 uint8_t* dst,
616 int dst_stride,
617 int w,
618 int h,
619 const InterpFilterParams* filter_params_x,
620 const InterpFilterParams* filter_params_y,
621 const int subpel_x_qn,
622 const int subpel_y_qn,
623 ConvolveParams* conv_params);
624 void av1_convolve_2d_copy_sr_avx2(const uint8_t* src,
625 int src_stride,
626 uint8_t* dst,
627 int dst_stride,
628 int w,
629 int h,
630 const InterpFilterParams* filter_params_x,
631 const InterpFilterParams* filter_params_y,
632 const int subpel_x_qn,
633 const int subpel_y_qn,
634 ConvolveParams* conv_params);
635 RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(
636 const uint8_t* src,
637 int src_stride,
638 uint8_t* dst,
639 int dst_stride,
640 int w,
641 int h,
642 const InterpFilterParams* filter_params_x,
643 const InterpFilterParams* filter_params_y,
644 const int subpel_x_qn,
645 const int subpel_y_qn,
646 ConvolveParams* conv_params);
647
648 void av1_convolve_2d_scale_c(const uint8_t* src,
649 int src_stride,
650 uint8_t* dst,
651 int dst_stride,
652 int w,
653 int h,
654 const InterpFilterParams* filter_params_x,
655 const InterpFilterParams* filter_params_y,
656 const int subpel_x_qn,
657 const int x_step_qn,
658 const int subpel_y_qn,
659 const int y_step_qn,
660 ConvolveParams* conv_params);
661 void av1_convolve_2d_scale_sse4_1(const uint8_t* src,
662 int src_stride,
663 uint8_t* dst,
664 int dst_stride,
665 int w,
666 int h,
667 const InterpFilterParams* filter_params_x,
668 const InterpFilterParams* filter_params_y,
669 const int subpel_x_qn,
670 const int x_step_qn,
671 const int subpel_y_qn,
672 const int y_step_qn,
673 ConvolveParams* conv_params);
674 RTCD_EXTERN void (*av1_convolve_2d_scale)(
675 const uint8_t* src,
676 int src_stride,
677 uint8_t* dst,
678 int dst_stride,
679 int w,
680 int h,
681 const InterpFilterParams* filter_params_x,
682 const InterpFilterParams* filter_params_y,
683 const int subpel_x_qn,
684 const int x_step_qn,
685 const int subpel_y_qn,
686 const int y_step_qn,
687 ConvolveParams* conv_params);
688
689 void av1_convolve_2d_sr_c(const uint8_t* src,
690 int src_stride,
691 uint8_t* dst,
692 int dst_stride,
693 int w,
694 int h,
695 const InterpFilterParams* filter_params_x,
696 const InterpFilterParams* filter_params_y,
697 const int subpel_x_qn,
698 const int subpel_y_qn,
699 ConvolveParams* conv_params);
700 void av1_convolve_2d_sr_sse2(const uint8_t* src,
701 int src_stride,
702 uint8_t* dst,
703 int dst_stride,
704 int w,
705 int h,
706 const InterpFilterParams* filter_params_x,
707 const InterpFilterParams* filter_params_y,
708 const int subpel_x_qn,
709 const int subpel_y_qn,
710 ConvolveParams* conv_params);
711 void av1_convolve_2d_sr_avx2(const uint8_t* src,
712 int src_stride,
713 uint8_t* dst,
714 int dst_stride,
715 int w,
716 int h,
717 const InterpFilterParams* filter_params_x,
718 const InterpFilterParams* filter_params_y,
719 const int subpel_x_qn,
720 const int subpel_y_qn,
721 ConvolveParams* conv_params);
722 RTCD_EXTERN void (*av1_convolve_2d_sr)(
723 const uint8_t* src,
724 int src_stride,
725 uint8_t* dst,
726 int dst_stride,
727 int w,
728 int h,
729 const InterpFilterParams* filter_params_x,
730 const InterpFilterParams* filter_params_y,
731 const int subpel_x_qn,
732 const int subpel_y_qn,
733 ConvolveParams* conv_params);
734
735 void av1_convolve_horiz_rs_c(const uint8_t* src,
736 int src_stride,
737 uint8_t* dst,
738 int dst_stride,
739 int w,
740 int h,
741 const int16_t* x_filters,
742 int x0_qn,
743 int x_step_qn);
744 void av1_convolve_horiz_rs_sse4_1(const uint8_t* src,
745 int src_stride,
746 uint8_t* dst,
747 int dst_stride,
748 int w,
749 int h,
750 const int16_t* x_filters,
751 int x0_qn,
752 int x_step_qn);
753 RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t* src,
754 int src_stride,
755 uint8_t* dst,
756 int dst_stride,
757 int w,
758 int h,
759 const int16_t* x_filters,
760 int x0_qn,
761 int x_step_qn);
762
763 void av1_convolve_x_sr_c(const uint8_t* src,
764 int src_stride,
765 uint8_t* dst,
766 int dst_stride,
767 int w,
768 int h,
769 const InterpFilterParams* filter_params_x,
770 const InterpFilterParams* filter_params_y,
771 const int subpel_x_qn,
772 const int subpel_y_qn,
773 ConvolveParams* conv_params);
774 void av1_convolve_x_sr_sse2(const uint8_t* src,
775 int src_stride,
776 uint8_t* dst,
777 int dst_stride,
778 int w,
779 int h,
780 const InterpFilterParams* filter_params_x,
781 const InterpFilterParams* filter_params_y,
782 const int subpel_x_qn,
783 const int subpel_y_qn,
784 ConvolveParams* conv_params);
785 void av1_convolve_x_sr_avx2(const uint8_t* src,
786 int src_stride,
787 uint8_t* dst,
788 int dst_stride,
789 int w,
790 int h,
791 const InterpFilterParams* filter_params_x,
792 const InterpFilterParams* filter_params_y,
793 const int subpel_x_qn,
794 const int subpel_y_qn,
795 ConvolveParams* conv_params);
796 RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t* src,
797 int src_stride,
798 uint8_t* dst,
799 int dst_stride,
800 int w,
801 int h,
802 const InterpFilterParams* filter_params_x,
803 const InterpFilterParams* filter_params_y,
804 const int subpel_x_qn,
805 const int subpel_y_qn,
806 ConvolveParams* conv_params);
807
808 void av1_convolve_y_sr_c(const uint8_t* src,
809 int src_stride,
810 uint8_t* dst,
811 int dst_stride,
812 int w,
813 int h,
814 const InterpFilterParams* filter_params_x,
815 const InterpFilterParams* filter_params_y,
816 const int subpel_x_qn,
817 const int subpel_y_qn,
818 ConvolveParams* conv_params);
819 void av1_convolve_y_sr_sse2(const uint8_t* src,
820 int src_stride,
821 uint8_t* dst,
822 int dst_stride,
823 int w,
824 int h,
825 const InterpFilterParams* filter_params_x,
826 const InterpFilterParams* filter_params_y,
827 const int subpel_x_qn,
828 const int subpel_y_qn,
829 ConvolveParams* conv_params);
830 void av1_convolve_y_sr_avx2(const uint8_t* src,
831 int src_stride,
832 uint8_t* dst,
833 int dst_stride,
834 int w,
835 int h,
836 const InterpFilterParams* filter_params_x,
837 const InterpFilterParams* filter_params_y,
838 const int subpel_x_qn,
839 const int subpel_y_qn,
840 ConvolveParams* conv_params);
841 RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t* src,
842 int src_stride,
843 uint8_t* dst,
844 int dst_stride,
845 int w,
846 int h,
847 const InterpFilterParams* filter_params_x,
848 const InterpFilterParams* filter_params_y,
849 const int subpel_x_qn,
850 const int subpel_y_qn,
851 ConvolveParams* conv_params);
852
853 void av1_dist_wtd_convolve_2d_c(const uint8_t* src,
854 int src_stride,
855 uint8_t* dst,
856 int dst_stride,
857 int w,
858 int h,
859 const InterpFilterParams* filter_params_x,
860 const InterpFilterParams* filter_params_y,
861 const int subpel_x_qn,
862 const int subpel_y_qn,
863 ConvolveParams* conv_params);
864 void av1_dist_wtd_convolve_2d_sse2(const uint8_t* src,
865 int src_stride,
866 uint8_t* dst,
867 int dst_stride,
868 int w,
869 int h,
870 const InterpFilterParams* filter_params_x,
871 const InterpFilterParams* filter_params_y,
872 const int subpel_x_qn,
873 const int subpel_y_qn,
874 ConvolveParams* conv_params);
875 void av1_dist_wtd_convolve_2d_ssse3(const uint8_t* src,
876 int src_stride,
877 uint8_t* dst,
878 int dst_stride,
879 int w,
880 int h,
881 const InterpFilterParams* filter_params_x,
882 const InterpFilterParams* filter_params_y,
883 const int subpel_x_qn,
884 const int subpel_y_qn,
885 ConvolveParams* conv_params);
886 void av1_dist_wtd_convolve_2d_avx2(const uint8_t* src,
887 int src_stride,
888 uint8_t* dst,
889 int dst_stride,
890 int w,
891 int h,
892 const InterpFilterParams* filter_params_x,
893 const InterpFilterParams* filter_params_y,
894 const int subpel_x_qn,
895 const int subpel_y_qn,
896 ConvolveParams* conv_params);
897 RTCD_EXTERN void (*av1_dist_wtd_convolve_2d)(
898 const uint8_t* src,
899 int src_stride,
900 uint8_t* dst,
901 int dst_stride,
902 int w,
903 int h,
904 const InterpFilterParams* filter_params_x,
905 const InterpFilterParams* filter_params_y,
906 const int subpel_x_qn,
907 const int subpel_y_qn,
908 ConvolveParams* conv_params);
909
910 void av1_dist_wtd_convolve_2d_copy_c(const uint8_t* src,
911 int src_stride,
912 uint8_t* dst,
913 int dst_stride,
914 int w,
915 int h,
916 const InterpFilterParams* filter_params_x,
917 const InterpFilterParams* filter_params_y,
918 const int subpel_x_qn,
919 const int subpel_y_qn,
920 ConvolveParams* conv_params);
921 void av1_dist_wtd_convolve_2d_copy_sse2(
922 const uint8_t* src,
923 int src_stride,
924 uint8_t* dst,
925 int dst_stride,
926 int w,
927 int h,
928 const InterpFilterParams* filter_params_x,
929 const InterpFilterParams* filter_params_y,
930 const int subpel_x_qn,
931 const int subpel_y_qn,
932 ConvolveParams* conv_params);
933 void av1_dist_wtd_convolve_2d_copy_avx2(
934 const uint8_t* src,
935 int src_stride,
936 uint8_t* dst,
937 int dst_stride,
938 int w,
939 int h,
940 const InterpFilterParams* filter_params_x,
941 const InterpFilterParams* filter_params_y,
942 const int subpel_x_qn,
943 const int subpel_y_qn,
944 ConvolveParams* conv_params);
945 RTCD_EXTERN void (*av1_dist_wtd_convolve_2d_copy)(
946 const uint8_t* src,
947 int src_stride,
948 uint8_t* dst,
949 int dst_stride,
950 int w,
951 int h,
952 const InterpFilterParams* filter_params_x,
953 const InterpFilterParams* filter_params_y,
954 const int subpel_x_qn,
955 const int subpel_y_qn,
956 ConvolveParams* conv_params);
957
958 void av1_dist_wtd_convolve_x_c(const uint8_t* src,
959 int src_stride,
960 uint8_t* dst,
961 int dst_stride,
962 int w,
963 int h,
964 const InterpFilterParams* filter_params_x,
965 const InterpFilterParams* filter_params_y,
966 const int subpel_x_qn,
967 const int subpel_y_qn,
968 ConvolveParams* conv_params);
969 void av1_dist_wtd_convolve_x_sse2(const uint8_t* src,
970 int src_stride,
971 uint8_t* dst,
972 int dst_stride,
973 int w,
974 int h,
975 const InterpFilterParams* filter_params_x,
976 const InterpFilterParams* filter_params_y,
977 const int subpel_x_qn,
978 const int subpel_y_qn,
979 ConvolveParams* conv_params);
980 void av1_dist_wtd_convolve_x_avx2(const uint8_t* src,
981 int src_stride,
982 uint8_t* dst,
983 int dst_stride,
984 int w,
985 int h,
986 const InterpFilterParams* filter_params_x,
987 const InterpFilterParams* filter_params_y,
988 const int subpel_x_qn,
989 const int subpel_y_qn,
990 ConvolveParams* conv_params);
991 RTCD_EXTERN void (*av1_dist_wtd_convolve_x)(
992 const uint8_t* src,
993 int src_stride,
994 uint8_t* dst,
995 int dst_stride,
996 int w,
997 int h,
998 const InterpFilterParams* filter_params_x,
999 const InterpFilterParams* filter_params_y,
1000 const int subpel_x_qn,
1001 const int subpel_y_qn,
1002 ConvolveParams* conv_params);
1003
1004 void av1_dist_wtd_convolve_y_c(const uint8_t* src,
1005 int src_stride,
1006 uint8_t* dst,
1007 int dst_stride,
1008 int w,
1009 int h,
1010 const InterpFilterParams* filter_params_x,
1011 const InterpFilterParams* filter_params_y,
1012 const int subpel_x_qn,
1013 const int subpel_y_qn,
1014 ConvolveParams* conv_params);
1015 void av1_dist_wtd_convolve_y_sse2(const uint8_t* src,
1016 int src_stride,
1017 uint8_t* dst,
1018 int dst_stride,
1019 int w,
1020 int h,
1021 const InterpFilterParams* filter_params_x,
1022 const InterpFilterParams* filter_params_y,
1023 const int subpel_x_qn,
1024 const int subpel_y_qn,
1025 ConvolveParams* conv_params);
1026 void av1_dist_wtd_convolve_y_avx2(const uint8_t* src,
1027 int src_stride,
1028 uint8_t* dst,
1029 int dst_stride,
1030 int w,
1031 int h,
1032 const InterpFilterParams* filter_params_x,
1033 const InterpFilterParams* filter_params_y,
1034 const int subpel_x_qn,
1035 const int subpel_y_qn,
1036 ConvolveParams* conv_params);
1037 RTCD_EXTERN void (*av1_dist_wtd_convolve_y)(
1038 const uint8_t* src,
1039 int src_stride,
1040 uint8_t* dst,
1041 int dst_stride,
1042 int w,
1043 int h,
1044 const InterpFilterParams* filter_params_x,
1045 const InterpFilterParams* filter_params_y,
1046 const int subpel_x_qn,
1047 const int subpel_y_qn,
1048 ConvolveParams* conv_params);
1049
1050 void av1_dr_prediction_z1_c(uint8_t* dst,
1051 ptrdiff_t stride,
1052 int bw,
1053 int bh,
1054 const uint8_t* above,
1055 const uint8_t* left,
1056 int upsample_above,
1057 int dx,
1058 int dy);
1059 void av1_dr_prediction_z1_avx2(uint8_t* dst,
1060 ptrdiff_t stride,
1061 int bw,
1062 int bh,
1063 const uint8_t* above,
1064 const uint8_t* left,
1065 int upsample_above,
1066 int dx,
1067 int dy);
1068 RTCD_EXTERN void (*av1_dr_prediction_z1)(uint8_t* dst,
1069 ptrdiff_t stride,
1070 int bw,
1071 int bh,
1072 const uint8_t* above,
1073 const uint8_t* left,
1074 int upsample_above,
1075 int dx,
1076 int dy);
1077
1078 void av1_dr_prediction_z2_c(uint8_t* dst,
1079 ptrdiff_t stride,
1080 int bw,
1081 int bh,
1082 const uint8_t* above,
1083 const uint8_t* left,
1084 int upsample_above,
1085 int upsample_left,
1086 int dx,
1087 int dy);
1088 void av1_dr_prediction_z2_avx2(uint8_t* dst,
1089 ptrdiff_t stride,
1090 int bw,
1091 int bh,
1092 const uint8_t* above,
1093 const uint8_t* left,
1094 int upsample_above,
1095 int upsample_left,
1096 int dx,
1097 int dy);
1098 RTCD_EXTERN void (*av1_dr_prediction_z2)(uint8_t* dst,
1099 ptrdiff_t stride,
1100 int bw,
1101 int bh,
1102 const uint8_t* above,
1103 const uint8_t* left,
1104 int upsample_above,
1105 int upsample_left,
1106 int dx,
1107 int dy);
1108
1109 void av1_dr_prediction_z3_c(uint8_t* dst,
1110 ptrdiff_t stride,
1111 int bw,
1112 int bh,
1113 const uint8_t* above,
1114 const uint8_t* left,
1115 int upsample_left,
1116 int dx,
1117 int dy);
1118 void av1_dr_prediction_z3_avx2(uint8_t* dst,
1119 ptrdiff_t stride,
1120 int bw,
1121 int bh,
1122 const uint8_t* above,
1123 const uint8_t* left,
1124 int upsample_left,
1125 int dx,
1126 int dy);
1127 RTCD_EXTERN void (*av1_dr_prediction_z3)(uint8_t* dst,
1128 ptrdiff_t stride,
1129 int bw,
1130 int bh,
1131 const uint8_t* above,
1132 const uint8_t* left,
1133 int upsample_left,
1134 int dx,
1135 int dy);
1136
1137 void av1_filter_intra_edge_c(uint8_t* p, int sz, int strength);
1138 void av1_filter_intra_edge_sse4_1(uint8_t* p, int sz, int strength);
1139 RTCD_EXTERN void (*av1_filter_intra_edge)(uint8_t* p, int sz, int strength);
1140
1141 void av1_filter_intra_edge_high_c(uint16_t* p, int sz, int strength);
1142 void av1_filter_intra_edge_high_sse4_1(uint16_t* p, int sz, int strength);
1143 RTCD_EXTERN void (*av1_filter_intra_edge_high)(uint16_t* p,
1144 int sz,
1145 int strength);
1146
1147 void av1_filter_intra_predictor_c(uint8_t* dst,
1148 ptrdiff_t stride,
1149 TX_SIZE tx_size,
1150 const uint8_t* above,
1151 const uint8_t* left,
1152 int mode);
1153 void av1_filter_intra_predictor_sse4_1(uint8_t* dst,
1154 ptrdiff_t stride,
1155 TX_SIZE tx_size,
1156 const uint8_t* above,
1157 const uint8_t* left,
1158 int mode);
1159 RTCD_EXTERN void (*av1_filter_intra_predictor)(uint8_t* dst,
1160 ptrdiff_t stride,
1161 TX_SIZE tx_size,
1162 const uint8_t* above,
1163 const uint8_t* left,
1164 int mode);
1165
1166 int av1_full_range_search_c(const struct macroblock* x,
1167 const struct search_site_config* cfg,
1168 MV* ref_mv,
1169 MV* best_mv,
1170 int search_param,
1171 int sad_per_bit,
1172 int* num00,
1173 const struct aom_variance_vtable* fn_ptr,
1174 const MV* center_mv);
1175 #define av1_full_range_search av1_full_range_search_c
1176
1177 void av1_fwd_txfm2d_16x16_c(const int16_t* input,
1178 int32_t* output,
1179 int stride,
1180 TX_TYPE tx_type,
1181 int bd);
1182 void av1_fwd_txfm2d_16x16_sse4_1(const int16_t* input,
1183 int32_t* output,
1184 int stride,
1185 TX_TYPE tx_type,
1186 int bd);
1187 void av1_fwd_txfm2d_16x16_avx2(const int16_t* input,
1188 int32_t* output,
1189 int stride,
1190 TX_TYPE tx_type,
1191 int bd);
1192 RTCD_EXTERN void (*av1_fwd_txfm2d_16x16)(const int16_t* input,
1193 int32_t* output,
1194 int stride,
1195 TX_TYPE tx_type,
1196 int bd);
1197
1198 void av1_fwd_txfm2d_16x32_c(const int16_t* input,
1199 int32_t* output,
1200 int stride,
1201 TX_TYPE tx_type,
1202 int bd);
1203 void av1_fwd_txfm2d_16x32_sse4_1(const int16_t* input,
1204 int32_t* output,
1205 int stride,
1206 TX_TYPE tx_type,
1207 int bd);
1208 RTCD_EXTERN void (*av1_fwd_txfm2d_16x32)(const int16_t* input,
1209 int32_t* output,
1210 int stride,
1211 TX_TYPE tx_type,
1212 int bd);
1213
1214 void av1_fwd_txfm2d_16x4_c(const int16_t* input,
1215 int32_t* output,
1216 int stride,
1217 TX_TYPE tx_type,
1218 int bd);
1219 void av1_fwd_txfm2d_16x4_sse4_1(const int16_t* input,
1220 int32_t* output,
1221 int stride,
1222 TX_TYPE tx_type,
1223 int bd);
1224 RTCD_EXTERN void (*av1_fwd_txfm2d_16x4)(const int16_t* input,
1225 int32_t* output,
1226 int stride,
1227 TX_TYPE tx_type,
1228 int bd);
1229
1230 void av1_fwd_txfm2d_16x64_c(const int16_t* input,
1231 int32_t* output,
1232 int stride,
1233 TX_TYPE tx_type,
1234 int bd);
1235 void av1_fwd_txfm2d_16x64_sse4_1(const int16_t* input,
1236 int32_t* output,
1237 int stride,
1238 TX_TYPE tx_type,
1239 int bd);
1240 RTCD_EXTERN void (*av1_fwd_txfm2d_16x64)(const int16_t* input,
1241 int32_t* output,
1242 int stride,
1243 TX_TYPE tx_type,
1244 int bd);
1245
1246 void av1_fwd_txfm2d_16x8_c(const int16_t* input,
1247 int32_t* output,
1248 int stride,
1249 TX_TYPE tx_type,
1250 int bd);
1251 void av1_fwd_txfm2d_16x8_sse4_1(const int16_t* input,
1252 int32_t* output,
1253 int stride,
1254 TX_TYPE tx_type,
1255 int bd);
1256 void av1_fwd_txfm2d_16x8_avx2(const int16_t* input,
1257 int32_t* output,
1258 int stride,
1259 TX_TYPE tx_type,
1260 int bd);
1261 RTCD_EXTERN void (*av1_fwd_txfm2d_16x8)(const int16_t* input,
1262 int32_t* output,
1263 int stride,
1264 TX_TYPE tx_type,
1265 int bd);
1266
1267 void av1_fwd_txfm2d_32x16_c(const int16_t* input,
1268 int32_t* output,
1269 int stride,
1270 TX_TYPE tx_type,
1271 int bd);
1272 void av1_fwd_txfm2d_32x16_sse4_1(const int16_t* input,
1273 int32_t* output,
1274 int stride,
1275 TX_TYPE tx_type,
1276 int bd);
1277 RTCD_EXTERN void (*av1_fwd_txfm2d_32x16)(const int16_t* input,
1278 int32_t* output,
1279 int stride,
1280 TX_TYPE tx_type,
1281 int bd);
1282
1283 void av1_fwd_txfm2d_32x32_c(const int16_t* input,
1284 int32_t* output,
1285 int stride,
1286 TX_TYPE tx_type,
1287 int bd);
1288 void av1_fwd_txfm2d_32x32_sse4_1(const int16_t* input,
1289 int32_t* output,
1290 int stride,
1291 TX_TYPE tx_type,
1292 int bd);
1293 void av1_fwd_txfm2d_32x32_avx2(const int16_t* input,
1294 int32_t* output,
1295 int stride,
1296 TX_TYPE tx_type,
1297 int bd);
1298 RTCD_EXTERN void (*av1_fwd_txfm2d_32x32)(const int16_t* input,
1299 int32_t* output,
1300 int stride,
1301 TX_TYPE tx_type,
1302 int bd);
1303
1304 void av1_fwd_txfm2d_32x64_c(const int16_t* input,
1305 int32_t* output,
1306 int stride,
1307 TX_TYPE tx_type,
1308 int bd);
1309 void av1_fwd_txfm2d_32x64_sse4_1(const int16_t* input,
1310 int32_t* output,
1311 int stride,
1312 TX_TYPE tx_type,
1313 int bd);
1314 RTCD_EXTERN void (*av1_fwd_txfm2d_32x64)(const int16_t* input,
1315 int32_t* output,
1316 int stride,
1317 TX_TYPE tx_type,
1318 int bd);
1319
1320 void av1_fwd_txfm2d_32x8_c(const int16_t* input,
1321 int32_t* output,
1322 int stride,
1323 TX_TYPE tx_type,
1324 int bd);
1325 void av1_fwd_txfm2d_32x8_sse4_1(const int16_t* input,
1326 int32_t* output,
1327 int stride,
1328 TX_TYPE tx_type,
1329 int bd);
1330 RTCD_EXTERN void (*av1_fwd_txfm2d_32x8)(const int16_t* input,
1331 int32_t* output,
1332 int stride,
1333 TX_TYPE tx_type,
1334 int bd);
1335
1336 void av1_fwd_txfm2d_4x16_c(const int16_t* input,
1337 int32_t* output,
1338 int stride,
1339 TX_TYPE tx_type,
1340 int bd);
1341 void av1_fwd_txfm2d_4x16_sse4_1(const int16_t* input,
1342 int32_t* output,
1343 int stride,
1344 TX_TYPE tx_type,
1345 int bd);
1346 RTCD_EXTERN void (*av1_fwd_txfm2d_4x16)(const int16_t* input,
1347 int32_t* output,
1348 int stride,
1349 TX_TYPE tx_type,
1350 int bd);
1351
1352 void av1_fwd_txfm2d_4x4_c(const int16_t* input,
1353 int32_t* output,
1354 int stride,
1355 TX_TYPE tx_type,
1356 int bd);
1357 void av1_fwd_txfm2d_4x4_sse4_1(const int16_t* input,
1358 int32_t* output,
1359 int stride,
1360 TX_TYPE tx_type,
1361 int bd);
1362 RTCD_EXTERN void (*av1_fwd_txfm2d_4x4)(const int16_t* input,
1363 int32_t* output,
1364 int stride,
1365 TX_TYPE tx_type,
1366 int bd);
1367
1368 void av1_fwd_txfm2d_4x8_c(const int16_t* input,
1369 int32_t* output,
1370 int stride,
1371 TX_TYPE tx_type,
1372 int bd);
1373 void av1_fwd_txfm2d_4x8_sse4_1(const int16_t* input,
1374 int32_t* output,
1375 int stride,
1376 TX_TYPE tx_type,
1377 int bd);
1378 RTCD_EXTERN void (*av1_fwd_txfm2d_4x8)(const int16_t* input,
1379 int32_t* output,
1380 int stride,
1381 TX_TYPE tx_type,
1382 int bd);
1383
1384 void av1_fwd_txfm2d_64x16_c(const int16_t* input,
1385 int32_t* output,
1386 int stride,
1387 TX_TYPE tx_type,
1388 int bd);
1389 void av1_fwd_txfm2d_64x16_sse4_1(const int16_t* input,
1390 int32_t* output,
1391 int stride,
1392 TX_TYPE tx_type,
1393 int bd);
1394 RTCD_EXTERN void (*av1_fwd_txfm2d_64x16)(const int16_t* input,
1395 int32_t* output,
1396 int stride,
1397 TX_TYPE tx_type,
1398 int bd);
1399
1400 void av1_fwd_txfm2d_64x32_c(const int16_t* input,
1401 int32_t* output,
1402 int stride,
1403 TX_TYPE tx_type,
1404 int bd);
1405 void av1_fwd_txfm2d_64x32_sse4_1(const int16_t* input,
1406 int32_t* output,
1407 int stride,
1408 TX_TYPE tx_type,
1409 int bd);
1410 RTCD_EXTERN void (*av1_fwd_txfm2d_64x32)(const int16_t* input,
1411 int32_t* output,
1412 int stride,
1413 TX_TYPE tx_type,
1414 int bd);
1415
1416 void av1_fwd_txfm2d_64x64_c(const int16_t* input,
1417 int32_t* output,
1418 int stride,
1419 TX_TYPE tx_type,
1420 int bd);
1421 void av1_fwd_txfm2d_64x64_sse4_1(const int16_t* input,
1422 int32_t* output,
1423 int stride,
1424 TX_TYPE tx_type,
1425 int bd);
1426 void av1_fwd_txfm2d_64x64_avx2(const int16_t* input,
1427 int32_t* output,
1428 int stride,
1429 TX_TYPE tx_type,
1430 int bd);
1431 RTCD_EXTERN void (*av1_fwd_txfm2d_64x64)(const int16_t* input,
1432 int32_t* output,
1433 int stride,
1434 TX_TYPE tx_type,
1435 int bd);
1436
1437 void av1_fwd_txfm2d_8x16_c(const int16_t* input,
1438 int32_t* output,
1439 int stride,
1440 TX_TYPE tx_type,
1441 int bd);
1442 void av1_fwd_txfm2d_8x16_sse4_1(const int16_t* input,
1443 int32_t* output,
1444 int stride,
1445 TX_TYPE tx_type,
1446 int bd);
1447 void av1_fwd_txfm2d_8x16_avx2(const int16_t* input,
1448 int32_t* output,
1449 int stride,
1450 TX_TYPE tx_type,
1451 int bd);
1452 RTCD_EXTERN void (*av1_fwd_txfm2d_8x16)(const int16_t* input,
1453 int32_t* output,
1454 int stride,
1455 TX_TYPE tx_type,
1456 int bd);
1457
1458 void av1_fwd_txfm2d_8x32_c(const int16_t* input,
1459 int32_t* output,
1460 int stride,
1461 TX_TYPE tx_type,
1462 int bd);
1463 void av1_fwd_txfm2d_8x32_sse4_1(const int16_t* input,
1464 int32_t* output,
1465 int stride,
1466 TX_TYPE tx_type,
1467 int bd);
1468 RTCD_EXTERN void (*av1_fwd_txfm2d_8x32)(const int16_t* input,
1469 int32_t* output,
1470 int stride,
1471 TX_TYPE tx_type,
1472 int bd);
1473
1474 void av1_fwd_txfm2d_8x4_c(const int16_t* input,
1475 int32_t* output,
1476 int stride,
1477 TX_TYPE tx_type,
1478 int bd);
1479 void av1_fwd_txfm2d_8x4_sse4_1(const int16_t* input,
1480 int32_t* output,
1481 int stride,
1482 TX_TYPE tx_type,
1483 int bd);
1484 RTCD_EXTERN void (*av1_fwd_txfm2d_8x4)(const int16_t* input,
1485 int32_t* output,
1486 int stride,
1487 TX_TYPE tx_type,
1488 int bd);
1489
1490 void av1_fwd_txfm2d_8x8_c(const int16_t* input,
1491 int32_t* output,
1492 int stride,
1493 TX_TYPE tx_type,
1494 int bd);
1495 void av1_fwd_txfm2d_8x8_sse4_1(const int16_t* input,
1496 int32_t* output,
1497 int stride,
1498 TX_TYPE tx_type,
1499 int bd);
1500 void av1_fwd_txfm2d_8x8_avx2(const int16_t* input,
1501 int32_t* output,
1502 int stride,
1503 TX_TYPE tx_type,
1504 int bd);
1505 RTCD_EXTERN void (*av1_fwd_txfm2d_8x8)(const int16_t* input,
1506 int32_t* output,
1507 int stride,
1508 TX_TYPE tx_type,
1509 int bd);
1510
1511 void av1_fwht4x4_c(const int16_t* input, tran_low_t* output, int stride);
1512 #define av1_fwht4x4 av1_fwht4x4_c
1513
1514 uint32_t av1_get_crc32c_value_c(void* crc_calculator,
1515 uint8_t* p,
1516 size_t length);
1517 uint32_t av1_get_crc32c_value_sse4_2(void* crc_calculator,
1518 uint8_t* p,
1519 size_t length);
1520 RTCD_EXTERN uint32_t (*av1_get_crc32c_value)(void* crc_calculator,
1521 uint8_t* p,
1522 size_t length);
1523
1524 void av1_get_horver_correlation_full_c(const int16_t* diff,
1525 int stride,
1526 int w,
1527 int h,
1528 float* hcorr,
1529 float* vcorr);
1530 void av1_get_horver_correlation_full_sse4_1(const int16_t* diff,
1531 int stride,
1532 int w,
1533 int h,
1534 float* hcorr,
1535 float* vcorr);
1536 void av1_get_horver_correlation_full_avx2(const int16_t* diff,
1537 int stride,
1538 int w,
1539 int h,
1540 float* hcorr,
1541 float* vcorr);
1542 RTCD_EXTERN void (*av1_get_horver_correlation_full)(const int16_t* diff,
1543 int stride,
1544 int w,
1545 int h,
1546 float* hcorr,
1547 float* vcorr);
1548
1549 void av1_get_nz_map_contexts_c(const uint8_t* const levels,
1550 const int16_t* const scan,
1551 const uint16_t eob,
1552 const TX_SIZE tx_size,
1553 const TX_CLASS tx_class,
1554 int8_t* const coeff_contexts);
1555 void av1_get_nz_map_contexts_sse2(const uint8_t* const levels,
1556 const int16_t* const scan,
1557 const uint16_t eob,
1558 const TX_SIZE tx_size,
1559 const TX_CLASS tx_class,
1560 int8_t* const coeff_contexts);
1561 #define av1_get_nz_map_contexts av1_get_nz_map_contexts_sse2
1562
1563 int64_t av1_highbd_block_error_c(const tran_low_t* coeff,
1564 const tran_low_t* dqcoeff,
1565 intptr_t block_size,
1566 int64_t* ssz,
1567 int bd);
1568 int64_t av1_highbd_block_error_sse2(const tran_low_t* coeff,
1569 const tran_low_t* dqcoeff,
1570 intptr_t block_size,
1571 int64_t* ssz,
1572 int bd);
1573 int64_t av1_highbd_block_error_avx2(const tran_low_t* coeff,
1574 const tran_low_t* dqcoeff,
1575 intptr_t block_size,
1576 int64_t* ssz,
1577 int bd);
1578 RTCD_EXTERN int64_t (*av1_highbd_block_error)(const tran_low_t* coeff,
1579 const tran_low_t* dqcoeff,
1580 intptr_t block_size,
1581 int64_t* ssz,
1582 int bd);
1583
1584 void av1_highbd_convolve8_c(const uint8_t* src,
1585 ptrdiff_t src_stride,
1586 uint8_t* dst,
1587 ptrdiff_t dst_stride,
1588 const int16_t* filter_x,
1589 int x_step_q4,
1590 const int16_t* filter_y,
1591 int y_step_q4,
1592 int w,
1593 int h,
1594 int bps);
1595 #define av1_highbd_convolve8 av1_highbd_convolve8_c
1596
1597 void av1_highbd_convolve8_horiz_c(const uint8_t* src,
1598 ptrdiff_t src_stride,
1599 uint8_t* dst,
1600 ptrdiff_t dst_stride,
1601 const int16_t* filter_x,
1602 int x_step_q4,
1603 const int16_t* filter_y,
1604 int y_step_q4,
1605 int w,
1606 int h,
1607 int bps);
1608 #define av1_highbd_convolve8_horiz av1_highbd_convolve8_horiz_c
1609
1610 void av1_highbd_convolve8_vert_c(const uint8_t* src,
1611 ptrdiff_t src_stride,
1612 uint8_t* dst,
1613 ptrdiff_t dst_stride,
1614 const int16_t* filter_x,
1615 int x_step_q4,
1616 const int16_t* filter_y,
1617 int y_step_q4,
1618 int w,
1619 int h,
1620 int bps);
1621 #define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c
1622
1623 void av1_highbd_convolve_2d_copy_sr_c(const uint16_t* src,
1624 int src_stride,
1625 uint16_t* dst,
1626 int dst_stride,
1627 int w,
1628 int h,
1629 const InterpFilterParams* filter_params_x,
1630 const InterpFilterParams* filter_params_y,
1631 const int subpel_x_qn,
1632 const int subpel_y_qn,
1633 ConvolveParams* conv_params,
1634 int bd);
1635 void av1_highbd_convolve_2d_copy_sr_sse2(
1636 const uint16_t* src,
1637 int src_stride,
1638 uint16_t* dst,
1639 int dst_stride,
1640 int w,
1641 int h,
1642 const InterpFilterParams* filter_params_x,
1643 const InterpFilterParams* filter_params_y,
1644 const int subpel_x_qn,
1645 const int subpel_y_qn,
1646 ConvolveParams* conv_params,
1647 int bd);
1648 void av1_highbd_convolve_2d_copy_sr_avx2(
1649 const uint16_t* src,
1650 int src_stride,
1651 uint16_t* dst,
1652 int dst_stride,
1653 int w,
1654 int h,
1655 const InterpFilterParams* filter_params_x,
1656 const InterpFilterParams* filter_params_y,
1657 const int subpel_x_qn,
1658 const int subpel_y_qn,
1659 ConvolveParams* conv_params,
1660 int bd);
1661 RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(
1662 const uint16_t* src,
1663 int src_stride,
1664 uint16_t* dst,
1665 int dst_stride,
1666 int w,
1667 int h,
1668 const InterpFilterParams* filter_params_x,
1669 const InterpFilterParams* filter_params_y,
1670 const int subpel_x_qn,
1671 const int subpel_y_qn,
1672 ConvolveParams* conv_params,
1673 int bd);
1674
1675 void av1_highbd_convolve_2d_scale_c(const uint16_t* src,
1676 int src_stride,
1677 uint16_t* dst,
1678 int dst_stride,
1679 int w,
1680 int h,
1681 const InterpFilterParams* filter_params_x,
1682 const InterpFilterParams* filter_params_y,
1683 const int subpel_x_qn,
1684 const int x_step_qn,
1685 const int subpel_y_qn,
1686 const int y_step_qn,
1687 ConvolveParams* conv_params,
1688 int bd);
1689 void av1_highbd_convolve_2d_scale_sse4_1(
1690 const uint16_t* src,
1691 int src_stride,
1692 uint16_t* dst,
1693 int dst_stride,
1694 int w,
1695 int h,
1696 const InterpFilterParams* filter_params_x,
1697 const InterpFilterParams* filter_params_y,
1698 const int subpel_x_qn,
1699 const int x_step_qn,
1700 const int subpel_y_qn,
1701 const int y_step_qn,
1702 ConvolveParams* conv_params,
1703 int bd);
1704 RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(
1705 const uint16_t* src,
1706 int src_stride,
1707 uint16_t* dst,
1708 int dst_stride,
1709 int w,
1710 int h,
1711 const InterpFilterParams* filter_params_x,
1712 const InterpFilterParams* filter_params_y,
1713 const int subpel_x_qn,
1714 const int x_step_qn,
1715 const int subpel_y_qn,
1716 const int y_step_qn,
1717 ConvolveParams* conv_params,
1718 int bd);
1719
1720 void av1_highbd_convolve_2d_sr_c(const uint16_t* src,
1721 int src_stride,
1722 uint16_t* dst,
1723 int dst_stride,
1724 int w,
1725 int h,
1726 const InterpFilterParams* filter_params_x,
1727 const InterpFilterParams* filter_params_y,
1728 const int subpel_x_qn,
1729 const int subpel_y_qn,
1730 ConvolveParams* conv_params,
1731 int bd);
1732 void av1_highbd_convolve_2d_sr_ssse3(const uint16_t* src,
1733 int src_stride,
1734 uint16_t* dst,
1735 int dst_stride,
1736 int w,
1737 int h,
1738 const InterpFilterParams* filter_params_x,
1739 const InterpFilterParams* filter_params_y,
1740 const int subpel_x_qn,
1741 const int subpel_y_qn,
1742 ConvolveParams* conv_params,
1743 int bd);
1744 void av1_highbd_convolve_2d_sr_avx2(const uint16_t* src,
1745 int src_stride,
1746 uint16_t* dst,
1747 int dst_stride,
1748 int w,
1749 int h,
1750 const InterpFilterParams* filter_params_x,
1751 const InterpFilterParams* filter_params_y,
1752 const int subpel_x_qn,
1753 const int subpel_y_qn,
1754 ConvolveParams* conv_params,
1755 int bd);
1756 RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(
1757 const uint16_t* src,
1758 int src_stride,
1759 uint16_t* dst,
1760 int dst_stride,
1761 int w,
1762 int h,
1763 const InterpFilterParams* filter_params_x,
1764 const InterpFilterParams* filter_params_y,
1765 const int subpel_x_qn,
1766 const int subpel_y_qn,
1767 ConvolveParams* conv_params,
1768 int bd);
1769
1770 void av1_highbd_convolve_avg_c(const uint8_t* src,
1771 ptrdiff_t src_stride,
1772 uint8_t* dst,
1773 ptrdiff_t dst_stride,
1774 const int16_t* filter_x,
1775 int x_step_q4,
1776 const int16_t* filter_y,
1777 int y_step_q4,
1778 int w,
1779 int h,
1780 int bps);
1781 #define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
1782
1783 void av1_highbd_convolve_copy_c(const uint8_t* src,
1784 ptrdiff_t src_stride,
1785 uint8_t* dst,
1786 ptrdiff_t dst_stride,
1787 const int16_t* filter_x,
1788 int x_step_q4,
1789 const int16_t* filter_y,
1790 int y_step_q4,
1791 int w,
1792 int h,
1793 int bps);
1794 #define av1_highbd_convolve_copy av1_highbd_convolve_copy_c
1795
1796 void av1_highbd_convolve_horiz_rs_c(const uint16_t* src,
1797 int src_stride,
1798 uint16_t* dst,
1799 int dst_stride,
1800 int w,
1801 int h,
1802 const int16_t* x_filters,
1803 int x0_qn,
1804 int x_step_qn,
1805 int bd);
1806 void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t* src,
1807 int src_stride,
1808 uint16_t* dst,
1809 int dst_stride,
1810 int w,
1811 int h,
1812 const int16_t* x_filters,
1813 int x0_qn,
1814 int x_step_qn,
1815 int bd);
1816 RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t* src,
1817 int src_stride,
1818 uint16_t* dst,
1819 int dst_stride,
1820 int w,
1821 int h,
1822 const int16_t* x_filters,
1823 int x0_qn,
1824 int x_step_qn,
1825 int bd);
1826
1827 void av1_highbd_convolve_x_sr_c(const uint16_t* src,
1828 int src_stride,
1829 uint16_t* dst,
1830 int dst_stride,
1831 int w,
1832 int h,
1833 const InterpFilterParams* filter_params_x,
1834 const InterpFilterParams* filter_params_y,
1835 const int subpel_x_qn,
1836 const int subpel_y_qn,
1837 ConvolveParams* conv_params,
1838 int bd);
1839 void av1_highbd_convolve_x_sr_ssse3(const uint16_t* src,
1840 int src_stride,
1841 uint16_t* dst,
1842 int dst_stride,
1843 int w,
1844 int h,
1845 const InterpFilterParams* filter_params_x,
1846 const InterpFilterParams* filter_params_y,
1847 const int subpel_x_qn,
1848 const int subpel_y_qn,
1849 ConvolveParams* conv_params,
1850 int bd);
1851 void av1_highbd_convolve_x_sr_avx2(const uint16_t* src,
1852 int src_stride,
1853 uint16_t* dst,
1854 int dst_stride,
1855 int w,
1856 int h,
1857 const InterpFilterParams* filter_params_x,
1858 const InterpFilterParams* filter_params_y,
1859 const int subpel_x_qn,
1860 const int subpel_y_qn,
1861 ConvolveParams* conv_params,
1862 int bd);
1863 RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(
1864 const uint16_t* src,
1865 int src_stride,
1866 uint16_t* dst,
1867 int dst_stride,
1868 int w,
1869 int h,
1870 const InterpFilterParams* filter_params_x,
1871 const InterpFilterParams* filter_params_y,
1872 const int subpel_x_qn,
1873 const int subpel_y_qn,
1874 ConvolveParams* conv_params,
1875 int bd);
1876
1877 void av1_highbd_convolve_y_sr_c(const uint16_t* src,
1878 int src_stride,
1879 uint16_t* dst,
1880 int dst_stride,
1881 int w,
1882 int h,
1883 const InterpFilterParams* filter_params_x,
1884 const InterpFilterParams* filter_params_y,
1885 const int subpel_x_qn,
1886 const int subpel_y_qn,
1887 ConvolveParams* conv_params,
1888 int bd);
1889 void av1_highbd_convolve_y_sr_ssse3(const uint16_t* src,
1890 int src_stride,
1891 uint16_t* dst,
1892 int dst_stride,
1893 int w,
1894 int h,
1895 const InterpFilterParams* filter_params_x,
1896 const InterpFilterParams* filter_params_y,
1897 const int subpel_x_qn,
1898 const int subpel_y_qn,
1899 ConvolveParams* conv_params,
1900 int bd);
1901 void av1_highbd_convolve_y_sr_avx2(const uint16_t* src,
1902 int src_stride,
1903 uint16_t* dst,
1904 int dst_stride,
1905 int w,
1906 int h,
1907 const InterpFilterParams* filter_params_x,
1908 const InterpFilterParams* filter_params_y,
1909 const int subpel_x_qn,
1910 const int subpel_y_qn,
1911 ConvolveParams* conv_params,
1912 int bd);
1913 RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(
1914 const uint16_t* src,
1915 int src_stride,
1916 uint16_t* dst,
1917 int dst_stride,
1918 int w,
1919 int h,
1920 const InterpFilterParams* filter_params_x,
1921 const InterpFilterParams* filter_params_y,
1922 const int subpel_x_qn,
1923 const int subpel_y_qn,
1924 ConvolveParams* conv_params,
1925 int bd);
1926
1927 void av1_highbd_dist_wtd_convolve_2d_c(
1928 const uint16_t* src,
1929 int src_stride,
1930 uint16_t* dst,
1931 int dst_stride,
1932 int w,
1933 int h,
1934 const InterpFilterParams* filter_params_x,
1935 const InterpFilterParams* filter_params_y,
1936 const int subpel_x_qn,
1937 const int subpel_y_qn,
1938 ConvolveParams* conv_params,
1939 int bd);
1940 void av1_highbd_dist_wtd_convolve_2d_sse4_1(
1941 const uint16_t* src,
1942 int src_stride,
1943 uint16_t* dst,
1944 int dst_stride,
1945 int w,
1946 int h,
1947 const InterpFilterParams* filter_params_x,
1948 const InterpFilterParams* filter_params_y,
1949 const int subpel_x_qn,
1950 const int subpel_y_qn,
1951 ConvolveParams* conv_params,
1952 int bd);
1953 void av1_highbd_dist_wtd_convolve_2d_avx2(
1954 const uint16_t* src,
1955 int src_stride,
1956 uint16_t* dst,
1957 int dst_stride,
1958 int w,
1959 int h,
1960 const InterpFilterParams* filter_params_x,
1961 const InterpFilterParams* filter_params_y,
1962 const int subpel_x_qn,
1963 const int subpel_y_qn,
1964 ConvolveParams* conv_params,
1965 int bd);
1966 RTCD_EXTERN void (*av1_highbd_dist_wtd_convolve_2d)(
1967 const uint16_t* src,
1968 int src_stride,
1969 uint16_t* dst,
1970 int dst_stride,
1971 int w,
1972 int h,
1973 const InterpFilterParams* filter_params_x,
1974 const InterpFilterParams* filter_params_y,
1975 const int subpel_x_qn,
1976 const int subpel_y_qn,
1977 ConvolveParams* conv_params,
1978 int bd);
1979
1980 void av1_highbd_dist_wtd_convolve_2d_copy_c(
1981 const uint16_t* src,
1982 int src_stride,
1983 uint16_t* dst,
1984 int dst_stride,
1985 int w,
1986 int h,
1987 const InterpFilterParams* filter_params_x,
1988 const InterpFilterParams* filter_params_y,
1989 const int subpel_x_qn,
1990 const int subpel_y_qn,
1991 ConvolveParams* conv_params,
1992 int bd);
1993 void av1_highbd_dist_wtd_convolve_2d_copy_sse4_1(
1994 const uint16_t* src,
1995 int src_stride,
1996 uint16_t* dst,
1997 int dst_stride,
1998 int w,
1999 int h,
2000 const InterpFilterParams* filter_params_x,
2001 const InterpFilterParams* filter_params_y,
2002 const int subpel_x_qn,
2003 const int subpel_y_qn,
2004 ConvolveParams* conv_params,
2005 int bd);
2006 void av1_highbd_dist_wtd_convolve_2d_copy_avx2(
2007 const uint16_t* src,
2008 int src_stride,
2009 uint16_t* dst,
2010 int dst_stride,
2011 int w,
2012 int h,
2013 const InterpFilterParams* filter_params_x,
2014 const InterpFilterParams* filter_params_y,
2015 const int subpel_x_qn,
2016 const int subpel_y_qn,
2017 ConvolveParams* conv_params,
2018 int bd);
2019 RTCD_EXTERN void (*av1_highbd_dist_wtd_convolve_2d_copy)(
2020 const uint16_t* src,
2021 int src_stride,
2022 uint16_t* dst,
2023 int dst_stride,
2024 int w,
2025 int h,
2026 const InterpFilterParams* filter_params_x,
2027 const InterpFilterParams* filter_params_y,
2028 const int subpel_x_qn,
2029 const int subpel_y_qn,
2030 ConvolveParams* conv_params,
2031 int bd);
2032
2033 void av1_highbd_dist_wtd_convolve_x_c(const uint16_t* src,
2034 int src_stride,
2035 uint16_t* dst,
2036 int dst_stride,
2037 int w,
2038 int h,
2039 const InterpFilterParams* filter_params_x,
2040 const InterpFilterParams* filter_params_y,
2041 const int subpel_x_qn,
2042 const int subpel_y_qn,
2043 ConvolveParams* conv_params,
2044 int bd);
2045 void av1_highbd_dist_wtd_convolve_x_sse4_1(
2046 const uint16_t* src,
2047 int src_stride,
2048 uint16_t* dst,
2049 int dst_stride,
2050 int w,
2051 int h,
2052 const InterpFilterParams* filter_params_x,
2053 const InterpFilterParams* filter_params_y,
2054 const int subpel_x_qn,
2055 const int subpel_y_qn,
2056 ConvolveParams* conv_params,
2057 int bd);
2058 void av1_highbd_dist_wtd_convolve_x_avx2(
2059 const uint16_t* src,
2060 int src_stride,
2061 uint16_t* dst,
2062 int dst_stride,
2063 int w,
2064 int h,
2065 const InterpFilterParams* filter_params_x,
2066 const InterpFilterParams* filter_params_y,
2067 const int subpel_x_qn,
2068 const int subpel_y_qn,
2069 ConvolveParams* conv_params,
2070 int bd);
2071 RTCD_EXTERN void (*av1_highbd_dist_wtd_convolve_x)(
2072 const uint16_t* src,
2073 int src_stride,
2074 uint16_t* dst,
2075 int dst_stride,
2076 int w,
2077 int h,
2078 const InterpFilterParams* filter_params_x,
2079 const InterpFilterParams* filter_params_y,
2080 const int subpel_x_qn,
2081 const int subpel_y_qn,
2082 ConvolveParams* conv_params,
2083 int bd);
2084
2085 void av1_highbd_dist_wtd_convolve_y_c(const uint16_t* src,
2086 int src_stride,
2087 uint16_t* dst,
2088 int dst_stride,
2089 int w,
2090 int h,
2091 const InterpFilterParams* filter_params_x,
2092 const InterpFilterParams* filter_params_y,
2093 const int subpel_x_qn,
2094 const int subpel_y_qn,
2095 ConvolveParams* conv_params,
2096 int bd);
2097 void av1_highbd_dist_wtd_convolve_y_sse4_1(
2098 const uint16_t* src,
2099 int src_stride,
2100 uint16_t* dst,
2101 int dst_stride,
2102 int w,
2103 int h,
2104 const InterpFilterParams* filter_params_x,
2105 const InterpFilterParams* filter_params_y,
2106 const int subpel_x_qn,
2107 const int subpel_y_qn,
2108 ConvolveParams* conv_params,
2109 int bd);
2110 void av1_highbd_dist_wtd_convolve_y_avx2(
2111 const uint16_t* src,
2112 int src_stride,
2113 uint16_t* dst,
2114 int dst_stride,
2115 int w,
2116 int h,
2117 const InterpFilterParams* filter_params_x,
2118 const InterpFilterParams* filter_params_y,
2119 const int subpel_x_qn,
2120 const int subpel_y_qn,
2121 ConvolveParams* conv_params,
2122 int bd);
2123 RTCD_EXTERN void (*av1_highbd_dist_wtd_convolve_y)(
2124 const uint16_t* src,
2125 int src_stride,
2126 uint16_t* dst,
2127 int dst_stride,
2128 int w,
2129 int h,
2130 const InterpFilterParams* filter_params_x,
2131 const InterpFilterParams* filter_params_y,
2132 const int subpel_x_qn,
2133 const int subpel_y_qn,
2134 ConvolveParams* conv_params,
2135 int bd);
2136
2137 void av1_highbd_dr_prediction_z1_c(uint16_t* dst,
2138 ptrdiff_t stride,
2139 int bw,
2140 int bh,
2141 const uint16_t* above,
2142 const uint16_t* left,
2143 int upsample_above,
2144 int dx,
2145 int dy,
2146 int bd);
2147 void av1_highbd_dr_prediction_z1_avx2(uint16_t* dst,
2148 ptrdiff_t stride,
2149 int bw,
2150 int bh,
2151 const uint16_t* above,
2152 const uint16_t* left,
2153 int upsample_above,
2154 int dx,
2155 int dy,
2156 int bd);
2157 RTCD_EXTERN void (*av1_highbd_dr_prediction_z1)(uint16_t* dst,
2158 ptrdiff_t stride,
2159 int bw,
2160 int bh,
2161 const uint16_t* above,
2162 const uint16_t* left,
2163 int upsample_above,
2164 int dx,
2165 int dy,
2166 int bd);
2167
2168 void av1_highbd_dr_prediction_z2_c(uint16_t* dst,
2169 ptrdiff_t stride,
2170 int bw,
2171 int bh,
2172 const uint16_t* above,
2173 const uint16_t* left,
2174 int upsample_above,
2175 int upsample_left,
2176 int dx,
2177 int dy,
2178 int bd);
2179 void av1_highbd_dr_prediction_z2_avx2(uint16_t* dst,
2180 ptrdiff_t stride,
2181 int bw,
2182 int bh,
2183 const uint16_t* above,
2184 const uint16_t* left,
2185 int upsample_above,
2186 int upsample_left,
2187 int dx,
2188 int dy,
2189 int bd);
2190 RTCD_EXTERN void (*av1_highbd_dr_prediction_z2)(uint16_t* dst,
2191 ptrdiff_t stride,
2192 int bw,
2193 int bh,
2194 const uint16_t* above,
2195 const uint16_t* left,
2196 int upsample_above,
2197 int upsample_left,
2198 int dx,
2199 int dy,
2200 int bd);
2201
2202 void av1_highbd_dr_prediction_z3_c(uint16_t* dst,
2203 ptrdiff_t stride,
2204 int bw,
2205 int bh,
2206 const uint16_t* above,
2207 const uint16_t* left,
2208 int upsample_left,
2209 int dx,
2210 int dy,
2211 int bd);
2212 void av1_highbd_dr_prediction_z3_avx2(uint16_t* dst,
2213 ptrdiff_t stride,
2214 int bw,
2215 int bh,
2216 const uint16_t* above,
2217 const uint16_t* left,
2218 int upsample_left,
2219 int dx,
2220 int dy,
2221 int bd);
2222 RTCD_EXTERN void (*av1_highbd_dr_prediction_z3)(uint16_t* dst,
2223 ptrdiff_t stride,
2224 int bw,
2225 int bh,
2226 const uint16_t* above,
2227 const uint16_t* left,
2228 int upsample_left,
2229 int dx,
2230 int dy,
2231 int bd);
2232
2233 void av1_highbd_fwht4x4_c(const int16_t* input, tran_low_t* output, int stride);
2234 #define av1_highbd_fwht4x4 av1_highbd_fwht4x4_c
2235
2236 void av1_highbd_inv_txfm_add_c(const tran_low_t* input,
2237 uint8_t* dest,
2238 int stride,
2239 const TxfmParam* txfm_param);
2240 void av1_highbd_inv_txfm_add_sse4_1(const tran_low_t* input,
2241 uint8_t* dest,
2242 int stride,
2243 const TxfmParam* txfm_param);
2244 void av1_highbd_inv_txfm_add_avx2(const tran_low_t* input,
2245 uint8_t* dest,
2246 int stride,
2247 const TxfmParam* txfm_param);
2248 RTCD_EXTERN void (*av1_highbd_inv_txfm_add)(const tran_low_t* input,
2249 uint8_t* dest,
2250 int stride,
2251 const TxfmParam* txfm_param);
2252
2253 void av1_highbd_inv_txfm_add_16x4_c(const tran_low_t* input,
2254 uint8_t* dest,
2255 int stride,
2256 const TxfmParam* txfm_param);
2257 void av1_highbd_inv_txfm_add_16x4_sse4_1(const tran_low_t* input,
2258 uint8_t* dest,
2259 int stride,
2260 const TxfmParam* txfm_param);
2261 RTCD_EXTERN void (*av1_highbd_inv_txfm_add_16x4)(const tran_low_t* input,
2262 uint8_t* dest,
2263 int stride,
2264 const TxfmParam* txfm_param);
2265
2266 void av1_highbd_inv_txfm_add_4x16_c(const tran_low_t* input,
2267 uint8_t* dest,
2268 int stride,
2269 const TxfmParam* txfm_param);
2270 void av1_highbd_inv_txfm_add_4x16_sse4_1(const tran_low_t* input,
2271 uint8_t* dest,
2272 int stride,
2273 const TxfmParam* txfm_param);
2274 RTCD_EXTERN void (*av1_highbd_inv_txfm_add_4x16)(const tran_low_t* input,
2275 uint8_t* dest,
2276 int stride,
2277 const TxfmParam* txfm_param);
2278
2279 void av1_highbd_inv_txfm_add_4x4_c(const tran_low_t* input,
2280 uint8_t* dest,
2281 int stride,
2282 const TxfmParam* txfm_param);
2283 void av1_highbd_inv_txfm_add_4x4_sse4_1(const tran_low_t* input,
2284 uint8_t* dest,
2285 int stride,
2286 const TxfmParam* txfm_param);
2287 RTCD_EXTERN void (*av1_highbd_inv_txfm_add_4x4)(const tran_low_t* input,
2288 uint8_t* dest,
2289 int stride,
2290 const TxfmParam* txfm_param);
2291
2292 void av1_highbd_inv_txfm_add_4x8_c(const tran_low_t* input,
2293 uint8_t* dest,
2294 int stride,
2295 const TxfmParam* txfm_param);
2296 void av1_highbd_inv_txfm_add_4x8_sse4_1(const tran_low_t* input,
2297 uint8_t* dest,
2298 int stride,
2299 const TxfmParam* txfm_param);
2300 RTCD_EXTERN void (*av1_highbd_inv_txfm_add_4x8)(const tran_low_t* input,
2301 uint8_t* dest,
2302 int stride,
2303 const TxfmParam* txfm_param);
2304
2305 void av1_highbd_inv_txfm_add_8x4_c(const tran_low_t* input,
2306 uint8_t* dest,
2307 int stride,
2308 const TxfmParam* txfm_param);
2309 void av1_highbd_inv_txfm_add_8x4_sse4_1(const tran_low_t* input,
2310 uint8_t* dest,
2311 int stride,
2312 const TxfmParam* txfm_param);
2313 RTCD_EXTERN void (*av1_highbd_inv_txfm_add_8x4)(const tran_low_t* input,
2314 uint8_t* dest,
2315 int stride,
2316 const TxfmParam* txfm_param);
2317
2318 void av1_highbd_inv_txfm_add_8x8_c(const tran_low_t* input,
2319 uint8_t* dest,
2320 int stride,
2321 const TxfmParam* txfm_param);
2322 void av1_highbd_inv_txfm_add_8x8_sse4_1(const tran_low_t* input,
2323 uint8_t* dest,
2324 int stride,
2325 const TxfmParam* txfm_param);
2326 RTCD_EXTERN void (*av1_highbd_inv_txfm_add_8x8)(const tran_low_t* input,
2327 uint8_t* dest,
2328 int stride,
2329 const TxfmParam* txfm_param);
2330
2331 void av1_highbd_iwht4x4_16_add_c(const tran_low_t* input,
2332 uint8_t* dest,
2333 int dest_stride,
2334 int bd);
2335 #define av1_highbd_iwht4x4_16_add av1_highbd_iwht4x4_16_add_c
2336
2337 void av1_highbd_iwht4x4_1_add_c(const tran_low_t* input,
2338 uint8_t* dest,
2339 int dest_stride,
2340 int bd);
2341 #define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
2342
2343 int64_t av1_highbd_pixel_proj_error_c(const uint8_t* src8,
2344 int width,
2345 int height,
2346 int src_stride,
2347 const uint8_t* dat8,
2348 int dat_stride,
2349 int32_t* flt0,
2350 int flt0_stride,
2351 int32_t* flt1,
2352 int flt1_stride,
2353 int xq[2],
2354 const sgr_params_type* params);
2355 int64_t av1_highbd_pixel_proj_error_sse4_1(const uint8_t* src8,
2356 int width,
2357 int height,
2358 int src_stride,
2359 const uint8_t* dat8,
2360 int dat_stride,
2361 int32_t* flt0,
2362 int flt0_stride,
2363 int32_t* flt1,
2364 int flt1_stride,
2365 int xq[2],
2366 const sgr_params_type* params);
2367 int64_t av1_highbd_pixel_proj_error_avx2(const uint8_t* src8,
2368 int width,
2369 int height,
2370 int src_stride,
2371 const uint8_t* dat8,
2372 int dat_stride,
2373 int32_t* flt0,
2374 int flt0_stride,
2375 int32_t* flt1,
2376 int flt1_stride,
2377 int xq[2],
2378 const sgr_params_type* params);
2379 RTCD_EXTERN int64_t (*av1_highbd_pixel_proj_error)(
2380 const uint8_t* src8,
2381 int width,
2382 int height,
2383 int src_stride,
2384 const uint8_t* dat8,
2385 int dat_stride,
2386 int32_t* flt0,
2387 int flt0_stride,
2388 int32_t* flt1,
2389 int flt1_stride,
2390 int xq[2],
2391 const sgr_params_type* params);
2392
2393 void av1_highbd_quantize_fp_c(const tran_low_t* coeff_ptr,
2394 intptr_t n_coeffs,
2395 const int16_t* zbin_ptr,
2396 const int16_t* round_ptr,
2397 const int16_t* quant_ptr,
2398 const int16_t* quant_shift_ptr,
2399 tran_low_t* qcoeff_ptr,
2400 tran_low_t* dqcoeff_ptr,
2401 const int16_t* dequant_ptr,
2402 uint16_t* eob_ptr,
2403 const int16_t* scan,
2404 const int16_t* iscan,
2405 int log_scale);
2406 void av1_highbd_quantize_fp_sse4_1(const tran_low_t* coeff_ptr,
2407 intptr_t n_coeffs,
2408 const int16_t* zbin_ptr,
2409 const int16_t* round_ptr,
2410 const int16_t* quant_ptr,
2411 const int16_t* quant_shift_ptr,
2412 tran_low_t* qcoeff_ptr,
2413 tran_low_t* dqcoeff_ptr,
2414 const int16_t* dequant_ptr,
2415 uint16_t* eob_ptr,
2416 const int16_t* scan,
2417 const int16_t* iscan,
2418 int log_scale);
2419 void av1_highbd_quantize_fp_avx2(const tran_low_t* coeff_ptr,
2420 intptr_t n_coeffs,
2421 const int16_t* zbin_ptr,
2422 const int16_t* round_ptr,
2423 const int16_t* quant_ptr,
2424 const int16_t* quant_shift_ptr,
2425 tran_low_t* qcoeff_ptr,
2426 tran_low_t* dqcoeff_ptr,
2427 const int16_t* dequant_ptr,
2428 uint16_t* eob_ptr,
2429 const int16_t* scan,
2430 const int16_t* iscan,
2431 int log_scale);
2432 RTCD_EXTERN void (*av1_highbd_quantize_fp)(const tran_low_t* coeff_ptr,
2433 intptr_t n_coeffs,
2434 const int16_t* zbin_ptr,
2435 const int16_t* round_ptr,
2436 const int16_t* quant_ptr,
2437 const int16_t* quant_shift_ptr,
2438 tran_low_t* qcoeff_ptr,
2439 tran_low_t* dqcoeff_ptr,
2440 const int16_t* dequant_ptr,
2441 uint16_t* eob_ptr,
2442 const int16_t* scan,
2443 const int16_t* iscan,
2444 int log_scale);
2445
2446 void av1_highbd_warp_affine_c(const int32_t* mat,
2447 const uint16_t* ref,
2448 int width,
2449 int height,
2450 int stride,
2451 uint16_t* pred,
2452 int p_col,
2453 int p_row,
2454 int p_width,
2455 int p_height,
2456 int p_stride,
2457 int subsampling_x,
2458 int subsampling_y,
2459 int bd,
2460 ConvolveParams* conv_params,
2461 int16_t alpha,
2462 int16_t beta,
2463 int16_t gamma,
2464 int16_t delta);
2465 void av1_highbd_warp_affine_sse4_1(const int32_t* mat,
2466 const uint16_t* ref,
2467 int width,
2468 int height,
2469 int stride,
2470 uint16_t* pred,
2471 int p_col,
2472 int p_row,
2473 int p_width,
2474 int p_height,
2475 int p_stride,
2476 int subsampling_x,
2477 int subsampling_y,
2478 int bd,
2479 ConvolveParams* conv_params,
2480 int16_t alpha,
2481 int16_t beta,
2482 int16_t gamma,
2483 int16_t delta);
2484 RTCD_EXTERN void (*av1_highbd_warp_affine)(const int32_t* mat,
2485 const uint16_t* ref,
2486 int width,
2487 int height,
2488 int stride,
2489 uint16_t* pred,
2490 int p_col,
2491 int p_row,
2492 int p_width,
2493 int p_height,
2494 int p_stride,
2495 int subsampling_x,
2496 int subsampling_y,
2497 int bd,
2498 ConvolveParams* conv_params,
2499 int16_t alpha,
2500 int16_t beta,
2501 int16_t gamma,
2502 int16_t delta);
2503
2504 void av1_highbd_wiener_convolve_add_src_c(const uint8_t* src,
2505 ptrdiff_t src_stride,
2506 uint8_t* dst,
2507 ptrdiff_t dst_stride,
2508 const int16_t* filter_x,
2509 int x_step_q4,
2510 const int16_t* filter_y,
2511 int y_step_q4,
2512 int w,
2513 int h,
2514 const ConvolveParams* conv_params,
2515 int bd);
2516 void av1_highbd_wiener_convolve_add_src_ssse3(const uint8_t* src,
2517 ptrdiff_t src_stride,
2518 uint8_t* dst,
2519 ptrdiff_t dst_stride,
2520 const int16_t* filter_x,
2521 int x_step_q4,
2522 const int16_t* filter_y,
2523 int y_step_q4,
2524 int w,
2525 int h,
2526 const ConvolveParams* conv_params,
2527 int bd);
2528 void av1_highbd_wiener_convolve_add_src_avx2(const uint8_t* src,
2529 ptrdiff_t src_stride,
2530 uint8_t* dst,
2531 ptrdiff_t dst_stride,
2532 const int16_t* filter_x,
2533 int x_step_q4,
2534 const int16_t* filter_y,
2535 int y_step_q4,
2536 int w,
2537 int h,
2538 const ConvolveParams* conv_params,
2539 int bd);
2540 RTCD_EXTERN void (*av1_highbd_wiener_convolve_add_src)(
2541 const uint8_t* src,
2542 ptrdiff_t src_stride,
2543 uint8_t* dst,
2544 ptrdiff_t dst_stride,
2545 const int16_t* filter_x,
2546 int x_step_q4,
2547 const int16_t* filter_y,
2548 int y_step_q4,
2549 int w,
2550 int h,
2551 const ConvolveParams* conv_params,
2552 int bd);
2553
2554 void av1_inv_txfm2d_add_16x16_c(const int32_t* input,
2555 uint16_t* output,
2556 int stride,
2557 TX_TYPE tx_type,
2558 int bd);
2559 #define av1_inv_txfm2d_add_16x16 av1_inv_txfm2d_add_16x16_c
2560
2561 void av1_inv_txfm2d_add_16x32_c(const int32_t* input,
2562 uint16_t* output,
2563 int stride,
2564 TX_TYPE tx_type,
2565 int bd);
2566 #define av1_inv_txfm2d_add_16x32 av1_inv_txfm2d_add_16x32_c
2567
2568 void av1_inv_txfm2d_add_16x4_c(const int32_t* input,
2569 uint16_t* output,
2570 int stride,
2571 TX_TYPE tx_type,
2572 int bd);
2573 #define av1_inv_txfm2d_add_16x4 av1_inv_txfm2d_add_16x4_c
2574
2575 void av1_inv_txfm2d_add_16x64_c(const int32_t* input,
2576 uint16_t* output,
2577 int stride,
2578 TX_TYPE tx_type,
2579 int bd);
2580 #define av1_inv_txfm2d_add_16x64 av1_inv_txfm2d_add_16x64_c
2581
2582 void av1_inv_txfm2d_add_16x8_c(const int32_t* input,
2583 uint16_t* output,
2584 int stride,
2585 TX_TYPE tx_type,
2586 int bd);
2587 #define av1_inv_txfm2d_add_16x8 av1_inv_txfm2d_add_16x8_c
2588
2589 void av1_inv_txfm2d_add_32x16_c(const int32_t* input,
2590 uint16_t* output,
2591 int stride,
2592 TX_TYPE tx_type,
2593 int bd);
2594 #define av1_inv_txfm2d_add_32x16 av1_inv_txfm2d_add_32x16_c
2595
2596 void av1_inv_txfm2d_add_32x32_c(const int32_t* input,
2597 uint16_t* output,
2598 int stride,
2599 TX_TYPE tx_type,
2600 int bd);
2601 #define av1_inv_txfm2d_add_32x32 av1_inv_txfm2d_add_32x32_c
2602
2603 void av1_inv_txfm2d_add_32x64_c(const int32_t* input,
2604 uint16_t* output,
2605 int stride,
2606 TX_TYPE tx_type,
2607 int bd);
2608 #define av1_inv_txfm2d_add_32x64 av1_inv_txfm2d_add_32x64_c
2609
2610 void av1_inv_txfm2d_add_32x8_c(const int32_t* input,
2611 uint16_t* output,
2612 int stride,
2613 TX_TYPE tx_type,
2614 int bd);
2615 #define av1_inv_txfm2d_add_32x8 av1_inv_txfm2d_add_32x8_c
2616
2617 void av1_inv_txfm2d_add_4x16_c(const int32_t* input,
2618 uint16_t* output,
2619 int stride,
2620 TX_TYPE tx_type,
2621 int bd);
2622 #define av1_inv_txfm2d_add_4x16 av1_inv_txfm2d_add_4x16_c
2623
2624 void av1_inv_txfm2d_add_4x4_c(const int32_t* input,
2625 uint16_t* output,
2626 int stride,
2627 TX_TYPE tx_type,
2628 int bd);
2629 void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t* input,
2630 uint16_t* output,
2631 int stride,
2632 TX_TYPE tx_type,
2633 int bd);
2634 RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t* input,
2635 uint16_t* output,
2636 int stride,
2637 TX_TYPE tx_type,
2638 int bd);
2639
2640 void av1_inv_txfm2d_add_4x8_c(const int32_t* input,
2641 uint16_t* output,
2642 int stride,
2643 TX_TYPE tx_type,
2644 int bd);
2645 #define av1_inv_txfm2d_add_4x8 av1_inv_txfm2d_add_4x8_c
2646
2647 void av1_inv_txfm2d_add_64x16_c(const int32_t* input,
2648 uint16_t* output,
2649 int stride,
2650 TX_TYPE tx_type,
2651 int bd);
2652 #define av1_inv_txfm2d_add_64x16 av1_inv_txfm2d_add_64x16_c
2653
2654 void av1_inv_txfm2d_add_64x32_c(const int32_t* input,
2655 uint16_t* output,
2656 int stride,
2657 TX_TYPE tx_type,
2658 int bd);
2659 #define av1_inv_txfm2d_add_64x32 av1_inv_txfm2d_add_64x32_c
2660
2661 void av1_inv_txfm2d_add_64x64_c(const int32_t* input,
2662 uint16_t* output,
2663 int stride,
2664 TX_TYPE tx_type,
2665 int bd);
2666 #define av1_inv_txfm2d_add_64x64 av1_inv_txfm2d_add_64x64_c
2667
2668 void av1_inv_txfm2d_add_8x16_c(const int32_t* input,
2669 uint16_t* output,
2670 int stride,
2671 TX_TYPE tx_type,
2672 int bd);
2673 #define av1_inv_txfm2d_add_8x16 av1_inv_txfm2d_add_8x16_c
2674
2675 void av1_inv_txfm2d_add_8x32_c(const int32_t* input,
2676 uint16_t* output,
2677 int stride,
2678 TX_TYPE tx_type,
2679 int bd);
2680 #define av1_inv_txfm2d_add_8x32 av1_inv_txfm2d_add_8x32_c
2681
2682 void av1_inv_txfm2d_add_8x4_c(const int32_t* input,
2683 uint16_t* output,
2684 int stride,
2685 TX_TYPE tx_type,
2686 int bd);
2687 #define av1_inv_txfm2d_add_8x4 av1_inv_txfm2d_add_8x4_c
2688
2689 void av1_inv_txfm2d_add_8x8_c(const int32_t* input,
2690 uint16_t* output,
2691 int stride,
2692 TX_TYPE tx_type,
2693 int bd);
2694 void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t* input,
2695 uint16_t* output,
2696 int stride,
2697 TX_TYPE tx_type,
2698 int bd);
2699 RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t* input,
2700 uint16_t* output,
2701 int stride,
2702 TX_TYPE tx_type,
2703 int bd);
2704
2705 void av1_inv_txfm_add_c(const tran_low_t* dqcoeff,
2706 uint8_t* dst,
2707 int stride,
2708 const TxfmParam* txfm_param);
2709 void av1_inv_txfm_add_ssse3(const tran_low_t* dqcoeff,
2710 uint8_t* dst,
2711 int stride,
2712 const TxfmParam* txfm_param);
2713 void av1_inv_txfm_add_avx2(const tran_low_t* dqcoeff,
2714 uint8_t* dst,
2715 int stride,
2716 const TxfmParam* txfm_param);
2717 RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t* dqcoeff,
2718 uint8_t* dst,
2719 int stride,
2720 const TxfmParam* txfm_param);
2721
2722 void av1_lowbd_fwd_txfm_c(const int16_t* src_diff,
2723 tran_low_t* coeff,
2724 int diff_stride,
2725 TxfmParam* txfm_param);
2726 void av1_lowbd_fwd_txfm_sse2(const int16_t* src_diff,
2727 tran_low_t* coeff,
2728 int diff_stride,
2729 TxfmParam* txfm_param);
2730 void av1_lowbd_fwd_txfm_sse4_1(const int16_t* src_diff,
2731 tran_low_t* coeff,
2732 int diff_stride,
2733 TxfmParam* txfm_param);
2734 void av1_lowbd_fwd_txfm_avx2(const int16_t* src_diff,
2735 tran_low_t* coeff,
2736 int diff_stride,
2737 TxfmParam* txfm_param);
2738 RTCD_EXTERN void (*av1_lowbd_fwd_txfm)(const int16_t* src_diff,
2739 tran_low_t* coeff,
2740 int diff_stride,
2741 TxfmParam* txfm_param);
2742
2743 int64_t av1_lowbd_pixel_proj_error_c(const uint8_t* src8,
2744 int width,
2745 int height,
2746 int src_stride,
2747 const uint8_t* dat8,
2748 int dat_stride,
2749 int32_t* flt0,
2750 int flt0_stride,
2751 int32_t* flt1,
2752 int flt1_stride,
2753 int xq[2],
2754 const sgr_params_type* params);
2755 int64_t av1_lowbd_pixel_proj_error_sse4_1(const uint8_t* src8,
2756 int width,
2757 int height,
2758 int src_stride,
2759 const uint8_t* dat8,
2760 int dat_stride,
2761 int32_t* flt0,
2762 int flt0_stride,
2763 int32_t* flt1,
2764 int flt1_stride,
2765 int xq[2],
2766 const sgr_params_type* params);
2767 int64_t av1_lowbd_pixel_proj_error_avx2(const uint8_t* src8,
2768 int width,
2769 int height,
2770 int src_stride,
2771 const uint8_t* dat8,
2772 int dat_stride,
2773 int32_t* flt0,
2774 int flt0_stride,
2775 int32_t* flt1,
2776 int flt1_stride,
2777 int xq[2],
2778 const sgr_params_type* params);
2779 RTCD_EXTERN int64_t (*av1_lowbd_pixel_proj_error)(
2780 const uint8_t* src8,
2781 int width,
2782 int height,
2783 int src_stride,
2784 const uint8_t* dat8,
2785 int dat_stride,
2786 int32_t* flt0,
2787 int flt0_stride,
2788 int32_t* flt1,
2789 int flt1_stride,
2790 int xq[2],
2791 const sgr_params_type* params);
2792
2793 void av1_nn_predict_c(const float* input_nodes,
2794 const NN_CONFIG* const nn_config,
2795 int reduce_prec,
2796 float* const output);
2797 void av1_nn_predict_sse3(const float* input_nodes,
2798 const NN_CONFIG* const nn_config,
2799 int reduce_prec,
2800 float* const output);
2801 RTCD_EXTERN void (*av1_nn_predict)(const float* input_nodes,
2802 const NN_CONFIG* const nn_config,
2803 int reduce_prec,
2804 float* const output);
2805
2806 void av1_quantize_b_c(const tran_low_t* coeff_ptr,
2807 intptr_t n_coeffs,
2808 const int16_t* zbin_ptr,
2809 const int16_t* round_ptr,
2810 const int16_t* quant_ptr,
2811 const int16_t* quant_shift_ptr,
2812 tran_low_t* qcoeff_ptr,
2813 tran_low_t* dqcoeff_ptr,
2814 const int16_t* dequant_ptr,
2815 uint16_t* eob_ptr,
2816 const int16_t* scan,
2817 const int16_t* iscan,
2818 const qm_val_t* qm_ptr,
2819 const qm_val_t* iqm_ptr,
2820 int log_scale);
2821 #define av1_quantize_b av1_quantize_b_c
2822
2823 void av1_quantize_fp_c(const tran_low_t* coeff_ptr,
2824 intptr_t n_coeffs,
2825 const int16_t* zbin_ptr,
2826 const int16_t* round_ptr,
2827 const int16_t* quant_ptr,
2828 const int16_t* quant_shift_ptr,
2829 tran_low_t* qcoeff_ptr,
2830 tran_low_t* dqcoeff_ptr,
2831 const int16_t* dequant_ptr,
2832 uint16_t* eob_ptr,
2833 const int16_t* scan,
2834 const int16_t* iscan);
2835 void av1_quantize_fp_sse2(const tran_low_t* coeff_ptr,
2836 intptr_t n_coeffs,
2837 const int16_t* zbin_ptr,
2838 const int16_t* round_ptr,
2839 const int16_t* quant_ptr,
2840 const int16_t* quant_shift_ptr,
2841 tran_low_t* qcoeff_ptr,
2842 tran_low_t* dqcoeff_ptr,
2843 const int16_t* dequant_ptr,
2844 uint16_t* eob_ptr,
2845 const int16_t* scan,
2846 const int16_t* iscan);
2847 void av1_quantize_fp_avx2(const tran_low_t* coeff_ptr,
2848 intptr_t n_coeffs,
2849 const int16_t* zbin_ptr,
2850 const int16_t* round_ptr,
2851 const int16_t* quant_ptr,
2852 const int16_t* quant_shift_ptr,
2853 tran_low_t* qcoeff_ptr,
2854 tran_low_t* dqcoeff_ptr,
2855 const int16_t* dequant_ptr,
2856 uint16_t* eob_ptr,
2857 const int16_t* scan,
2858 const int16_t* iscan);
2859 RTCD_EXTERN void (*av1_quantize_fp)(const tran_low_t* coeff_ptr,
2860 intptr_t n_coeffs,
2861 const int16_t* zbin_ptr,
2862 const int16_t* round_ptr,
2863 const int16_t* quant_ptr,
2864 const int16_t* quant_shift_ptr,
2865 tran_low_t* qcoeff_ptr,
2866 tran_low_t* dqcoeff_ptr,
2867 const int16_t* dequant_ptr,
2868 uint16_t* eob_ptr,
2869 const int16_t* scan,
2870 const int16_t* iscan);
2871
2872 void av1_quantize_fp_32x32_c(const tran_low_t* coeff_ptr,
2873 intptr_t n_coeffs,
2874 const int16_t* zbin_ptr,
2875 const int16_t* round_ptr,
2876 const int16_t* quant_ptr,
2877 const int16_t* quant_shift_ptr,
2878 tran_low_t* qcoeff_ptr,
2879 tran_low_t* dqcoeff_ptr,
2880 const int16_t* dequant_ptr,
2881 uint16_t* eob_ptr,
2882 const int16_t* scan,
2883 const int16_t* iscan);
2884 void av1_quantize_fp_32x32_avx2(const tran_low_t* coeff_ptr,
2885 intptr_t n_coeffs,
2886 const int16_t* zbin_ptr,
2887 const int16_t* round_ptr,
2888 const int16_t* quant_ptr,
2889 const int16_t* quant_shift_ptr,
2890 tran_low_t* qcoeff_ptr,
2891 tran_low_t* dqcoeff_ptr,
2892 const int16_t* dequant_ptr,
2893 uint16_t* eob_ptr,
2894 const int16_t* scan,
2895 const int16_t* iscan);
2896 RTCD_EXTERN void (*av1_quantize_fp_32x32)(const tran_low_t* coeff_ptr,
2897 intptr_t n_coeffs,
2898 const int16_t* zbin_ptr,
2899 const int16_t* round_ptr,
2900 const int16_t* quant_ptr,
2901 const int16_t* quant_shift_ptr,
2902 tran_low_t* qcoeff_ptr,
2903 tran_low_t* dqcoeff_ptr,
2904 const int16_t* dequant_ptr,
2905 uint16_t* eob_ptr,
2906 const int16_t* scan,
2907 const int16_t* iscan);
2908
2909 void av1_quantize_fp_64x64_c(const tran_low_t* coeff_ptr,
2910 intptr_t n_coeffs,
2911 const int16_t* zbin_ptr,
2912 const int16_t* round_ptr,
2913 const int16_t* quant_ptr,
2914 const int16_t* quant_shift_ptr,
2915 tran_low_t* qcoeff_ptr,
2916 tran_low_t* dqcoeff_ptr,
2917 const int16_t* dequant_ptr,
2918 uint16_t* eob_ptr,
2919 const int16_t* scan,
2920 const int16_t* iscan);
2921 void av1_quantize_fp_64x64_avx2(const tran_low_t* coeff_ptr,
2922 intptr_t n_coeffs,
2923 const int16_t* zbin_ptr,
2924 const int16_t* round_ptr,
2925 const int16_t* quant_ptr,
2926 const int16_t* quant_shift_ptr,
2927 tran_low_t* qcoeff_ptr,
2928 tran_low_t* dqcoeff_ptr,
2929 const int16_t* dequant_ptr,
2930 uint16_t* eob_ptr,
2931 const int16_t* scan,
2932 const int16_t* iscan);
2933 RTCD_EXTERN void (*av1_quantize_fp_64x64)(const tran_low_t* coeff_ptr,
2934 intptr_t n_coeffs,
2935 const int16_t* zbin_ptr,
2936 const int16_t* round_ptr,
2937 const int16_t* quant_ptr,
2938 const int16_t* quant_shift_ptr,
2939 tran_low_t* qcoeff_ptr,
2940 tran_low_t* dqcoeff_ptr,
2941 const int16_t* dequant_ptr,
2942 uint16_t* eob_ptr,
2943 const int16_t* scan,
2944 const int16_t* iscan);
2945
2946 void av1_quantize_lp_c(const int16_t* coeff_ptr,
2947 intptr_t n_coeffs,
2948 const int16_t* round_ptr,
2949 const int16_t* quant_ptr,
2950 int16_t* qcoeff_ptr,
2951 int16_t* dqcoeff_ptr,
2952 const int16_t* dequant_ptr,
2953 uint16_t* eob_ptr,
2954 const int16_t* scan);
2955 void av1_quantize_lp_avx2(const int16_t* coeff_ptr,
2956 intptr_t n_coeffs,
2957 const int16_t* round_ptr,
2958 const int16_t* quant_ptr,
2959 int16_t* qcoeff_ptr,
2960 int16_t* dqcoeff_ptr,
2961 const int16_t* dequant_ptr,
2962 uint16_t* eob_ptr,
2963 const int16_t* scan);
2964 RTCD_EXTERN void (*av1_quantize_lp)(const int16_t* coeff_ptr,
2965 intptr_t n_coeffs,
2966 const int16_t* round_ptr,
2967 const int16_t* quant_ptr,
2968 int16_t* qcoeff_ptr,
2969 int16_t* dqcoeff_ptr,
2970 const int16_t* dequant_ptr,
2971 uint16_t* eob_ptr,
2972 const int16_t* scan);
2973
2974 void av1_round_shift_array_c(int32_t* arr, int size, int bit);
2975 void av1_round_shift_array_sse4_1(int32_t* arr, int size, int bit);
2976 RTCD_EXTERN void (*av1_round_shift_array)(int32_t* arr, int size, int bit);
2977
2978 int av1_selfguided_restoration_c(const uint8_t* dgd8,
2979 int width,
2980 int height,
2981 int dgd_stride,
2982 int32_t* flt0,
2983 int32_t* flt1,
2984 int flt_stride,
2985 int sgr_params_idx,
2986 int bit_depth,
2987 int highbd);
2988 int av1_selfguided_restoration_sse4_1(const uint8_t* dgd8,
2989 int width,
2990 int height,
2991 int dgd_stride,
2992 int32_t* flt0,
2993 int32_t* flt1,
2994 int flt_stride,
2995 int sgr_params_idx,
2996 int bit_depth,
2997 int highbd);
2998 int av1_selfguided_restoration_avx2(const uint8_t* dgd8,
2999 int width,
3000 int height,
3001 int dgd_stride,
3002 int32_t* flt0,
3003 int32_t* flt1,
3004 int flt_stride,
3005 int sgr_params_idx,
3006 int bit_depth,
3007 int highbd);
3008 RTCD_EXTERN int (*av1_selfguided_restoration)(const uint8_t* dgd8,
3009 int width,
3010 int height,
3011 int dgd_stride,
3012 int32_t* flt0,
3013 int32_t* flt1,
3014 int flt_stride,
3015 int sgr_params_idx,
3016 int bit_depth,
3017 int highbd);
3018
3019 void av1_txb_init_levels_c(const tran_low_t* const coeff,
3020 const int width,
3021 const int height,
3022 uint8_t* const levels);
3023 void av1_txb_init_levels_sse4_1(const tran_low_t* const coeff,
3024 const int width,
3025 const int height,
3026 uint8_t* const levels);
3027 void av1_txb_init_levels_avx2(const tran_low_t* const coeff,
3028 const int width,
3029 const int height,
3030 uint8_t* const levels);
3031 RTCD_EXTERN void (*av1_txb_init_levels)(const tran_low_t* const coeff,
3032 const int width,
3033 const int height,
3034 uint8_t* const levels);
3035
3036 void av1_upsample_intra_edge_c(uint8_t* p, int sz);
3037 void av1_upsample_intra_edge_sse4_1(uint8_t* p, int sz);
3038 RTCD_EXTERN void (*av1_upsample_intra_edge)(uint8_t* p, int sz);
3039
3040 void av1_upsample_intra_edge_high_c(uint16_t* p, int sz, int bd);
3041 void av1_upsample_intra_edge_high_sse4_1(uint16_t* p, int sz, int bd);
3042 RTCD_EXTERN void (*av1_upsample_intra_edge_high)(uint16_t* p, int sz, int bd);
3043
3044 void av1_warp_affine_c(const int32_t* mat,
3045 const uint8_t* ref,
3046 int width,
3047 int height,
3048 int stride,
3049 uint8_t* pred,
3050 int p_col,
3051 int p_row,
3052 int p_width,
3053 int p_height,
3054 int p_stride,
3055 int subsampling_x,
3056 int subsampling_y,
3057 ConvolveParams* conv_params,
3058 int16_t alpha,
3059 int16_t beta,
3060 int16_t gamma,
3061 int16_t delta);
3062 void av1_warp_affine_sse4_1(const int32_t* mat,
3063 const uint8_t* ref,
3064 int width,
3065 int height,
3066 int stride,
3067 uint8_t* pred,
3068 int p_col,
3069 int p_row,
3070 int p_width,
3071 int p_height,
3072 int p_stride,
3073 int subsampling_x,
3074 int subsampling_y,
3075 ConvolveParams* conv_params,
3076 int16_t alpha,
3077 int16_t beta,
3078 int16_t gamma,
3079 int16_t delta);
3080 void av1_warp_affine_avx2(const int32_t* mat,
3081 const uint8_t* ref,
3082 int width,
3083 int height,
3084 int stride,
3085 uint8_t* pred,
3086 int p_col,
3087 int p_row,
3088 int p_width,
3089 int p_height,
3090 int p_stride,
3091 int subsampling_x,
3092 int subsampling_y,
3093 ConvolveParams* conv_params,
3094 int16_t alpha,
3095 int16_t beta,
3096 int16_t gamma,
3097 int16_t delta);
3098 RTCD_EXTERN void (*av1_warp_affine)(const int32_t* mat,
3099 const uint8_t* ref,
3100 int width,
3101 int height,
3102 int stride,
3103 uint8_t* pred,
3104 int p_col,
3105 int p_row,
3106 int p_width,
3107 int p_height,
3108 int p_stride,
3109 int subsampling_x,
3110 int subsampling_y,
3111 ConvolveParams* conv_params,
3112 int16_t alpha,
3113 int16_t beta,
3114 int16_t gamma,
3115 int16_t delta);
3116
3117 void av1_wedge_compute_delta_squares_c(int16_t* d,
3118 const int16_t* a,
3119 const int16_t* b,
3120 int N);
3121 void av1_wedge_compute_delta_squares_sse2(int16_t* d,
3122 const int16_t* a,
3123 const int16_t* b,
3124 int N);
3125 void av1_wedge_compute_delta_squares_avx2(int16_t* d,
3126 const int16_t* a,
3127 const int16_t* b,
3128 int N);
3129 RTCD_EXTERN void (*av1_wedge_compute_delta_squares)(int16_t* d,
3130 const int16_t* a,
3131 const int16_t* b,
3132 int N);
3133
3134 int8_t av1_wedge_sign_from_residuals_c(const int16_t* ds,
3135 const uint8_t* m,
3136 int N,
3137 int64_t limit);
3138 int8_t av1_wedge_sign_from_residuals_sse2(const int16_t* ds,
3139 const uint8_t* m,
3140 int N,
3141 int64_t limit);
3142 int8_t av1_wedge_sign_from_residuals_avx2(const int16_t* ds,
3143 const uint8_t* m,
3144 int N,
3145 int64_t limit);
3146 RTCD_EXTERN int8_t (*av1_wedge_sign_from_residuals)(const int16_t* ds,
3147 const uint8_t* m,
3148 int N,
3149 int64_t limit);
3150
3151 uint64_t av1_wedge_sse_from_residuals_c(const int16_t* r1,
3152 const int16_t* d,
3153 const uint8_t* m,
3154 int N);
3155 uint64_t av1_wedge_sse_from_residuals_sse2(const int16_t* r1,
3156 const int16_t* d,
3157 const uint8_t* m,
3158 int N);
3159 uint64_t av1_wedge_sse_from_residuals_avx2(const int16_t* r1,
3160 const int16_t* d,
3161 const uint8_t* m,
3162 int N);
3163 RTCD_EXTERN uint64_t (*av1_wedge_sse_from_residuals)(const int16_t* r1,
3164 const int16_t* d,
3165 const uint8_t* m,
3166 int N);
3167
3168 void av1_wiener_convolve_add_src_c(const uint8_t* src,
3169 ptrdiff_t src_stride,
3170 uint8_t* dst,
3171 ptrdiff_t dst_stride,
3172 const int16_t* filter_x,
3173 int x_step_q4,
3174 const int16_t* filter_y,
3175 int y_step_q4,
3176 int w,
3177 int h,
3178 const ConvolveParams* conv_params);
3179 void av1_wiener_convolve_add_src_sse2(const uint8_t* src,
3180 ptrdiff_t src_stride,
3181 uint8_t* dst,
3182 ptrdiff_t dst_stride,
3183 const int16_t* filter_x,
3184 int x_step_q4,
3185 const int16_t* filter_y,
3186 int y_step_q4,
3187 int w,
3188 int h,
3189 const ConvolveParams* conv_params);
3190 void av1_wiener_convolve_add_src_avx2(const uint8_t* src,
3191 ptrdiff_t src_stride,
3192 uint8_t* dst,
3193 ptrdiff_t dst_stride,
3194 const int16_t* filter_x,
3195 int x_step_q4,
3196 const int16_t* filter_y,
3197 int y_step_q4,
3198 int w,
3199 int h,
3200 const ConvolveParams* conv_params);
3201 RTCD_EXTERN void (*av1_wiener_convolve_add_src)(
3202 const uint8_t* src,
3203 ptrdiff_t src_stride,
3204 uint8_t* dst,
3205 ptrdiff_t dst_stride,
3206 const int16_t* filter_x,
3207 int x_step_q4,
3208 const int16_t* filter_y,
3209 int y_step_q4,
3210 int w,
3211 int h,
3212 const ConvolveParams* conv_params);
3213
3214 void cdef_copy_rect8_16bit_to_16bit_c(uint16_t* dst,
3215 int dstride,
3216 const uint16_t* src,
3217 int sstride,
3218 int v,
3219 int h);
3220 void cdef_copy_rect8_16bit_to_16bit_sse2(uint16_t* dst,
3221 int dstride,
3222 const uint16_t* src,
3223 int sstride,
3224 int v,
3225 int h);
3226 void cdef_copy_rect8_16bit_to_16bit_ssse3(uint16_t* dst,
3227 int dstride,
3228 const uint16_t* src,
3229 int sstride,
3230 int v,
3231 int h);
3232 void cdef_copy_rect8_16bit_to_16bit_sse4_1(uint16_t* dst,
3233 int dstride,
3234 const uint16_t* src,
3235 int sstride,
3236 int v,
3237 int h);
3238 void cdef_copy_rect8_16bit_to_16bit_avx2(uint16_t* dst,
3239 int dstride,
3240 const uint16_t* src,
3241 int sstride,
3242 int v,
3243 int h);
3244 RTCD_EXTERN void (*cdef_copy_rect8_16bit_to_16bit)(uint16_t* dst,
3245 int dstride,
3246 const uint16_t* src,
3247 int sstride,
3248 int v,
3249 int h);
3250
3251 void cdef_copy_rect8_8bit_to_16bit_c(uint16_t* dst,
3252 int dstride,
3253 const uint8_t* src,
3254 int sstride,
3255 int v,
3256 int h);
3257 void cdef_copy_rect8_8bit_to_16bit_sse2(uint16_t* dst,
3258 int dstride,
3259 const uint8_t* src,
3260 int sstride,
3261 int v,
3262 int h);
3263 void cdef_copy_rect8_8bit_to_16bit_ssse3(uint16_t* dst,
3264 int dstride,
3265 const uint8_t* src,
3266 int sstride,
3267 int v,
3268 int h);
3269 void cdef_copy_rect8_8bit_to_16bit_sse4_1(uint16_t* dst,
3270 int dstride,
3271 const uint8_t* src,
3272 int sstride,
3273 int v,
3274 int h);
3275 void cdef_copy_rect8_8bit_to_16bit_avx2(uint16_t* dst,
3276 int dstride,
3277 const uint8_t* src,
3278 int sstride,
3279 int v,
3280 int h);
3281 RTCD_EXTERN void (*cdef_copy_rect8_8bit_to_16bit)(uint16_t* dst,
3282 int dstride,
3283 const uint8_t* src,
3284 int sstride,
3285 int v,
3286 int h);
3287
3288 void cdef_filter_block_c(uint8_t* dst8,
3289 uint16_t* dst16,
3290 int dstride,
3291 const uint16_t* in,
3292 int pri_strength,
3293 int sec_strength,
3294 int dir,
3295 int pri_damping,
3296 int sec_damping,
3297 int bsize,
3298 int coeff_shift);
3299 void cdef_filter_block_sse2(uint8_t* dst8,
3300 uint16_t* dst16,
3301 int dstride,
3302 const uint16_t* in,
3303 int pri_strength,
3304 int sec_strength,
3305 int dir,
3306 int pri_damping,
3307 int sec_damping,
3308 int bsize,
3309 int coeff_shift);
3310 void cdef_filter_block_ssse3(uint8_t* dst8,
3311 uint16_t* dst16,
3312 int dstride,
3313 const uint16_t* in,
3314 int pri_strength,
3315 int sec_strength,
3316 int dir,
3317 int pri_damping,
3318 int sec_damping,
3319 int bsize,
3320 int coeff_shift);
3321 void cdef_filter_block_sse4_1(uint8_t* dst8,
3322 uint16_t* dst16,
3323 int dstride,
3324 const uint16_t* in,
3325 int pri_strength,
3326 int sec_strength,
3327 int dir,
3328 int pri_damping,
3329 int sec_damping,
3330 int bsize,
3331 int coeff_shift);
3332 void cdef_filter_block_avx2(uint8_t* dst8,
3333 uint16_t* dst16,
3334 int dstride,
3335 const uint16_t* in,
3336 int pri_strength,
3337 int sec_strength,
3338 int dir,
3339 int pri_damping,
3340 int sec_damping,
3341 int bsize,
3342 int coeff_shift);
3343 RTCD_EXTERN void (*cdef_filter_block)(uint8_t* dst8,
3344 uint16_t* dst16,
3345 int dstride,
3346 const uint16_t* in,
3347 int pri_strength,
3348 int sec_strength,
3349 int dir,
3350 int pri_damping,
3351 int sec_damping,
3352 int bsize,
3353 int coeff_shift);
3354
3355 int cdef_find_dir_c(const uint16_t* img,
3356 int stride,
3357 int32_t* var,
3358 int coeff_shift);
3359 int cdef_find_dir_sse2(const uint16_t* img,
3360 int stride,
3361 int32_t* var,
3362 int coeff_shift);
3363 int cdef_find_dir_ssse3(const uint16_t* img,
3364 int stride,
3365 int32_t* var,
3366 int coeff_shift);
3367 int cdef_find_dir_sse4_1(const uint16_t* img,
3368 int stride,
3369 int32_t* var,
3370 int coeff_shift);
3371 int cdef_find_dir_avx2(const uint16_t* img,
3372 int stride,
3373 int32_t* var,
3374 int coeff_shift);
3375 RTCD_EXTERN int (*cdef_find_dir)(const uint16_t* img,
3376 int stride,
3377 int32_t* var,
3378 int coeff_shift);
3379
3380 cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_c(TX_SIZE tx_size);
3381 cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_ssse3(TX_SIZE tx_size);
3382 cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_avx2(TX_SIZE tx_size);
3383 RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_420_hbd)(
3384 TX_SIZE tx_size);
3385
3386 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_c(TX_SIZE tx_size);
3387 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_ssse3(TX_SIZE tx_size);
3388 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_avx2(TX_SIZE tx_size);
3389 RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_420_lbd)(
3390 TX_SIZE tx_size);
3391
3392 cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_c(TX_SIZE tx_size);
3393 cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_ssse3(TX_SIZE tx_size);
3394 cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_avx2(TX_SIZE tx_size);
3395 RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_422_hbd)(
3396 TX_SIZE tx_size);
3397
3398 cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_c(TX_SIZE tx_size);
3399 cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_ssse3(TX_SIZE tx_size);
3400 cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_avx2(TX_SIZE tx_size);
3401 RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_422_lbd)(
3402 TX_SIZE tx_size);
3403
3404 cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_c(TX_SIZE tx_size);
3405 cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_ssse3(TX_SIZE tx_size);
3406 cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_avx2(TX_SIZE tx_size);
3407 RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_444_hbd)(
3408 TX_SIZE tx_size);
3409
3410 cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_c(TX_SIZE tx_size);
3411 cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_ssse3(TX_SIZE tx_size);
3412 cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_avx2(TX_SIZE tx_size);
3413 RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_444_lbd)(
3414 TX_SIZE tx_size);
3415
3416 cfl_predict_hbd_fn cfl_get_predict_hbd_fn_c(TX_SIZE tx_size);
3417 cfl_predict_hbd_fn cfl_get_predict_hbd_fn_ssse3(TX_SIZE tx_size);
3418 cfl_predict_hbd_fn cfl_get_predict_hbd_fn_avx2(TX_SIZE tx_size);
3419 RTCD_EXTERN cfl_predict_hbd_fn (*cfl_get_predict_hbd_fn)(TX_SIZE tx_size);
3420
3421 cfl_predict_lbd_fn cfl_get_predict_lbd_fn_c(TX_SIZE tx_size);
3422 cfl_predict_lbd_fn cfl_get_predict_lbd_fn_ssse3(TX_SIZE tx_size);
3423 cfl_predict_lbd_fn cfl_get_predict_lbd_fn_avx2(TX_SIZE tx_size);
3424 RTCD_EXTERN cfl_predict_lbd_fn (*cfl_get_predict_lbd_fn)(TX_SIZE tx_size);
3425
3426 cfl_subtract_average_fn cfl_get_subtract_average_fn_c(TX_SIZE tx_size);
3427 cfl_subtract_average_fn cfl_get_subtract_average_fn_sse2(TX_SIZE tx_size);
3428 cfl_subtract_average_fn cfl_get_subtract_average_fn_avx2(TX_SIZE tx_size);
3429 RTCD_EXTERN cfl_subtract_average_fn (*cfl_get_subtract_average_fn)(
3430 TX_SIZE tx_size);
3431
3432 void av1_rtcd(void);
3433
3434 #ifdef RTCD_C
3435 #include "aom_ports/x86.h"
setup_rtcd_internal(void)3436 static void setup_rtcd_internal(void) {
3437 int flags = x86_simd_caps();
3438
3439 (void)flags;
3440
3441 av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_c;
3442 if (flags & HAS_SSE4_1)
3443 av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_sse4_1;
3444 if (flags & HAS_AVX2)
3445 av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_avx2;
3446 av1_apply_temporal_filter_planewise =
3447 av1_apply_temporal_filter_planewise_sse2;
3448 if (flags & HAS_AVX2)
3449 av1_apply_temporal_filter_planewise =
3450 av1_apply_temporal_filter_planewise_avx2;
3451 av1_apply_temporal_filter_yuv = av1_apply_temporal_filter_yuv_c;
3452 if (flags & HAS_SSE4_1)
3453 av1_apply_temporal_filter_yuv = av1_apply_temporal_filter_yuv_sse4_1;
3454 av1_block_error = av1_block_error_sse2;
3455 if (flags & HAS_AVX2)
3456 av1_block_error = av1_block_error_avx2;
3457 av1_block_error_lp = av1_block_error_lp_c;
3458 if (flags & HAS_AVX2)
3459 av1_block_error_lp = av1_block_error_lp_avx2;
3460 av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_c;
3461 if (flags & HAS_SSE4_1)
3462 av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_sse4_1;
3463 if (flags & HAS_AVX2)
3464 av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_avx2;
3465 av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_c;
3466 if (flags & HAS_SSE4_1)
3467 av1_build_compound_diffwtd_mask_d16 =
3468 av1_build_compound_diffwtd_mask_d16_sse4_1;
3469 if (flags & HAS_AVX2)
3470 av1_build_compound_diffwtd_mask_d16 =
3471 av1_build_compound_diffwtd_mask_d16_avx2;
3472 av1_build_compound_diffwtd_mask_highbd =
3473 av1_build_compound_diffwtd_mask_highbd_c;
3474 if (flags & HAS_SSSE3)
3475 av1_build_compound_diffwtd_mask_highbd =
3476 av1_build_compound_diffwtd_mask_highbd_ssse3;
3477 if (flags & HAS_AVX2)
3478 av1_build_compound_diffwtd_mask_highbd =
3479 av1_build_compound_diffwtd_mask_highbd_avx2;
3480 av1_calc_frame_error = av1_calc_frame_error_sse2;
3481 if (flags & HAS_AVX2)
3482 av1_calc_frame_error = av1_calc_frame_error_avx2;
3483 av1_calc_proj_params = av1_calc_proj_params_c;
3484 if (flags & HAS_AVX2)
3485 av1_calc_proj_params = av1_calc_proj_params_avx2;
3486 av1_compute_cross_correlation = av1_compute_cross_correlation_c;
3487 if (flags & HAS_SSE4_1)
3488 av1_compute_cross_correlation = av1_compute_cross_correlation_sse4_1;
3489 if (flags & HAS_AVX2)
3490 av1_compute_cross_correlation = av1_compute_cross_correlation_avx2;
3491 av1_compute_stats = av1_compute_stats_c;
3492 if (flags & HAS_SSE4_1)
3493 av1_compute_stats = av1_compute_stats_sse4_1;
3494 if (flags & HAS_AVX2)
3495 av1_compute_stats = av1_compute_stats_avx2;
3496 av1_compute_stats_highbd = av1_compute_stats_highbd_c;
3497 if (flags & HAS_SSE4_1)
3498 av1_compute_stats_highbd = av1_compute_stats_highbd_sse4_1;
3499 if (flags & HAS_AVX2)
3500 av1_compute_stats_highbd = av1_compute_stats_highbd_avx2;
3501 av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_sse2;
3502 if (flags & HAS_AVX2)
3503 av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_avx2;
3504 av1_convolve_2d_scale = av1_convolve_2d_scale_c;
3505 if (flags & HAS_SSE4_1)
3506 av1_convolve_2d_scale = av1_convolve_2d_scale_sse4_1;
3507 av1_convolve_2d_sr = av1_convolve_2d_sr_sse2;
3508 if (flags & HAS_AVX2)
3509 av1_convolve_2d_sr = av1_convolve_2d_sr_avx2;
3510 av1_convolve_horiz_rs = av1_convolve_horiz_rs_c;
3511 if (flags & HAS_SSE4_1)
3512 av1_convolve_horiz_rs = av1_convolve_horiz_rs_sse4_1;
3513 av1_convolve_x_sr = av1_convolve_x_sr_sse2;
3514 if (flags & HAS_AVX2)
3515 av1_convolve_x_sr = av1_convolve_x_sr_avx2;
3516 av1_convolve_y_sr = av1_convolve_y_sr_sse2;
3517 if (flags & HAS_AVX2)
3518 av1_convolve_y_sr = av1_convolve_y_sr_avx2;
3519 av1_dist_wtd_convolve_2d = av1_dist_wtd_convolve_2d_sse2;
3520 if (flags & HAS_SSSE3)
3521 av1_dist_wtd_convolve_2d = av1_dist_wtd_convolve_2d_ssse3;
3522 if (flags & HAS_AVX2)
3523 av1_dist_wtd_convolve_2d = av1_dist_wtd_convolve_2d_avx2;
3524 av1_dist_wtd_convolve_2d_copy = av1_dist_wtd_convolve_2d_copy_sse2;
3525 if (flags & HAS_AVX2)
3526 av1_dist_wtd_convolve_2d_copy = av1_dist_wtd_convolve_2d_copy_avx2;
3527 av1_dist_wtd_convolve_x = av1_dist_wtd_convolve_x_sse2;
3528 if (flags & HAS_AVX2)
3529 av1_dist_wtd_convolve_x = av1_dist_wtd_convolve_x_avx2;
3530 av1_dist_wtd_convolve_y = av1_dist_wtd_convolve_y_sse2;
3531 if (flags & HAS_AVX2)
3532 av1_dist_wtd_convolve_y = av1_dist_wtd_convolve_y_avx2;
3533 av1_dr_prediction_z1 = av1_dr_prediction_z1_c;
3534 if (flags & HAS_AVX2)
3535 av1_dr_prediction_z1 = av1_dr_prediction_z1_avx2;
3536 av1_dr_prediction_z2 = av1_dr_prediction_z2_c;
3537 if (flags & HAS_AVX2)
3538 av1_dr_prediction_z2 = av1_dr_prediction_z2_avx2;
3539 av1_dr_prediction_z3 = av1_dr_prediction_z3_c;
3540 if (flags & HAS_AVX2)
3541 av1_dr_prediction_z3 = av1_dr_prediction_z3_avx2;
3542 av1_filter_intra_edge = av1_filter_intra_edge_c;
3543 if (flags & HAS_SSE4_1)
3544 av1_filter_intra_edge = av1_filter_intra_edge_sse4_1;
3545 av1_filter_intra_edge_high = av1_filter_intra_edge_high_c;
3546 if (flags & HAS_SSE4_1)
3547 av1_filter_intra_edge_high = av1_filter_intra_edge_high_sse4_1;
3548 av1_filter_intra_predictor = av1_filter_intra_predictor_c;
3549 if (flags & HAS_SSE4_1)
3550 av1_filter_intra_predictor = av1_filter_intra_predictor_sse4_1;
3551 av1_fwd_txfm2d_16x16 = av1_fwd_txfm2d_16x16_c;
3552 if (flags & HAS_SSE4_1)
3553 av1_fwd_txfm2d_16x16 = av1_fwd_txfm2d_16x16_sse4_1;
3554 if (flags & HAS_AVX2)
3555 av1_fwd_txfm2d_16x16 = av1_fwd_txfm2d_16x16_avx2;
3556 av1_fwd_txfm2d_16x32 = av1_fwd_txfm2d_16x32_c;
3557 if (flags & HAS_SSE4_1)
3558 av1_fwd_txfm2d_16x32 = av1_fwd_txfm2d_16x32_sse4_1;
3559 av1_fwd_txfm2d_16x4 = av1_fwd_txfm2d_16x4_c;
3560 if (flags & HAS_SSE4_1)
3561 av1_fwd_txfm2d_16x4 = av1_fwd_txfm2d_16x4_sse4_1;
3562 av1_fwd_txfm2d_16x64 = av1_fwd_txfm2d_16x64_c;
3563 if (flags & HAS_SSE4_1)
3564 av1_fwd_txfm2d_16x64 = av1_fwd_txfm2d_16x64_sse4_1;
3565 av1_fwd_txfm2d_16x8 = av1_fwd_txfm2d_16x8_c;
3566 if (flags & HAS_SSE4_1)
3567 av1_fwd_txfm2d_16x8 = av1_fwd_txfm2d_16x8_sse4_1;
3568 if (flags & HAS_AVX2)
3569 av1_fwd_txfm2d_16x8 = av1_fwd_txfm2d_16x8_avx2;
3570 av1_fwd_txfm2d_32x16 = av1_fwd_txfm2d_32x16_c;
3571 if (flags & HAS_SSE4_1)
3572 av1_fwd_txfm2d_32x16 = av1_fwd_txfm2d_32x16_sse4_1;
3573 av1_fwd_txfm2d_32x32 = av1_fwd_txfm2d_32x32_c;
3574 if (flags & HAS_SSE4_1)
3575 av1_fwd_txfm2d_32x32 = av1_fwd_txfm2d_32x32_sse4_1;
3576 if (flags & HAS_AVX2)
3577 av1_fwd_txfm2d_32x32 = av1_fwd_txfm2d_32x32_avx2;
3578 av1_fwd_txfm2d_32x64 = av1_fwd_txfm2d_32x64_c;
3579 if (flags & HAS_SSE4_1)
3580 av1_fwd_txfm2d_32x64 = av1_fwd_txfm2d_32x64_sse4_1;
3581 av1_fwd_txfm2d_32x8 = av1_fwd_txfm2d_32x8_c;
3582 if (flags & HAS_SSE4_1)
3583 av1_fwd_txfm2d_32x8 = av1_fwd_txfm2d_32x8_sse4_1;
3584 av1_fwd_txfm2d_4x16 = av1_fwd_txfm2d_4x16_c;
3585 if (flags & HAS_SSE4_1)
3586 av1_fwd_txfm2d_4x16 = av1_fwd_txfm2d_4x16_sse4_1;
3587 av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_c;
3588 if (flags & HAS_SSE4_1)
3589 av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_sse4_1;
3590 av1_fwd_txfm2d_4x8 = av1_fwd_txfm2d_4x8_c;
3591 if (flags & HAS_SSE4_1)
3592 av1_fwd_txfm2d_4x8 = av1_fwd_txfm2d_4x8_sse4_1;
3593 av1_fwd_txfm2d_64x16 = av1_fwd_txfm2d_64x16_c;
3594 if (flags & HAS_SSE4_1)
3595 av1_fwd_txfm2d_64x16 = av1_fwd_txfm2d_64x16_sse4_1;
3596 av1_fwd_txfm2d_64x32 = av1_fwd_txfm2d_64x32_c;
3597 if (flags & HAS_SSE4_1)
3598 av1_fwd_txfm2d_64x32 = av1_fwd_txfm2d_64x32_sse4_1;
3599 av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_c;
3600 if (flags & HAS_SSE4_1)
3601 av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_sse4_1;
3602 if (flags & HAS_AVX2)
3603 av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_avx2;
3604 av1_fwd_txfm2d_8x16 = av1_fwd_txfm2d_8x16_c;
3605 if (flags & HAS_SSE4_1)
3606 av1_fwd_txfm2d_8x16 = av1_fwd_txfm2d_8x16_sse4_1;
3607 if (flags & HAS_AVX2)
3608 av1_fwd_txfm2d_8x16 = av1_fwd_txfm2d_8x16_avx2;
3609 av1_fwd_txfm2d_8x32 = av1_fwd_txfm2d_8x32_c;
3610 if (flags & HAS_SSE4_1)
3611 av1_fwd_txfm2d_8x32 = av1_fwd_txfm2d_8x32_sse4_1;
3612 av1_fwd_txfm2d_8x4 = av1_fwd_txfm2d_8x4_c;
3613 if (flags & HAS_SSE4_1)
3614 av1_fwd_txfm2d_8x4 = av1_fwd_txfm2d_8x4_sse4_1;
3615 av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_c;
3616 if (flags & HAS_SSE4_1)
3617 av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_sse4_1;
3618 if (flags & HAS_AVX2)
3619 av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_avx2;
3620 av1_get_crc32c_value = av1_get_crc32c_value_c;
3621 if (flags & HAS_SSE4_2)
3622 av1_get_crc32c_value = av1_get_crc32c_value_sse4_2;
3623 av1_get_horver_correlation_full = av1_get_horver_correlation_full_c;
3624 if (flags & HAS_SSE4_1)
3625 av1_get_horver_correlation_full = av1_get_horver_correlation_full_sse4_1;
3626 if (flags & HAS_AVX2)
3627 av1_get_horver_correlation_full = av1_get_horver_correlation_full_avx2;
3628 av1_highbd_block_error = av1_highbd_block_error_sse2;
3629 if (flags & HAS_AVX2)
3630 av1_highbd_block_error = av1_highbd_block_error_avx2;
3631 av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_sse2;
3632 if (flags & HAS_AVX2)
3633 av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_avx2;
3634 av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_c;
3635 if (flags & HAS_SSE4_1)
3636 av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_sse4_1;
3637 av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_c;
3638 if (flags & HAS_SSSE3)
3639 av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_ssse3;
3640 if (flags & HAS_AVX2)
3641 av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_avx2;
3642 av1_highbd_convolve_horiz_rs = av1_highbd_convolve_horiz_rs_c;
3643 if (flags & HAS_SSE4_1)
3644 av1_highbd_convolve_horiz_rs = av1_highbd_convolve_horiz_rs_sse4_1;
3645 av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_c;
3646 if (flags & HAS_SSSE3)
3647 av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_ssse3;
3648 if (flags & HAS_AVX2)
3649 av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_avx2;
3650 av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_c;
3651 if (flags & HAS_SSSE3)
3652 av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_ssse3;
3653 if (flags & HAS_AVX2)
3654 av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_avx2;
3655 av1_highbd_dist_wtd_convolve_2d = av1_highbd_dist_wtd_convolve_2d_c;
3656 if (flags & HAS_SSE4_1)
3657 av1_highbd_dist_wtd_convolve_2d = av1_highbd_dist_wtd_convolve_2d_sse4_1;
3658 if (flags & HAS_AVX2)
3659 av1_highbd_dist_wtd_convolve_2d = av1_highbd_dist_wtd_convolve_2d_avx2;
3660 av1_highbd_dist_wtd_convolve_2d_copy = av1_highbd_dist_wtd_convolve_2d_copy_c;
3661 if (flags & HAS_SSE4_1)
3662 av1_highbd_dist_wtd_convolve_2d_copy =
3663 av1_highbd_dist_wtd_convolve_2d_copy_sse4_1;
3664 if (flags & HAS_AVX2)
3665 av1_highbd_dist_wtd_convolve_2d_copy =
3666 av1_highbd_dist_wtd_convolve_2d_copy_avx2;
3667 av1_highbd_dist_wtd_convolve_x = av1_highbd_dist_wtd_convolve_x_c;
3668 if (flags & HAS_SSE4_1)
3669 av1_highbd_dist_wtd_convolve_x = av1_highbd_dist_wtd_convolve_x_sse4_1;
3670 if (flags & HAS_AVX2)
3671 av1_highbd_dist_wtd_convolve_x = av1_highbd_dist_wtd_convolve_x_avx2;
3672 av1_highbd_dist_wtd_convolve_y = av1_highbd_dist_wtd_convolve_y_c;
3673 if (flags & HAS_SSE4_1)
3674 av1_highbd_dist_wtd_convolve_y = av1_highbd_dist_wtd_convolve_y_sse4_1;
3675 if (flags & HAS_AVX2)
3676 av1_highbd_dist_wtd_convolve_y = av1_highbd_dist_wtd_convolve_y_avx2;
3677 av1_highbd_dr_prediction_z1 = av1_highbd_dr_prediction_z1_c;
3678 if (flags & HAS_AVX2)
3679 av1_highbd_dr_prediction_z1 = av1_highbd_dr_prediction_z1_avx2;
3680 av1_highbd_dr_prediction_z2 = av1_highbd_dr_prediction_z2_c;
3681 if (flags & HAS_AVX2)
3682 av1_highbd_dr_prediction_z2 = av1_highbd_dr_prediction_z2_avx2;
3683 av1_highbd_dr_prediction_z3 = av1_highbd_dr_prediction_z3_c;
3684 if (flags & HAS_AVX2)
3685 av1_highbd_dr_prediction_z3 = av1_highbd_dr_prediction_z3_avx2;
3686 av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_c;
3687 if (flags & HAS_SSE4_1)
3688 av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_sse4_1;
3689 if (flags & HAS_AVX2)
3690 av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_avx2;
3691 av1_highbd_inv_txfm_add_16x4 = av1_highbd_inv_txfm_add_16x4_c;
3692 if (flags & HAS_SSE4_1)
3693 av1_highbd_inv_txfm_add_16x4 = av1_highbd_inv_txfm_add_16x4_sse4_1;
3694 av1_highbd_inv_txfm_add_4x16 = av1_highbd_inv_txfm_add_4x16_c;
3695 if (flags & HAS_SSE4_1)
3696 av1_highbd_inv_txfm_add_4x16 = av1_highbd_inv_txfm_add_4x16_sse4_1;
3697 av1_highbd_inv_txfm_add_4x4 = av1_highbd_inv_txfm_add_4x4_c;
3698 if (flags & HAS_SSE4_1)
3699 av1_highbd_inv_txfm_add_4x4 = av1_highbd_inv_txfm_add_4x4_sse4_1;
3700 av1_highbd_inv_txfm_add_4x8 = av1_highbd_inv_txfm_add_4x8_c;
3701 if (flags & HAS_SSE4_1)
3702 av1_highbd_inv_txfm_add_4x8 = av1_highbd_inv_txfm_add_4x8_sse4_1;
3703 av1_highbd_inv_txfm_add_8x4 = av1_highbd_inv_txfm_add_8x4_c;
3704 if (flags & HAS_SSE4_1)
3705 av1_highbd_inv_txfm_add_8x4 = av1_highbd_inv_txfm_add_8x4_sse4_1;
3706 av1_highbd_inv_txfm_add_8x8 = av1_highbd_inv_txfm_add_8x8_c;
3707 if (flags & HAS_SSE4_1)
3708 av1_highbd_inv_txfm_add_8x8 = av1_highbd_inv_txfm_add_8x8_sse4_1;
3709 av1_highbd_pixel_proj_error = av1_highbd_pixel_proj_error_c;
3710 if (flags & HAS_SSE4_1)
3711 av1_highbd_pixel_proj_error = av1_highbd_pixel_proj_error_sse4_1;
3712 if (flags & HAS_AVX2)
3713 av1_highbd_pixel_proj_error = av1_highbd_pixel_proj_error_avx2;
3714 av1_highbd_quantize_fp = av1_highbd_quantize_fp_c;
3715 if (flags & HAS_SSE4_1)
3716 av1_highbd_quantize_fp = av1_highbd_quantize_fp_sse4_1;
3717 if (flags & HAS_AVX2)
3718 av1_highbd_quantize_fp = av1_highbd_quantize_fp_avx2;
3719 av1_highbd_warp_affine = av1_highbd_warp_affine_c;
3720 if (flags & HAS_SSE4_1)
3721 av1_highbd_warp_affine = av1_highbd_warp_affine_sse4_1;
3722 av1_highbd_wiener_convolve_add_src = av1_highbd_wiener_convolve_add_src_c;
3723 if (flags & HAS_SSSE3)
3724 av1_highbd_wiener_convolve_add_src =
3725 av1_highbd_wiener_convolve_add_src_ssse3;
3726 if (flags & HAS_AVX2)
3727 av1_highbd_wiener_convolve_add_src =
3728 av1_highbd_wiener_convolve_add_src_avx2;
3729 av1_inv_txfm2d_add_4x4 = av1_inv_txfm2d_add_4x4_c;
3730 if (flags & HAS_SSE4_1)
3731 av1_inv_txfm2d_add_4x4 = av1_inv_txfm2d_add_4x4_sse4_1;
3732 av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_c;
3733 if (flags & HAS_SSE4_1)
3734 av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_sse4_1;
3735 av1_inv_txfm_add = av1_inv_txfm_add_c;
3736 if (flags & HAS_SSSE3)
3737 av1_inv_txfm_add = av1_inv_txfm_add_ssse3;
3738 if (flags & HAS_AVX2)
3739 av1_inv_txfm_add = av1_inv_txfm_add_avx2;
3740 av1_lowbd_fwd_txfm = av1_lowbd_fwd_txfm_sse2;
3741 if (flags & HAS_SSE4_1)
3742 av1_lowbd_fwd_txfm = av1_lowbd_fwd_txfm_sse4_1;
3743 if (flags & HAS_AVX2)
3744 av1_lowbd_fwd_txfm = av1_lowbd_fwd_txfm_avx2;
3745 av1_lowbd_pixel_proj_error = av1_lowbd_pixel_proj_error_c;
3746 if (flags & HAS_SSE4_1)
3747 av1_lowbd_pixel_proj_error = av1_lowbd_pixel_proj_error_sse4_1;
3748 if (flags & HAS_AVX2)
3749 av1_lowbd_pixel_proj_error = av1_lowbd_pixel_proj_error_avx2;
3750 av1_nn_predict = av1_nn_predict_c;
3751 if (flags & HAS_SSE3)
3752 av1_nn_predict = av1_nn_predict_sse3;
3753 av1_quantize_fp = av1_quantize_fp_sse2;
3754 if (flags & HAS_AVX2)
3755 av1_quantize_fp = av1_quantize_fp_avx2;
3756 av1_quantize_fp_32x32 = av1_quantize_fp_32x32_c;
3757 if (flags & HAS_AVX2)
3758 av1_quantize_fp_32x32 = av1_quantize_fp_32x32_avx2;
3759 av1_quantize_fp_64x64 = av1_quantize_fp_64x64_c;
3760 if (flags & HAS_AVX2)
3761 av1_quantize_fp_64x64 = av1_quantize_fp_64x64_avx2;
3762 av1_quantize_lp = av1_quantize_lp_c;
3763 if (flags & HAS_AVX2)
3764 av1_quantize_lp = av1_quantize_lp_avx2;
3765 av1_round_shift_array = av1_round_shift_array_c;
3766 if (flags & HAS_SSE4_1)
3767 av1_round_shift_array = av1_round_shift_array_sse4_1;
3768 av1_selfguided_restoration = av1_selfguided_restoration_c;
3769 if (flags & HAS_SSE4_1)
3770 av1_selfguided_restoration = av1_selfguided_restoration_sse4_1;
3771 if (flags & HAS_AVX2)
3772 av1_selfguided_restoration = av1_selfguided_restoration_avx2;
3773 av1_txb_init_levels = av1_txb_init_levels_c;
3774 if (flags & HAS_SSE4_1)
3775 av1_txb_init_levels = av1_txb_init_levels_sse4_1;
3776 if (flags & HAS_AVX2)
3777 av1_txb_init_levels = av1_txb_init_levels_avx2;
3778 av1_upsample_intra_edge = av1_upsample_intra_edge_c;
3779 if (flags & HAS_SSE4_1)
3780 av1_upsample_intra_edge = av1_upsample_intra_edge_sse4_1;
3781 av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c;
3782 if (flags & HAS_SSE4_1)
3783 av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_sse4_1;
3784 av1_warp_affine = av1_warp_affine_c;
3785 if (flags & HAS_SSE4_1)
3786 av1_warp_affine = av1_warp_affine_sse4_1;
3787 if (flags & HAS_AVX2)
3788 av1_warp_affine = av1_warp_affine_avx2;
3789 av1_wedge_compute_delta_squares = av1_wedge_compute_delta_squares_sse2;
3790 if (flags & HAS_AVX2)
3791 av1_wedge_compute_delta_squares = av1_wedge_compute_delta_squares_avx2;
3792 av1_wedge_sign_from_residuals = av1_wedge_sign_from_residuals_sse2;
3793 if (flags & HAS_AVX2)
3794 av1_wedge_sign_from_residuals = av1_wedge_sign_from_residuals_avx2;
3795 av1_wedge_sse_from_residuals = av1_wedge_sse_from_residuals_sse2;
3796 if (flags & HAS_AVX2)
3797 av1_wedge_sse_from_residuals = av1_wedge_sse_from_residuals_avx2;
3798 av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_sse2;
3799 if (flags & HAS_AVX2)
3800 av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_avx2;
3801 cdef_copy_rect8_16bit_to_16bit = cdef_copy_rect8_16bit_to_16bit_sse2;
3802 if (flags & HAS_SSSE3)
3803 cdef_copy_rect8_16bit_to_16bit = cdef_copy_rect8_16bit_to_16bit_ssse3;
3804 if (flags & HAS_SSE4_1)
3805 cdef_copy_rect8_16bit_to_16bit = cdef_copy_rect8_16bit_to_16bit_sse4_1;
3806 if (flags & HAS_AVX2)
3807 cdef_copy_rect8_16bit_to_16bit = cdef_copy_rect8_16bit_to_16bit_avx2;
3808 cdef_copy_rect8_8bit_to_16bit = cdef_copy_rect8_8bit_to_16bit_sse2;
3809 if (flags & HAS_SSSE3)
3810 cdef_copy_rect8_8bit_to_16bit = cdef_copy_rect8_8bit_to_16bit_ssse3;
3811 if (flags & HAS_SSE4_1)
3812 cdef_copy_rect8_8bit_to_16bit = cdef_copy_rect8_8bit_to_16bit_sse4_1;
3813 if (flags & HAS_AVX2)
3814 cdef_copy_rect8_8bit_to_16bit = cdef_copy_rect8_8bit_to_16bit_avx2;
3815 cdef_filter_block = cdef_filter_block_sse2;
3816 if (flags & HAS_SSSE3)
3817 cdef_filter_block = cdef_filter_block_ssse3;
3818 if (flags & HAS_SSE4_1)
3819 cdef_filter_block = cdef_filter_block_sse4_1;
3820 if (flags & HAS_AVX2)
3821 cdef_filter_block = cdef_filter_block_avx2;
3822 cdef_find_dir = cdef_find_dir_sse2;
3823 if (flags & HAS_SSSE3)
3824 cdef_find_dir = cdef_find_dir_ssse3;
3825 if (flags & HAS_SSE4_1)
3826 cdef_find_dir = cdef_find_dir_sse4_1;
3827 if (flags & HAS_AVX2)
3828 cdef_find_dir = cdef_find_dir_avx2;
3829 cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_c;
3830 if (flags & HAS_SSSE3)
3831 cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_ssse3;
3832 if (flags & HAS_AVX2)
3833 cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_avx2;
3834 cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_c;
3835 if (flags & HAS_SSSE3)
3836 cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_ssse3;
3837 if (flags & HAS_AVX2)
3838 cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_avx2;
3839 cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_c;
3840 if (flags & HAS_SSSE3)
3841 cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_ssse3;
3842 if (flags & HAS_AVX2)
3843 cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_avx2;
3844 cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_c;
3845 if (flags & HAS_SSSE3)
3846 cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_ssse3;
3847 if (flags & HAS_AVX2)
3848 cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_avx2;
3849 cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_c;
3850 if (flags & HAS_SSSE3)
3851 cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_ssse3;
3852 if (flags & HAS_AVX2)
3853 cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_avx2;
3854 cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_c;
3855 if (flags & HAS_SSSE3)
3856 cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_ssse3;
3857 if (flags & HAS_AVX2)
3858 cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_avx2;
3859 cfl_get_predict_hbd_fn = cfl_get_predict_hbd_fn_c;
3860 if (flags & HAS_SSSE3)
3861 cfl_get_predict_hbd_fn = cfl_get_predict_hbd_fn_ssse3;
3862 if (flags & HAS_AVX2)
3863 cfl_get_predict_hbd_fn = cfl_get_predict_hbd_fn_avx2;
3864 cfl_get_predict_lbd_fn = cfl_get_predict_lbd_fn_c;
3865 if (flags & HAS_SSSE3)
3866 cfl_get_predict_lbd_fn = cfl_get_predict_lbd_fn_ssse3;
3867 if (flags & HAS_AVX2)
3868 cfl_get_predict_lbd_fn = cfl_get_predict_lbd_fn_avx2;
3869 cfl_get_subtract_average_fn = cfl_get_subtract_average_fn_sse2;
3870 if (flags & HAS_AVX2)
3871 cfl_get_subtract_average_fn = cfl_get_subtract_average_fn_avx2;
3872 }
3873 #endif
3874
3875 #ifdef __cplusplus
3876 } // extern "C"
3877 #endif
3878
3879 #endif
3880