1 // This file is generated. Do not edit.
2 #ifndef AV1_RTCD_H_
3 #define AV1_RTCD_H_
4 
5 #ifdef RTCD_C
6 #define RTCD_EXTERN
7 #else
8 #define RTCD_EXTERN extern
9 #endif
10 
11 /*
12  * AV1
13  */
14 
15 #include "aom/aom_integer.h"
16 #include "aom_dsp/txfm_common.h"
17 #include "av1/common/av1_txfm.h"
18 #include "av1/common/common.h"
19 #include "av1/common/convolve.h"
20 #include "av1/common/enums.h"
21 #include "av1/common/filter.h"
22 #include "av1/common/odintrin.h"
23 #include "av1/common/quant_common.h"
24 #include "av1/common/restoration.h"
25 
26 struct macroblockd;
27 
28 /* Encoder forward decls */
29 struct macroblock;
30 struct txfm_param;
31 struct aom_variance_vtable;
32 struct search_site_config;
33 struct yv12_buffer_config;
34 struct NN_CONFIG;
35 typedef struct NN_CONFIG NN_CONFIG;
36 
37 enum { NONE, RELU, SOFTSIGN, SIGMOID } UENUM1BYTE(ACTIVATION);
38 #if CONFIG_NN_V2
39 enum { SOFTMAX_CROSS_ENTROPY } UENUM1BYTE(LOSS);
40 struct NN_CONFIG_V2;
41 typedef struct NN_CONFIG_V2 NN_CONFIG_V2;
42 struct FC_LAYER;
43 typedef struct FC_LAYER FC_LAYER;
44 #endif  // CONFIG_NN_V2
45 
46 struct CNN_CONFIG;
47 typedef struct CNN_CONFIG CNN_CONFIG;
48 struct CNN_LAYER_CONFIG;
49 typedef struct CNN_LAYER_CONFIG CNN_LAYER_CONFIG;
50 struct CNN_THREAD_DATA;
51 typedef struct CNN_THREAD_DATA CNN_THREAD_DATA;
52 struct CNN_BRANCH_CONFIG;
53 typedef struct CNN_BRANCH_CONFIG CNN_BRANCH_CONFIG;
54 struct CNN_MULTI_OUT;
55 typedef struct CNN_MULTI_OUT CNN_MULTI_OUT;
56 
57 /* Function pointers return by CfL functions */
58 typedef void (*cfl_subsample_lbd_fn)(const uint8_t* input,
59                                      int input_stride,
60                                      uint16_t* output_q3);
61 
62 #if CONFIG_AV1_HIGHBITDEPTH
63 typedef void (*cfl_subsample_hbd_fn)(const uint16_t* input,
64                                      int input_stride,
65                                      uint16_t* output_q3);
66 
67 typedef void (*cfl_predict_hbd_fn)(const int16_t* src,
68                                    uint16_t* dst,
69                                    int dst_stride,
70                                    int alpha_q3,
71                                    int bd);
72 #endif
73 
74 typedef void (*cfl_subtract_average_fn)(const uint16_t* src, int16_t* dst);
75 
76 typedef void (*cfl_predict_lbd_fn)(const int16_t* src,
77                                    uint8_t* dst,
78                                    int dst_stride,
79                                    int alpha_q3);
80 
81 #ifdef __cplusplus
82 extern "C" {
83 #endif
84 
85 void av1_apply_selfguided_restoration_c(const uint8_t* dat,
86                                         int width,
87                                         int height,
88                                         int stride,
89                                         int eps,
90                                         const int* xqd,
91                                         uint8_t* dst,
92                                         int dst_stride,
93                                         int32_t* tmpbuf,
94                                         int bit_depth,
95                                         int highbd);
96 void av1_apply_selfguided_restoration_sse4_1(const uint8_t* dat,
97                                              int width,
98                                              int height,
99                                              int stride,
100                                              int eps,
101                                              const int* xqd,
102                                              uint8_t* dst,
103                                              int dst_stride,
104                                              int32_t* tmpbuf,
105                                              int bit_depth,
106                                              int highbd);
107 void av1_apply_selfguided_restoration_avx2(const uint8_t* dat,
108                                            int width,
109                                            int height,
110                                            int stride,
111                                            int eps,
112                                            const int* xqd,
113                                            uint8_t* dst,
114                                            int dst_stride,
115                                            int32_t* tmpbuf,
116                                            int bit_depth,
117                                            int highbd);
118 RTCD_EXTERN void (*av1_apply_selfguided_restoration)(const uint8_t* dat,
119                                                      int width,
120                                                      int height,
121                                                      int stride,
122                                                      int eps,
123                                                      const int* xqd,
124                                                      uint8_t* dst,
125                                                      int dst_stride,
126                                                      int32_t* tmpbuf,
127                                                      int bit_depth,
128                                                      int highbd);
129 
130 void av1_apply_temporal_filter_planewise_c(
131     const struct yv12_buffer_config* ref_frame,
132     const struct macroblockd* mbd,
133     const BLOCK_SIZE block_size,
134     const int mb_row,
135     const int mb_col,
136     const int num_planes,
137     const double* noise_levels,
138     const uint8_t* pred,
139     uint32_t* accum,
140     uint16_t* count);
141 void av1_apply_temporal_filter_planewise_sse2(
142     const struct yv12_buffer_config* ref_frame,
143     const struct macroblockd* mbd,
144     const BLOCK_SIZE block_size,
145     const int mb_row,
146     const int mb_col,
147     const int num_planes,
148     const double* noise_levels,
149     const uint8_t* pred,
150     uint32_t* accum,
151     uint16_t* count);
152 void av1_apply_temporal_filter_planewise_avx2(
153     const struct yv12_buffer_config* ref_frame,
154     const struct macroblockd* mbd,
155     const BLOCK_SIZE block_size,
156     const int mb_row,
157     const int mb_col,
158     const int num_planes,
159     const double* noise_levels,
160     const uint8_t* pred,
161     uint32_t* accum,
162     uint16_t* count);
163 RTCD_EXTERN void (*av1_apply_temporal_filter_planewise)(
164     const struct yv12_buffer_config* ref_frame,
165     const struct macroblockd* mbd,
166     const BLOCK_SIZE block_size,
167     const int mb_row,
168     const int mb_col,
169     const int num_planes,
170     const double* noise_levels,
171     const uint8_t* pred,
172     uint32_t* accum,
173     uint16_t* count);
174 
175 void av1_apply_temporal_filter_yuv_c(const struct yv12_buffer_config* ref_frame,
176                                      const struct macroblockd* mbd,
177                                      const BLOCK_SIZE block_size,
178                                      const int mb_row,
179                                      const int mb_col,
180                                      const int num_planes,
181                                      const int strength,
182                                      const int use_subblock,
183                                      const int* subblock_filter_weights,
184                                      const uint8_t* pred,
185                                      uint32_t* accum,
186                                      uint16_t* count);
187 void av1_apply_temporal_filter_yuv_sse4_1(
188     const struct yv12_buffer_config* ref_frame,
189     const struct macroblockd* mbd,
190     const BLOCK_SIZE block_size,
191     const int mb_row,
192     const int mb_col,
193     const int num_planes,
194     const int strength,
195     const int use_subblock,
196     const int* subblock_filter_weights,
197     const uint8_t* pred,
198     uint32_t* accum,
199     uint16_t* count);
200 RTCD_EXTERN void (*av1_apply_temporal_filter_yuv)(
201     const struct yv12_buffer_config* ref_frame,
202     const struct macroblockd* mbd,
203     const BLOCK_SIZE block_size,
204     const int mb_row,
205     const int mb_col,
206     const int num_planes,
207     const int strength,
208     const int use_subblock,
209     const int* subblock_filter_weights,
210     const uint8_t* pred,
211     uint32_t* accum,
212     uint16_t* count);
213 
214 int64_t av1_block_error_c(const tran_low_t* coeff,
215                           const tran_low_t* dqcoeff,
216                           intptr_t block_size,
217                           int64_t* ssz);
218 int64_t av1_block_error_sse2(const tran_low_t* coeff,
219                              const tran_low_t* dqcoeff,
220                              intptr_t block_size,
221                              int64_t* ssz);
222 int64_t av1_block_error_avx2(const tran_low_t* coeff,
223                              const tran_low_t* dqcoeff,
224                              intptr_t block_size,
225                              int64_t* ssz);
226 RTCD_EXTERN int64_t (*av1_block_error)(const tran_low_t* coeff,
227                                        const tran_low_t* dqcoeff,
228                                        intptr_t block_size,
229                                        int64_t* ssz);
230 
231 int64_t av1_block_error_lp_c(const int16_t* coeff,
232                              const int16_t* dqcoeff,
233                              intptr_t block_size);
234 int64_t av1_block_error_lp_avx2(const int16_t* coeff,
235                                 const int16_t* dqcoeff,
236                                 intptr_t block_size);
237 RTCD_EXTERN int64_t (*av1_block_error_lp)(const int16_t* coeff,
238                                           const int16_t* dqcoeff,
239                                           intptr_t block_size);
240 
241 void av1_build_compound_diffwtd_mask_c(uint8_t* mask,
242                                        DIFFWTD_MASK_TYPE mask_type,
243                                        const uint8_t* src0,
244                                        int src0_stride,
245                                        const uint8_t* src1,
246                                        int src1_stride,
247                                        int h,
248                                        int w);
249 void av1_build_compound_diffwtd_mask_sse4_1(uint8_t* mask,
250                                             DIFFWTD_MASK_TYPE mask_type,
251                                             const uint8_t* src0,
252                                             int src0_stride,
253                                             const uint8_t* src1,
254                                             int src1_stride,
255                                             int h,
256                                             int w);
257 void av1_build_compound_diffwtd_mask_avx2(uint8_t* mask,
258                                           DIFFWTD_MASK_TYPE mask_type,
259                                           const uint8_t* src0,
260                                           int src0_stride,
261                                           const uint8_t* src1,
262                                           int src1_stride,
263                                           int h,
264                                           int w);
265 RTCD_EXTERN void (*av1_build_compound_diffwtd_mask)(uint8_t* mask,
266                                                     DIFFWTD_MASK_TYPE mask_type,
267                                                     const uint8_t* src0,
268                                                     int src0_stride,
269                                                     const uint8_t* src1,
270                                                     int src1_stride,
271                                                     int h,
272                                                     int w);
273 
274 void av1_build_compound_diffwtd_mask_d16_c(uint8_t* mask,
275                                            DIFFWTD_MASK_TYPE mask_type,
276                                            const CONV_BUF_TYPE* src0,
277                                            int src0_stride,
278                                            const CONV_BUF_TYPE* src1,
279                                            int src1_stride,
280                                            int h,
281                                            int w,
282                                            ConvolveParams* conv_params,
283                                            int bd);
284 void av1_build_compound_diffwtd_mask_d16_sse4_1(uint8_t* mask,
285                                                 DIFFWTD_MASK_TYPE mask_type,
286                                                 const CONV_BUF_TYPE* src0,
287                                                 int src0_stride,
288                                                 const CONV_BUF_TYPE* src1,
289                                                 int src1_stride,
290                                                 int h,
291                                                 int w,
292                                                 ConvolveParams* conv_params,
293                                                 int bd);
294 void av1_build_compound_diffwtd_mask_d16_avx2(uint8_t* mask,
295                                               DIFFWTD_MASK_TYPE mask_type,
296                                               const CONV_BUF_TYPE* src0,
297                                               int src0_stride,
298                                               const CONV_BUF_TYPE* src1,
299                                               int src1_stride,
300                                               int h,
301                                               int w,
302                                               ConvolveParams* conv_params,
303                                               int bd);
304 RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_d16)(
305     uint8_t* mask,
306     DIFFWTD_MASK_TYPE mask_type,
307     const CONV_BUF_TYPE* src0,
308     int src0_stride,
309     const CONV_BUF_TYPE* src1,
310     int src1_stride,
311     int h,
312     int w,
313     ConvolveParams* conv_params,
314     int bd);
315 
316 void av1_build_compound_diffwtd_mask_highbd_c(uint8_t* mask,
317                                               DIFFWTD_MASK_TYPE mask_type,
318                                               const uint8_t* src0,
319                                               int src0_stride,
320                                               const uint8_t* src1,
321                                               int src1_stride,
322                                               int h,
323                                               int w,
324                                               int bd);
325 void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t* mask,
326                                                   DIFFWTD_MASK_TYPE mask_type,
327                                                   const uint8_t* src0,
328                                                   int src0_stride,
329                                                   const uint8_t* src1,
330                                                   int src1_stride,
331                                                   int h,
332                                                   int w,
333                                                   int bd);
334 void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t* mask,
335                                                  DIFFWTD_MASK_TYPE mask_type,
336                                                  const uint8_t* src0,
337                                                  int src0_stride,
338                                                  const uint8_t* src1,
339                                                  int src1_stride,
340                                                  int h,
341                                                  int w,
342                                                  int bd);
343 RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(
344     uint8_t* mask,
345     DIFFWTD_MASK_TYPE mask_type,
346     const uint8_t* src0,
347     int src0_stride,
348     const uint8_t* src1,
349     int src1_stride,
350     int h,
351     int w,
352     int bd);
353 
354 int64_t av1_calc_frame_error_c(const uint8_t* const ref,
355                                int stride,
356                                const uint8_t* const dst,
357                                int p_width,
358                                int p_height,
359                                int p_stride);
360 int64_t av1_calc_frame_error_sse2(const uint8_t* const ref,
361                                   int stride,
362                                   const uint8_t* const dst,
363                                   int p_width,
364                                   int p_height,
365                                   int p_stride);
366 int64_t av1_calc_frame_error_avx2(const uint8_t* const ref,
367                                   int stride,
368                                   const uint8_t* const dst,
369                                   int p_width,
370                                   int p_height,
371                                   int p_stride);
372 RTCD_EXTERN int64_t (*av1_calc_frame_error)(const uint8_t* const ref,
373                                             int stride,
374                                             const uint8_t* const dst,
375                                             int p_width,
376                                             int p_height,
377                                             int p_stride);
378 
379 void av1_calc_proj_params_c(const uint8_t* src8,
380                             int width,
381                             int height,
382                             int src_stride,
383                             const uint8_t* dat8,
384                             int dat_stride,
385                             int32_t* flt0,
386                             int flt0_stride,
387                             int32_t* flt1,
388                             int flt1_stride,
389                             int64_t H[2][2],
390                             int64_t C[2],
391                             const sgr_params_type* params);
392 void av1_calc_proj_params_avx2(const uint8_t* src8,
393                                int width,
394                                int height,
395                                int src_stride,
396                                const uint8_t* dat8,
397                                int dat_stride,
398                                int32_t* flt0,
399                                int flt0_stride,
400                                int32_t* flt1,
401                                int flt1_stride,
402                                int64_t H[2][2],
403                                int64_t C[2],
404                                const sgr_params_type* params);
405 RTCD_EXTERN void (*av1_calc_proj_params)(const uint8_t* src8,
406                                          int width,
407                                          int height,
408                                          int src_stride,
409                                          const uint8_t* dat8,
410                                          int dat_stride,
411                                          int32_t* flt0,
412                                          int flt0_stride,
413                                          int32_t* flt1,
414                                          int flt1_stride,
415                                          int64_t H[2][2],
416                                          int64_t C[2],
417                                          const sgr_params_type* params);
418 
419 void av1_cnn_activate_c(float** input,
420                         int channels,
421                         int width,
422                         int height,
423                         int stride,
424                         ACTIVATION layer_activation);
425 #define av1_cnn_activate av1_cnn_activate_c
426 
427 void av1_cnn_add_c(float** input,
428                    int channels,
429                    int width,
430                    int height,
431                    int stride,
432                    const float** add);
433 #define av1_cnn_add av1_cnn_add_c
434 
435 void av1_cnn_batchnorm_c(float** image,
436                          int channels,
437                          int width,
438                          int height,
439                          int stride,
440                          const float* gamma,
441                          const float* beta,
442                          const float* mean,
443                          const float* std);
444 #define av1_cnn_batchnorm av1_cnn_batchnorm_c
445 
446 void av1_cnn_convolve_c(const float** input,
447                         int in_width,
448                         int in_height,
449                         int in_stride,
450                         const CNN_LAYER_CONFIG* layer_config,
451                         float** output,
452                         int out_stride,
453                         int start_idx,
454                         int step);
455 #define av1_cnn_convolve av1_cnn_convolve_c
456 
457 void av1_cnn_deconvolve_c(const float** input,
458                           int in_width,
459                           int in_height,
460                           int in_stride,
461                           const CNN_LAYER_CONFIG* layer_config,
462                           float** output,
463                           int out_stride);
464 #define av1_cnn_deconvolve av1_cnn_deconvolve_c
465 
466 void av1_cnn_predict_c(const float** input,
467                        int in_width,
468                        int in_height,
469                        int in_stride,
470                        const CNN_CONFIG* cnn_config,
471                        const CNN_THREAD_DATA* thread_data,
472                        CNN_MULTI_OUT* output_struct);
473 #define av1_cnn_predict av1_cnn_predict_c
474 
475 double av1_compute_cross_correlation_c(unsigned char* im1,
476                                        int stride1,
477                                        int x1,
478                                        int y1,
479                                        unsigned char* im2,
480                                        int stride2,
481                                        int x2,
482                                        int y2);
483 double av1_compute_cross_correlation_sse4_1(unsigned char* im1,
484                                             int stride1,
485                                             int x1,
486                                             int y1,
487                                             unsigned char* im2,
488                                             int stride2,
489                                             int x2,
490                                             int y2);
491 double av1_compute_cross_correlation_avx2(unsigned char* im1,
492                                           int stride1,
493                                           int x1,
494                                           int y1,
495                                           unsigned char* im2,
496                                           int stride2,
497                                           int x2,
498                                           int y2);
499 RTCD_EXTERN double (*av1_compute_cross_correlation)(unsigned char* im1,
500                                                     int stride1,
501                                                     int x1,
502                                                     int y1,
503                                                     unsigned char* im2,
504                                                     int stride2,
505                                                     int x2,
506                                                     int y2);
507 
508 void av1_compute_stats_c(int wiener_win,
509                          const uint8_t* dgd8,
510                          const uint8_t* src8,
511                          int h_start,
512                          int h_end,
513                          int v_start,
514                          int v_end,
515                          int dgd_stride,
516                          int src_stride,
517                          int64_t* M,
518                          int64_t* H);
519 void av1_compute_stats_sse4_1(int wiener_win,
520                               const uint8_t* dgd8,
521                               const uint8_t* src8,
522                               int h_start,
523                               int h_end,
524                               int v_start,
525                               int v_end,
526                               int dgd_stride,
527                               int src_stride,
528                               int64_t* M,
529                               int64_t* H);
530 void av1_compute_stats_avx2(int wiener_win,
531                             const uint8_t* dgd8,
532                             const uint8_t* src8,
533                             int h_start,
534                             int h_end,
535                             int v_start,
536                             int v_end,
537                             int dgd_stride,
538                             int src_stride,
539                             int64_t* M,
540                             int64_t* H);
541 RTCD_EXTERN void (*av1_compute_stats)(int wiener_win,
542                                       const uint8_t* dgd8,
543                                       const uint8_t* src8,
544                                       int h_start,
545                                       int h_end,
546                                       int v_start,
547                                       int v_end,
548                                       int dgd_stride,
549                                       int src_stride,
550                                       int64_t* M,
551                                       int64_t* H);
552 
553 void av1_compute_stats_highbd_c(int wiener_win,
554                                 const uint8_t* dgd8,
555                                 const uint8_t* src8,
556                                 int h_start,
557                                 int h_end,
558                                 int v_start,
559                                 int v_end,
560                                 int dgd_stride,
561                                 int src_stride,
562                                 int64_t* M,
563                                 int64_t* H,
564                                 aom_bit_depth_t bit_depth);
565 void av1_compute_stats_highbd_sse4_1(int wiener_win,
566                                      const uint8_t* dgd8,
567                                      const uint8_t* src8,
568                                      int h_start,
569                                      int h_end,
570                                      int v_start,
571                                      int v_end,
572                                      int dgd_stride,
573                                      int src_stride,
574                                      int64_t* M,
575                                      int64_t* H,
576                                      aom_bit_depth_t bit_depth);
577 void av1_compute_stats_highbd_avx2(int wiener_win,
578                                    const uint8_t* dgd8,
579                                    const uint8_t* src8,
580                                    int h_start,
581                                    int h_end,
582                                    int v_start,
583                                    int v_end,
584                                    int dgd_stride,
585                                    int src_stride,
586                                    int64_t* M,
587                                    int64_t* H,
588                                    aom_bit_depth_t bit_depth);
589 RTCD_EXTERN void (*av1_compute_stats_highbd)(int wiener_win,
590                                              const uint8_t* dgd8,
591                                              const uint8_t* src8,
592                                              int h_start,
593                                              int h_end,
594                                              int v_start,
595                                              int v_end,
596                                              int dgd_stride,
597                                              int src_stride,
598                                              int64_t* M,
599                                              int64_t* H,
600                                              aom_bit_depth_t bit_depth);
601 
602 void av1_convolve_2d_copy_sr_c(const uint8_t* src,
603                                int src_stride,
604                                uint8_t* dst,
605                                int dst_stride,
606                                int w,
607                                int h,
608                                const InterpFilterParams* filter_params_x,
609                                const InterpFilterParams* filter_params_y,
610                                const int subpel_x_qn,
611                                const int subpel_y_qn,
612                                ConvolveParams* conv_params);
613 void av1_convolve_2d_copy_sr_sse2(const uint8_t* src,
614                                   int src_stride,
615                                   uint8_t* dst,
616                                   int dst_stride,
617                                   int w,
618                                   int h,
619                                   const InterpFilterParams* filter_params_x,
620                                   const InterpFilterParams* filter_params_y,
621                                   const int subpel_x_qn,
622                                   const int subpel_y_qn,
623                                   ConvolveParams* conv_params);
624 void av1_convolve_2d_copy_sr_avx2(const uint8_t* src,
625                                   int src_stride,
626                                   uint8_t* dst,
627                                   int dst_stride,
628                                   int w,
629                                   int h,
630                                   const InterpFilterParams* filter_params_x,
631                                   const InterpFilterParams* filter_params_y,
632                                   const int subpel_x_qn,
633                                   const int subpel_y_qn,
634                                   ConvolveParams* conv_params);
635 RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(
636     const uint8_t* src,
637     int src_stride,
638     uint8_t* dst,
639     int dst_stride,
640     int w,
641     int h,
642     const InterpFilterParams* filter_params_x,
643     const InterpFilterParams* filter_params_y,
644     const int subpel_x_qn,
645     const int subpel_y_qn,
646     ConvolveParams* conv_params);
647 
648 void av1_convolve_2d_scale_c(const uint8_t* src,
649                              int src_stride,
650                              uint8_t* dst,
651                              int dst_stride,
652                              int w,
653                              int h,
654                              const InterpFilterParams* filter_params_x,
655                              const InterpFilterParams* filter_params_y,
656                              const int subpel_x_qn,
657                              const int x_step_qn,
658                              const int subpel_y_qn,
659                              const int y_step_qn,
660                              ConvolveParams* conv_params);
661 void av1_convolve_2d_scale_sse4_1(const uint8_t* src,
662                                   int src_stride,
663                                   uint8_t* dst,
664                                   int dst_stride,
665                                   int w,
666                                   int h,
667                                   const InterpFilterParams* filter_params_x,
668                                   const InterpFilterParams* filter_params_y,
669                                   const int subpel_x_qn,
670                                   const int x_step_qn,
671                                   const int subpel_y_qn,
672                                   const int y_step_qn,
673                                   ConvolveParams* conv_params);
674 RTCD_EXTERN void (*av1_convolve_2d_scale)(
675     const uint8_t* src,
676     int src_stride,
677     uint8_t* dst,
678     int dst_stride,
679     int w,
680     int h,
681     const InterpFilterParams* filter_params_x,
682     const InterpFilterParams* filter_params_y,
683     const int subpel_x_qn,
684     const int x_step_qn,
685     const int subpel_y_qn,
686     const int y_step_qn,
687     ConvolveParams* conv_params);
688 
689 void av1_convolve_2d_sr_c(const uint8_t* src,
690                           int src_stride,
691                           uint8_t* dst,
692                           int dst_stride,
693                           int w,
694                           int h,
695                           const InterpFilterParams* filter_params_x,
696                           const InterpFilterParams* filter_params_y,
697                           const int subpel_x_qn,
698                           const int subpel_y_qn,
699                           ConvolveParams* conv_params);
700 void av1_convolve_2d_sr_sse2(const uint8_t* src,
701                              int src_stride,
702                              uint8_t* dst,
703                              int dst_stride,
704                              int w,
705                              int h,
706                              const InterpFilterParams* filter_params_x,
707                              const InterpFilterParams* filter_params_y,
708                              const int subpel_x_qn,
709                              const int subpel_y_qn,
710                              ConvolveParams* conv_params);
711 void av1_convolve_2d_sr_avx2(const uint8_t* src,
712                              int src_stride,
713                              uint8_t* dst,
714                              int dst_stride,
715                              int w,
716                              int h,
717                              const InterpFilterParams* filter_params_x,
718                              const InterpFilterParams* filter_params_y,
719                              const int subpel_x_qn,
720                              const int subpel_y_qn,
721                              ConvolveParams* conv_params);
722 RTCD_EXTERN void (*av1_convolve_2d_sr)(
723     const uint8_t* src,
724     int src_stride,
725     uint8_t* dst,
726     int dst_stride,
727     int w,
728     int h,
729     const InterpFilterParams* filter_params_x,
730     const InterpFilterParams* filter_params_y,
731     const int subpel_x_qn,
732     const int subpel_y_qn,
733     ConvolveParams* conv_params);
734 
735 void av1_convolve_horiz_rs_c(const uint8_t* src,
736                              int src_stride,
737                              uint8_t* dst,
738                              int dst_stride,
739                              int w,
740                              int h,
741                              const int16_t* x_filters,
742                              int x0_qn,
743                              int x_step_qn);
744 void av1_convolve_horiz_rs_sse4_1(const uint8_t* src,
745                                   int src_stride,
746                                   uint8_t* dst,
747                                   int dst_stride,
748                                   int w,
749                                   int h,
750                                   const int16_t* x_filters,
751                                   int x0_qn,
752                                   int x_step_qn);
753 RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t* src,
754                                           int src_stride,
755                                           uint8_t* dst,
756                                           int dst_stride,
757                                           int w,
758                                           int h,
759                                           const int16_t* x_filters,
760                                           int x0_qn,
761                                           int x_step_qn);
762 
763 void av1_convolve_x_sr_c(const uint8_t* src,
764                          int src_stride,
765                          uint8_t* dst,
766                          int dst_stride,
767                          int w,
768                          int h,
769                          const InterpFilterParams* filter_params_x,
770                          const InterpFilterParams* filter_params_y,
771                          const int subpel_x_qn,
772                          const int subpel_y_qn,
773                          ConvolveParams* conv_params);
774 void av1_convolve_x_sr_sse2(const uint8_t* src,
775                             int src_stride,
776                             uint8_t* dst,
777                             int dst_stride,
778                             int w,
779                             int h,
780                             const InterpFilterParams* filter_params_x,
781                             const InterpFilterParams* filter_params_y,
782                             const int subpel_x_qn,
783                             const int subpel_y_qn,
784                             ConvolveParams* conv_params);
785 void av1_convolve_x_sr_avx2(const uint8_t* src,
786                             int src_stride,
787                             uint8_t* dst,
788                             int dst_stride,
789                             int w,
790                             int h,
791                             const InterpFilterParams* filter_params_x,
792                             const InterpFilterParams* filter_params_y,
793                             const int subpel_x_qn,
794                             const int subpel_y_qn,
795                             ConvolveParams* conv_params);
796 RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t* src,
797                                       int src_stride,
798                                       uint8_t* dst,
799                                       int dst_stride,
800                                       int w,
801                                       int h,
802                                       const InterpFilterParams* filter_params_x,
803                                       const InterpFilterParams* filter_params_y,
804                                       const int subpel_x_qn,
805                                       const int subpel_y_qn,
806                                       ConvolveParams* conv_params);
807 
808 void av1_convolve_y_sr_c(const uint8_t* src,
809                          int src_stride,
810                          uint8_t* dst,
811                          int dst_stride,
812                          int w,
813                          int h,
814                          const InterpFilterParams* filter_params_x,
815                          const InterpFilterParams* filter_params_y,
816                          const int subpel_x_qn,
817                          const int subpel_y_qn,
818                          ConvolveParams* conv_params);
819 void av1_convolve_y_sr_sse2(const uint8_t* src,
820                             int src_stride,
821                             uint8_t* dst,
822                             int dst_stride,
823                             int w,
824                             int h,
825                             const InterpFilterParams* filter_params_x,
826                             const InterpFilterParams* filter_params_y,
827                             const int subpel_x_qn,
828                             const int subpel_y_qn,
829                             ConvolveParams* conv_params);
830 void av1_convolve_y_sr_avx2(const uint8_t* src,
831                             int src_stride,
832                             uint8_t* dst,
833                             int dst_stride,
834                             int w,
835                             int h,
836                             const InterpFilterParams* filter_params_x,
837                             const InterpFilterParams* filter_params_y,
838                             const int subpel_x_qn,
839                             const int subpel_y_qn,
840                             ConvolveParams* conv_params);
841 RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t* src,
842                                       int src_stride,
843                                       uint8_t* dst,
844                                       int dst_stride,
845                                       int w,
846                                       int h,
847                                       const InterpFilterParams* filter_params_x,
848                                       const InterpFilterParams* filter_params_y,
849                                       const int subpel_x_qn,
850                                       const int subpel_y_qn,
851                                       ConvolveParams* conv_params);
852 
853 void av1_dist_wtd_convolve_2d_c(const uint8_t* src,
854                                 int src_stride,
855                                 uint8_t* dst,
856                                 int dst_stride,
857                                 int w,
858                                 int h,
859                                 const InterpFilterParams* filter_params_x,
860                                 const InterpFilterParams* filter_params_y,
861                                 const int subpel_x_qn,
862                                 const int subpel_y_qn,
863                                 ConvolveParams* conv_params);
864 void av1_dist_wtd_convolve_2d_sse2(const uint8_t* src,
865                                    int src_stride,
866                                    uint8_t* dst,
867                                    int dst_stride,
868                                    int w,
869                                    int h,
870                                    const InterpFilterParams* filter_params_x,
871                                    const InterpFilterParams* filter_params_y,
872                                    const int subpel_x_qn,
873                                    const int subpel_y_qn,
874                                    ConvolveParams* conv_params);
875 void av1_dist_wtd_convolve_2d_ssse3(const uint8_t* src,
876                                     int src_stride,
877                                     uint8_t* dst,
878                                     int dst_stride,
879                                     int w,
880                                     int h,
881                                     const InterpFilterParams* filter_params_x,
882                                     const InterpFilterParams* filter_params_y,
883                                     const int subpel_x_qn,
884                                     const int subpel_y_qn,
885                                     ConvolveParams* conv_params);
886 void av1_dist_wtd_convolve_2d_avx2(const uint8_t* src,
887                                    int src_stride,
888                                    uint8_t* dst,
889                                    int dst_stride,
890                                    int w,
891                                    int h,
892                                    const InterpFilterParams* filter_params_x,
893                                    const InterpFilterParams* filter_params_y,
894                                    const int subpel_x_qn,
895                                    const int subpel_y_qn,
896                                    ConvolveParams* conv_params);
897 RTCD_EXTERN void (*av1_dist_wtd_convolve_2d)(
898     const uint8_t* src,
899     int src_stride,
900     uint8_t* dst,
901     int dst_stride,
902     int w,
903     int h,
904     const InterpFilterParams* filter_params_x,
905     const InterpFilterParams* filter_params_y,
906     const int subpel_x_qn,
907     const int subpel_y_qn,
908     ConvolveParams* conv_params);
909 
910 void av1_dist_wtd_convolve_2d_copy_c(const uint8_t* src,
911                                      int src_stride,
912                                      uint8_t* dst,
913                                      int dst_stride,
914                                      int w,
915                                      int h,
916                                      const InterpFilterParams* filter_params_x,
917                                      const InterpFilterParams* filter_params_y,
918                                      const int subpel_x_qn,
919                                      const int subpel_y_qn,
920                                      ConvolveParams* conv_params);
921 void av1_dist_wtd_convolve_2d_copy_sse2(
922     const uint8_t* src,
923     int src_stride,
924     uint8_t* dst,
925     int dst_stride,
926     int w,
927     int h,
928     const InterpFilterParams* filter_params_x,
929     const InterpFilterParams* filter_params_y,
930     const int subpel_x_qn,
931     const int subpel_y_qn,
932     ConvolveParams* conv_params);
933 void av1_dist_wtd_convolve_2d_copy_avx2(
934     const uint8_t* src,
935     int src_stride,
936     uint8_t* dst,
937     int dst_stride,
938     int w,
939     int h,
940     const InterpFilterParams* filter_params_x,
941     const InterpFilterParams* filter_params_y,
942     const int subpel_x_qn,
943     const int subpel_y_qn,
944     ConvolveParams* conv_params);
945 RTCD_EXTERN void (*av1_dist_wtd_convolve_2d_copy)(
946     const uint8_t* src,
947     int src_stride,
948     uint8_t* dst,
949     int dst_stride,
950     int w,
951     int h,
952     const InterpFilterParams* filter_params_x,
953     const InterpFilterParams* filter_params_y,
954     const int subpel_x_qn,
955     const int subpel_y_qn,
956     ConvolveParams* conv_params);
957 
958 void av1_dist_wtd_convolve_x_c(const uint8_t* src,
959                                int src_stride,
960                                uint8_t* dst,
961                                int dst_stride,
962                                int w,
963                                int h,
964                                const InterpFilterParams* filter_params_x,
965                                const InterpFilterParams* filter_params_y,
966                                const int subpel_x_qn,
967                                const int subpel_y_qn,
968                                ConvolveParams* conv_params);
969 void av1_dist_wtd_convolve_x_sse2(const uint8_t* src,
970                                   int src_stride,
971                                   uint8_t* dst,
972                                   int dst_stride,
973                                   int w,
974                                   int h,
975                                   const InterpFilterParams* filter_params_x,
976                                   const InterpFilterParams* filter_params_y,
977                                   const int subpel_x_qn,
978                                   const int subpel_y_qn,
979                                   ConvolveParams* conv_params);
980 void av1_dist_wtd_convolve_x_avx2(const uint8_t* src,
981                                   int src_stride,
982                                   uint8_t* dst,
983                                   int dst_stride,
984                                   int w,
985                                   int h,
986                                   const InterpFilterParams* filter_params_x,
987                                   const InterpFilterParams* filter_params_y,
988                                   const int subpel_x_qn,
989                                   const int subpel_y_qn,
990                                   ConvolveParams* conv_params);
991 RTCD_EXTERN void (*av1_dist_wtd_convolve_x)(
992     const uint8_t* src,
993     int src_stride,
994     uint8_t* dst,
995     int dst_stride,
996     int w,
997     int h,
998     const InterpFilterParams* filter_params_x,
999     const InterpFilterParams* filter_params_y,
1000     const int subpel_x_qn,
1001     const int subpel_y_qn,
1002     ConvolveParams* conv_params);
1003 
1004 void av1_dist_wtd_convolve_y_c(const uint8_t* src,
1005                                int src_stride,
1006                                uint8_t* dst,
1007                                int dst_stride,
1008                                int w,
1009                                int h,
1010                                const InterpFilterParams* filter_params_x,
1011                                const InterpFilterParams* filter_params_y,
1012                                const int subpel_x_qn,
1013                                const int subpel_y_qn,
1014                                ConvolveParams* conv_params);
1015 void av1_dist_wtd_convolve_y_sse2(const uint8_t* src,
1016                                   int src_stride,
1017                                   uint8_t* dst,
1018                                   int dst_stride,
1019                                   int w,
1020                                   int h,
1021                                   const InterpFilterParams* filter_params_x,
1022                                   const InterpFilterParams* filter_params_y,
1023                                   const int subpel_x_qn,
1024                                   const int subpel_y_qn,
1025                                   ConvolveParams* conv_params);
1026 void av1_dist_wtd_convolve_y_avx2(const uint8_t* src,
1027                                   int src_stride,
1028                                   uint8_t* dst,
1029                                   int dst_stride,
1030                                   int w,
1031                                   int h,
1032                                   const InterpFilterParams* filter_params_x,
1033                                   const InterpFilterParams* filter_params_y,
1034                                   const int subpel_x_qn,
1035                                   const int subpel_y_qn,
1036                                   ConvolveParams* conv_params);
1037 RTCD_EXTERN void (*av1_dist_wtd_convolve_y)(
1038     const uint8_t* src,
1039     int src_stride,
1040     uint8_t* dst,
1041     int dst_stride,
1042     int w,
1043     int h,
1044     const InterpFilterParams* filter_params_x,
1045     const InterpFilterParams* filter_params_y,
1046     const int subpel_x_qn,
1047     const int subpel_y_qn,
1048     ConvolveParams* conv_params);
1049 
1050 void av1_dr_prediction_z1_c(uint8_t* dst,
1051                             ptrdiff_t stride,
1052                             int bw,
1053                             int bh,
1054                             const uint8_t* above,
1055                             const uint8_t* left,
1056                             int upsample_above,
1057                             int dx,
1058                             int dy);
1059 void av1_dr_prediction_z1_avx2(uint8_t* dst,
1060                                ptrdiff_t stride,
1061                                int bw,
1062                                int bh,
1063                                const uint8_t* above,
1064                                const uint8_t* left,
1065                                int upsample_above,
1066                                int dx,
1067                                int dy);
1068 RTCD_EXTERN void (*av1_dr_prediction_z1)(uint8_t* dst,
1069                                          ptrdiff_t stride,
1070                                          int bw,
1071                                          int bh,
1072                                          const uint8_t* above,
1073                                          const uint8_t* left,
1074                                          int upsample_above,
1075                                          int dx,
1076                                          int dy);
1077 
1078 void av1_dr_prediction_z2_c(uint8_t* dst,
1079                             ptrdiff_t stride,
1080                             int bw,
1081                             int bh,
1082                             const uint8_t* above,
1083                             const uint8_t* left,
1084                             int upsample_above,
1085                             int upsample_left,
1086                             int dx,
1087                             int dy);
1088 void av1_dr_prediction_z2_avx2(uint8_t* dst,
1089                                ptrdiff_t stride,
1090                                int bw,
1091                                int bh,
1092                                const uint8_t* above,
1093                                const uint8_t* left,
1094                                int upsample_above,
1095                                int upsample_left,
1096                                int dx,
1097                                int dy);
1098 RTCD_EXTERN void (*av1_dr_prediction_z2)(uint8_t* dst,
1099                                          ptrdiff_t stride,
1100                                          int bw,
1101                                          int bh,
1102                                          const uint8_t* above,
1103                                          const uint8_t* left,
1104                                          int upsample_above,
1105                                          int upsample_left,
1106                                          int dx,
1107                                          int dy);
1108 
1109 void av1_dr_prediction_z3_c(uint8_t* dst,
1110                             ptrdiff_t stride,
1111                             int bw,
1112                             int bh,
1113                             const uint8_t* above,
1114                             const uint8_t* left,
1115                             int upsample_left,
1116                             int dx,
1117                             int dy);
1118 void av1_dr_prediction_z3_avx2(uint8_t* dst,
1119                                ptrdiff_t stride,
1120                                int bw,
1121                                int bh,
1122                                const uint8_t* above,
1123                                const uint8_t* left,
1124                                int upsample_left,
1125                                int dx,
1126                                int dy);
1127 RTCD_EXTERN void (*av1_dr_prediction_z3)(uint8_t* dst,
1128                                          ptrdiff_t stride,
1129                                          int bw,
1130                                          int bh,
1131                                          const uint8_t* above,
1132                                          const uint8_t* left,
1133                                          int upsample_left,
1134                                          int dx,
1135                                          int dy);
1136 
1137 void av1_filter_intra_edge_c(uint8_t* p, int sz, int strength);
1138 void av1_filter_intra_edge_sse4_1(uint8_t* p, int sz, int strength);
1139 RTCD_EXTERN void (*av1_filter_intra_edge)(uint8_t* p, int sz, int strength);
1140 
1141 void av1_filter_intra_edge_high_c(uint16_t* p, int sz, int strength);
1142 void av1_filter_intra_edge_high_sse4_1(uint16_t* p, int sz, int strength);
1143 RTCD_EXTERN void (*av1_filter_intra_edge_high)(uint16_t* p,
1144                                                int sz,
1145                                                int strength);
1146 
1147 void av1_filter_intra_predictor_c(uint8_t* dst,
1148                                   ptrdiff_t stride,
1149                                   TX_SIZE tx_size,
1150                                   const uint8_t* above,
1151                                   const uint8_t* left,
1152                                   int mode);
1153 void av1_filter_intra_predictor_sse4_1(uint8_t* dst,
1154                                        ptrdiff_t stride,
1155                                        TX_SIZE tx_size,
1156                                        const uint8_t* above,
1157                                        const uint8_t* left,
1158                                        int mode);
1159 RTCD_EXTERN void (*av1_filter_intra_predictor)(uint8_t* dst,
1160                                                ptrdiff_t stride,
1161                                                TX_SIZE tx_size,
1162                                                const uint8_t* above,
1163                                                const uint8_t* left,
1164                                                int mode);
1165 
1166 int av1_full_range_search_c(const struct macroblock* x,
1167                             const struct search_site_config* cfg,
1168                             MV* ref_mv,
1169                             MV* best_mv,
1170                             int search_param,
1171                             int sad_per_bit,
1172                             int* num00,
1173                             const struct aom_variance_vtable* fn_ptr,
1174                             const MV* center_mv);
1175 #define av1_full_range_search av1_full_range_search_c
1176 
1177 void av1_fwd_txfm2d_16x16_c(const int16_t* input,
1178                             int32_t* output,
1179                             int stride,
1180                             TX_TYPE tx_type,
1181                             int bd);
1182 void av1_fwd_txfm2d_16x16_sse4_1(const int16_t* input,
1183                                  int32_t* output,
1184                                  int stride,
1185                                  TX_TYPE tx_type,
1186                                  int bd);
1187 void av1_fwd_txfm2d_16x16_avx2(const int16_t* input,
1188                                int32_t* output,
1189                                int stride,
1190                                TX_TYPE tx_type,
1191                                int bd);
1192 RTCD_EXTERN void (*av1_fwd_txfm2d_16x16)(const int16_t* input,
1193                                          int32_t* output,
1194                                          int stride,
1195                                          TX_TYPE tx_type,
1196                                          int bd);
1197 
1198 void av1_fwd_txfm2d_16x32_c(const int16_t* input,
1199                             int32_t* output,
1200                             int stride,
1201                             TX_TYPE tx_type,
1202                             int bd);
1203 void av1_fwd_txfm2d_16x32_sse4_1(const int16_t* input,
1204                                  int32_t* output,
1205                                  int stride,
1206                                  TX_TYPE tx_type,
1207                                  int bd);
1208 RTCD_EXTERN void (*av1_fwd_txfm2d_16x32)(const int16_t* input,
1209                                          int32_t* output,
1210                                          int stride,
1211                                          TX_TYPE tx_type,
1212                                          int bd);
1213 
1214 void av1_fwd_txfm2d_16x4_c(const int16_t* input,
1215                            int32_t* output,
1216                            int stride,
1217                            TX_TYPE tx_type,
1218                            int bd);
1219 void av1_fwd_txfm2d_16x4_sse4_1(const int16_t* input,
1220                                 int32_t* output,
1221                                 int stride,
1222                                 TX_TYPE tx_type,
1223                                 int bd);
1224 RTCD_EXTERN void (*av1_fwd_txfm2d_16x4)(const int16_t* input,
1225                                         int32_t* output,
1226                                         int stride,
1227                                         TX_TYPE tx_type,
1228                                         int bd);
1229 
1230 void av1_fwd_txfm2d_16x64_c(const int16_t* input,
1231                             int32_t* output,
1232                             int stride,
1233                             TX_TYPE tx_type,
1234                             int bd);
1235 void av1_fwd_txfm2d_16x64_sse4_1(const int16_t* input,
1236                                  int32_t* output,
1237                                  int stride,
1238                                  TX_TYPE tx_type,
1239                                  int bd);
1240 RTCD_EXTERN void (*av1_fwd_txfm2d_16x64)(const int16_t* input,
1241                                          int32_t* output,
1242                                          int stride,
1243                                          TX_TYPE tx_type,
1244                                          int bd);
1245 
1246 void av1_fwd_txfm2d_16x8_c(const int16_t* input,
1247                            int32_t* output,
1248                            int stride,
1249                            TX_TYPE tx_type,
1250                            int bd);
1251 void av1_fwd_txfm2d_16x8_sse4_1(const int16_t* input,
1252                                 int32_t* output,
1253                                 int stride,
1254                                 TX_TYPE tx_type,
1255                                 int bd);
1256 void av1_fwd_txfm2d_16x8_avx2(const int16_t* input,
1257                               int32_t* output,
1258                               int stride,
1259                               TX_TYPE tx_type,
1260                               int bd);
1261 RTCD_EXTERN void (*av1_fwd_txfm2d_16x8)(const int16_t* input,
1262                                         int32_t* output,
1263                                         int stride,
1264                                         TX_TYPE tx_type,
1265                                         int bd);
1266 
1267 void av1_fwd_txfm2d_32x16_c(const int16_t* input,
1268                             int32_t* output,
1269                             int stride,
1270                             TX_TYPE tx_type,
1271                             int bd);
1272 void av1_fwd_txfm2d_32x16_sse4_1(const int16_t* input,
1273                                  int32_t* output,
1274                                  int stride,
1275                                  TX_TYPE tx_type,
1276                                  int bd);
1277 RTCD_EXTERN void (*av1_fwd_txfm2d_32x16)(const int16_t* input,
1278                                          int32_t* output,
1279                                          int stride,
1280                                          TX_TYPE tx_type,
1281                                          int bd);
1282 
1283 void av1_fwd_txfm2d_32x32_c(const int16_t* input,
1284                             int32_t* output,
1285                             int stride,
1286                             TX_TYPE tx_type,
1287                             int bd);
1288 void av1_fwd_txfm2d_32x32_sse4_1(const int16_t* input,
1289                                  int32_t* output,
1290                                  int stride,
1291                                  TX_TYPE tx_type,
1292                                  int bd);
1293 void av1_fwd_txfm2d_32x32_avx2(const int16_t* input,
1294                                int32_t* output,
1295                                int stride,
1296                                TX_TYPE tx_type,
1297                                int bd);
1298 RTCD_EXTERN void (*av1_fwd_txfm2d_32x32)(const int16_t* input,
1299                                          int32_t* output,
1300                                          int stride,
1301                                          TX_TYPE tx_type,
1302                                          int bd);
1303 
1304 void av1_fwd_txfm2d_32x64_c(const int16_t* input,
1305                             int32_t* output,
1306                             int stride,
1307                             TX_TYPE tx_type,
1308                             int bd);
1309 void av1_fwd_txfm2d_32x64_sse4_1(const int16_t* input,
1310                                  int32_t* output,
1311                                  int stride,
1312                                  TX_TYPE tx_type,
1313                                  int bd);
1314 RTCD_EXTERN void (*av1_fwd_txfm2d_32x64)(const int16_t* input,
1315                                          int32_t* output,
1316                                          int stride,
1317                                          TX_TYPE tx_type,
1318                                          int bd);
1319 
1320 void av1_fwd_txfm2d_32x8_c(const int16_t* input,
1321                            int32_t* output,
1322                            int stride,
1323                            TX_TYPE tx_type,
1324                            int bd);
1325 void av1_fwd_txfm2d_32x8_sse4_1(const int16_t* input,
1326                                 int32_t* output,
1327                                 int stride,
1328                                 TX_TYPE tx_type,
1329                                 int bd);
1330 RTCD_EXTERN void (*av1_fwd_txfm2d_32x8)(const int16_t* input,
1331                                         int32_t* output,
1332                                         int stride,
1333                                         TX_TYPE tx_type,
1334                                         int bd);
1335 
1336 void av1_fwd_txfm2d_4x16_c(const int16_t* input,
1337                            int32_t* output,
1338                            int stride,
1339                            TX_TYPE tx_type,
1340                            int bd);
1341 void av1_fwd_txfm2d_4x16_sse4_1(const int16_t* input,
1342                                 int32_t* output,
1343                                 int stride,
1344                                 TX_TYPE tx_type,
1345                                 int bd);
1346 RTCD_EXTERN void (*av1_fwd_txfm2d_4x16)(const int16_t* input,
1347                                         int32_t* output,
1348                                         int stride,
1349                                         TX_TYPE tx_type,
1350                                         int bd);
1351 
1352 void av1_fwd_txfm2d_4x4_c(const int16_t* input,
1353                           int32_t* output,
1354                           int stride,
1355                           TX_TYPE tx_type,
1356                           int bd);
1357 void av1_fwd_txfm2d_4x4_sse4_1(const int16_t* input,
1358                                int32_t* output,
1359                                int stride,
1360                                TX_TYPE tx_type,
1361                                int bd);
1362 RTCD_EXTERN void (*av1_fwd_txfm2d_4x4)(const int16_t* input,
1363                                        int32_t* output,
1364                                        int stride,
1365                                        TX_TYPE tx_type,
1366                                        int bd);
1367 
1368 void av1_fwd_txfm2d_4x8_c(const int16_t* input,
1369                           int32_t* output,
1370                           int stride,
1371                           TX_TYPE tx_type,
1372                           int bd);
1373 void av1_fwd_txfm2d_4x8_sse4_1(const int16_t* input,
1374                                int32_t* output,
1375                                int stride,
1376                                TX_TYPE tx_type,
1377                                int bd);
1378 RTCD_EXTERN void (*av1_fwd_txfm2d_4x8)(const int16_t* input,
1379                                        int32_t* output,
1380                                        int stride,
1381                                        TX_TYPE tx_type,
1382                                        int bd);
1383 
1384 void av1_fwd_txfm2d_64x16_c(const int16_t* input,
1385                             int32_t* output,
1386                             int stride,
1387                             TX_TYPE tx_type,
1388                             int bd);
1389 void av1_fwd_txfm2d_64x16_sse4_1(const int16_t* input,
1390                                  int32_t* output,
1391                                  int stride,
1392                                  TX_TYPE tx_type,
1393                                  int bd);
1394 RTCD_EXTERN void (*av1_fwd_txfm2d_64x16)(const int16_t* input,
1395                                          int32_t* output,
1396                                          int stride,
1397                                          TX_TYPE tx_type,
1398                                          int bd);
1399 
1400 void av1_fwd_txfm2d_64x32_c(const int16_t* input,
1401                             int32_t* output,
1402                             int stride,
1403                             TX_TYPE tx_type,
1404                             int bd);
1405 void av1_fwd_txfm2d_64x32_sse4_1(const int16_t* input,
1406                                  int32_t* output,
1407                                  int stride,
1408                                  TX_TYPE tx_type,
1409                                  int bd);
1410 RTCD_EXTERN void (*av1_fwd_txfm2d_64x32)(const int16_t* input,
1411                                          int32_t* output,
1412                                          int stride,
1413                                          TX_TYPE tx_type,
1414                                          int bd);
1415 
1416 void av1_fwd_txfm2d_64x64_c(const int16_t* input,
1417                             int32_t* output,
1418                             int stride,
1419                             TX_TYPE tx_type,
1420                             int bd);
1421 void av1_fwd_txfm2d_64x64_sse4_1(const int16_t* input,
1422                                  int32_t* output,
1423                                  int stride,
1424                                  TX_TYPE tx_type,
1425                                  int bd);
1426 void av1_fwd_txfm2d_64x64_avx2(const int16_t* input,
1427                                int32_t* output,
1428                                int stride,
1429                                TX_TYPE tx_type,
1430                                int bd);
1431 RTCD_EXTERN void (*av1_fwd_txfm2d_64x64)(const int16_t* input,
1432                                          int32_t* output,
1433                                          int stride,
1434                                          TX_TYPE tx_type,
1435                                          int bd);
1436 
1437 void av1_fwd_txfm2d_8x16_c(const int16_t* input,
1438                            int32_t* output,
1439                            int stride,
1440                            TX_TYPE tx_type,
1441                            int bd);
1442 void av1_fwd_txfm2d_8x16_sse4_1(const int16_t* input,
1443                                 int32_t* output,
1444                                 int stride,
1445                                 TX_TYPE tx_type,
1446                                 int bd);
1447 void av1_fwd_txfm2d_8x16_avx2(const int16_t* input,
1448                               int32_t* output,
1449                               int stride,
1450                               TX_TYPE tx_type,
1451                               int bd);
1452 RTCD_EXTERN void (*av1_fwd_txfm2d_8x16)(const int16_t* input,
1453                                         int32_t* output,
1454                                         int stride,
1455                                         TX_TYPE tx_type,
1456                                         int bd);
1457 
1458 void av1_fwd_txfm2d_8x32_c(const int16_t* input,
1459                            int32_t* output,
1460                            int stride,
1461                            TX_TYPE tx_type,
1462                            int bd);
1463 void av1_fwd_txfm2d_8x32_sse4_1(const int16_t* input,
1464                                 int32_t* output,
1465                                 int stride,
1466                                 TX_TYPE tx_type,
1467                                 int bd);
1468 RTCD_EXTERN void (*av1_fwd_txfm2d_8x32)(const int16_t* input,
1469                                         int32_t* output,
1470                                         int stride,
1471                                         TX_TYPE tx_type,
1472                                         int bd);
1473 
1474 void av1_fwd_txfm2d_8x4_c(const int16_t* input,
1475                           int32_t* output,
1476                           int stride,
1477                           TX_TYPE tx_type,
1478                           int bd);
1479 void av1_fwd_txfm2d_8x4_sse4_1(const int16_t* input,
1480                                int32_t* output,
1481                                int stride,
1482                                TX_TYPE tx_type,
1483                                int bd);
1484 RTCD_EXTERN void (*av1_fwd_txfm2d_8x4)(const int16_t* input,
1485                                        int32_t* output,
1486                                        int stride,
1487                                        TX_TYPE tx_type,
1488                                        int bd);
1489 
1490 void av1_fwd_txfm2d_8x8_c(const int16_t* input,
1491                           int32_t* output,
1492                           int stride,
1493                           TX_TYPE tx_type,
1494                           int bd);
1495 void av1_fwd_txfm2d_8x8_sse4_1(const int16_t* input,
1496                                int32_t* output,
1497                                int stride,
1498                                TX_TYPE tx_type,
1499                                int bd);
1500 void av1_fwd_txfm2d_8x8_avx2(const int16_t* input,
1501                              int32_t* output,
1502                              int stride,
1503                              TX_TYPE tx_type,
1504                              int bd);
1505 RTCD_EXTERN void (*av1_fwd_txfm2d_8x8)(const int16_t* input,
1506                                        int32_t* output,
1507                                        int stride,
1508                                        TX_TYPE tx_type,
1509                                        int bd);
1510 
1511 void av1_fwht4x4_c(const int16_t* input, tran_low_t* output, int stride);
1512 #define av1_fwht4x4 av1_fwht4x4_c
1513 
1514 uint32_t av1_get_crc32c_value_c(void* crc_calculator,
1515                                 uint8_t* p,
1516                                 size_t length);
1517 uint32_t av1_get_crc32c_value_sse4_2(void* crc_calculator,
1518                                      uint8_t* p,
1519                                      size_t length);
1520 RTCD_EXTERN uint32_t (*av1_get_crc32c_value)(void* crc_calculator,
1521                                              uint8_t* p,
1522                                              size_t length);
1523 
1524 void av1_get_horver_correlation_full_c(const int16_t* diff,
1525                                        int stride,
1526                                        int w,
1527                                        int h,
1528                                        float* hcorr,
1529                                        float* vcorr);
1530 void av1_get_horver_correlation_full_sse4_1(const int16_t* diff,
1531                                             int stride,
1532                                             int w,
1533                                             int h,
1534                                             float* hcorr,
1535                                             float* vcorr);
1536 void av1_get_horver_correlation_full_avx2(const int16_t* diff,
1537                                           int stride,
1538                                           int w,
1539                                           int h,
1540                                           float* hcorr,
1541                                           float* vcorr);
1542 RTCD_EXTERN void (*av1_get_horver_correlation_full)(const int16_t* diff,
1543                                                     int stride,
1544                                                     int w,
1545                                                     int h,
1546                                                     float* hcorr,
1547                                                     float* vcorr);
1548 
1549 void av1_get_nz_map_contexts_c(const uint8_t* const levels,
1550                                const int16_t* const scan,
1551                                const uint16_t eob,
1552                                const TX_SIZE tx_size,
1553                                const TX_CLASS tx_class,
1554                                int8_t* const coeff_contexts);
1555 void av1_get_nz_map_contexts_sse2(const uint8_t* const levels,
1556                                   const int16_t* const scan,
1557                                   const uint16_t eob,
1558                                   const TX_SIZE tx_size,
1559                                   const TX_CLASS tx_class,
1560                                   int8_t* const coeff_contexts);
1561 #define av1_get_nz_map_contexts av1_get_nz_map_contexts_sse2
1562 
1563 int64_t av1_highbd_block_error_c(const tran_low_t* coeff,
1564                                  const tran_low_t* dqcoeff,
1565                                  intptr_t block_size,
1566                                  int64_t* ssz,
1567                                  int bd);
1568 int64_t av1_highbd_block_error_sse2(const tran_low_t* coeff,
1569                                     const tran_low_t* dqcoeff,
1570                                     intptr_t block_size,
1571                                     int64_t* ssz,
1572                                     int bd);
1573 int64_t av1_highbd_block_error_avx2(const tran_low_t* coeff,
1574                                     const tran_low_t* dqcoeff,
1575                                     intptr_t block_size,
1576                                     int64_t* ssz,
1577                                     int bd);
1578 RTCD_EXTERN int64_t (*av1_highbd_block_error)(const tran_low_t* coeff,
1579                                               const tran_low_t* dqcoeff,
1580                                               intptr_t block_size,
1581                                               int64_t* ssz,
1582                                               int bd);
1583 
1584 void av1_highbd_convolve8_c(const uint8_t* src,
1585                             ptrdiff_t src_stride,
1586                             uint8_t* dst,
1587                             ptrdiff_t dst_stride,
1588                             const int16_t* filter_x,
1589                             int x_step_q4,
1590                             const int16_t* filter_y,
1591                             int y_step_q4,
1592                             int w,
1593                             int h,
1594                             int bps);
1595 #define av1_highbd_convolve8 av1_highbd_convolve8_c
1596 
1597 void av1_highbd_convolve8_horiz_c(const uint8_t* src,
1598                                   ptrdiff_t src_stride,
1599                                   uint8_t* dst,
1600                                   ptrdiff_t dst_stride,
1601                                   const int16_t* filter_x,
1602                                   int x_step_q4,
1603                                   const int16_t* filter_y,
1604                                   int y_step_q4,
1605                                   int w,
1606                                   int h,
1607                                   int bps);
1608 #define av1_highbd_convolve8_horiz av1_highbd_convolve8_horiz_c
1609 
1610 void av1_highbd_convolve8_vert_c(const uint8_t* src,
1611                                  ptrdiff_t src_stride,
1612                                  uint8_t* dst,
1613                                  ptrdiff_t dst_stride,
1614                                  const int16_t* filter_x,
1615                                  int x_step_q4,
1616                                  const int16_t* filter_y,
1617                                  int y_step_q4,
1618                                  int w,
1619                                  int h,
1620                                  int bps);
1621 #define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c
1622 
1623 void av1_highbd_convolve_2d_copy_sr_c(const uint16_t* src,
1624                                       int src_stride,
1625                                       uint16_t* dst,
1626                                       int dst_stride,
1627                                       int w,
1628                                       int h,
1629                                       const InterpFilterParams* filter_params_x,
1630                                       const InterpFilterParams* filter_params_y,
1631                                       const int subpel_x_qn,
1632                                       const int subpel_y_qn,
1633                                       ConvolveParams* conv_params,
1634                                       int bd);
1635 void av1_highbd_convolve_2d_copy_sr_sse2(
1636     const uint16_t* src,
1637     int src_stride,
1638     uint16_t* dst,
1639     int dst_stride,
1640     int w,
1641     int h,
1642     const InterpFilterParams* filter_params_x,
1643     const InterpFilterParams* filter_params_y,
1644     const int subpel_x_qn,
1645     const int subpel_y_qn,
1646     ConvolveParams* conv_params,
1647     int bd);
1648 void av1_highbd_convolve_2d_copy_sr_avx2(
1649     const uint16_t* src,
1650     int src_stride,
1651     uint16_t* dst,
1652     int dst_stride,
1653     int w,
1654     int h,
1655     const InterpFilterParams* filter_params_x,
1656     const InterpFilterParams* filter_params_y,
1657     const int subpel_x_qn,
1658     const int subpel_y_qn,
1659     ConvolveParams* conv_params,
1660     int bd);
1661 RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(
1662     const uint16_t* src,
1663     int src_stride,
1664     uint16_t* dst,
1665     int dst_stride,
1666     int w,
1667     int h,
1668     const InterpFilterParams* filter_params_x,
1669     const InterpFilterParams* filter_params_y,
1670     const int subpel_x_qn,
1671     const int subpel_y_qn,
1672     ConvolveParams* conv_params,
1673     int bd);
1674 
1675 void av1_highbd_convolve_2d_scale_c(const uint16_t* src,
1676                                     int src_stride,
1677                                     uint16_t* dst,
1678                                     int dst_stride,
1679                                     int w,
1680                                     int h,
1681                                     const InterpFilterParams* filter_params_x,
1682                                     const InterpFilterParams* filter_params_y,
1683                                     const int subpel_x_qn,
1684                                     const int x_step_qn,
1685                                     const int subpel_y_qn,
1686                                     const int y_step_qn,
1687                                     ConvolveParams* conv_params,
1688                                     int bd);
1689 void av1_highbd_convolve_2d_scale_sse4_1(
1690     const uint16_t* src,
1691     int src_stride,
1692     uint16_t* dst,
1693     int dst_stride,
1694     int w,
1695     int h,
1696     const InterpFilterParams* filter_params_x,
1697     const InterpFilterParams* filter_params_y,
1698     const int subpel_x_qn,
1699     const int x_step_qn,
1700     const int subpel_y_qn,
1701     const int y_step_qn,
1702     ConvolveParams* conv_params,
1703     int bd);
1704 RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(
1705     const uint16_t* src,
1706     int src_stride,
1707     uint16_t* dst,
1708     int dst_stride,
1709     int w,
1710     int h,
1711     const InterpFilterParams* filter_params_x,
1712     const InterpFilterParams* filter_params_y,
1713     const int subpel_x_qn,
1714     const int x_step_qn,
1715     const int subpel_y_qn,
1716     const int y_step_qn,
1717     ConvolveParams* conv_params,
1718     int bd);
1719 
1720 void av1_highbd_convolve_2d_sr_c(const uint16_t* src,
1721                                  int src_stride,
1722                                  uint16_t* dst,
1723                                  int dst_stride,
1724                                  int w,
1725                                  int h,
1726                                  const InterpFilterParams* filter_params_x,
1727                                  const InterpFilterParams* filter_params_y,
1728                                  const int subpel_x_qn,
1729                                  const int subpel_y_qn,
1730                                  ConvolveParams* conv_params,
1731                                  int bd);
1732 void av1_highbd_convolve_2d_sr_ssse3(const uint16_t* src,
1733                                      int src_stride,
1734                                      uint16_t* dst,
1735                                      int dst_stride,
1736                                      int w,
1737                                      int h,
1738                                      const InterpFilterParams* filter_params_x,
1739                                      const InterpFilterParams* filter_params_y,
1740                                      const int subpel_x_qn,
1741                                      const int subpel_y_qn,
1742                                      ConvolveParams* conv_params,
1743                                      int bd);
1744 void av1_highbd_convolve_2d_sr_avx2(const uint16_t* src,
1745                                     int src_stride,
1746                                     uint16_t* dst,
1747                                     int dst_stride,
1748                                     int w,
1749                                     int h,
1750                                     const InterpFilterParams* filter_params_x,
1751                                     const InterpFilterParams* filter_params_y,
1752                                     const int subpel_x_qn,
1753                                     const int subpel_y_qn,
1754                                     ConvolveParams* conv_params,
1755                                     int bd);
1756 RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(
1757     const uint16_t* src,
1758     int src_stride,
1759     uint16_t* dst,
1760     int dst_stride,
1761     int w,
1762     int h,
1763     const InterpFilterParams* filter_params_x,
1764     const InterpFilterParams* filter_params_y,
1765     const int subpel_x_qn,
1766     const int subpel_y_qn,
1767     ConvolveParams* conv_params,
1768     int bd);
1769 
1770 void av1_highbd_convolve_avg_c(const uint8_t* src,
1771                                ptrdiff_t src_stride,
1772                                uint8_t* dst,
1773                                ptrdiff_t dst_stride,
1774                                const int16_t* filter_x,
1775                                int x_step_q4,
1776                                const int16_t* filter_y,
1777                                int y_step_q4,
1778                                int w,
1779                                int h,
1780                                int bps);
1781 #define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
1782 
1783 void av1_highbd_convolve_copy_c(const uint8_t* src,
1784                                 ptrdiff_t src_stride,
1785                                 uint8_t* dst,
1786                                 ptrdiff_t dst_stride,
1787                                 const int16_t* filter_x,
1788                                 int x_step_q4,
1789                                 const int16_t* filter_y,
1790                                 int y_step_q4,
1791                                 int w,
1792                                 int h,
1793                                 int bps);
1794 #define av1_highbd_convolve_copy av1_highbd_convolve_copy_c
1795 
1796 void av1_highbd_convolve_horiz_rs_c(const uint16_t* src,
1797                                     int src_stride,
1798                                     uint16_t* dst,
1799                                     int dst_stride,
1800                                     int w,
1801                                     int h,
1802                                     const int16_t* x_filters,
1803                                     int x0_qn,
1804                                     int x_step_qn,
1805                                     int bd);
1806 void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t* src,
1807                                          int src_stride,
1808                                          uint16_t* dst,
1809                                          int dst_stride,
1810                                          int w,
1811                                          int h,
1812                                          const int16_t* x_filters,
1813                                          int x0_qn,
1814                                          int x_step_qn,
1815                                          int bd);
1816 RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t* src,
1817                                                  int src_stride,
1818                                                  uint16_t* dst,
1819                                                  int dst_stride,
1820                                                  int w,
1821                                                  int h,
1822                                                  const int16_t* x_filters,
1823                                                  int x0_qn,
1824                                                  int x_step_qn,
1825                                                  int bd);
1826 
1827 void av1_highbd_convolve_x_sr_c(const uint16_t* src,
1828                                 int src_stride,
1829                                 uint16_t* dst,
1830                                 int dst_stride,
1831                                 int w,
1832                                 int h,
1833                                 const InterpFilterParams* filter_params_x,
1834                                 const InterpFilterParams* filter_params_y,
1835                                 const int subpel_x_qn,
1836                                 const int subpel_y_qn,
1837                                 ConvolveParams* conv_params,
1838                                 int bd);
1839 void av1_highbd_convolve_x_sr_ssse3(const uint16_t* src,
1840                                     int src_stride,
1841                                     uint16_t* dst,
1842                                     int dst_stride,
1843                                     int w,
1844                                     int h,
1845                                     const InterpFilterParams* filter_params_x,
1846                                     const InterpFilterParams* filter_params_y,
1847                                     const int subpel_x_qn,
1848                                     const int subpel_y_qn,
1849                                     ConvolveParams* conv_params,
1850                                     int bd);
1851 void av1_highbd_convolve_x_sr_avx2(const uint16_t* src,
1852                                    int src_stride,
1853                                    uint16_t* dst,
1854                                    int dst_stride,
1855                                    int w,
1856                                    int h,
1857                                    const InterpFilterParams* filter_params_x,
1858                                    const InterpFilterParams* filter_params_y,
1859                                    const int subpel_x_qn,
1860                                    const int subpel_y_qn,
1861                                    ConvolveParams* conv_params,
1862                                    int bd);
1863 RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(
1864     const uint16_t* src,
1865     int src_stride,
1866     uint16_t* dst,
1867     int dst_stride,
1868     int w,
1869     int h,
1870     const InterpFilterParams* filter_params_x,
1871     const InterpFilterParams* filter_params_y,
1872     const int subpel_x_qn,
1873     const int subpel_y_qn,
1874     ConvolveParams* conv_params,
1875     int bd);
1876 
1877 void av1_highbd_convolve_y_sr_c(const uint16_t* src,
1878                                 int src_stride,
1879                                 uint16_t* dst,
1880                                 int dst_stride,
1881                                 int w,
1882                                 int h,
1883                                 const InterpFilterParams* filter_params_x,
1884                                 const InterpFilterParams* filter_params_y,
1885                                 const int subpel_x_qn,
1886                                 const int subpel_y_qn,
1887                                 ConvolveParams* conv_params,
1888                                 int bd);
1889 void av1_highbd_convolve_y_sr_ssse3(const uint16_t* src,
1890                                     int src_stride,
1891                                     uint16_t* dst,
1892                                     int dst_stride,
1893                                     int w,
1894                                     int h,
1895                                     const InterpFilterParams* filter_params_x,
1896                                     const InterpFilterParams* filter_params_y,
1897                                     const int subpel_x_qn,
1898                                     const int subpel_y_qn,
1899                                     ConvolveParams* conv_params,
1900                                     int bd);
1901 void av1_highbd_convolve_y_sr_avx2(const uint16_t* src,
1902                                    int src_stride,
1903                                    uint16_t* dst,
1904                                    int dst_stride,
1905                                    int w,
1906                                    int h,
1907                                    const InterpFilterParams* filter_params_x,
1908                                    const InterpFilterParams* filter_params_y,
1909                                    const int subpel_x_qn,
1910                                    const int subpel_y_qn,
1911                                    ConvolveParams* conv_params,
1912                                    int bd);
1913 RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(
1914     const uint16_t* src,
1915     int src_stride,
1916     uint16_t* dst,
1917     int dst_stride,
1918     int w,
1919     int h,
1920     const InterpFilterParams* filter_params_x,
1921     const InterpFilterParams* filter_params_y,
1922     const int subpel_x_qn,
1923     const int subpel_y_qn,
1924     ConvolveParams* conv_params,
1925     int bd);
1926 
1927 void av1_highbd_dist_wtd_convolve_2d_c(
1928     const uint16_t* src,
1929     int src_stride,
1930     uint16_t* dst,
1931     int dst_stride,
1932     int w,
1933     int h,
1934     const InterpFilterParams* filter_params_x,
1935     const InterpFilterParams* filter_params_y,
1936     const int subpel_x_qn,
1937     const int subpel_y_qn,
1938     ConvolveParams* conv_params,
1939     int bd);
1940 void av1_highbd_dist_wtd_convolve_2d_sse4_1(
1941     const uint16_t* src,
1942     int src_stride,
1943     uint16_t* dst,
1944     int dst_stride,
1945     int w,
1946     int h,
1947     const InterpFilterParams* filter_params_x,
1948     const InterpFilterParams* filter_params_y,
1949     const int subpel_x_qn,
1950     const int subpel_y_qn,
1951     ConvolveParams* conv_params,
1952     int bd);
1953 void av1_highbd_dist_wtd_convolve_2d_avx2(
1954     const uint16_t* src,
1955     int src_stride,
1956     uint16_t* dst,
1957     int dst_stride,
1958     int w,
1959     int h,
1960     const InterpFilterParams* filter_params_x,
1961     const InterpFilterParams* filter_params_y,
1962     const int subpel_x_qn,
1963     const int subpel_y_qn,
1964     ConvolveParams* conv_params,
1965     int bd);
1966 RTCD_EXTERN void (*av1_highbd_dist_wtd_convolve_2d)(
1967     const uint16_t* src,
1968     int src_stride,
1969     uint16_t* dst,
1970     int dst_stride,
1971     int w,
1972     int h,
1973     const InterpFilterParams* filter_params_x,
1974     const InterpFilterParams* filter_params_y,
1975     const int subpel_x_qn,
1976     const int subpel_y_qn,
1977     ConvolveParams* conv_params,
1978     int bd);
1979 
1980 void av1_highbd_dist_wtd_convolve_2d_copy_c(
1981     const uint16_t* src,
1982     int src_stride,
1983     uint16_t* dst,
1984     int dst_stride,
1985     int w,
1986     int h,
1987     const InterpFilterParams* filter_params_x,
1988     const InterpFilterParams* filter_params_y,
1989     const int subpel_x_qn,
1990     const int subpel_y_qn,
1991     ConvolveParams* conv_params,
1992     int bd);
1993 void av1_highbd_dist_wtd_convolve_2d_copy_sse4_1(
1994     const uint16_t* src,
1995     int src_stride,
1996     uint16_t* dst,
1997     int dst_stride,
1998     int w,
1999     int h,
2000     const InterpFilterParams* filter_params_x,
2001     const InterpFilterParams* filter_params_y,
2002     const int subpel_x_qn,
2003     const int subpel_y_qn,
2004     ConvolveParams* conv_params,
2005     int bd);
2006 void av1_highbd_dist_wtd_convolve_2d_copy_avx2(
2007     const uint16_t* src,
2008     int src_stride,
2009     uint16_t* dst,
2010     int dst_stride,
2011     int w,
2012     int h,
2013     const InterpFilterParams* filter_params_x,
2014     const InterpFilterParams* filter_params_y,
2015     const int subpel_x_qn,
2016     const int subpel_y_qn,
2017     ConvolveParams* conv_params,
2018     int bd);
2019 RTCD_EXTERN void (*av1_highbd_dist_wtd_convolve_2d_copy)(
2020     const uint16_t* src,
2021     int src_stride,
2022     uint16_t* dst,
2023     int dst_stride,
2024     int w,
2025     int h,
2026     const InterpFilterParams* filter_params_x,
2027     const InterpFilterParams* filter_params_y,
2028     const int subpel_x_qn,
2029     const int subpel_y_qn,
2030     ConvolveParams* conv_params,
2031     int bd);
2032 
2033 void av1_highbd_dist_wtd_convolve_x_c(const uint16_t* src,
2034                                       int src_stride,
2035                                       uint16_t* dst,
2036                                       int dst_stride,
2037                                       int w,
2038                                       int h,
2039                                       const InterpFilterParams* filter_params_x,
2040                                       const InterpFilterParams* filter_params_y,
2041                                       const int subpel_x_qn,
2042                                       const int subpel_y_qn,
2043                                       ConvolveParams* conv_params,
2044                                       int bd);
2045 void av1_highbd_dist_wtd_convolve_x_sse4_1(
2046     const uint16_t* src,
2047     int src_stride,
2048     uint16_t* dst,
2049     int dst_stride,
2050     int w,
2051     int h,
2052     const InterpFilterParams* filter_params_x,
2053     const InterpFilterParams* filter_params_y,
2054     const int subpel_x_qn,
2055     const int subpel_y_qn,
2056     ConvolveParams* conv_params,
2057     int bd);
2058 void av1_highbd_dist_wtd_convolve_x_avx2(
2059     const uint16_t* src,
2060     int src_stride,
2061     uint16_t* dst,
2062     int dst_stride,
2063     int w,
2064     int h,
2065     const InterpFilterParams* filter_params_x,
2066     const InterpFilterParams* filter_params_y,
2067     const int subpel_x_qn,
2068     const int subpel_y_qn,
2069     ConvolveParams* conv_params,
2070     int bd);
2071 RTCD_EXTERN void (*av1_highbd_dist_wtd_convolve_x)(
2072     const uint16_t* src,
2073     int src_stride,
2074     uint16_t* dst,
2075     int dst_stride,
2076     int w,
2077     int h,
2078     const InterpFilterParams* filter_params_x,
2079     const InterpFilterParams* filter_params_y,
2080     const int subpel_x_qn,
2081     const int subpel_y_qn,
2082     ConvolveParams* conv_params,
2083     int bd);
2084 
2085 void av1_highbd_dist_wtd_convolve_y_c(const uint16_t* src,
2086                                       int src_stride,
2087                                       uint16_t* dst,
2088                                       int dst_stride,
2089                                       int w,
2090                                       int h,
2091                                       const InterpFilterParams* filter_params_x,
2092                                       const InterpFilterParams* filter_params_y,
2093                                       const int subpel_x_qn,
2094                                       const int subpel_y_qn,
2095                                       ConvolveParams* conv_params,
2096                                       int bd);
2097 void av1_highbd_dist_wtd_convolve_y_sse4_1(
2098     const uint16_t* src,
2099     int src_stride,
2100     uint16_t* dst,
2101     int dst_stride,
2102     int w,
2103     int h,
2104     const InterpFilterParams* filter_params_x,
2105     const InterpFilterParams* filter_params_y,
2106     const int subpel_x_qn,
2107     const int subpel_y_qn,
2108     ConvolveParams* conv_params,
2109     int bd);
2110 void av1_highbd_dist_wtd_convolve_y_avx2(
2111     const uint16_t* src,
2112     int src_stride,
2113     uint16_t* dst,
2114     int dst_stride,
2115     int w,
2116     int h,
2117     const InterpFilterParams* filter_params_x,
2118     const InterpFilterParams* filter_params_y,
2119     const int subpel_x_qn,
2120     const int subpel_y_qn,
2121     ConvolveParams* conv_params,
2122     int bd);
2123 RTCD_EXTERN void (*av1_highbd_dist_wtd_convolve_y)(
2124     const uint16_t* src,
2125     int src_stride,
2126     uint16_t* dst,
2127     int dst_stride,
2128     int w,
2129     int h,
2130     const InterpFilterParams* filter_params_x,
2131     const InterpFilterParams* filter_params_y,
2132     const int subpel_x_qn,
2133     const int subpel_y_qn,
2134     ConvolveParams* conv_params,
2135     int bd);
2136 
2137 void av1_highbd_dr_prediction_z1_c(uint16_t* dst,
2138                                    ptrdiff_t stride,
2139                                    int bw,
2140                                    int bh,
2141                                    const uint16_t* above,
2142                                    const uint16_t* left,
2143                                    int upsample_above,
2144                                    int dx,
2145                                    int dy,
2146                                    int bd);
2147 void av1_highbd_dr_prediction_z1_avx2(uint16_t* dst,
2148                                       ptrdiff_t stride,
2149                                       int bw,
2150                                       int bh,
2151                                       const uint16_t* above,
2152                                       const uint16_t* left,
2153                                       int upsample_above,
2154                                       int dx,
2155                                       int dy,
2156                                       int bd);
2157 RTCD_EXTERN void (*av1_highbd_dr_prediction_z1)(uint16_t* dst,
2158                                                 ptrdiff_t stride,
2159                                                 int bw,
2160                                                 int bh,
2161                                                 const uint16_t* above,
2162                                                 const uint16_t* left,
2163                                                 int upsample_above,
2164                                                 int dx,
2165                                                 int dy,
2166                                                 int bd);
2167 
2168 void av1_highbd_dr_prediction_z2_c(uint16_t* dst,
2169                                    ptrdiff_t stride,
2170                                    int bw,
2171                                    int bh,
2172                                    const uint16_t* above,
2173                                    const uint16_t* left,
2174                                    int upsample_above,
2175                                    int upsample_left,
2176                                    int dx,
2177                                    int dy,
2178                                    int bd);
2179 void av1_highbd_dr_prediction_z2_avx2(uint16_t* dst,
2180                                       ptrdiff_t stride,
2181                                       int bw,
2182                                       int bh,
2183                                       const uint16_t* above,
2184                                       const uint16_t* left,
2185                                       int upsample_above,
2186                                       int upsample_left,
2187                                       int dx,
2188                                       int dy,
2189                                       int bd);
2190 RTCD_EXTERN void (*av1_highbd_dr_prediction_z2)(uint16_t* dst,
2191                                                 ptrdiff_t stride,
2192                                                 int bw,
2193                                                 int bh,
2194                                                 const uint16_t* above,
2195                                                 const uint16_t* left,
2196                                                 int upsample_above,
2197                                                 int upsample_left,
2198                                                 int dx,
2199                                                 int dy,
2200                                                 int bd);
2201 
2202 void av1_highbd_dr_prediction_z3_c(uint16_t* dst,
2203                                    ptrdiff_t stride,
2204                                    int bw,
2205                                    int bh,
2206                                    const uint16_t* above,
2207                                    const uint16_t* left,
2208                                    int upsample_left,
2209                                    int dx,
2210                                    int dy,
2211                                    int bd);
2212 void av1_highbd_dr_prediction_z3_avx2(uint16_t* dst,
2213                                       ptrdiff_t stride,
2214                                       int bw,
2215                                       int bh,
2216                                       const uint16_t* above,
2217                                       const uint16_t* left,
2218                                       int upsample_left,
2219                                       int dx,
2220                                       int dy,
2221                                       int bd);
2222 RTCD_EXTERN void (*av1_highbd_dr_prediction_z3)(uint16_t* dst,
2223                                                 ptrdiff_t stride,
2224                                                 int bw,
2225                                                 int bh,
2226                                                 const uint16_t* above,
2227                                                 const uint16_t* left,
2228                                                 int upsample_left,
2229                                                 int dx,
2230                                                 int dy,
2231                                                 int bd);
2232 
2233 void av1_highbd_fwht4x4_c(const int16_t* input, tran_low_t* output, int stride);
2234 #define av1_highbd_fwht4x4 av1_highbd_fwht4x4_c
2235 
2236 void av1_highbd_inv_txfm_add_c(const tran_low_t* input,
2237                                uint8_t* dest,
2238                                int stride,
2239                                const TxfmParam* txfm_param);
2240 void av1_highbd_inv_txfm_add_sse4_1(const tran_low_t* input,
2241                                     uint8_t* dest,
2242                                     int stride,
2243                                     const TxfmParam* txfm_param);
2244 void av1_highbd_inv_txfm_add_avx2(const tran_low_t* input,
2245                                   uint8_t* dest,
2246                                   int stride,
2247                                   const TxfmParam* txfm_param);
2248 RTCD_EXTERN void (*av1_highbd_inv_txfm_add)(const tran_low_t* input,
2249                                             uint8_t* dest,
2250                                             int stride,
2251                                             const TxfmParam* txfm_param);
2252 
2253 void av1_highbd_inv_txfm_add_16x4_c(const tran_low_t* input,
2254                                     uint8_t* dest,
2255                                     int stride,
2256                                     const TxfmParam* txfm_param);
2257 void av1_highbd_inv_txfm_add_16x4_sse4_1(const tran_low_t* input,
2258                                          uint8_t* dest,
2259                                          int stride,
2260                                          const TxfmParam* txfm_param);
2261 RTCD_EXTERN void (*av1_highbd_inv_txfm_add_16x4)(const tran_low_t* input,
2262                                                  uint8_t* dest,
2263                                                  int stride,
2264                                                  const TxfmParam* txfm_param);
2265 
2266 void av1_highbd_inv_txfm_add_4x16_c(const tran_low_t* input,
2267                                     uint8_t* dest,
2268                                     int stride,
2269                                     const TxfmParam* txfm_param);
2270 void av1_highbd_inv_txfm_add_4x16_sse4_1(const tran_low_t* input,
2271                                          uint8_t* dest,
2272                                          int stride,
2273                                          const TxfmParam* txfm_param);
2274 RTCD_EXTERN void (*av1_highbd_inv_txfm_add_4x16)(const tran_low_t* input,
2275                                                  uint8_t* dest,
2276                                                  int stride,
2277                                                  const TxfmParam* txfm_param);
2278 
2279 void av1_highbd_inv_txfm_add_4x4_c(const tran_low_t* input,
2280                                    uint8_t* dest,
2281                                    int stride,
2282                                    const TxfmParam* txfm_param);
2283 void av1_highbd_inv_txfm_add_4x4_sse4_1(const tran_low_t* input,
2284                                         uint8_t* dest,
2285                                         int stride,
2286                                         const TxfmParam* txfm_param);
2287 RTCD_EXTERN void (*av1_highbd_inv_txfm_add_4x4)(const tran_low_t* input,
2288                                                 uint8_t* dest,
2289                                                 int stride,
2290                                                 const TxfmParam* txfm_param);
2291 
2292 void av1_highbd_inv_txfm_add_4x8_c(const tran_low_t* input,
2293                                    uint8_t* dest,
2294                                    int stride,
2295                                    const TxfmParam* txfm_param);
2296 void av1_highbd_inv_txfm_add_4x8_sse4_1(const tran_low_t* input,
2297                                         uint8_t* dest,
2298                                         int stride,
2299                                         const TxfmParam* txfm_param);
2300 RTCD_EXTERN void (*av1_highbd_inv_txfm_add_4x8)(const tran_low_t* input,
2301                                                 uint8_t* dest,
2302                                                 int stride,
2303                                                 const TxfmParam* txfm_param);
2304 
2305 void av1_highbd_inv_txfm_add_8x4_c(const tran_low_t* input,
2306                                    uint8_t* dest,
2307                                    int stride,
2308                                    const TxfmParam* txfm_param);
2309 void av1_highbd_inv_txfm_add_8x4_sse4_1(const tran_low_t* input,
2310                                         uint8_t* dest,
2311                                         int stride,
2312                                         const TxfmParam* txfm_param);
2313 RTCD_EXTERN void (*av1_highbd_inv_txfm_add_8x4)(const tran_low_t* input,
2314                                                 uint8_t* dest,
2315                                                 int stride,
2316                                                 const TxfmParam* txfm_param);
2317 
2318 void av1_highbd_inv_txfm_add_8x8_c(const tran_low_t* input,
2319                                    uint8_t* dest,
2320                                    int stride,
2321                                    const TxfmParam* txfm_param);
2322 void av1_highbd_inv_txfm_add_8x8_sse4_1(const tran_low_t* input,
2323                                         uint8_t* dest,
2324                                         int stride,
2325                                         const TxfmParam* txfm_param);
2326 RTCD_EXTERN void (*av1_highbd_inv_txfm_add_8x8)(const tran_low_t* input,
2327                                                 uint8_t* dest,
2328                                                 int stride,
2329                                                 const TxfmParam* txfm_param);
2330 
2331 void av1_highbd_iwht4x4_16_add_c(const tran_low_t* input,
2332                                  uint8_t* dest,
2333                                  int dest_stride,
2334                                  int bd);
2335 #define av1_highbd_iwht4x4_16_add av1_highbd_iwht4x4_16_add_c
2336 
2337 void av1_highbd_iwht4x4_1_add_c(const tran_low_t* input,
2338                                 uint8_t* dest,
2339                                 int dest_stride,
2340                                 int bd);
2341 #define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
2342 
2343 int64_t av1_highbd_pixel_proj_error_c(const uint8_t* src8,
2344                                       int width,
2345                                       int height,
2346                                       int src_stride,
2347                                       const uint8_t* dat8,
2348                                       int dat_stride,
2349                                       int32_t* flt0,
2350                                       int flt0_stride,
2351                                       int32_t* flt1,
2352                                       int flt1_stride,
2353                                       int xq[2],
2354                                       const sgr_params_type* params);
2355 int64_t av1_highbd_pixel_proj_error_sse4_1(const uint8_t* src8,
2356                                            int width,
2357                                            int height,
2358                                            int src_stride,
2359                                            const uint8_t* dat8,
2360                                            int dat_stride,
2361                                            int32_t* flt0,
2362                                            int flt0_stride,
2363                                            int32_t* flt1,
2364                                            int flt1_stride,
2365                                            int xq[2],
2366                                            const sgr_params_type* params);
2367 int64_t av1_highbd_pixel_proj_error_avx2(const uint8_t* src8,
2368                                          int width,
2369                                          int height,
2370                                          int src_stride,
2371                                          const uint8_t* dat8,
2372                                          int dat_stride,
2373                                          int32_t* flt0,
2374                                          int flt0_stride,
2375                                          int32_t* flt1,
2376                                          int flt1_stride,
2377                                          int xq[2],
2378                                          const sgr_params_type* params);
2379 RTCD_EXTERN int64_t (*av1_highbd_pixel_proj_error)(
2380     const uint8_t* src8,
2381     int width,
2382     int height,
2383     int src_stride,
2384     const uint8_t* dat8,
2385     int dat_stride,
2386     int32_t* flt0,
2387     int flt0_stride,
2388     int32_t* flt1,
2389     int flt1_stride,
2390     int xq[2],
2391     const sgr_params_type* params);
2392 
2393 void av1_highbd_quantize_fp_c(const tran_low_t* coeff_ptr,
2394                               intptr_t n_coeffs,
2395                               const int16_t* zbin_ptr,
2396                               const int16_t* round_ptr,
2397                               const int16_t* quant_ptr,
2398                               const int16_t* quant_shift_ptr,
2399                               tran_low_t* qcoeff_ptr,
2400                               tran_low_t* dqcoeff_ptr,
2401                               const int16_t* dequant_ptr,
2402                               uint16_t* eob_ptr,
2403                               const int16_t* scan,
2404                               const int16_t* iscan,
2405                               int log_scale);
2406 void av1_highbd_quantize_fp_sse4_1(const tran_low_t* coeff_ptr,
2407                                    intptr_t n_coeffs,
2408                                    const int16_t* zbin_ptr,
2409                                    const int16_t* round_ptr,
2410                                    const int16_t* quant_ptr,
2411                                    const int16_t* quant_shift_ptr,
2412                                    tran_low_t* qcoeff_ptr,
2413                                    tran_low_t* dqcoeff_ptr,
2414                                    const int16_t* dequant_ptr,
2415                                    uint16_t* eob_ptr,
2416                                    const int16_t* scan,
2417                                    const int16_t* iscan,
2418                                    int log_scale);
2419 void av1_highbd_quantize_fp_avx2(const tran_low_t* coeff_ptr,
2420                                  intptr_t n_coeffs,
2421                                  const int16_t* zbin_ptr,
2422                                  const int16_t* round_ptr,
2423                                  const int16_t* quant_ptr,
2424                                  const int16_t* quant_shift_ptr,
2425                                  tran_low_t* qcoeff_ptr,
2426                                  tran_low_t* dqcoeff_ptr,
2427                                  const int16_t* dequant_ptr,
2428                                  uint16_t* eob_ptr,
2429                                  const int16_t* scan,
2430                                  const int16_t* iscan,
2431                                  int log_scale);
2432 RTCD_EXTERN void (*av1_highbd_quantize_fp)(const tran_low_t* coeff_ptr,
2433                                            intptr_t n_coeffs,
2434                                            const int16_t* zbin_ptr,
2435                                            const int16_t* round_ptr,
2436                                            const int16_t* quant_ptr,
2437                                            const int16_t* quant_shift_ptr,
2438                                            tran_low_t* qcoeff_ptr,
2439                                            tran_low_t* dqcoeff_ptr,
2440                                            const int16_t* dequant_ptr,
2441                                            uint16_t* eob_ptr,
2442                                            const int16_t* scan,
2443                                            const int16_t* iscan,
2444                                            int log_scale);
2445 
2446 void av1_highbd_warp_affine_c(const int32_t* mat,
2447                               const uint16_t* ref,
2448                               int width,
2449                               int height,
2450                               int stride,
2451                               uint16_t* pred,
2452                               int p_col,
2453                               int p_row,
2454                               int p_width,
2455                               int p_height,
2456                               int p_stride,
2457                               int subsampling_x,
2458                               int subsampling_y,
2459                               int bd,
2460                               ConvolveParams* conv_params,
2461                               int16_t alpha,
2462                               int16_t beta,
2463                               int16_t gamma,
2464                               int16_t delta);
2465 void av1_highbd_warp_affine_sse4_1(const int32_t* mat,
2466                                    const uint16_t* ref,
2467                                    int width,
2468                                    int height,
2469                                    int stride,
2470                                    uint16_t* pred,
2471                                    int p_col,
2472                                    int p_row,
2473                                    int p_width,
2474                                    int p_height,
2475                                    int p_stride,
2476                                    int subsampling_x,
2477                                    int subsampling_y,
2478                                    int bd,
2479                                    ConvolveParams* conv_params,
2480                                    int16_t alpha,
2481                                    int16_t beta,
2482                                    int16_t gamma,
2483                                    int16_t delta);
2484 RTCD_EXTERN void (*av1_highbd_warp_affine)(const int32_t* mat,
2485                                            const uint16_t* ref,
2486                                            int width,
2487                                            int height,
2488                                            int stride,
2489                                            uint16_t* pred,
2490                                            int p_col,
2491                                            int p_row,
2492                                            int p_width,
2493                                            int p_height,
2494                                            int p_stride,
2495                                            int subsampling_x,
2496                                            int subsampling_y,
2497                                            int bd,
2498                                            ConvolveParams* conv_params,
2499                                            int16_t alpha,
2500                                            int16_t beta,
2501                                            int16_t gamma,
2502                                            int16_t delta);
2503 
2504 void av1_highbd_wiener_convolve_add_src_c(const uint8_t* src,
2505                                           ptrdiff_t src_stride,
2506                                           uint8_t* dst,
2507                                           ptrdiff_t dst_stride,
2508                                           const int16_t* filter_x,
2509                                           int x_step_q4,
2510                                           const int16_t* filter_y,
2511                                           int y_step_q4,
2512                                           int w,
2513                                           int h,
2514                                           const ConvolveParams* conv_params,
2515                                           int bd);
2516 void av1_highbd_wiener_convolve_add_src_ssse3(const uint8_t* src,
2517                                               ptrdiff_t src_stride,
2518                                               uint8_t* dst,
2519                                               ptrdiff_t dst_stride,
2520                                               const int16_t* filter_x,
2521                                               int x_step_q4,
2522                                               const int16_t* filter_y,
2523                                               int y_step_q4,
2524                                               int w,
2525                                               int h,
2526                                               const ConvolveParams* conv_params,
2527                                               int bd);
2528 void av1_highbd_wiener_convolve_add_src_avx2(const uint8_t* src,
2529                                              ptrdiff_t src_stride,
2530                                              uint8_t* dst,
2531                                              ptrdiff_t dst_stride,
2532                                              const int16_t* filter_x,
2533                                              int x_step_q4,
2534                                              const int16_t* filter_y,
2535                                              int y_step_q4,
2536                                              int w,
2537                                              int h,
2538                                              const ConvolveParams* conv_params,
2539                                              int bd);
2540 RTCD_EXTERN void (*av1_highbd_wiener_convolve_add_src)(
2541     const uint8_t* src,
2542     ptrdiff_t src_stride,
2543     uint8_t* dst,
2544     ptrdiff_t dst_stride,
2545     const int16_t* filter_x,
2546     int x_step_q4,
2547     const int16_t* filter_y,
2548     int y_step_q4,
2549     int w,
2550     int h,
2551     const ConvolveParams* conv_params,
2552     int bd);
2553 
2554 void av1_inv_txfm2d_add_16x16_c(const int32_t* input,
2555                                 uint16_t* output,
2556                                 int stride,
2557                                 TX_TYPE tx_type,
2558                                 int bd);
2559 #define av1_inv_txfm2d_add_16x16 av1_inv_txfm2d_add_16x16_c
2560 
2561 void av1_inv_txfm2d_add_16x32_c(const int32_t* input,
2562                                 uint16_t* output,
2563                                 int stride,
2564                                 TX_TYPE tx_type,
2565                                 int bd);
2566 #define av1_inv_txfm2d_add_16x32 av1_inv_txfm2d_add_16x32_c
2567 
2568 void av1_inv_txfm2d_add_16x4_c(const int32_t* input,
2569                                uint16_t* output,
2570                                int stride,
2571                                TX_TYPE tx_type,
2572                                int bd);
2573 #define av1_inv_txfm2d_add_16x4 av1_inv_txfm2d_add_16x4_c
2574 
2575 void av1_inv_txfm2d_add_16x64_c(const int32_t* input,
2576                                 uint16_t* output,
2577                                 int stride,
2578                                 TX_TYPE tx_type,
2579                                 int bd);
2580 #define av1_inv_txfm2d_add_16x64 av1_inv_txfm2d_add_16x64_c
2581 
2582 void av1_inv_txfm2d_add_16x8_c(const int32_t* input,
2583                                uint16_t* output,
2584                                int stride,
2585                                TX_TYPE tx_type,
2586                                int bd);
2587 #define av1_inv_txfm2d_add_16x8 av1_inv_txfm2d_add_16x8_c
2588 
2589 void av1_inv_txfm2d_add_32x16_c(const int32_t* input,
2590                                 uint16_t* output,
2591                                 int stride,
2592                                 TX_TYPE tx_type,
2593                                 int bd);
2594 #define av1_inv_txfm2d_add_32x16 av1_inv_txfm2d_add_32x16_c
2595 
2596 void av1_inv_txfm2d_add_32x32_c(const int32_t* input,
2597                                 uint16_t* output,
2598                                 int stride,
2599                                 TX_TYPE tx_type,
2600                                 int bd);
2601 #define av1_inv_txfm2d_add_32x32 av1_inv_txfm2d_add_32x32_c
2602 
2603 void av1_inv_txfm2d_add_32x64_c(const int32_t* input,
2604                                 uint16_t* output,
2605                                 int stride,
2606                                 TX_TYPE tx_type,
2607                                 int bd);
2608 #define av1_inv_txfm2d_add_32x64 av1_inv_txfm2d_add_32x64_c
2609 
2610 void av1_inv_txfm2d_add_32x8_c(const int32_t* input,
2611                                uint16_t* output,
2612                                int stride,
2613                                TX_TYPE tx_type,
2614                                int bd);
2615 #define av1_inv_txfm2d_add_32x8 av1_inv_txfm2d_add_32x8_c
2616 
2617 void av1_inv_txfm2d_add_4x16_c(const int32_t* input,
2618                                uint16_t* output,
2619                                int stride,
2620                                TX_TYPE tx_type,
2621                                int bd);
2622 #define av1_inv_txfm2d_add_4x16 av1_inv_txfm2d_add_4x16_c
2623 
2624 void av1_inv_txfm2d_add_4x4_c(const int32_t* input,
2625                               uint16_t* output,
2626                               int stride,
2627                               TX_TYPE tx_type,
2628                               int bd);
2629 void av1_inv_txfm2d_add_4x4_sse4_1(const int32_t* input,
2630                                    uint16_t* output,
2631                                    int stride,
2632                                    TX_TYPE tx_type,
2633                                    int bd);
2634 RTCD_EXTERN void (*av1_inv_txfm2d_add_4x4)(const int32_t* input,
2635                                            uint16_t* output,
2636                                            int stride,
2637                                            TX_TYPE tx_type,
2638                                            int bd);
2639 
2640 void av1_inv_txfm2d_add_4x8_c(const int32_t* input,
2641                               uint16_t* output,
2642                               int stride,
2643                               TX_TYPE tx_type,
2644                               int bd);
2645 #define av1_inv_txfm2d_add_4x8 av1_inv_txfm2d_add_4x8_c
2646 
2647 void av1_inv_txfm2d_add_64x16_c(const int32_t* input,
2648                                 uint16_t* output,
2649                                 int stride,
2650                                 TX_TYPE tx_type,
2651                                 int bd);
2652 #define av1_inv_txfm2d_add_64x16 av1_inv_txfm2d_add_64x16_c
2653 
2654 void av1_inv_txfm2d_add_64x32_c(const int32_t* input,
2655                                 uint16_t* output,
2656                                 int stride,
2657                                 TX_TYPE tx_type,
2658                                 int bd);
2659 #define av1_inv_txfm2d_add_64x32 av1_inv_txfm2d_add_64x32_c
2660 
2661 void av1_inv_txfm2d_add_64x64_c(const int32_t* input,
2662                                 uint16_t* output,
2663                                 int stride,
2664                                 TX_TYPE tx_type,
2665                                 int bd);
2666 #define av1_inv_txfm2d_add_64x64 av1_inv_txfm2d_add_64x64_c
2667 
2668 void av1_inv_txfm2d_add_8x16_c(const int32_t* input,
2669                                uint16_t* output,
2670                                int stride,
2671                                TX_TYPE tx_type,
2672                                int bd);
2673 #define av1_inv_txfm2d_add_8x16 av1_inv_txfm2d_add_8x16_c
2674 
2675 void av1_inv_txfm2d_add_8x32_c(const int32_t* input,
2676                                uint16_t* output,
2677                                int stride,
2678                                TX_TYPE tx_type,
2679                                int bd);
2680 #define av1_inv_txfm2d_add_8x32 av1_inv_txfm2d_add_8x32_c
2681 
2682 void av1_inv_txfm2d_add_8x4_c(const int32_t* input,
2683                               uint16_t* output,
2684                               int stride,
2685                               TX_TYPE tx_type,
2686                               int bd);
2687 #define av1_inv_txfm2d_add_8x4 av1_inv_txfm2d_add_8x4_c
2688 
2689 void av1_inv_txfm2d_add_8x8_c(const int32_t* input,
2690                               uint16_t* output,
2691                               int stride,
2692                               TX_TYPE tx_type,
2693                               int bd);
2694 void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t* input,
2695                                    uint16_t* output,
2696                                    int stride,
2697                                    TX_TYPE tx_type,
2698                                    int bd);
2699 RTCD_EXTERN void (*av1_inv_txfm2d_add_8x8)(const int32_t* input,
2700                                            uint16_t* output,
2701                                            int stride,
2702                                            TX_TYPE tx_type,
2703                                            int bd);
2704 
2705 void av1_inv_txfm_add_c(const tran_low_t* dqcoeff,
2706                         uint8_t* dst,
2707                         int stride,
2708                         const TxfmParam* txfm_param);
2709 void av1_inv_txfm_add_ssse3(const tran_low_t* dqcoeff,
2710                             uint8_t* dst,
2711                             int stride,
2712                             const TxfmParam* txfm_param);
2713 void av1_inv_txfm_add_avx2(const tran_low_t* dqcoeff,
2714                            uint8_t* dst,
2715                            int stride,
2716                            const TxfmParam* txfm_param);
2717 RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t* dqcoeff,
2718                                      uint8_t* dst,
2719                                      int stride,
2720                                      const TxfmParam* txfm_param);
2721 
2722 void av1_lowbd_fwd_txfm_c(const int16_t* src_diff,
2723                           tran_low_t* coeff,
2724                           int diff_stride,
2725                           TxfmParam* txfm_param);
2726 void av1_lowbd_fwd_txfm_sse2(const int16_t* src_diff,
2727                              tran_low_t* coeff,
2728                              int diff_stride,
2729                              TxfmParam* txfm_param);
2730 void av1_lowbd_fwd_txfm_sse4_1(const int16_t* src_diff,
2731                                tran_low_t* coeff,
2732                                int diff_stride,
2733                                TxfmParam* txfm_param);
2734 void av1_lowbd_fwd_txfm_avx2(const int16_t* src_diff,
2735                              tran_low_t* coeff,
2736                              int diff_stride,
2737                              TxfmParam* txfm_param);
2738 RTCD_EXTERN void (*av1_lowbd_fwd_txfm)(const int16_t* src_diff,
2739                                        tran_low_t* coeff,
2740                                        int diff_stride,
2741                                        TxfmParam* txfm_param);
2742 
2743 int64_t av1_lowbd_pixel_proj_error_c(const uint8_t* src8,
2744                                      int width,
2745                                      int height,
2746                                      int src_stride,
2747                                      const uint8_t* dat8,
2748                                      int dat_stride,
2749                                      int32_t* flt0,
2750                                      int flt0_stride,
2751                                      int32_t* flt1,
2752                                      int flt1_stride,
2753                                      int xq[2],
2754                                      const sgr_params_type* params);
2755 int64_t av1_lowbd_pixel_proj_error_sse4_1(const uint8_t* src8,
2756                                           int width,
2757                                           int height,
2758                                           int src_stride,
2759                                           const uint8_t* dat8,
2760                                           int dat_stride,
2761                                           int32_t* flt0,
2762                                           int flt0_stride,
2763                                           int32_t* flt1,
2764                                           int flt1_stride,
2765                                           int xq[2],
2766                                           const sgr_params_type* params);
2767 int64_t av1_lowbd_pixel_proj_error_avx2(const uint8_t* src8,
2768                                         int width,
2769                                         int height,
2770                                         int src_stride,
2771                                         const uint8_t* dat8,
2772                                         int dat_stride,
2773                                         int32_t* flt0,
2774                                         int flt0_stride,
2775                                         int32_t* flt1,
2776                                         int flt1_stride,
2777                                         int xq[2],
2778                                         const sgr_params_type* params);
2779 RTCD_EXTERN int64_t (*av1_lowbd_pixel_proj_error)(
2780     const uint8_t* src8,
2781     int width,
2782     int height,
2783     int src_stride,
2784     const uint8_t* dat8,
2785     int dat_stride,
2786     int32_t* flt0,
2787     int flt0_stride,
2788     int32_t* flt1,
2789     int flt1_stride,
2790     int xq[2],
2791     const sgr_params_type* params);
2792 
2793 void av1_nn_predict_c(const float* input_nodes,
2794                       const NN_CONFIG* const nn_config,
2795                       int reduce_prec,
2796                       float* const output);
2797 void av1_nn_predict_sse3(const float* input_nodes,
2798                          const NN_CONFIG* const nn_config,
2799                          int reduce_prec,
2800                          float* const output);
2801 RTCD_EXTERN void (*av1_nn_predict)(const float* input_nodes,
2802                                    const NN_CONFIG* const nn_config,
2803                                    int reduce_prec,
2804                                    float* const output);
2805 
2806 void av1_quantize_b_c(const tran_low_t* coeff_ptr,
2807                       intptr_t n_coeffs,
2808                       const int16_t* zbin_ptr,
2809                       const int16_t* round_ptr,
2810                       const int16_t* quant_ptr,
2811                       const int16_t* quant_shift_ptr,
2812                       tran_low_t* qcoeff_ptr,
2813                       tran_low_t* dqcoeff_ptr,
2814                       const int16_t* dequant_ptr,
2815                       uint16_t* eob_ptr,
2816                       const int16_t* scan,
2817                       const int16_t* iscan,
2818                       const qm_val_t* qm_ptr,
2819                       const qm_val_t* iqm_ptr,
2820                       int log_scale);
2821 #define av1_quantize_b av1_quantize_b_c
2822 
2823 void av1_quantize_fp_c(const tran_low_t* coeff_ptr,
2824                        intptr_t n_coeffs,
2825                        const int16_t* zbin_ptr,
2826                        const int16_t* round_ptr,
2827                        const int16_t* quant_ptr,
2828                        const int16_t* quant_shift_ptr,
2829                        tran_low_t* qcoeff_ptr,
2830                        tran_low_t* dqcoeff_ptr,
2831                        const int16_t* dequant_ptr,
2832                        uint16_t* eob_ptr,
2833                        const int16_t* scan,
2834                        const int16_t* iscan);
2835 void av1_quantize_fp_sse2(const tran_low_t* coeff_ptr,
2836                           intptr_t n_coeffs,
2837                           const int16_t* zbin_ptr,
2838                           const int16_t* round_ptr,
2839                           const int16_t* quant_ptr,
2840                           const int16_t* quant_shift_ptr,
2841                           tran_low_t* qcoeff_ptr,
2842                           tran_low_t* dqcoeff_ptr,
2843                           const int16_t* dequant_ptr,
2844                           uint16_t* eob_ptr,
2845                           const int16_t* scan,
2846                           const int16_t* iscan);
2847 void av1_quantize_fp_avx2(const tran_low_t* coeff_ptr,
2848                           intptr_t n_coeffs,
2849                           const int16_t* zbin_ptr,
2850                           const int16_t* round_ptr,
2851                           const int16_t* quant_ptr,
2852                           const int16_t* quant_shift_ptr,
2853                           tran_low_t* qcoeff_ptr,
2854                           tran_low_t* dqcoeff_ptr,
2855                           const int16_t* dequant_ptr,
2856                           uint16_t* eob_ptr,
2857                           const int16_t* scan,
2858                           const int16_t* iscan);
2859 RTCD_EXTERN void (*av1_quantize_fp)(const tran_low_t* coeff_ptr,
2860                                     intptr_t n_coeffs,
2861                                     const int16_t* zbin_ptr,
2862                                     const int16_t* round_ptr,
2863                                     const int16_t* quant_ptr,
2864                                     const int16_t* quant_shift_ptr,
2865                                     tran_low_t* qcoeff_ptr,
2866                                     tran_low_t* dqcoeff_ptr,
2867                                     const int16_t* dequant_ptr,
2868                                     uint16_t* eob_ptr,
2869                                     const int16_t* scan,
2870                                     const int16_t* iscan);
2871 
2872 void av1_quantize_fp_32x32_c(const tran_low_t* coeff_ptr,
2873                              intptr_t n_coeffs,
2874                              const int16_t* zbin_ptr,
2875                              const int16_t* round_ptr,
2876                              const int16_t* quant_ptr,
2877                              const int16_t* quant_shift_ptr,
2878                              tran_low_t* qcoeff_ptr,
2879                              tran_low_t* dqcoeff_ptr,
2880                              const int16_t* dequant_ptr,
2881                              uint16_t* eob_ptr,
2882                              const int16_t* scan,
2883                              const int16_t* iscan);
2884 void av1_quantize_fp_32x32_avx2(const tran_low_t* coeff_ptr,
2885                                 intptr_t n_coeffs,
2886                                 const int16_t* zbin_ptr,
2887                                 const int16_t* round_ptr,
2888                                 const int16_t* quant_ptr,
2889                                 const int16_t* quant_shift_ptr,
2890                                 tran_low_t* qcoeff_ptr,
2891                                 tran_low_t* dqcoeff_ptr,
2892                                 const int16_t* dequant_ptr,
2893                                 uint16_t* eob_ptr,
2894                                 const int16_t* scan,
2895                                 const int16_t* iscan);
2896 RTCD_EXTERN void (*av1_quantize_fp_32x32)(const tran_low_t* coeff_ptr,
2897                                           intptr_t n_coeffs,
2898                                           const int16_t* zbin_ptr,
2899                                           const int16_t* round_ptr,
2900                                           const int16_t* quant_ptr,
2901                                           const int16_t* quant_shift_ptr,
2902                                           tran_low_t* qcoeff_ptr,
2903                                           tran_low_t* dqcoeff_ptr,
2904                                           const int16_t* dequant_ptr,
2905                                           uint16_t* eob_ptr,
2906                                           const int16_t* scan,
2907                                           const int16_t* iscan);
2908 
2909 void av1_quantize_fp_64x64_c(const tran_low_t* coeff_ptr,
2910                              intptr_t n_coeffs,
2911                              const int16_t* zbin_ptr,
2912                              const int16_t* round_ptr,
2913                              const int16_t* quant_ptr,
2914                              const int16_t* quant_shift_ptr,
2915                              tran_low_t* qcoeff_ptr,
2916                              tran_low_t* dqcoeff_ptr,
2917                              const int16_t* dequant_ptr,
2918                              uint16_t* eob_ptr,
2919                              const int16_t* scan,
2920                              const int16_t* iscan);
2921 void av1_quantize_fp_64x64_avx2(const tran_low_t* coeff_ptr,
2922                                 intptr_t n_coeffs,
2923                                 const int16_t* zbin_ptr,
2924                                 const int16_t* round_ptr,
2925                                 const int16_t* quant_ptr,
2926                                 const int16_t* quant_shift_ptr,
2927                                 tran_low_t* qcoeff_ptr,
2928                                 tran_low_t* dqcoeff_ptr,
2929                                 const int16_t* dequant_ptr,
2930                                 uint16_t* eob_ptr,
2931                                 const int16_t* scan,
2932                                 const int16_t* iscan);
2933 RTCD_EXTERN void (*av1_quantize_fp_64x64)(const tran_low_t* coeff_ptr,
2934                                           intptr_t n_coeffs,
2935                                           const int16_t* zbin_ptr,
2936                                           const int16_t* round_ptr,
2937                                           const int16_t* quant_ptr,
2938                                           const int16_t* quant_shift_ptr,
2939                                           tran_low_t* qcoeff_ptr,
2940                                           tran_low_t* dqcoeff_ptr,
2941                                           const int16_t* dequant_ptr,
2942                                           uint16_t* eob_ptr,
2943                                           const int16_t* scan,
2944                                           const int16_t* iscan);
2945 
2946 void av1_quantize_lp_c(const int16_t* coeff_ptr,
2947                        intptr_t n_coeffs,
2948                        const int16_t* round_ptr,
2949                        const int16_t* quant_ptr,
2950                        int16_t* qcoeff_ptr,
2951                        int16_t* dqcoeff_ptr,
2952                        const int16_t* dequant_ptr,
2953                        uint16_t* eob_ptr,
2954                        const int16_t* scan);
2955 void av1_quantize_lp_avx2(const int16_t* coeff_ptr,
2956                           intptr_t n_coeffs,
2957                           const int16_t* round_ptr,
2958                           const int16_t* quant_ptr,
2959                           int16_t* qcoeff_ptr,
2960                           int16_t* dqcoeff_ptr,
2961                           const int16_t* dequant_ptr,
2962                           uint16_t* eob_ptr,
2963                           const int16_t* scan);
2964 RTCD_EXTERN void (*av1_quantize_lp)(const int16_t* coeff_ptr,
2965                                     intptr_t n_coeffs,
2966                                     const int16_t* round_ptr,
2967                                     const int16_t* quant_ptr,
2968                                     int16_t* qcoeff_ptr,
2969                                     int16_t* dqcoeff_ptr,
2970                                     const int16_t* dequant_ptr,
2971                                     uint16_t* eob_ptr,
2972                                     const int16_t* scan);
2973 
2974 void av1_round_shift_array_c(int32_t* arr, int size, int bit);
2975 void av1_round_shift_array_sse4_1(int32_t* arr, int size, int bit);
2976 RTCD_EXTERN void (*av1_round_shift_array)(int32_t* arr, int size, int bit);
2977 
2978 int av1_selfguided_restoration_c(const uint8_t* dgd8,
2979                                  int width,
2980                                  int height,
2981                                  int dgd_stride,
2982                                  int32_t* flt0,
2983                                  int32_t* flt1,
2984                                  int flt_stride,
2985                                  int sgr_params_idx,
2986                                  int bit_depth,
2987                                  int highbd);
2988 int av1_selfguided_restoration_sse4_1(const uint8_t* dgd8,
2989                                       int width,
2990                                       int height,
2991                                       int dgd_stride,
2992                                       int32_t* flt0,
2993                                       int32_t* flt1,
2994                                       int flt_stride,
2995                                       int sgr_params_idx,
2996                                       int bit_depth,
2997                                       int highbd);
2998 int av1_selfguided_restoration_avx2(const uint8_t* dgd8,
2999                                     int width,
3000                                     int height,
3001                                     int dgd_stride,
3002                                     int32_t* flt0,
3003                                     int32_t* flt1,
3004                                     int flt_stride,
3005                                     int sgr_params_idx,
3006                                     int bit_depth,
3007                                     int highbd);
3008 RTCD_EXTERN int (*av1_selfguided_restoration)(const uint8_t* dgd8,
3009                                               int width,
3010                                               int height,
3011                                               int dgd_stride,
3012                                               int32_t* flt0,
3013                                               int32_t* flt1,
3014                                               int flt_stride,
3015                                               int sgr_params_idx,
3016                                               int bit_depth,
3017                                               int highbd);
3018 
3019 void av1_txb_init_levels_c(const tran_low_t* const coeff,
3020                            const int width,
3021                            const int height,
3022                            uint8_t* const levels);
3023 void av1_txb_init_levels_sse4_1(const tran_low_t* const coeff,
3024                                 const int width,
3025                                 const int height,
3026                                 uint8_t* const levels);
3027 void av1_txb_init_levels_avx2(const tran_low_t* const coeff,
3028                               const int width,
3029                               const int height,
3030                               uint8_t* const levels);
3031 RTCD_EXTERN void (*av1_txb_init_levels)(const tran_low_t* const coeff,
3032                                         const int width,
3033                                         const int height,
3034                                         uint8_t* const levels);
3035 
3036 void av1_upsample_intra_edge_c(uint8_t* p, int sz);
3037 void av1_upsample_intra_edge_sse4_1(uint8_t* p, int sz);
3038 RTCD_EXTERN void (*av1_upsample_intra_edge)(uint8_t* p, int sz);
3039 
3040 void av1_upsample_intra_edge_high_c(uint16_t* p, int sz, int bd);
3041 void av1_upsample_intra_edge_high_sse4_1(uint16_t* p, int sz, int bd);
3042 RTCD_EXTERN void (*av1_upsample_intra_edge_high)(uint16_t* p, int sz, int bd);
3043 
3044 void av1_warp_affine_c(const int32_t* mat,
3045                        const uint8_t* ref,
3046                        int width,
3047                        int height,
3048                        int stride,
3049                        uint8_t* pred,
3050                        int p_col,
3051                        int p_row,
3052                        int p_width,
3053                        int p_height,
3054                        int p_stride,
3055                        int subsampling_x,
3056                        int subsampling_y,
3057                        ConvolveParams* conv_params,
3058                        int16_t alpha,
3059                        int16_t beta,
3060                        int16_t gamma,
3061                        int16_t delta);
3062 void av1_warp_affine_sse4_1(const int32_t* mat,
3063                             const uint8_t* ref,
3064                             int width,
3065                             int height,
3066                             int stride,
3067                             uint8_t* pred,
3068                             int p_col,
3069                             int p_row,
3070                             int p_width,
3071                             int p_height,
3072                             int p_stride,
3073                             int subsampling_x,
3074                             int subsampling_y,
3075                             ConvolveParams* conv_params,
3076                             int16_t alpha,
3077                             int16_t beta,
3078                             int16_t gamma,
3079                             int16_t delta);
3080 void av1_warp_affine_avx2(const int32_t* mat,
3081                           const uint8_t* ref,
3082                           int width,
3083                           int height,
3084                           int stride,
3085                           uint8_t* pred,
3086                           int p_col,
3087                           int p_row,
3088                           int p_width,
3089                           int p_height,
3090                           int p_stride,
3091                           int subsampling_x,
3092                           int subsampling_y,
3093                           ConvolveParams* conv_params,
3094                           int16_t alpha,
3095                           int16_t beta,
3096                           int16_t gamma,
3097                           int16_t delta);
3098 RTCD_EXTERN void (*av1_warp_affine)(const int32_t* mat,
3099                                     const uint8_t* ref,
3100                                     int width,
3101                                     int height,
3102                                     int stride,
3103                                     uint8_t* pred,
3104                                     int p_col,
3105                                     int p_row,
3106                                     int p_width,
3107                                     int p_height,
3108                                     int p_stride,
3109                                     int subsampling_x,
3110                                     int subsampling_y,
3111                                     ConvolveParams* conv_params,
3112                                     int16_t alpha,
3113                                     int16_t beta,
3114                                     int16_t gamma,
3115                                     int16_t delta);
3116 
3117 void av1_wedge_compute_delta_squares_c(int16_t* d,
3118                                        const int16_t* a,
3119                                        const int16_t* b,
3120                                        int N);
3121 void av1_wedge_compute_delta_squares_sse2(int16_t* d,
3122                                           const int16_t* a,
3123                                           const int16_t* b,
3124                                           int N);
3125 void av1_wedge_compute_delta_squares_avx2(int16_t* d,
3126                                           const int16_t* a,
3127                                           const int16_t* b,
3128                                           int N);
3129 RTCD_EXTERN void (*av1_wedge_compute_delta_squares)(int16_t* d,
3130                                                     const int16_t* a,
3131                                                     const int16_t* b,
3132                                                     int N);
3133 
3134 int8_t av1_wedge_sign_from_residuals_c(const int16_t* ds,
3135                                        const uint8_t* m,
3136                                        int N,
3137                                        int64_t limit);
3138 int8_t av1_wedge_sign_from_residuals_sse2(const int16_t* ds,
3139                                           const uint8_t* m,
3140                                           int N,
3141                                           int64_t limit);
3142 int8_t av1_wedge_sign_from_residuals_avx2(const int16_t* ds,
3143                                           const uint8_t* m,
3144                                           int N,
3145                                           int64_t limit);
3146 RTCD_EXTERN int8_t (*av1_wedge_sign_from_residuals)(const int16_t* ds,
3147                                                     const uint8_t* m,
3148                                                     int N,
3149                                                     int64_t limit);
3150 
3151 uint64_t av1_wedge_sse_from_residuals_c(const int16_t* r1,
3152                                         const int16_t* d,
3153                                         const uint8_t* m,
3154                                         int N);
3155 uint64_t av1_wedge_sse_from_residuals_sse2(const int16_t* r1,
3156                                            const int16_t* d,
3157                                            const uint8_t* m,
3158                                            int N);
3159 uint64_t av1_wedge_sse_from_residuals_avx2(const int16_t* r1,
3160                                            const int16_t* d,
3161                                            const uint8_t* m,
3162                                            int N);
3163 RTCD_EXTERN uint64_t (*av1_wedge_sse_from_residuals)(const int16_t* r1,
3164                                                      const int16_t* d,
3165                                                      const uint8_t* m,
3166                                                      int N);
3167 
3168 void av1_wiener_convolve_add_src_c(const uint8_t* src,
3169                                    ptrdiff_t src_stride,
3170                                    uint8_t* dst,
3171                                    ptrdiff_t dst_stride,
3172                                    const int16_t* filter_x,
3173                                    int x_step_q4,
3174                                    const int16_t* filter_y,
3175                                    int y_step_q4,
3176                                    int w,
3177                                    int h,
3178                                    const ConvolveParams* conv_params);
3179 void av1_wiener_convolve_add_src_sse2(const uint8_t* src,
3180                                       ptrdiff_t src_stride,
3181                                       uint8_t* dst,
3182                                       ptrdiff_t dst_stride,
3183                                       const int16_t* filter_x,
3184                                       int x_step_q4,
3185                                       const int16_t* filter_y,
3186                                       int y_step_q4,
3187                                       int w,
3188                                       int h,
3189                                       const ConvolveParams* conv_params);
3190 void av1_wiener_convolve_add_src_avx2(const uint8_t* src,
3191                                       ptrdiff_t src_stride,
3192                                       uint8_t* dst,
3193                                       ptrdiff_t dst_stride,
3194                                       const int16_t* filter_x,
3195                                       int x_step_q4,
3196                                       const int16_t* filter_y,
3197                                       int y_step_q4,
3198                                       int w,
3199                                       int h,
3200                                       const ConvolveParams* conv_params);
3201 RTCD_EXTERN void (*av1_wiener_convolve_add_src)(
3202     const uint8_t* src,
3203     ptrdiff_t src_stride,
3204     uint8_t* dst,
3205     ptrdiff_t dst_stride,
3206     const int16_t* filter_x,
3207     int x_step_q4,
3208     const int16_t* filter_y,
3209     int y_step_q4,
3210     int w,
3211     int h,
3212     const ConvolveParams* conv_params);
3213 
3214 void cdef_copy_rect8_16bit_to_16bit_c(uint16_t* dst,
3215                                       int dstride,
3216                                       const uint16_t* src,
3217                                       int sstride,
3218                                       int v,
3219                                       int h);
3220 void cdef_copy_rect8_16bit_to_16bit_sse2(uint16_t* dst,
3221                                          int dstride,
3222                                          const uint16_t* src,
3223                                          int sstride,
3224                                          int v,
3225                                          int h);
3226 void cdef_copy_rect8_16bit_to_16bit_ssse3(uint16_t* dst,
3227                                           int dstride,
3228                                           const uint16_t* src,
3229                                           int sstride,
3230                                           int v,
3231                                           int h);
3232 void cdef_copy_rect8_16bit_to_16bit_sse4_1(uint16_t* dst,
3233                                            int dstride,
3234                                            const uint16_t* src,
3235                                            int sstride,
3236                                            int v,
3237                                            int h);
3238 void cdef_copy_rect8_16bit_to_16bit_avx2(uint16_t* dst,
3239                                          int dstride,
3240                                          const uint16_t* src,
3241                                          int sstride,
3242                                          int v,
3243                                          int h);
3244 RTCD_EXTERN void (*cdef_copy_rect8_16bit_to_16bit)(uint16_t* dst,
3245                                                    int dstride,
3246                                                    const uint16_t* src,
3247                                                    int sstride,
3248                                                    int v,
3249                                                    int h);
3250 
3251 void cdef_copy_rect8_8bit_to_16bit_c(uint16_t* dst,
3252                                      int dstride,
3253                                      const uint8_t* src,
3254                                      int sstride,
3255                                      int v,
3256                                      int h);
3257 void cdef_copy_rect8_8bit_to_16bit_sse2(uint16_t* dst,
3258                                         int dstride,
3259                                         const uint8_t* src,
3260                                         int sstride,
3261                                         int v,
3262                                         int h);
3263 void cdef_copy_rect8_8bit_to_16bit_ssse3(uint16_t* dst,
3264                                          int dstride,
3265                                          const uint8_t* src,
3266                                          int sstride,
3267                                          int v,
3268                                          int h);
3269 void cdef_copy_rect8_8bit_to_16bit_sse4_1(uint16_t* dst,
3270                                           int dstride,
3271                                           const uint8_t* src,
3272                                           int sstride,
3273                                           int v,
3274                                           int h);
3275 void cdef_copy_rect8_8bit_to_16bit_avx2(uint16_t* dst,
3276                                         int dstride,
3277                                         const uint8_t* src,
3278                                         int sstride,
3279                                         int v,
3280                                         int h);
3281 RTCD_EXTERN void (*cdef_copy_rect8_8bit_to_16bit)(uint16_t* dst,
3282                                                   int dstride,
3283                                                   const uint8_t* src,
3284                                                   int sstride,
3285                                                   int v,
3286                                                   int h);
3287 
3288 void cdef_filter_block_c(uint8_t* dst8,
3289                          uint16_t* dst16,
3290                          int dstride,
3291                          const uint16_t* in,
3292                          int pri_strength,
3293                          int sec_strength,
3294                          int dir,
3295                          int pri_damping,
3296                          int sec_damping,
3297                          int bsize,
3298                          int coeff_shift);
3299 void cdef_filter_block_sse2(uint8_t* dst8,
3300                             uint16_t* dst16,
3301                             int dstride,
3302                             const uint16_t* in,
3303                             int pri_strength,
3304                             int sec_strength,
3305                             int dir,
3306                             int pri_damping,
3307                             int sec_damping,
3308                             int bsize,
3309                             int coeff_shift);
3310 void cdef_filter_block_ssse3(uint8_t* dst8,
3311                              uint16_t* dst16,
3312                              int dstride,
3313                              const uint16_t* in,
3314                              int pri_strength,
3315                              int sec_strength,
3316                              int dir,
3317                              int pri_damping,
3318                              int sec_damping,
3319                              int bsize,
3320                              int coeff_shift);
3321 void cdef_filter_block_sse4_1(uint8_t* dst8,
3322                               uint16_t* dst16,
3323                               int dstride,
3324                               const uint16_t* in,
3325                               int pri_strength,
3326                               int sec_strength,
3327                               int dir,
3328                               int pri_damping,
3329                               int sec_damping,
3330                               int bsize,
3331                               int coeff_shift);
3332 void cdef_filter_block_avx2(uint8_t* dst8,
3333                             uint16_t* dst16,
3334                             int dstride,
3335                             const uint16_t* in,
3336                             int pri_strength,
3337                             int sec_strength,
3338                             int dir,
3339                             int pri_damping,
3340                             int sec_damping,
3341                             int bsize,
3342                             int coeff_shift);
3343 RTCD_EXTERN void (*cdef_filter_block)(uint8_t* dst8,
3344                                       uint16_t* dst16,
3345                                       int dstride,
3346                                       const uint16_t* in,
3347                                       int pri_strength,
3348                                       int sec_strength,
3349                                       int dir,
3350                                       int pri_damping,
3351                                       int sec_damping,
3352                                       int bsize,
3353                                       int coeff_shift);
3354 
3355 int cdef_find_dir_c(const uint16_t* img,
3356                     int stride,
3357                     int32_t* var,
3358                     int coeff_shift);
3359 int cdef_find_dir_sse2(const uint16_t* img,
3360                        int stride,
3361                        int32_t* var,
3362                        int coeff_shift);
3363 int cdef_find_dir_ssse3(const uint16_t* img,
3364                         int stride,
3365                         int32_t* var,
3366                         int coeff_shift);
3367 int cdef_find_dir_sse4_1(const uint16_t* img,
3368                          int stride,
3369                          int32_t* var,
3370                          int coeff_shift);
3371 int cdef_find_dir_avx2(const uint16_t* img,
3372                        int stride,
3373                        int32_t* var,
3374                        int coeff_shift);
3375 RTCD_EXTERN int (*cdef_find_dir)(const uint16_t* img,
3376                                  int stride,
3377                                  int32_t* var,
3378                                  int coeff_shift);
3379 
3380 cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_c(TX_SIZE tx_size);
3381 cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_ssse3(TX_SIZE tx_size);
3382 cfl_subsample_hbd_fn cfl_get_luma_subsampling_420_hbd_avx2(TX_SIZE tx_size);
3383 RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_420_hbd)(
3384     TX_SIZE tx_size);
3385 
3386 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_c(TX_SIZE tx_size);
3387 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_ssse3(TX_SIZE tx_size);
3388 cfl_subsample_lbd_fn cfl_get_luma_subsampling_420_lbd_avx2(TX_SIZE tx_size);
3389 RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_420_lbd)(
3390     TX_SIZE tx_size);
3391 
3392 cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_c(TX_SIZE tx_size);
3393 cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_ssse3(TX_SIZE tx_size);
3394 cfl_subsample_hbd_fn cfl_get_luma_subsampling_422_hbd_avx2(TX_SIZE tx_size);
3395 RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_422_hbd)(
3396     TX_SIZE tx_size);
3397 
3398 cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_c(TX_SIZE tx_size);
3399 cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_ssse3(TX_SIZE tx_size);
3400 cfl_subsample_lbd_fn cfl_get_luma_subsampling_422_lbd_avx2(TX_SIZE tx_size);
3401 RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_422_lbd)(
3402     TX_SIZE tx_size);
3403 
3404 cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_c(TX_SIZE tx_size);
3405 cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_ssse3(TX_SIZE tx_size);
3406 cfl_subsample_hbd_fn cfl_get_luma_subsampling_444_hbd_avx2(TX_SIZE tx_size);
3407 RTCD_EXTERN cfl_subsample_hbd_fn (*cfl_get_luma_subsampling_444_hbd)(
3408     TX_SIZE tx_size);
3409 
3410 cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_c(TX_SIZE tx_size);
3411 cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_ssse3(TX_SIZE tx_size);
3412 cfl_subsample_lbd_fn cfl_get_luma_subsampling_444_lbd_avx2(TX_SIZE tx_size);
3413 RTCD_EXTERN cfl_subsample_lbd_fn (*cfl_get_luma_subsampling_444_lbd)(
3414     TX_SIZE tx_size);
3415 
3416 cfl_predict_hbd_fn cfl_get_predict_hbd_fn_c(TX_SIZE tx_size);
3417 cfl_predict_hbd_fn cfl_get_predict_hbd_fn_ssse3(TX_SIZE tx_size);
3418 cfl_predict_hbd_fn cfl_get_predict_hbd_fn_avx2(TX_SIZE tx_size);
3419 RTCD_EXTERN cfl_predict_hbd_fn (*cfl_get_predict_hbd_fn)(TX_SIZE tx_size);
3420 
3421 cfl_predict_lbd_fn cfl_get_predict_lbd_fn_c(TX_SIZE tx_size);
3422 cfl_predict_lbd_fn cfl_get_predict_lbd_fn_ssse3(TX_SIZE tx_size);
3423 cfl_predict_lbd_fn cfl_get_predict_lbd_fn_avx2(TX_SIZE tx_size);
3424 RTCD_EXTERN cfl_predict_lbd_fn (*cfl_get_predict_lbd_fn)(TX_SIZE tx_size);
3425 
3426 cfl_subtract_average_fn cfl_get_subtract_average_fn_c(TX_SIZE tx_size);
3427 cfl_subtract_average_fn cfl_get_subtract_average_fn_sse2(TX_SIZE tx_size);
3428 cfl_subtract_average_fn cfl_get_subtract_average_fn_avx2(TX_SIZE tx_size);
3429 RTCD_EXTERN cfl_subtract_average_fn (*cfl_get_subtract_average_fn)(
3430     TX_SIZE tx_size);
3431 
3432 void av1_rtcd(void);
3433 
3434 #ifdef RTCD_C
3435 #include "aom_ports/x86.h"
setup_rtcd_internal(void)3436 static void setup_rtcd_internal(void) {
3437   int flags = x86_simd_caps();
3438 
3439   (void)flags;
3440 
3441   av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_c;
3442   if (flags & HAS_SSE4_1)
3443     av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_sse4_1;
3444   if (flags & HAS_AVX2)
3445     av1_apply_selfguided_restoration = av1_apply_selfguided_restoration_avx2;
3446   av1_apply_temporal_filter_planewise =
3447       av1_apply_temporal_filter_planewise_sse2;
3448   if (flags & HAS_AVX2)
3449     av1_apply_temporal_filter_planewise =
3450         av1_apply_temporal_filter_planewise_avx2;
3451   av1_apply_temporal_filter_yuv = av1_apply_temporal_filter_yuv_c;
3452   if (flags & HAS_SSE4_1)
3453     av1_apply_temporal_filter_yuv = av1_apply_temporal_filter_yuv_sse4_1;
3454   av1_block_error = av1_block_error_sse2;
3455   if (flags & HAS_AVX2)
3456     av1_block_error = av1_block_error_avx2;
3457   av1_block_error_lp = av1_block_error_lp_c;
3458   if (flags & HAS_AVX2)
3459     av1_block_error_lp = av1_block_error_lp_avx2;
3460   av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_c;
3461   if (flags & HAS_SSE4_1)
3462     av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_sse4_1;
3463   if (flags & HAS_AVX2)
3464     av1_build_compound_diffwtd_mask = av1_build_compound_diffwtd_mask_avx2;
3465   av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_c;
3466   if (flags & HAS_SSE4_1)
3467     av1_build_compound_diffwtd_mask_d16 =
3468         av1_build_compound_diffwtd_mask_d16_sse4_1;
3469   if (flags & HAS_AVX2)
3470     av1_build_compound_diffwtd_mask_d16 =
3471         av1_build_compound_diffwtd_mask_d16_avx2;
3472   av1_build_compound_diffwtd_mask_highbd =
3473       av1_build_compound_diffwtd_mask_highbd_c;
3474   if (flags & HAS_SSSE3)
3475     av1_build_compound_diffwtd_mask_highbd =
3476         av1_build_compound_diffwtd_mask_highbd_ssse3;
3477   if (flags & HAS_AVX2)
3478     av1_build_compound_diffwtd_mask_highbd =
3479         av1_build_compound_diffwtd_mask_highbd_avx2;
3480   av1_calc_frame_error = av1_calc_frame_error_sse2;
3481   if (flags & HAS_AVX2)
3482     av1_calc_frame_error = av1_calc_frame_error_avx2;
3483   av1_calc_proj_params = av1_calc_proj_params_c;
3484   if (flags & HAS_AVX2)
3485     av1_calc_proj_params = av1_calc_proj_params_avx2;
3486   av1_compute_cross_correlation = av1_compute_cross_correlation_c;
3487   if (flags & HAS_SSE4_1)
3488     av1_compute_cross_correlation = av1_compute_cross_correlation_sse4_1;
3489   if (flags & HAS_AVX2)
3490     av1_compute_cross_correlation = av1_compute_cross_correlation_avx2;
3491   av1_compute_stats = av1_compute_stats_c;
3492   if (flags & HAS_SSE4_1)
3493     av1_compute_stats = av1_compute_stats_sse4_1;
3494   if (flags & HAS_AVX2)
3495     av1_compute_stats = av1_compute_stats_avx2;
3496   av1_compute_stats_highbd = av1_compute_stats_highbd_c;
3497   if (flags & HAS_SSE4_1)
3498     av1_compute_stats_highbd = av1_compute_stats_highbd_sse4_1;
3499   if (flags & HAS_AVX2)
3500     av1_compute_stats_highbd = av1_compute_stats_highbd_avx2;
3501   av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_sse2;
3502   if (flags & HAS_AVX2)
3503     av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_avx2;
3504   av1_convolve_2d_scale = av1_convolve_2d_scale_c;
3505   if (flags & HAS_SSE4_1)
3506     av1_convolve_2d_scale = av1_convolve_2d_scale_sse4_1;
3507   av1_convolve_2d_sr = av1_convolve_2d_sr_sse2;
3508   if (flags & HAS_AVX2)
3509     av1_convolve_2d_sr = av1_convolve_2d_sr_avx2;
3510   av1_convolve_horiz_rs = av1_convolve_horiz_rs_c;
3511   if (flags & HAS_SSE4_1)
3512     av1_convolve_horiz_rs = av1_convolve_horiz_rs_sse4_1;
3513   av1_convolve_x_sr = av1_convolve_x_sr_sse2;
3514   if (flags & HAS_AVX2)
3515     av1_convolve_x_sr = av1_convolve_x_sr_avx2;
3516   av1_convolve_y_sr = av1_convolve_y_sr_sse2;
3517   if (flags & HAS_AVX2)
3518     av1_convolve_y_sr = av1_convolve_y_sr_avx2;
3519   av1_dist_wtd_convolve_2d = av1_dist_wtd_convolve_2d_sse2;
3520   if (flags & HAS_SSSE3)
3521     av1_dist_wtd_convolve_2d = av1_dist_wtd_convolve_2d_ssse3;
3522   if (flags & HAS_AVX2)
3523     av1_dist_wtd_convolve_2d = av1_dist_wtd_convolve_2d_avx2;
3524   av1_dist_wtd_convolve_2d_copy = av1_dist_wtd_convolve_2d_copy_sse2;
3525   if (flags & HAS_AVX2)
3526     av1_dist_wtd_convolve_2d_copy = av1_dist_wtd_convolve_2d_copy_avx2;
3527   av1_dist_wtd_convolve_x = av1_dist_wtd_convolve_x_sse2;
3528   if (flags & HAS_AVX2)
3529     av1_dist_wtd_convolve_x = av1_dist_wtd_convolve_x_avx2;
3530   av1_dist_wtd_convolve_y = av1_dist_wtd_convolve_y_sse2;
3531   if (flags & HAS_AVX2)
3532     av1_dist_wtd_convolve_y = av1_dist_wtd_convolve_y_avx2;
3533   av1_dr_prediction_z1 = av1_dr_prediction_z1_c;
3534   if (flags & HAS_AVX2)
3535     av1_dr_prediction_z1 = av1_dr_prediction_z1_avx2;
3536   av1_dr_prediction_z2 = av1_dr_prediction_z2_c;
3537   if (flags & HAS_AVX2)
3538     av1_dr_prediction_z2 = av1_dr_prediction_z2_avx2;
3539   av1_dr_prediction_z3 = av1_dr_prediction_z3_c;
3540   if (flags & HAS_AVX2)
3541     av1_dr_prediction_z3 = av1_dr_prediction_z3_avx2;
3542   av1_filter_intra_edge = av1_filter_intra_edge_c;
3543   if (flags & HAS_SSE4_1)
3544     av1_filter_intra_edge = av1_filter_intra_edge_sse4_1;
3545   av1_filter_intra_edge_high = av1_filter_intra_edge_high_c;
3546   if (flags & HAS_SSE4_1)
3547     av1_filter_intra_edge_high = av1_filter_intra_edge_high_sse4_1;
3548   av1_filter_intra_predictor = av1_filter_intra_predictor_c;
3549   if (flags & HAS_SSE4_1)
3550     av1_filter_intra_predictor = av1_filter_intra_predictor_sse4_1;
3551   av1_fwd_txfm2d_16x16 = av1_fwd_txfm2d_16x16_c;
3552   if (flags & HAS_SSE4_1)
3553     av1_fwd_txfm2d_16x16 = av1_fwd_txfm2d_16x16_sse4_1;
3554   if (flags & HAS_AVX2)
3555     av1_fwd_txfm2d_16x16 = av1_fwd_txfm2d_16x16_avx2;
3556   av1_fwd_txfm2d_16x32 = av1_fwd_txfm2d_16x32_c;
3557   if (flags & HAS_SSE4_1)
3558     av1_fwd_txfm2d_16x32 = av1_fwd_txfm2d_16x32_sse4_1;
3559   av1_fwd_txfm2d_16x4 = av1_fwd_txfm2d_16x4_c;
3560   if (flags & HAS_SSE4_1)
3561     av1_fwd_txfm2d_16x4 = av1_fwd_txfm2d_16x4_sse4_1;
3562   av1_fwd_txfm2d_16x64 = av1_fwd_txfm2d_16x64_c;
3563   if (flags & HAS_SSE4_1)
3564     av1_fwd_txfm2d_16x64 = av1_fwd_txfm2d_16x64_sse4_1;
3565   av1_fwd_txfm2d_16x8 = av1_fwd_txfm2d_16x8_c;
3566   if (flags & HAS_SSE4_1)
3567     av1_fwd_txfm2d_16x8 = av1_fwd_txfm2d_16x8_sse4_1;
3568   if (flags & HAS_AVX2)
3569     av1_fwd_txfm2d_16x8 = av1_fwd_txfm2d_16x8_avx2;
3570   av1_fwd_txfm2d_32x16 = av1_fwd_txfm2d_32x16_c;
3571   if (flags & HAS_SSE4_1)
3572     av1_fwd_txfm2d_32x16 = av1_fwd_txfm2d_32x16_sse4_1;
3573   av1_fwd_txfm2d_32x32 = av1_fwd_txfm2d_32x32_c;
3574   if (flags & HAS_SSE4_1)
3575     av1_fwd_txfm2d_32x32 = av1_fwd_txfm2d_32x32_sse4_1;
3576   if (flags & HAS_AVX2)
3577     av1_fwd_txfm2d_32x32 = av1_fwd_txfm2d_32x32_avx2;
3578   av1_fwd_txfm2d_32x64 = av1_fwd_txfm2d_32x64_c;
3579   if (flags & HAS_SSE4_1)
3580     av1_fwd_txfm2d_32x64 = av1_fwd_txfm2d_32x64_sse4_1;
3581   av1_fwd_txfm2d_32x8 = av1_fwd_txfm2d_32x8_c;
3582   if (flags & HAS_SSE4_1)
3583     av1_fwd_txfm2d_32x8 = av1_fwd_txfm2d_32x8_sse4_1;
3584   av1_fwd_txfm2d_4x16 = av1_fwd_txfm2d_4x16_c;
3585   if (flags & HAS_SSE4_1)
3586     av1_fwd_txfm2d_4x16 = av1_fwd_txfm2d_4x16_sse4_1;
3587   av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_c;
3588   if (flags & HAS_SSE4_1)
3589     av1_fwd_txfm2d_4x4 = av1_fwd_txfm2d_4x4_sse4_1;
3590   av1_fwd_txfm2d_4x8 = av1_fwd_txfm2d_4x8_c;
3591   if (flags & HAS_SSE4_1)
3592     av1_fwd_txfm2d_4x8 = av1_fwd_txfm2d_4x8_sse4_1;
3593   av1_fwd_txfm2d_64x16 = av1_fwd_txfm2d_64x16_c;
3594   if (flags & HAS_SSE4_1)
3595     av1_fwd_txfm2d_64x16 = av1_fwd_txfm2d_64x16_sse4_1;
3596   av1_fwd_txfm2d_64x32 = av1_fwd_txfm2d_64x32_c;
3597   if (flags & HAS_SSE4_1)
3598     av1_fwd_txfm2d_64x32 = av1_fwd_txfm2d_64x32_sse4_1;
3599   av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_c;
3600   if (flags & HAS_SSE4_1)
3601     av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_sse4_1;
3602   if (flags & HAS_AVX2)
3603     av1_fwd_txfm2d_64x64 = av1_fwd_txfm2d_64x64_avx2;
3604   av1_fwd_txfm2d_8x16 = av1_fwd_txfm2d_8x16_c;
3605   if (flags & HAS_SSE4_1)
3606     av1_fwd_txfm2d_8x16 = av1_fwd_txfm2d_8x16_sse4_1;
3607   if (flags & HAS_AVX2)
3608     av1_fwd_txfm2d_8x16 = av1_fwd_txfm2d_8x16_avx2;
3609   av1_fwd_txfm2d_8x32 = av1_fwd_txfm2d_8x32_c;
3610   if (flags & HAS_SSE4_1)
3611     av1_fwd_txfm2d_8x32 = av1_fwd_txfm2d_8x32_sse4_1;
3612   av1_fwd_txfm2d_8x4 = av1_fwd_txfm2d_8x4_c;
3613   if (flags & HAS_SSE4_1)
3614     av1_fwd_txfm2d_8x4 = av1_fwd_txfm2d_8x4_sse4_1;
3615   av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_c;
3616   if (flags & HAS_SSE4_1)
3617     av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_sse4_1;
3618   if (flags & HAS_AVX2)
3619     av1_fwd_txfm2d_8x8 = av1_fwd_txfm2d_8x8_avx2;
3620   av1_get_crc32c_value = av1_get_crc32c_value_c;
3621   if (flags & HAS_SSE4_2)
3622     av1_get_crc32c_value = av1_get_crc32c_value_sse4_2;
3623   av1_get_horver_correlation_full = av1_get_horver_correlation_full_c;
3624   if (flags & HAS_SSE4_1)
3625     av1_get_horver_correlation_full = av1_get_horver_correlation_full_sse4_1;
3626   if (flags & HAS_AVX2)
3627     av1_get_horver_correlation_full = av1_get_horver_correlation_full_avx2;
3628   av1_highbd_block_error = av1_highbd_block_error_sse2;
3629   if (flags & HAS_AVX2)
3630     av1_highbd_block_error = av1_highbd_block_error_avx2;
3631   av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_sse2;
3632   if (flags & HAS_AVX2)
3633     av1_highbd_convolve_2d_copy_sr = av1_highbd_convolve_2d_copy_sr_avx2;
3634   av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_c;
3635   if (flags & HAS_SSE4_1)
3636     av1_highbd_convolve_2d_scale = av1_highbd_convolve_2d_scale_sse4_1;
3637   av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_c;
3638   if (flags & HAS_SSSE3)
3639     av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_ssse3;
3640   if (flags & HAS_AVX2)
3641     av1_highbd_convolve_2d_sr = av1_highbd_convolve_2d_sr_avx2;
3642   av1_highbd_convolve_horiz_rs = av1_highbd_convolve_horiz_rs_c;
3643   if (flags & HAS_SSE4_1)
3644     av1_highbd_convolve_horiz_rs = av1_highbd_convolve_horiz_rs_sse4_1;
3645   av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_c;
3646   if (flags & HAS_SSSE3)
3647     av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_ssse3;
3648   if (flags & HAS_AVX2)
3649     av1_highbd_convolve_x_sr = av1_highbd_convolve_x_sr_avx2;
3650   av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_c;
3651   if (flags & HAS_SSSE3)
3652     av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_ssse3;
3653   if (flags & HAS_AVX2)
3654     av1_highbd_convolve_y_sr = av1_highbd_convolve_y_sr_avx2;
3655   av1_highbd_dist_wtd_convolve_2d = av1_highbd_dist_wtd_convolve_2d_c;
3656   if (flags & HAS_SSE4_1)
3657     av1_highbd_dist_wtd_convolve_2d = av1_highbd_dist_wtd_convolve_2d_sse4_1;
3658   if (flags & HAS_AVX2)
3659     av1_highbd_dist_wtd_convolve_2d = av1_highbd_dist_wtd_convolve_2d_avx2;
3660   av1_highbd_dist_wtd_convolve_2d_copy = av1_highbd_dist_wtd_convolve_2d_copy_c;
3661   if (flags & HAS_SSE4_1)
3662     av1_highbd_dist_wtd_convolve_2d_copy =
3663         av1_highbd_dist_wtd_convolve_2d_copy_sse4_1;
3664   if (flags & HAS_AVX2)
3665     av1_highbd_dist_wtd_convolve_2d_copy =
3666         av1_highbd_dist_wtd_convolve_2d_copy_avx2;
3667   av1_highbd_dist_wtd_convolve_x = av1_highbd_dist_wtd_convolve_x_c;
3668   if (flags & HAS_SSE4_1)
3669     av1_highbd_dist_wtd_convolve_x = av1_highbd_dist_wtd_convolve_x_sse4_1;
3670   if (flags & HAS_AVX2)
3671     av1_highbd_dist_wtd_convolve_x = av1_highbd_dist_wtd_convolve_x_avx2;
3672   av1_highbd_dist_wtd_convolve_y = av1_highbd_dist_wtd_convolve_y_c;
3673   if (flags & HAS_SSE4_1)
3674     av1_highbd_dist_wtd_convolve_y = av1_highbd_dist_wtd_convolve_y_sse4_1;
3675   if (flags & HAS_AVX2)
3676     av1_highbd_dist_wtd_convolve_y = av1_highbd_dist_wtd_convolve_y_avx2;
3677   av1_highbd_dr_prediction_z1 = av1_highbd_dr_prediction_z1_c;
3678   if (flags & HAS_AVX2)
3679     av1_highbd_dr_prediction_z1 = av1_highbd_dr_prediction_z1_avx2;
3680   av1_highbd_dr_prediction_z2 = av1_highbd_dr_prediction_z2_c;
3681   if (flags & HAS_AVX2)
3682     av1_highbd_dr_prediction_z2 = av1_highbd_dr_prediction_z2_avx2;
3683   av1_highbd_dr_prediction_z3 = av1_highbd_dr_prediction_z3_c;
3684   if (flags & HAS_AVX2)
3685     av1_highbd_dr_prediction_z3 = av1_highbd_dr_prediction_z3_avx2;
3686   av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_c;
3687   if (flags & HAS_SSE4_1)
3688     av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_sse4_1;
3689   if (flags & HAS_AVX2)
3690     av1_highbd_inv_txfm_add = av1_highbd_inv_txfm_add_avx2;
3691   av1_highbd_inv_txfm_add_16x4 = av1_highbd_inv_txfm_add_16x4_c;
3692   if (flags & HAS_SSE4_1)
3693     av1_highbd_inv_txfm_add_16x4 = av1_highbd_inv_txfm_add_16x4_sse4_1;
3694   av1_highbd_inv_txfm_add_4x16 = av1_highbd_inv_txfm_add_4x16_c;
3695   if (flags & HAS_SSE4_1)
3696     av1_highbd_inv_txfm_add_4x16 = av1_highbd_inv_txfm_add_4x16_sse4_1;
3697   av1_highbd_inv_txfm_add_4x4 = av1_highbd_inv_txfm_add_4x4_c;
3698   if (flags & HAS_SSE4_1)
3699     av1_highbd_inv_txfm_add_4x4 = av1_highbd_inv_txfm_add_4x4_sse4_1;
3700   av1_highbd_inv_txfm_add_4x8 = av1_highbd_inv_txfm_add_4x8_c;
3701   if (flags & HAS_SSE4_1)
3702     av1_highbd_inv_txfm_add_4x8 = av1_highbd_inv_txfm_add_4x8_sse4_1;
3703   av1_highbd_inv_txfm_add_8x4 = av1_highbd_inv_txfm_add_8x4_c;
3704   if (flags & HAS_SSE4_1)
3705     av1_highbd_inv_txfm_add_8x4 = av1_highbd_inv_txfm_add_8x4_sse4_1;
3706   av1_highbd_inv_txfm_add_8x8 = av1_highbd_inv_txfm_add_8x8_c;
3707   if (flags & HAS_SSE4_1)
3708     av1_highbd_inv_txfm_add_8x8 = av1_highbd_inv_txfm_add_8x8_sse4_1;
3709   av1_highbd_pixel_proj_error = av1_highbd_pixel_proj_error_c;
3710   if (flags & HAS_SSE4_1)
3711     av1_highbd_pixel_proj_error = av1_highbd_pixel_proj_error_sse4_1;
3712   if (flags & HAS_AVX2)
3713     av1_highbd_pixel_proj_error = av1_highbd_pixel_proj_error_avx2;
3714   av1_highbd_quantize_fp = av1_highbd_quantize_fp_c;
3715   if (flags & HAS_SSE4_1)
3716     av1_highbd_quantize_fp = av1_highbd_quantize_fp_sse4_1;
3717   if (flags & HAS_AVX2)
3718     av1_highbd_quantize_fp = av1_highbd_quantize_fp_avx2;
3719   av1_highbd_warp_affine = av1_highbd_warp_affine_c;
3720   if (flags & HAS_SSE4_1)
3721     av1_highbd_warp_affine = av1_highbd_warp_affine_sse4_1;
3722   av1_highbd_wiener_convolve_add_src = av1_highbd_wiener_convolve_add_src_c;
3723   if (flags & HAS_SSSE3)
3724     av1_highbd_wiener_convolve_add_src =
3725         av1_highbd_wiener_convolve_add_src_ssse3;
3726   if (flags & HAS_AVX2)
3727     av1_highbd_wiener_convolve_add_src =
3728         av1_highbd_wiener_convolve_add_src_avx2;
3729   av1_inv_txfm2d_add_4x4 = av1_inv_txfm2d_add_4x4_c;
3730   if (flags & HAS_SSE4_1)
3731     av1_inv_txfm2d_add_4x4 = av1_inv_txfm2d_add_4x4_sse4_1;
3732   av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_c;
3733   if (flags & HAS_SSE4_1)
3734     av1_inv_txfm2d_add_8x8 = av1_inv_txfm2d_add_8x8_sse4_1;
3735   av1_inv_txfm_add = av1_inv_txfm_add_c;
3736   if (flags & HAS_SSSE3)
3737     av1_inv_txfm_add = av1_inv_txfm_add_ssse3;
3738   if (flags & HAS_AVX2)
3739     av1_inv_txfm_add = av1_inv_txfm_add_avx2;
3740   av1_lowbd_fwd_txfm = av1_lowbd_fwd_txfm_sse2;
3741   if (flags & HAS_SSE4_1)
3742     av1_lowbd_fwd_txfm = av1_lowbd_fwd_txfm_sse4_1;
3743   if (flags & HAS_AVX2)
3744     av1_lowbd_fwd_txfm = av1_lowbd_fwd_txfm_avx2;
3745   av1_lowbd_pixel_proj_error = av1_lowbd_pixel_proj_error_c;
3746   if (flags & HAS_SSE4_1)
3747     av1_lowbd_pixel_proj_error = av1_lowbd_pixel_proj_error_sse4_1;
3748   if (flags & HAS_AVX2)
3749     av1_lowbd_pixel_proj_error = av1_lowbd_pixel_proj_error_avx2;
3750   av1_nn_predict = av1_nn_predict_c;
3751   if (flags & HAS_SSE3)
3752     av1_nn_predict = av1_nn_predict_sse3;
3753   av1_quantize_fp = av1_quantize_fp_sse2;
3754   if (flags & HAS_AVX2)
3755     av1_quantize_fp = av1_quantize_fp_avx2;
3756   av1_quantize_fp_32x32 = av1_quantize_fp_32x32_c;
3757   if (flags & HAS_AVX2)
3758     av1_quantize_fp_32x32 = av1_quantize_fp_32x32_avx2;
3759   av1_quantize_fp_64x64 = av1_quantize_fp_64x64_c;
3760   if (flags & HAS_AVX2)
3761     av1_quantize_fp_64x64 = av1_quantize_fp_64x64_avx2;
3762   av1_quantize_lp = av1_quantize_lp_c;
3763   if (flags & HAS_AVX2)
3764     av1_quantize_lp = av1_quantize_lp_avx2;
3765   av1_round_shift_array = av1_round_shift_array_c;
3766   if (flags & HAS_SSE4_1)
3767     av1_round_shift_array = av1_round_shift_array_sse4_1;
3768   av1_selfguided_restoration = av1_selfguided_restoration_c;
3769   if (flags & HAS_SSE4_1)
3770     av1_selfguided_restoration = av1_selfguided_restoration_sse4_1;
3771   if (flags & HAS_AVX2)
3772     av1_selfguided_restoration = av1_selfguided_restoration_avx2;
3773   av1_txb_init_levels = av1_txb_init_levels_c;
3774   if (flags & HAS_SSE4_1)
3775     av1_txb_init_levels = av1_txb_init_levels_sse4_1;
3776   if (flags & HAS_AVX2)
3777     av1_txb_init_levels = av1_txb_init_levels_avx2;
3778   av1_upsample_intra_edge = av1_upsample_intra_edge_c;
3779   if (flags & HAS_SSE4_1)
3780     av1_upsample_intra_edge = av1_upsample_intra_edge_sse4_1;
3781   av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_c;
3782   if (flags & HAS_SSE4_1)
3783     av1_upsample_intra_edge_high = av1_upsample_intra_edge_high_sse4_1;
3784   av1_warp_affine = av1_warp_affine_c;
3785   if (flags & HAS_SSE4_1)
3786     av1_warp_affine = av1_warp_affine_sse4_1;
3787   if (flags & HAS_AVX2)
3788     av1_warp_affine = av1_warp_affine_avx2;
3789   av1_wedge_compute_delta_squares = av1_wedge_compute_delta_squares_sse2;
3790   if (flags & HAS_AVX2)
3791     av1_wedge_compute_delta_squares = av1_wedge_compute_delta_squares_avx2;
3792   av1_wedge_sign_from_residuals = av1_wedge_sign_from_residuals_sse2;
3793   if (flags & HAS_AVX2)
3794     av1_wedge_sign_from_residuals = av1_wedge_sign_from_residuals_avx2;
3795   av1_wedge_sse_from_residuals = av1_wedge_sse_from_residuals_sse2;
3796   if (flags & HAS_AVX2)
3797     av1_wedge_sse_from_residuals = av1_wedge_sse_from_residuals_avx2;
3798   av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_sse2;
3799   if (flags & HAS_AVX2)
3800     av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_avx2;
3801   cdef_copy_rect8_16bit_to_16bit = cdef_copy_rect8_16bit_to_16bit_sse2;
3802   if (flags & HAS_SSSE3)
3803     cdef_copy_rect8_16bit_to_16bit = cdef_copy_rect8_16bit_to_16bit_ssse3;
3804   if (flags & HAS_SSE4_1)
3805     cdef_copy_rect8_16bit_to_16bit = cdef_copy_rect8_16bit_to_16bit_sse4_1;
3806   if (flags & HAS_AVX2)
3807     cdef_copy_rect8_16bit_to_16bit = cdef_copy_rect8_16bit_to_16bit_avx2;
3808   cdef_copy_rect8_8bit_to_16bit = cdef_copy_rect8_8bit_to_16bit_sse2;
3809   if (flags & HAS_SSSE3)
3810     cdef_copy_rect8_8bit_to_16bit = cdef_copy_rect8_8bit_to_16bit_ssse3;
3811   if (flags & HAS_SSE4_1)
3812     cdef_copy_rect8_8bit_to_16bit = cdef_copy_rect8_8bit_to_16bit_sse4_1;
3813   if (flags & HAS_AVX2)
3814     cdef_copy_rect8_8bit_to_16bit = cdef_copy_rect8_8bit_to_16bit_avx2;
3815   cdef_filter_block = cdef_filter_block_sse2;
3816   if (flags & HAS_SSSE3)
3817     cdef_filter_block = cdef_filter_block_ssse3;
3818   if (flags & HAS_SSE4_1)
3819     cdef_filter_block = cdef_filter_block_sse4_1;
3820   if (flags & HAS_AVX2)
3821     cdef_filter_block = cdef_filter_block_avx2;
3822   cdef_find_dir = cdef_find_dir_sse2;
3823   if (flags & HAS_SSSE3)
3824     cdef_find_dir = cdef_find_dir_ssse3;
3825   if (flags & HAS_SSE4_1)
3826     cdef_find_dir = cdef_find_dir_sse4_1;
3827   if (flags & HAS_AVX2)
3828     cdef_find_dir = cdef_find_dir_avx2;
3829   cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_c;
3830   if (flags & HAS_SSSE3)
3831     cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_ssse3;
3832   if (flags & HAS_AVX2)
3833     cfl_get_luma_subsampling_420_hbd = cfl_get_luma_subsampling_420_hbd_avx2;
3834   cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_c;
3835   if (flags & HAS_SSSE3)
3836     cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_ssse3;
3837   if (flags & HAS_AVX2)
3838     cfl_get_luma_subsampling_420_lbd = cfl_get_luma_subsampling_420_lbd_avx2;
3839   cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_c;
3840   if (flags & HAS_SSSE3)
3841     cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_ssse3;
3842   if (flags & HAS_AVX2)
3843     cfl_get_luma_subsampling_422_hbd = cfl_get_luma_subsampling_422_hbd_avx2;
3844   cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_c;
3845   if (flags & HAS_SSSE3)
3846     cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_ssse3;
3847   if (flags & HAS_AVX2)
3848     cfl_get_luma_subsampling_422_lbd = cfl_get_luma_subsampling_422_lbd_avx2;
3849   cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_c;
3850   if (flags & HAS_SSSE3)
3851     cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_ssse3;
3852   if (flags & HAS_AVX2)
3853     cfl_get_luma_subsampling_444_hbd = cfl_get_luma_subsampling_444_hbd_avx2;
3854   cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_c;
3855   if (flags & HAS_SSSE3)
3856     cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_ssse3;
3857   if (flags & HAS_AVX2)
3858     cfl_get_luma_subsampling_444_lbd = cfl_get_luma_subsampling_444_lbd_avx2;
3859   cfl_get_predict_hbd_fn = cfl_get_predict_hbd_fn_c;
3860   if (flags & HAS_SSSE3)
3861     cfl_get_predict_hbd_fn = cfl_get_predict_hbd_fn_ssse3;
3862   if (flags & HAS_AVX2)
3863     cfl_get_predict_hbd_fn = cfl_get_predict_hbd_fn_avx2;
3864   cfl_get_predict_lbd_fn = cfl_get_predict_lbd_fn_c;
3865   if (flags & HAS_SSSE3)
3866     cfl_get_predict_lbd_fn = cfl_get_predict_lbd_fn_ssse3;
3867   if (flags & HAS_AVX2)
3868     cfl_get_predict_lbd_fn = cfl_get_predict_lbd_fn_avx2;
3869   cfl_get_subtract_average_fn = cfl_get_subtract_average_fn_sse2;
3870   if (flags & HAS_AVX2)
3871     cfl_get_subtract_average_fn = cfl_get_subtract_average_fn_avx2;
3872 }
3873 #endif
3874 
3875 #ifdef __cplusplus
3876 }  // extern "C"
3877 #endif
3878 
3879 #endif
3880