1 /*
2  * Copyright © 2018, VideoLAN and dav1d authors
3  * Copyright © 2018, Two Orioles, LLC
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "src/cpu.h"
29 #include "src/ipred.h"
30 
31 decl_angular_ipred_fn(dav1d_ipred_dc_avx2);
32 decl_angular_ipred_fn(dav1d_ipred_dc_128_avx2);
33 decl_angular_ipred_fn(dav1d_ipred_dc_top_avx2);
34 decl_angular_ipred_fn(dav1d_ipred_dc_left_avx2);
35 decl_angular_ipred_fn(dav1d_ipred_h_avx2);
36 decl_angular_ipred_fn(dav1d_ipred_v_avx2);
37 decl_angular_ipred_fn(dav1d_ipred_paeth_avx2);
38 decl_angular_ipred_fn(dav1d_ipred_smooth_avx2);
39 decl_angular_ipred_fn(dav1d_ipred_smooth_v_avx2);
40 decl_angular_ipred_fn(dav1d_ipred_smooth_h_avx2);
41 decl_angular_ipred_fn(dav1d_ipred_z1_avx2);
42 decl_angular_ipred_fn(dav1d_ipred_z2_avx2);
43 decl_angular_ipred_fn(dav1d_ipred_z3_avx2);
44 decl_angular_ipred_fn(dav1d_ipred_filter_avx2);
45 
46 decl_cfl_pred_fn(dav1d_ipred_cfl_avx2);
47 decl_cfl_pred_fn(dav1d_ipred_cfl_128_avx2);
48 decl_cfl_pred_fn(dav1d_ipred_cfl_top_avx2);
49 decl_cfl_pred_fn(dav1d_ipred_cfl_left_avx2);
50 
51 decl_cfl_ac_fn(dav1d_ipred_cfl_ac_420_avx2);
52 decl_cfl_ac_fn(dav1d_ipred_cfl_ac_422_avx2);
53 
54 decl_pal_pred_fn(dav1d_pal_pred_avx2);
55 
56 decl_angular_ipred_fn(dav1d_ipred_dc_ssse3);
57 decl_angular_ipred_fn(dav1d_ipred_dc_128_ssse3);
58 decl_angular_ipred_fn(dav1d_ipred_dc_top_ssse3);
59 decl_angular_ipred_fn(dav1d_ipred_dc_left_ssse3);
60 decl_angular_ipred_fn(dav1d_ipred_h_ssse3);
61 decl_angular_ipred_fn(dav1d_ipred_v_ssse3);
62 decl_angular_ipred_fn(dav1d_ipred_paeth_ssse3);
63 decl_angular_ipred_fn(dav1d_ipred_smooth_ssse3);
64 decl_angular_ipred_fn(dav1d_ipred_smooth_v_ssse3);
65 decl_angular_ipred_fn(dav1d_ipred_smooth_h_ssse3);
66 decl_angular_ipred_fn(dav1d_ipred_filter_ssse3);
67 
68 decl_cfl_pred_fn(dav1d_ipred_cfl_ssse3);
69 decl_cfl_pred_fn(dav1d_ipred_cfl_128_ssse3);
70 decl_cfl_pred_fn(dav1d_ipred_cfl_top_ssse3);
71 decl_cfl_pred_fn(dav1d_ipred_cfl_left_ssse3);
72 
73 decl_cfl_ac_fn(dav1d_ipred_cfl_ac_420_ssse3);
74 decl_cfl_ac_fn(dav1d_ipred_cfl_ac_422_ssse3);
75 decl_cfl_ac_fn(dav1d_ipred_cfl_ac_444_ssse3);
76 
77 decl_pal_pred_fn(dav1d_pal_pred_ssse3);
78 
bitfn(dav1d_intra_pred_dsp_init_x86)79 COLD void bitfn(dav1d_intra_pred_dsp_init_x86)(Dav1dIntraPredDSPContext *const c) {
80     const unsigned flags = dav1d_get_cpu_flags();
81 
82     if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
83 
84 #if BITDEPTH == 8
85     c->intra_pred[DC_PRED]       = dav1d_ipred_dc_ssse3;
86     c->intra_pred[DC_128_PRED]   = dav1d_ipred_dc_128_ssse3;
87     c->intra_pred[TOP_DC_PRED]   = dav1d_ipred_dc_top_ssse3;
88     c->intra_pred[LEFT_DC_PRED]  = dav1d_ipred_dc_left_ssse3;
89     c->intra_pred[HOR_PRED]      = dav1d_ipred_h_ssse3;
90     c->intra_pred[VERT_PRED]     = dav1d_ipred_v_ssse3;
91     c->intra_pred[PAETH_PRED]    = dav1d_ipred_paeth_ssse3;
92     c->intra_pred[SMOOTH_PRED]   = dav1d_ipred_smooth_ssse3;
93     c->intra_pred[SMOOTH_V_PRED] = dav1d_ipred_smooth_v_ssse3;
94     c->intra_pred[SMOOTH_H_PRED] = dav1d_ipred_smooth_h_ssse3;
95     c->intra_pred[FILTER_PRED]   = dav1d_ipred_filter_ssse3;
96 
97     c->cfl_pred[DC_PRED]         = dav1d_ipred_cfl_ssse3;
98     c->cfl_pred[DC_128_PRED]     = dav1d_ipred_cfl_128_ssse3;
99     c->cfl_pred[TOP_DC_PRED]     = dav1d_ipred_cfl_top_ssse3;
100     c->cfl_pred[LEFT_DC_PRED]    = dav1d_ipred_cfl_left_ssse3;
101 
102     c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = dav1d_ipred_cfl_ac_420_ssse3;
103     c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = dav1d_ipred_cfl_ac_422_ssse3;
104     c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = dav1d_ipred_cfl_ac_444_ssse3;
105 
106     c->pal_pred                  = dav1d_pal_pred_ssse3;
107 #endif
108 
109     if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
110 
111 #if BITDEPTH == 8 && ARCH_X86_64
112     c->intra_pred[DC_PRED]       = dav1d_ipred_dc_avx2;
113     c->intra_pred[DC_128_PRED]   = dav1d_ipred_dc_128_avx2;
114     c->intra_pred[TOP_DC_PRED]   = dav1d_ipred_dc_top_avx2;
115     c->intra_pred[LEFT_DC_PRED]  = dav1d_ipred_dc_left_avx2;
116     c->intra_pred[HOR_PRED]      = dav1d_ipred_h_avx2;
117     c->intra_pred[VERT_PRED]     = dav1d_ipred_v_avx2;
118     c->intra_pred[PAETH_PRED]    = dav1d_ipred_paeth_avx2;
119     c->intra_pred[SMOOTH_PRED]   = dav1d_ipred_smooth_avx2;
120     c->intra_pred[SMOOTH_V_PRED] = dav1d_ipred_smooth_v_avx2;
121     c->intra_pred[SMOOTH_H_PRED] = dav1d_ipred_smooth_h_avx2;
122     c->intra_pred[Z1_PRED]       = dav1d_ipred_z1_avx2;
123     c->intra_pred[Z2_PRED]       = dav1d_ipred_z2_avx2;
124     c->intra_pred[Z3_PRED]       = dav1d_ipred_z3_avx2;
125     c->intra_pred[FILTER_PRED]   = dav1d_ipred_filter_avx2;
126 
127     c->cfl_pred[DC_PRED]      = dav1d_ipred_cfl_avx2;
128     c->cfl_pred[DC_128_PRED]  = dav1d_ipred_cfl_128_avx2;
129     c->cfl_pred[TOP_DC_PRED]  = dav1d_ipred_cfl_top_avx2;
130     c->cfl_pred[LEFT_DC_PRED] = dav1d_ipred_cfl_left_avx2;
131 
132     c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = dav1d_ipred_cfl_ac_420_avx2;
133     c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = dav1d_ipred_cfl_ac_422_avx2;
134 
135     c->pal_pred = dav1d_pal_pred_avx2;
136 #endif
137 }
138