1 /*
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2018, Two Orioles, LLC
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this
10 * list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "src/cpu.h"
29 #include "src/ipred.h"
30
31 decl_angular_ipred_fn(dav1d_ipred_dc_avx2);
32 decl_angular_ipred_fn(dav1d_ipred_dc_128_avx2);
33 decl_angular_ipred_fn(dav1d_ipred_dc_top_avx2);
34 decl_angular_ipred_fn(dav1d_ipred_dc_left_avx2);
35 decl_angular_ipred_fn(dav1d_ipred_h_avx2);
36 decl_angular_ipred_fn(dav1d_ipred_v_avx2);
37 decl_angular_ipred_fn(dav1d_ipred_paeth_avx2);
38 decl_angular_ipred_fn(dav1d_ipred_smooth_avx2);
39 decl_angular_ipred_fn(dav1d_ipred_smooth_v_avx2);
40 decl_angular_ipred_fn(dav1d_ipred_smooth_h_avx2);
41 decl_angular_ipred_fn(dav1d_ipred_z1_avx2);
42 decl_angular_ipred_fn(dav1d_ipred_z2_avx2);
43 decl_angular_ipred_fn(dav1d_ipred_z3_avx2);
44 decl_angular_ipred_fn(dav1d_ipred_filter_avx2);
45
46 decl_cfl_pred_fn(dav1d_ipred_cfl_avx2);
47 decl_cfl_pred_fn(dav1d_ipred_cfl_128_avx2);
48 decl_cfl_pred_fn(dav1d_ipred_cfl_top_avx2);
49 decl_cfl_pred_fn(dav1d_ipred_cfl_left_avx2);
50
51 decl_cfl_ac_fn(dav1d_ipred_cfl_ac_420_avx2);
52 decl_cfl_ac_fn(dav1d_ipred_cfl_ac_422_avx2);
53
54 decl_pal_pred_fn(dav1d_pal_pred_avx2);
55
56 decl_angular_ipred_fn(dav1d_ipred_dc_ssse3);
57 decl_angular_ipred_fn(dav1d_ipred_dc_128_ssse3);
58 decl_angular_ipred_fn(dav1d_ipred_dc_top_ssse3);
59 decl_angular_ipred_fn(dav1d_ipred_dc_left_ssse3);
60 decl_angular_ipred_fn(dav1d_ipred_h_ssse3);
61 decl_angular_ipred_fn(dav1d_ipred_v_ssse3);
62 decl_angular_ipred_fn(dav1d_ipred_paeth_ssse3);
63 decl_angular_ipred_fn(dav1d_ipred_smooth_ssse3);
64 decl_angular_ipred_fn(dav1d_ipred_smooth_v_ssse3);
65 decl_angular_ipred_fn(dav1d_ipred_smooth_h_ssse3);
66 decl_angular_ipred_fn(dav1d_ipred_filter_ssse3);
67
68 decl_cfl_pred_fn(dav1d_ipred_cfl_ssse3);
69 decl_cfl_pred_fn(dav1d_ipred_cfl_128_ssse3);
70 decl_cfl_pred_fn(dav1d_ipred_cfl_top_ssse3);
71 decl_cfl_pred_fn(dav1d_ipred_cfl_left_ssse3);
72
73 decl_cfl_ac_fn(dav1d_ipred_cfl_ac_420_ssse3);
74 decl_cfl_ac_fn(dav1d_ipred_cfl_ac_422_ssse3);
75 decl_cfl_ac_fn(dav1d_ipred_cfl_ac_444_ssse3);
76
77 decl_pal_pred_fn(dav1d_pal_pred_ssse3);
78
bitfn(dav1d_intra_pred_dsp_init_x86)79 COLD void bitfn(dav1d_intra_pred_dsp_init_x86)(Dav1dIntraPredDSPContext *const c) {
80 const unsigned flags = dav1d_get_cpu_flags();
81
82 if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
83
84 #if BITDEPTH == 8
85 c->intra_pred[DC_PRED] = dav1d_ipred_dc_ssse3;
86 c->intra_pred[DC_128_PRED] = dav1d_ipred_dc_128_ssse3;
87 c->intra_pred[TOP_DC_PRED] = dav1d_ipred_dc_top_ssse3;
88 c->intra_pred[LEFT_DC_PRED] = dav1d_ipred_dc_left_ssse3;
89 c->intra_pred[HOR_PRED] = dav1d_ipred_h_ssse3;
90 c->intra_pred[VERT_PRED] = dav1d_ipred_v_ssse3;
91 c->intra_pred[PAETH_PRED] = dav1d_ipred_paeth_ssse3;
92 c->intra_pred[SMOOTH_PRED] = dav1d_ipred_smooth_ssse3;
93 c->intra_pred[SMOOTH_V_PRED] = dav1d_ipred_smooth_v_ssse3;
94 c->intra_pred[SMOOTH_H_PRED] = dav1d_ipred_smooth_h_ssse3;
95 c->intra_pred[FILTER_PRED] = dav1d_ipred_filter_ssse3;
96
97 c->cfl_pred[DC_PRED] = dav1d_ipred_cfl_ssse3;
98 c->cfl_pred[DC_128_PRED] = dav1d_ipred_cfl_128_ssse3;
99 c->cfl_pred[TOP_DC_PRED] = dav1d_ipred_cfl_top_ssse3;
100 c->cfl_pred[LEFT_DC_PRED] = dav1d_ipred_cfl_left_ssse3;
101
102 c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = dav1d_ipred_cfl_ac_420_ssse3;
103 c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = dav1d_ipred_cfl_ac_422_ssse3;
104 c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = dav1d_ipred_cfl_ac_444_ssse3;
105
106 c->pal_pred = dav1d_pal_pred_ssse3;
107 #endif
108
109 if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
110
111 #if BITDEPTH == 8 && ARCH_X86_64
112 c->intra_pred[DC_PRED] = dav1d_ipred_dc_avx2;
113 c->intra_pred[DC_128_PRED] = dav1d_ipred_dc_128_avx2;
114 c->intra_pred[TOP_DC_PRED] = dav1d_ipred_dc_top_avx2;
115 c->intra_pred[LEFT_DC_PRED] = dav1d_ipred_dc_left_avx2;
116 c->intra_pred[HOR_PRED] = dav1d_ipred_h_avx2;
117 c->intra_pred[VERT_PRED] = dav1d_ipred_v_avx2;
118 c->intra_pred[PAETH_PRED] = dav1d_ipred_paeth_avx2;
119 c->intra_pred[SMOOTH_PRED] = dav1d_ipred_smooth_avx2;
120 c->intra_pred[SMOOTH_V_PRED] = dav1d_ipred_smooth_v_avx2;
121 c->intra_pred[SMOOTH_H_PRED] = dav1d_ipred_smooth_h_avx2;
122 c->intra_pred[Z1_PRED] = dav1d_ipred_z1_avx2;
123 c->intra_pred[Z2_PRED] = dav1d_ipred_z2_avx2;
124 c->intra_pred[Z3_PRED] = dav1d_ipred_z3_avx2;
125 c->intra_pred[FILTER_PRED] = dav1d_ipred_filter_avx2;
126
127 c->cfl_pred[DC_PRED] = dav1d_ipred_cfl_avx2;
128 c->cfl_pred[DC_128_PRED] = dav1d_ipred_cfl_128_avx2;
129 c->cfl_pred[TOP_DC_PRED] = dav1d_ipred_cfl_top_avx2;
130 c->cfl_pred[LEFT_DC_PRED] = dav1d_ipred_cfl_left_avx2;
131
132 c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = dav1d_ipred_cfl_ac_420_avx2;
133 c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = dav1d_ipred_cfl_ac_422_avx2;
134
135 c->pal_pred = dav1d_pal_pred_avx2;
136 #endif
137 }
138