1 #ifdef ZIMG_X86
2
3 #include "common/cpuinfo.h"
4 #include "common/x86/cpuinfo_x86.h"
5 #include "common/pixel.h"
6 #include "depth_convert_x86.h"
7 #include "f16c_x86.h"
8
9 namespace zimg {
10 namespace depth {
11
12 namespace {
13
select_left_shift_func_sse2(PixelType pixel_in,PixelType pixel_out)14 left_shift_func select_left_shift_func_sse2(PixelType pixel_in, PixelType pixel_out)
15 {
16 if (pixel_in == PixelType::BYTE && pixel_out == PixelType::BYTE)
17 return left_shift_b2b_sse2;
18 else if (pixel_in == PixelType::BYTE && pixel_out == PixelType::WORD)
19 return left_shift_b2w_sse2;
20 else if (pixel_in == PixelType::WORD && pixel_out == PixelType::BYTE)
21 return left_shift_w2b_sse2;
22 else if (pixel_in == PixelType::WORD && pixel_out == PixelType::WORD)
23 return left_shift_w2w_sse2;
24 else
25 return nullptr;
26 }
27
select_left_shift_func_avx2(PixelType pixel_in,PixelType pixel_out)28 left_shift_func select_left_shift_func_avx2(PixelType pixel_in, PixelType pixel_out)
29 {
30 if (pixel_in == PixelType::BYTE && pixel_out == PixelType::BYTE)
31 return left_shift_b2b_avx2;
32 else if (pixel_in == PixelType::BYTE && pixel_out == PixelType::WORD)
33 return left_shift_b2w_avx2;
34 else if (pixel_in == PixelType::WORD && pixel_out == PixelType::BYTE)
35 return left_shift_w2b_avx2;
36 else if (pixel_in == PixelType::WORD && pixel_out == PixelType::WORD)
37 return left_shift_w2w_avx2;
38 else
39 return nullptr;
40 }
41
42 #ifdef ZIMG_X86_AVX512
select_left_shift_func_avx512(PixelType pixel_in,PixelType pixel_out)43 left_shift_func select_left_shift_func_avx512(PixelType pixel_in, PixelType pixel_out)
44 {
45 if (pixel_in == PixelType::BYTE && pixel_out == PixelType::BYTE)
46 return left_shift_b2b_avx512;
47 else if (pixel_in == PixelType::BYTE && pixel_out == PixelType::WORD)
48 return left_shift_b2w_avx512;
49 else if (pixel_in == PixelType::WORD && pixel_out == PixelType::BYTE)
50 return left_shift_w2b_avx512;
51 else if (pixel_in == PixelType::WORD && pixel_out == PixelType::WORD)
52 return left_shift_w2w_avx512;
53 else
54 return nullptr;
55 }
56 #endif
57
select_depth_convert_func_sse2(PixelType pixel_in,PixelType pixel_out)58 depth_convert_func select_depth_convert_func_sse2(PixelType pixel_in, PixelType pixel_out)
59 {
60 if (pixel_out == PixelType::HALF)
61 pixel_out = PixelType::FLOAT;
62
63 if (pixel_in == PixelType::BYTE && pixel_out == PixelType::FLOAT)
64 return depth_convert_b2f_sse2;
65 else if (pixel_in == PixelType::WORD && pixel_out == PixelType::FLOAT)
66 return depth_convert_w2f_sse2;
67 else
68 return nullptr;
69 }
70
select_depth_convert_func_avx2(PixelType pixel_in,PixelType pixel_out)71 depth_convert_func select_depth_convert_func_avx2(PixelType pixel_in, PixelType pixel_out)
72 {
73 if (pixel_in == PixelType::BYTE && pixel_out == PixelType::HALF)
74 return depth_convert_b2h_avx2;
75 else if (pixel_in == PixelType::BYTE && pixel_out == PixelType::FLOAT)
76 return depth_convert_b2f_avx2;
77 else if (pixel_in == PixelType::WORD && pixel_out == PixelType::HALF)
78 return depth_convert_w2h_avx2;
79 else if (pixel_in == PixelType::WORD && pixel_out == PixelType::FLOAT)
80 return depth_convert_w2f_avx2;
81 else
82 return nullptr;
83 }
84
85 #ifdef ZIMG_X86_AVX512
select_depth_convert_func_avx512(PixelType pixel_in,PixelType pixel_out)86 depth_convert_func select_depth_convert_func_avx512(PixelType pixel_in, PixelType pixel_out)
87 {
88 if (pixel_in == PixelType::BYTE && pixel_out == PixelType::HALF)
89 return depth_convert_b2h_avx512;
90 else if (pixel_in == PixelType::BYTE && pixel_out == PixelType::FLOAT)
91 return depth_convert_b2f_avx512;
92 else if (pixel_in == PixelType::WORD && pixel_out == PixelType::HALF)
93 return depth_convert_w2h_avx512;
94 else if (pixel_in == PixelType::WORD && pixel_out == PixelType::FLOAT)
95 return depth_convert_w2f_avx512;
96 else
97 return nullptr;
98 }
99 #endif // ZIMG_X86_AVX512
100
101 } // namespace
102
103
select_left_shift_func_x86(PixelType pixel_in,PixelType pixel_out,CPUClass cpu)104 left_shift_func select_left_shift_func_x86(PixelType pixel_in, PixelType pixel_out, CPUClass cpu)
105 {
106 X86Capabilities caps = query_x86_capabilities();
107 left_shift_func func = nullptr;
108
109 if (cpu_is_autodetect(cpu)) {
110 #ifdef ZIMG_X86_AVX512
111 if (!func && cpu == CPUClass::AUTO_64B && caps.avx512f && caps.avx512bw && caps.avx512vl)
112 func = select_left_shift_func_avx512(pixel_in, pixel_out);
113 #endif
114 if (!func && caps.avx2)
115 func = select_left_shift_func_avx2(pixel_in, pixel_out);
116 if (!func && caps.sse2)
117 func = select_left_shift_func_sse2(pixel_in, pixel_out);
118 } else {
119 #ifdef ZIMG_X86_AVX512
120 if (!func && cpu >= CPUClass::X86_AVX512)
121 func = select_left_shift_func_avx512(pixel_in, pixel_out);
122 #endif
123 if (!func && cpu >= CPUClass::X86_AVX2)
124 func = select_left_shift_func_avx2(pixel_in, pixel_out);
125 if (!func && cpu >= CPUClass::X86_SSE2)
126 func = select_left_shift_func_sse2(pixel_in, pixel_out);
127 }
128
129 return func;
130 }
131
select_depth_convert_func_x86(const PixelFormat & format_in,const PixelFormat & format_out,CPUClass cpu)132 depth_convert_func select_depth_convert_func_x86(const PixelFormat &format_in, const PixelFormat &format_out, CPUClass cpu)
133 {
134 X86Capabilities caps = query_x86_capabilities();
135 depth_convert_func func = nullptr;
136
137 if (cpu_is_autodetect(cpu)) {
138 #ifdef ZIMG_X86_AVX512
139 if (!func && cpu == CPUClass::AUTO_64B && caps.avx512f && caps.avx512bw && caps.avx512vl)
140 func = select_depth_convert_func_avx512(format_in.type, format_out.type);
141 #endif
142 if (!func && caps.avx2 && caps.fma)
143 func = select_depth_convert_func_avx2(format_in.type, format_out.type);
144 if (!func && caps.sse2)
145 func = select_depth_convert_func_sse2(format_in.type, format_out.type);
146 } else {
147 #ifdef ZIMG_X86_AVX512
148 if (!func && cpu >= CPUClass::X86_AVX512)
149 func = select_depth_convert_func_avx512(format_in.type, format_out.type);
150 #endif
151 if (!func && cpu >= CPUClass::X86_AVX2)
152 func = select_depth_convert_func_avx2(format_in.type, format_out.type);
153 if (!func && cpu >= CPUClass::X86_SSE2)
154 func = select_depth_convert_func_sse2(format_in.type, format_out.type);
155 }
156
157 return func;
158 }
159
select_depth_f16c_func_x86(bool to_half,CPUClass cpu)160 depth_f16c_func select_depth_f16c_func_x86(bool to_half, CPUClass cpu)
161 {
162 X86Capabilities caps = query_x86_capabilities();
163 depth_f16c_func func = nullptr;
164
165 if (cpu_is_autodetect(cpu)) {
166 if (!func && caps.avx && caps.f16c)
167 func = to_half ? f16c_float_to_half_ivb : f16c_half_to_float_ivb;
168 if (!func && caps.sse2)
169 func = to_half ? f16c_float_to_half_sse2 : f16c_half_to_float_sse2;
170 } else {
171 if (!func && cpu >= CPUClass::X86_F16C)
172 func = to_half ? f16c_float_to_half_ivb : f16c_half_to_float_ivb;
173 if (!func && cpu >= CPUClass::X86_SSE2)
174 func = to_half ? f16c_float_to_half_sse2 : f16c_half_to_float_sse2;
175 }
176
177 return func;
178 }
179
needs_depth_f16c_func_x86(const PixelFormat & format_in,const PixelFormat & format_out,CPUClass cpu)180 bool needs_depth_f16c_func_x86(const PixelFormat &format_in, const PixelFormat &format_out, CPUClass cpu)
181 {
182 X86Capabilities caps = query_x86_capabilities();
183 bool value = format_in.type == PixelType::HALF || format_out.type == PixelType::HALF;
184
185 if ((cpu_is_autodetect(cpu) && caps.avx2) || cpu >= CPUClass::X86_AVX2)
186 value = value && pixel_is_float(format_in.type) && pixel_is_float(format_out.type);
187
188 return value;
189 }
190
191 } // namespace depth
192 } // namespace zimg
193
194 #endif // ZIMG_X86
195