1 #ifdef ZIMG_X86
2 
3 #include "common/cpuinfo.h"
4 #include "common/x86/cpuinfo_x86.h"
5 #include "common/pixel.h"
6 #include "depth_convert_x86.h"
7 #include "f16c_x86.h"
8 
9 namespace zimg {
10 namespace depth {
11 
12 namespace {
13 
select_left_shift_func_sse2(PixelType pixel_in,PixelType pixel_out)14 left_shift_func select_left_shift_func_sse2(PixelType pixel_in, PixelType pixel_out)
15 {
16 	if (pixel_in == PixelType::BYTE && pixel_out == PixelType::BYTE)
17 		return left_shift_b2b_sse2;
18 	else if (pixel_in == PixelType::BYTE && pixel_out == PixelType::WORD)
19 		return left_shift_b2w_sse2;
20 	else if (pixel_in == PixelType::WORD && pixel_out == PixelType::BYTE)
21 		return left_shift_w2b_sse2;
22 	else if (pixel_in == PixelType::WORD && pixel_out == PixelType::WORD)
23 		return left_shift_w2w_sse2;
24 	else
25 		return nullptr;
26 }
27 
select_left_shift_func_avx2(PixelType pixel_in,PixelType pixel_out)28 left_shift_func select_left_shift_func_avx2(PixelType pixel_in, PixelType pixel_out)
29 {
30 	if (pixel_in == PixelType::BYTE && pixel_out == PixelType::BYTE)
31 		return left_shift_b2b_avx2;
32 	else if (pixel_in == PixelType::BYTE && pixel_out == PixelType::WORD)
33 		return left_shift_b2w_avx2;
34 	else if (pixel_in == PixelType::WORD && pixel_out == PixelType::BYTE)
35 		return left_shift_w2b_avx2;
36 	else if (pixel_in == PixelType::WORD && pixel_out == PixelType::WORD)
37 		return left_shift_w2w_avx2;
38 	else
39 		return nullptr;
40 }
41 
42 #ifdef ZIMG_X86_AVX512
select_left_shift_func_avx512(PixelType pixel_in,PixelType pixel_out)43 left_shift_func select_left_shift_func_avx512(PixelType pixel_in, PixelType pixel_out)
44 {
45 	if (pixel_in == PixelType::BYTE && pixel_out == PixelType::BYTE)
46 		return left_shift_b2b_avx512;
47 	else if (pixel_in == PixelType::BYTE && pixel_out == PixelType::WORD)
48 		return left_shift_b2w_avx512;
49 	else if (pixel_in == PixelType::WORD && pixel_out == PixelType::BYTE)
50 		return left_shift_w2b_avx512;
51 	else if (pixel_in == PixelType::WORD && pixel_out == PixelType::WORD)
52 		return left_shift_w2w_avx512;
53 	else
54 		return nullptr;
55 }
56 #endif
57 
select_depth_convert_func_sse2(PixelType pixel_in,PixelType pixel_out)58 depth_convert_func select_depth_convert_func_sse2(PixelType pixel_in, PixelType pixel_out)
59 {
60 	if (pixel_out == PixelType::HALF)
61 		pixel_out = PixelType::FLOAT;
62 
63 	if (pixel_in == PixelType::BYTE && pixel_out == PixelType::FLOAT)
64 		return depth_convert_b2f_sse2;
65 	else if (pixel_in == PixelType::WORD && pixel_out == PixelType::FLOAT)
66 		return depth_convert_w2f_sse2;
67 	else
68 		return nullptr;
69 }
70 
select_depth_convert_func_avx2(PixelType pixel_in,PixelType pixel_out)71 depth_convert_func select_depth_convert_func_avx2(PixelType pixel_in, PixelType pixel_out)
72 {
73 	if (pixel_in == PixelType::BYTE && pixel_out == PixelType::HALF)
74 		return depth_convert_b2h_avx2;
75 	else if (pixel_in == PixelType::BYTE && pixel_out == PixelType::FLOAT)
76 		return depth_convert_b2f_avx2;
77 	else if (pixel_in == PixelType::WORD && pixel_out == PixelType::HALF)
78 		return depth_convert_w2h_avx2;
79 	else if (pixel_in == PixelType::WORD && pixel_out == PixelType::FLOAT)
80 		return depth_convert_w2f_avx2;
81 	else
82 		return nullptr;
83 }
84 
85 #ifdef ZIMG_X86_AVX512
select_depth_convert_func_avx512(PixelType pixel_in,PixelType pixel_out)86 depth_convert_func select_depth_convert_func_avx512(PixelType pixel_in, PixelType pixel_out)
87 {
88 	if (pixel_in == PixelType::BYTE && pixel_out == PixelType::HALF)
89 		return depth_convert_b2h_avx512;
90 	else if (pixel_in == PixelType::BYTE && pixel_out == PixelType::FLOAT)
91 		return depth_convert_b2f_avx512;
92 	else if (pixel_in == PixelType::WORD && pixel_out == PixelType::HALF)
93 		return depth_convert_w2h_avx512;
94 	else if (pixel_in == PixelType::WORD && pixel_out == PixelType::FLOAT)
95 		return depth_convert_w2f_avx512;
96 	else
97 		return nullptr;
98 }
99 #endif // ZIMG_X86_AVX512
100 
101 } // namespace
102 
103 
select_left_shift_func_x86(PixelType pixel_in,PixelType pixel_out,CPUClass cpu)104 left_shift_func select_left_shift_func_x86(PixelType pixel_in, PixelType pixel_out, CPUClass cpu)
105 {
106 	X86Capabilities caps = query_x86_capabilities();
107 	left_shift_func func = nullptr;
108 
109 	if (cpu_is_autodetect(cpu)) {
110 #ifdef ZIMG_X86_AVX512
111 		if (!func && cpu == CPUClass::AUTO_64B && caps.avx512f && caps.avx512bw && caps.avx512vl)
112 			func = select_left_shift_func_avx512(pixel_in, pixel_out);
113 #endif
114 		if (!func && caps.avx2)
115 			func = select_left_shift_func_avx2(pixel_in, pixel_out);
116 		if (!func && caps.sse2)
117 			func = select_left_shift_func_sse2(pixel_in, pixel_out);
118 	} else {
119 #ifdef ZIMG_X86_AVX512
120 		if (!func && cpu >= CPUClass::X86_AVX512)
121 			func = select_left_shift_func_avx512(pixel_in, pixel_out);
122 #endif
123 		if (!func && cpu >= CPUClass::X86_AVX2)
124 			func = select_left_shift_func_avx2(pixel_in, pixel_out);
125 		if (!func && cpu >= CPUClass::X86_SSE2)
126 			func = select_left_shift_func_sse2(pixel_in, pixel_out);
127 	}
128 
129 	return func;
130 }
131 
select_depth_convert_func_x86(const PixelFormat & format_in,const PixelFormat & format_out,CPUClass cpu)132 depth_convert_func select_depth_convert_func_x86(const PixelFormat &format_in, const PixelFormat &format_out, CPUClass cpu)
133 {
134 	X86Capabilities caps = query_x86_capabilities();
135 	depth_convert_func func = nullptr;
136 
137 	if (cpu_is_autodetect(cpu)) {
138 #ifdef ZIMG_X86_AVX512
139 		if (!func && cpu == CPUClass::AUTO_64B && caps.avx512f && caps.avx512bw && caps.avx512vl)
140 			func = select_depth_convert_func_avx512(format_in.type, format_out.type);
141 #endif
142 		if (!func && caps.avx2 && caps.fma)
143 			func = select_depth_convert_func_avx2(format_in.type, format_out.type);
144 		if (!func && caps.sse2)
145 			func = select_depth_convert_func_sse2(format_in.type, format_out.type);
146 	} else {
147 #ifdef ZIMG_X86_AVX512
148 		if (!func && cpu >= CPUClass::X86_AVX512)
149 			func = select_depth_convert_func_avx512(format_in.type, format_out.type);
150 #endif
151 		if (!func && cpu >= CPUClass::X86_AVX2)
152 			func = select_depth_convert_func_avx2(format_in.type, format_out.type);
153 		if (!func && cpu >= CPUClass::X86_SSE2)
154 			func = select_depth_convert_func_sse2(format_in.type, format_out.type);
155 	}
156 
157 	return func;
158 }
159 
select_depth_f16c_func_x86(bool to_half,CPUClass cpu)160 depth_f16c_func select_depth_f16c_func_x86(bool to_half, CPUClass cpu)
161 {
162 	X86Capabilities caps = query_x86_capabilities();
163 	depth_f16c_func func = nullptr;
164 
165 	if (cpu_is_autodetect(cpu)) {
166 		if (!func && caps.avx && caps.f16c)
167 			func = to_half ? f16c_float_to_half_ivb : f16c_half_to_float_ivb;
168 		if (!func && caps.sse2)
169 			func = to_half ? f16c_float_to_half_sse2 : f16c_half_to_float_sse2;
170 	} else {
171 		if (!func && cpu >= CPUClass::X86_F16C)
172 			func = to_half ? f16c_float_to_half_ivb : f16c_half_to_float_ivb;
173 		if (!func && cpu >= CPUClass::X86_SSE2)
174 			func = to_half ? f16c_float_to_half_sse2 : f16c_half_to_float_sse2;
175 	}
176 
177 	return func;
178 }
179 
needs_depth_f16c_func_x86(const PixelFormat & format_in,const PixelFormat & format_out,CPUClass cpu)180 bool needs_depth_f16c_func_x86(const PixelFormat &format_in, const PixelFormat &format_out, CPUClass cpu)
181 {
182 	X86Capabilities caps = query_x86_capabilities();
183 	bool value = format_in.type == PixelType::HALF || format_out.type == PixelType::HALF;
184 
185 	if ((cpu_is_autodetect(cpu) && caps.avx2) || cpu >= CPUClass::X86_AVX2)
186 		value = value && pixel_is_float(format_in.type) && pixel_is_float(format_out.type);
187 
188 	return value;
189 }
190 
191 } // namespace depth
192 } // namespace zimg
193 
194 #endif // ZIMG_X86
195