1 /**
2 * Copyright 2014-2016 Andreas Schäfer
3 *
4 * Distributed under the Boost Software License, Version 1.0. (See accompanying
5 * file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
6 */
7
8 #ifndef FLAT_ARRAY_SHORT_VEC_HPP
9 #define FLAT_ARRAY_SHORT_VEC_HPP
10
11 namespace LibFlatArray {
12
13 template<typename CARGO, int ARITY>
14 class short_vec;
15
16 template<typename CARGO, int ARITY>
operator +(CARGO a,const short_vec<CARGO,ARITY> & b)17 inline short_vec<CARGO, ARITY> operator+(CARGO a, const short_vec<CARGO, ARITY>& b)
18 {
19 return short_vec<CARGO, ARITY>(a) + b;
20 }
21
22 template<typename CARGO, int ARITY>
operator -(CARGO a,const short_vec<CARGO,ARITY> & b)23 inline short_vec<CARGO, ARITY> operator-(CARGO a, const short_vec<CARGO, ARITY>& b)
24 {
25 return short_vec<CARGO, ARITY>(a) - b;
26 }
27
28 template<typename CARGO, int ARITY>
operator *(CARGO a,const short_vec<CARGO,ARITY> & b)29 inline short_vec<CARGO, ARITY> operator*(CARGO a, const short_vec<CARGO, ARITY>& b)
30 {
31 return short_vec<CARGO, ARITY>(a) * b;
32 }
33
34 template<typename CARGO, int ARITY>
operator /(CARGO a,const short_vec<CARGO,ARITY> & b)35 inline short_vec<CARGO, ARITY> operator/(CARGO a, const short_vec<CARGO, ARITY>& b)
36 {
37 return short_vec<CARGO, ARITY>(a) / b;
38 }
39
40 template<typename CARGO, int ARITY >
any(const short_vec<CARGO,ARITY> & vec)41 inline bool any(const short_vec<CARGO, ARITY>& vec)
42 {
43 return vec.any();
44 }
45
any(unsigned mask)46 inline bool any(unsigned mask)
47 {
48 return mask;
49 }
50
any(unsigned short mask)51 inline bool any(unsigned short mask)
52 {
53 return mask;
54 }
55
any(unsigned char mask)56 inline bool any(unsigned char mask)
57 {
58 return mask;
59 }
60
61 template<typename CARGO, int ARITY >
get(const short_vec<CARGO,ARITY> & vec,const int i)62 inline CARGO get(const short_vec<CARGO, ARITY>& vec, const int i)
63 {
64 return vec.get(i);
65 }
66
get(unsigned mask,const int i)67 inline bool get(unsigned mask, const int i)
68 {
69 return (mask >> i) & 1;
70 }
71
get(unsigned short mask,const int i)72 inline bool get(unsigned short mask, const int i)
73 {
74 return (mask >> i) & 1;
75 }
76
get(unsigned char mask,const int i)77 inline bool get(unsigned char mask, const int i)
78 {
79 return (mask >> i) & 1;
80 }
81
82 class short_vec_strategy
83 {
84 public:
85 class scalar
86 {};
87
88 class avx
89 {};
90
91 class avx2
92 {};
93
94 class avx512f
95 {};
96
97 class cuda
98 {};
99
100 class qpx
101 {};
102
103 class sse
104 {};
105
106 class sse2
107 {};
108
109 class sse4_1
110 {};
111
112 class mic
113 {};
114
115 class neon
116 {};
117 };
118
119 }
120
121 #define LIBFLATARRAY_SCALAR 10
122 #define LIBFLATARRAY_QPX 11
123 #define LIBFLATARRAY_ARM_NEON 12
124 #define LIBFLATARRAY_MIC 13
125 #define LIBFLATARRAY_AVX512F 14
126 #define LIBFLATARRAY_AVX 15
127 #define LIBFLATARRAY_AVX2 16
128 #define LIBFLATARRAY_SSE 17
129 #define LIBFLATARRAY_SSE2 18
130 #define LIBFLATARRAY_SSE4_1 19
131
132 #ifdef __CUDA_ARCH__
133 // Use only scalar short_vec implementations on CUDA devices:
134 #define LIBFLATARRAY_WIDEST_VECTOR_ISA LIBFLATARRAY_SCALAR
135 #else
136 // for IBM Blue Gene/Q's QPX, which is mutually exclusive to
137 // Intel/AMD's AVX/SSE or ARM's NEON ISAs:
138 # ifdef __VECTOR4DOUBLE__
139 # define LIBFLATARRAY_WIDEST_VECTOR_ISA LIBFLATARRAY_QPX
140 # endif
141
142 // Dito for ARM NEON:
143 # ifdef __ARM_NEON__
144 # define LIBFLATARRAY_WIDEST_VECTOR_ISA LIBFLATARRAY_ARM_NEON
145 # endif
146
147 # ifndef LIBFLATARRAY_WIDEST_VECTOR_ISA
148 // Only the case of the IBM PC is complicated. No thanks to you,
149 // history!
150 # ifdef __MIC__
151 # define LIBFLATARRAY_WIDEST_VECTOR_ISA LIBFLATARRAY_MIC
152 # else
153 # ifdef __AVX512F__
154 # define LIBFLATARRAY_WIDEST_VECTOR_ISA LIBFLATARRAY_AVX512F
155 # else
156 # ifdef __AVX2__
157 # define LIBFLATARRAY_WIDEST_VECTOR_ISA LIBFLATARRAY_AVX2
158 # else
159 # ifdef __AVX__
160 # define LIBFLATARRAY_WIDEST_VECTOR_ISA LIBFLATARRAY_AVX
161 # else
162 # ifdef __SSE4_1__
163 # define LIBFLATARRAY_WIDEST_VECTOR_ISA LIBFLATARRAY_SSE4_1
164 # else
165 # ifdef __SSE2__
166 # define LIBFLATARRAY_WIDEST_VECTOR_ISA LIBFLATARRAY_SSE2
167 # else
168 # ifdef __SSE__
169 # define LIBFLATARRAY_WIDEST_VECTOR_ISA LIBFLATARRAY_SSE
170 # else
171 // fallback: scalar implementation always works and is still yields
172 // code that's easy to vectorize for the compiler:
173 # define LIBFLATARRAY_WIDEST_VECTOR_ISA LIBFLATARRAY_SCALAR
174 # endif
175 # endif
176 # endif
177 # endif
178 # endif
179 # endif
180 # endif
181 # endif
182
183 #endif
184
185 #include <sstream>
186
187 #include <libflatarray/detail/short_vec_avx512_double_8.hpp>
188 #include <libflatarray/detail/short_vec_avx512_double_16.hpp>
189 #include <libflatarray/detail/short_vec_avx512_double_32.hpp>
190
191 #include <libflatarray/detail/short_vec_avx512_float_16.hpp>
192 #include <libflatarray/detail/short_vec_avx512_float_32.hpp>
193
194 #include <libflatarray/detail/short_vec_avx_double_4.hpp>
195 #include <libflatarray/detail/short_vec_avx_double_8.hpp>
196 #include <libflatarray/detail/short_vec_avx_double_16.hpp>
197 #include <libflatarray/detail/short_vec_avx_double_32.hpp>
198
199 #include <libflatarray/detail/short_vec_avx_float_8.hpp>
200 #include <libflatarray/detail/short_vec_avx_float_16.hpp>
201 #include <libflatarray/detail/short_vec_avx_float_32.hpp>
202
203 #include <libflatarray/detail/short_vec_scalar_double_1.hpp>
204 #include <libflatarray/detail/short_vec_scalar_double_2.hpp>
205 #include <libflatarray/detail/short_vec_scalar_double_4.hpp>
206 #include <libflatarray/detail/short_vec_scalar_double_8.hpp>
207 #include <libflatarray/detail/short_vec_scalar_double_16.hpp>
208 #include <libflatarray/detail/short_vec_scalar_double_32.hpp>
209
210 #include <libflatarray/detail/short_vec_scalar_float_1.hpp>
211 #include <libflatarray/detail/short_vec_scalar_float_2.hpp>
212 #include <libflatarray/detail/short_vec_scalar_float_4.hpp>
213 #include <libflatarray/detail/short_vec_scalar_float_8.hpp>
214 #include <libflatarray/detail/short_vec_scalar_float_16.hpp>
215 #include <libflatarray/detail/short_vec_scalar_float_32.hpp>
216
217 #include <libflatarray/detail/short_vec_scalar_int_1.hpp>
218 #include <libflatarray/detail/short_vec_scalar_int_2.hpp>
219 #include <libflatarray/detail/short_vec_scalar_int_4.hpp>
220 #include <libflatarray/detail/short_vec_scalar_int_8.hpp>
221 #include <libflatarray/detail/short_vec_scalar_int_16.hpp>
222 #include <libflatarray/detail/short_vec_scalar_int_32.hpp>
223
224 #include <libflatarray/detail/short_vec_sse_int_4.hpp>
225 #include <libflatarray/detail/short_vec_sse_int_8.hpp>
226 #include <libflatarray/detail/short_vec_sse_int_16.hpp>
227 #include <libflatarray/detail/short_vec_sse_int_32.hpp>
228
229 #include <libflatarray/detail/short_vec_avx_int_8.hpp>
230 #include <libflatarray/detail/short_vec_avx_int_16.hpp>
231 #include <libflatarray/detail/short_vec_avx_int_32.hpp>
232
233 #include <libflatarray/detail/short_vec_avx512_int_16.hpp>
234 #include <libflatarray/detail/short_vec_avx512_int_32.hpp>
235
236 #include <libflatarray/detail/short_vec_sse_double_2.hpp>
237 #include <libflatarray/detail/short_vec_sse_double_4.hpp>
238 #include <libflatarray/detail/short_vec_sse_double_8.hpp>
239 #include <libflatarray/detail/short_vec_sse_double_16.hpp>
240 #include <libflatarray/detail/short_vec_sse_double_32.hpp>
241
242 #include <libflatarray/detail/short_vec_sse_float_4.hpp>
243 #include <libflatarray/detail/short_vec_sse_float_8.hpp>
244 #include <libflatarray/detail/short_vec_sse_float_16.hpp>
245 #include <libflatarray/detail/short_vec_sse_float_32.hpp>
246
247 #include <libflatarray/detail/short_vec_qpx_double_4.hpp>
248 #include <libflatarray/detail/short_vec_qpx_double_8.hpp>
249 #include <libflatarray/detail/short_vec_qpx_double_16.hpp>
250 #include <libflatarray/detail/short_vec_qpx_double_32.hpp>
251
252 #include <libflatarray/detail/short_vec_neon_float_4.hpp>
253 #include <libflatarray/detail/short_vec_neon_float_8.hpp>
254 #include <libflatarray/detail/short_vec_neon_float_16.hpp>
255 #include <libflatarray/detail/short_vec_neon_float_32.hpp>
256
257 #include <libflatarray/detail/short_vec_mic_double_8.hpp>
258 #include <libflatarray/detail/short_vec_mic_double_16.hpp>
259 #include <libflatarray/detail/short_vec_mic_double_32.hpp>
260
261 #include <libflatarray/detail/short_vec_mic_float_16.hpp>
262 #include <libflatarray/detail/short_vec_mic_float_32.hpp>
263
264 #endif
265