1 /**
2 * Copyright 2014-2016 Andreas Schäfer
3 * Copyright 2015 Kurt Kanzenbach
4 *
5 * Distributed under the Boost Software License, Version 1.0. (See accompanying
6 * file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
7 */
8
9 #ifndef FLAT_ARRAY_DETAIL_SHORT_VEC_QPX_DOUBLE_16_HPP
10 #define FLAT_ARRAY_DETAIL_SHORT_VEC_QPX_DOUBLE_16_HPP
11
12 #if LIBFLATARRAY_WIDEST_VECTOR_ISA == LIBFLATARRAY_QPX
13
14 #include <libflatarray/detail/sqrt_reference.hpp>
15 #include <libflatarray/detail/short_vec_helpers.hpp>
16
17 #ifdef LIBFLATARRAY_WITH_CPP14
18 #include <initializer_list>
19 #endif
20
21 namespace LibFlatArray {
22
23 template<typename CARGO, int ARITY>
24 class short_vec;
25
26 template<typename CARGO, int ARITY>
27 class sqrt_reference;
28
29 #ifdef __ICC
30 // disabling this warning as implicit type conversion is exactly our goal here:
31 #pragma warning push
32 #pragma warning (disable: 2304)
33 #endif
34
35 template<>
36 class short_vec<double, 16>
37 {
38 public:
39 static const int ARITY = 16;
40
41 inline
short_vec(const double data=0)42 short_vec(const double data = 0) :
43 val1(vec_splats(data)),
44 val2(vec_splats(data)),
45 val3(vec_splats(data)),
46 val4(vec_splats(data))
47 {}
48
49 inline
short_vec(const double * data)50 short_vec(const double *data) :
51 val1(vec_ld(0, const_cast<double *>(data + 0))),
52 val2(vec_ld(0, const_cast<double *>(data + 4))),
53 val3(vec_ld(0, const_cast<double *>(data + 8))),
54 val4(vec_ld(0, const_cast<double *>(data + 12)))
55 {}
56
57 inline
short_vec(const vector4double & val1,const vector4double & val2,const vector4double & val3,const vector4double & val4)58 short_vec(const vector4double& val1, const vector4double& val2,
59 const vector4double& val3, const vector4double& val4) :
60 val1(val1),
61 val2(val2),
62 val3(val3),
63 val4(val4)
64 {}
65
66 #ifdef LIBFLATARRAY_WITH_CPP14
67 inline
short_vec(const std::initializer_list<double> & il)68 short_vec(const std::initializer_list<double>& il)
69 {
70 const double *ptr = static_cast<const double *>(&(*il.begin()));
71 load(ptr);
72 }
73 #endif
74
75 inline
76 short_vec(const sqrt_reference<double, 16>& other);
77
78 inline
operator -=(const short_vec<double,16> & other)79 void operator-=(const short_vec<double, 16>& other)
80 {
81 val1 = vec_sub(val1, other.val1);
82 val2 = vec_sub(val2, other.val2);
83 val3 = vec_sub(val3, other.val3);
84 val4 = vec_sub(val4, other.val4);
85 }
86
87 inline
operator -(const short_vec<double,16> & other) const88 short_vec<double, 16> operator-(const short_vec<double, 16>& other) const
89 {
90 return short_vec<double, 16>(
91 vec_sub(val1, other.val1),
92 vec_sub(val2, other.val2),
93 vec_sub(val3, other.val3),
94 vec_sub(val4, other.val4));
95 }
96
97 inline
operator +=(const short_vec<double,16> & other)98 void operator+=(const short_vec<double, 16>& other)
99 {
100 val1 = vec_add(val1, other.val1);
101 val2 = vec_add(val2, other.val2);
102 val3 = vec_add(val3, other.val3);
103 val4 = vec_add(val4, other.val4);
104 }
105
106 inline
operator +(const short_vec<double,16> & other) const107 short_vec<double, 16> operator+(const short_vec<double, 16>& other) const
108 {
109 return short_vec<double, 16>(
110 vec_add(val1, other.val1),
111 vec_add(val2, other.val2),
112 vec_add(val3, other.val3),
113 vec_add(val4, other.val4));
114 }
115
116 inline
operator *=(const short_vec<double,16> & other)117 void operator*=(const short_vec<double, 16>& other)
118 {
119 val1 = vec_add(val1, other.val1);
120 val2 = vec_add(val2, other.val2);
121 val3 = vec_add(val3, other.val3);
122 val4 = vec_add(val4, other.val4);
123 }
124
125 inline
operator *(const short_vec<double,16> & other) const126 short_vec<double, 16> operator*(const short_vec<double, 16>& other) const
127 {
128 return short_vec<double, 16>(
129 vec_mul(val1, other.val1),
130 vec_mul(val2, other.val2),
131 vec_mul(val3, other.val3),
132 vec_mul(val4, other.val4));
133 }
134
135 inline
136 void operator/=(const sqrt_reference<double, 16>& other);
137
138 inline
operator /=(const short_vec<double,16> & other)139 void operator/=(const short_vec<double, 16>& other)
140 {
141 val1 = vec_swdiv_nochk(val1, other.val1);
142 val2 = vec_swdiv_nochk(val2, other.val2);
143 val3 = vec_swdiv_nochk(val3, other.val3);
144 val4 = vec_swdiv_nochk(val4, other.val4);
145 }
146
147 inline
operator /(const short_vec<double,16> & other) const148 short_vec<double, 16> operator/(const short_vec<double, 16>& other) const
149 {
150 return short_vec<double, 16>(
151 vec_swdiv_nochk(val1, other.val1),
152 vec_swdiv_nochk(val2, other.val2),
153 vec_swdiv_nochk(val3, other.val3),
154 vec_swdiv_nochk(val4, other.val4));
155 }
156
157 inline
158 short_vec<double, 16> operator/(const sqrt_reference<double, 16>& other) const;
159
160 inline
sqrt() const161 short_vec<double, 16> sqrt() const
162 {
163 return short_vec<double, 16>(
164 vec_swsqrt(val1),
165 vec_swsqrt(val2),
166 vec_swsqrt(val3),
167 vec_swsqrt(val4));
168 }
169
170 inline
load(const double * data)171 void load(const double *data)
172 {
173 val1 = vec_ld(0, const_cast<double *>(data + 0));
174 val2 = vec_ld(0, const_cast<double *>(data + 4));
175 val3 = vec_ld(0, const_cast<double *>(data + 8));
176 val4 = vec_ld(0, const_cast<double *>(data + 12));
177 }
178
179 inline
load_aligned(const double * data)180 void load_aligned(const double *data)
181 {
182 SHORTVEC_ASSERT_ALIGNED(data, 32);
183 val1 = vec_lda(0, const_cast<double *>(data + 0));
184 val2 = vec_lda(0, const_cast<double *>(data + 4));
185 val3 = vec_lda(0, const_cast<double *>(data + 8));
186 val4 = vec_lda(0, const_cast<double *>(data + 12));
187 }
188
189 inline
store(double * data) const190 void store(double *data) const
191 {
192 vec_st(val1, 0, data + 0);
193 vec_st(val2, 0, data + 4);
194 vec_st(val3, 0, data + 8);
195 vec_st(val4, 0, data + 12);
196 }
197
198 inline
store_aligned(double * data) const199 void store_aligned(double *data) const
200 {
201 SHORTVEC_ASSERT_ALIGNED(data, 32);
202 vec_sta(val1, 0, data + 0);
203 vec_sta(val2, 0, data + 4);
204 vec_sta(val3, 0, data + 8);
205 vec_sta(val4, 0, data + 12);
206 }
207
208 inline
store_nt(double * data) const209 void store_nt(double *data) const
210 {
211 store(data);
212 }
213
214 inline
gather(const double * ptr,const int * offsets)215 void gather(const double *ptr, const int *offsets)
216 {
217 double *base = const_cast<double *>(ptr);
218 val1 = vec_insert(base[offsets[ 0]], val1, 0);
219 val1 = vec_insert(base[offsets[ 1]], val1, 1);
220 val1 = vec_insert(base[offsets[ 2]], val1, 2);
221 val1 = vec_insert(base[offsets[ 3]], val1, 3);
222
223 val2 = vec_insert(base[offsets[ 4]], val2, 0);
224 val2 = vec_insert(base[offsets[ 5]], val2, 1);
225 val2 = vec_insert(base[offsets[ 6]], val2, 2);
226 val2 = vec_insert(base[offsets[ 7]], val2, 3);
227
228 val3 = vec_insert(base[offsets[ 8]], val3, 0);
229 val3 = vec_insert(base[offsets[ 9]], val3, 1);
230 val3 = vec_insert(base[offsets[10]], val3, 2);
231 val3 = vec_insert(base[offsets[11]], val3, 3);
232
233 val4 = vec_insert(base[offsets[12]], val4, 0);
234 val4 = vec_insert(base[offsets[13]], val4, 1);
235 val4 = vec_insert(base[offsets[14]], val4, 2);
236 val4 = vec_insert(base[offsets[15]], val4, 3);
237 }
238
239 inline
scatter(double * ptr,const int * offsets) const240 void scatter(double *ptr, const int *offsets) const
241 {
242 ptr[offsets[ 0]] = vec_extract(val1, 0);
243 ptr[offsets[ 1]] = vec_extract(val1, 1);
244 ptr[offsets[ 2]] = vec_extract(val1, 2);
245 ptr[offsets[ 3]] = vec_extract(val1, 3);
246
247 ptr[offsets[ 4]] = vec_extract(val2, 0);
248 ptr[offsets[ 5]] = vec_extract(val2, 1);
249 ptr[offsets[ 6]] = vec_extract(val2, 2);
250 ptr[offsets[ 7]] = vec_extract(val2, 3);
251
252 ptr[offsets[ 8]] = vec_extract(val3, 0);
253 ptr[offsets[ 9]] = vec_extract(val3, 1);
254 ptr[offsets[10]] = vec_extract(val3, 2);
255 ptr[offsets[11]] = vec_extract(val3, 3);
256
257 ptr[offsets[12]] = vec_extract(val4, 0);
258 ptr[offsets[13]] = vec_extract(val4, 1);
259 ptr[offsets[14]] = vec_extract(val4, 2);
260 ptr[offsets[15]] = vec_extract(val4, 3);
261 }
262
263 private:
264 vector4double val1;
265 vector4double val2;
266 vector4double val3;
267 vector4double val4;
268 };
269
270 #ifdef __ICC
271 #pragma warning pop
272 #endif
273
274 inline
operator <<(double * data,const short_vec<double,16> & vec)275 void operator<<(double *data, const short_vec<double, 16>& vec)
276 {
277 vec.store(data);
278 }
279
280 template<>
281 class sqrt_reference<double, 16>
282 {
283 public:
284 template<typename OTHER_CARGO, int OTHER_ARITY>
285 friend class short_vec;
286
sqrt_reference(const short_vec<double,16> & vec)287 sqrt_reference(const short_vec<double, 16>& vec) :
288 vec(vec)
289 {}
290
291 private:
292 short_vec<double, 16> vec;
293 };
294
295 inline
short_vec(const sqrt_reference<double,16> & other)296 short_vec<double, 16>::short_vec(const sqrt_reference<double, 16>& other) :
297 val1(vec_swsqrt(other.vec.val1)),
298 val2(vec_swsqrt(other.vec.val2)),
299 val3(vec_swsqrt(other.vec.val3)),
300 val4(vec_swsqrt(other.vec.val4))
301 {}
302
303 inline
operator /=(const sqrt_reference<double,16> & other)304 void short_vec<double, 16>::operator/=(const sqrt_reference<double, 16>& other)
305 {
306 val1 = vec_mul(val1, vec_rsqrte(other.vec.val1));
307 val2 = vec_mul(val2, vec_rsqrte(other.vec.val2));
308 val3 = vec_mul(val3, vec_rsqrte(other.vec.val3));
309 val4 = vec_mul(val4, vec_rsqrte(other.vec.val4));
310 }
311
312 inline
operator /(const sqrt_reference<double,16> & other) const313 short_vec<double, 16> short_vec<double, 16>::operator/(const sqrt_reference<double, 16>& other) const
314 {
315 return short_vec<double, 16>(
316 vec_mul(val1, vec_rsqrte(other.vec.val1)),
317 vec_mul(val2, vec_rsqrte(other.vec.val2)),
318 vec_mul(val3, vec_rsqrte(other.vec.val3)),
319 vec_mul(val4, vec_rsqrte(other.vec.val4)));
320 }
321
322 inline
sqrt(const short_vec<double,16> & vec)323 sqrt_reference<double, 16> sqrt(const short_vec<double, 16>& vec)
324 {
325 return sqrt_reference<double, 16>(vec);
326 }
327
328 template<typename _CharT, typename _Traits>
329 std::basic_ostream<_CharT, _Traits>&
operator <<(std::basic_ostream<_CharT,_Traits> & __os,const short_vec<double,16> & vec)330 operator<<(std::basic_ostream<_CharT, _Traits>& __os,
331 const short_vec<double, 16>& vec)
332 {
333 const double *data1 = reinterpret_cast<const double *>(&vec.val1);
334 const double *data2 = reinterpret_cast<const double *>(&vec.val2);
335 const double *data3 = reinterpret_cast<const double *>(&vec.val3);
336 const double *data4 = reinterpret_cast<const double *>(&vec.val4);
337 __os << "[" << data1[0] << ", " << data1[1] << ", " << data1[2] << ", " << data1[3]
338 << ", " << data2[0] << ", " << data2[1] << ", " << data2[2] << ", " << data2[3]
339 << ", " << data3[0] << ", " << data3[1] << ", " << data3[2] << ", " << data3[3]
340 << ", " << data4[0] << ", " << data4[1] << ", " << data4[2] << ", " << data4[3]
341 << "]";
342 return __os;
343 }
344
345 }
346
347 #endif
348
349 #endif
350