1 /**
2  * Copyright 2014-2016 Andreas Schäfer
3  * Copyright 2015 Kurt Kanzenbach
4  *
5  * Distributed under the Boost Software License, Version 1.0. (See accompanying
6  * file LICENSE or copy at http://www.boost.org/LICENSE_1_0.txt)
7  */
8 
9 #ifndef FLAT_ARRAY_DETAIL_SHORT_VEC_QPX_DOUBLE_16_HPP
10 #define FLAT_ARRAY_DETAIL_SHORT_VEC_QPX_DOUBLE_16_HPP
11 
12 #if LIBFLATARRAY_WIDEST_VECTOR_ISA == LIBFLATARRAY_QPX
13 
14 #include <libflatarray/detail/sqrt_reference.hpp>
15 #include <libflatarray/detail/short_vec_helpers.hpp>
16 
17 #ifdef LIBFLATARRAY_WITH_CPP14
18 #include <initializer_list>
19 #endif
20 
21 namespace LibFlatArray {
22 
23 template<typename CARGO, int ARITY>
24 class short_vec;
25 
26 template<typename CARGO, int ARITY>
27 class sqrt_reference;
28 
29 #ifdef __ICC
30 // disabling this warning as implicit type conversion is exactly our goal here:
31 #pragma warning push
32 #pragma warning (disable: 2304)
33 #endif
34 
35 template<>
36 class short_vec<double, 16>
37 {
38 public:
39     static const int ARITY = 16;
40 
41     inline
short_vec(const double data=0)42     short_vec(const double data = 0) :
43         val1(vec_splats(data)),
44         val2(vec_splats(data)),
45         val3(vec_splats(data)),
46         val4(vec_splats(data))
47     {}
48 
49     inline
short_vec(const double * data)50     short_vec(const double *data) :
51         val1(vec_ld(0, const_cast<double *>(data +  0))),
52         val2(vec_ld(0, const_cast<double *>(data +  4))),
53         val3(vec_ld(0, const_cast<double *>(data +  8))),
54         val4(vec_ld(0, const_cast<double *>(data + 12)))
55     {}
56 
57     inline
short_vec(const vector4double & val1,const vector4double & val2,const vector4double & val3,const vector4double & val4)58     short_vec(const vector4double& val1, const vector4double& val2,
59               const vector4double& val3, const vector4double& val4) :
60         val1(val1),
61         val2(val2),
62         val3(val3),
63         val4(val4)
64     {}
65 
66 #ifdef LIBFLATARRAY_WITH_CPP14
67     inline
short_vec(const std::initializer_list<double> & il)68     short_vec(const std::initializer_list<double>& il)
69     {
70         const double *ptr = static_cast<const double *>(&(*il.begin()));
71         load(ptr);
72     }
73 #endif
74 
75     inline
76     short_vec(const sqrt_reference<double, 16>& other);
77 
78     inline
operator -=(const short_vec<double,16> & other)79     void operator-=(const short_vec<double, 16>& other)
80     {
81         val1 = vec_sub(val1, other.val1);
82         val2 = vec_sub(val2, other.val2);
83         val3 = vec_sub(val3, other.val3);
84         val4 = vec_sub(val4, other.val4);
85     }
86 
87     inline
operator -(const short_vec<double,16> & other) const88     short_vec<double, 16> operator-(const short_vec<double, 16>& other) const
89     {
90         return short_vec<double, 16>(
91             vec_sub(val1, other.val1),
92             vec_sub(val2, other.val2),
93             vec_sub(val3, other.val3),
94             vec_sub(val4, other.val4));
95     }
96 
97     inline
operator +=(const short_vec<double,16> & other)98     void operator+=(const short_vec<double, 16>& other)
99     {
100         val1 = vec_add(val1, other.val1);
101         val2 = vec_add(val2, other.val2);
102         val3 = vec_add(val3, other.val3);
103         val4 = vec_add(val4, other.val4);
104     }
105 
106     inline
operator +(const short_vec<double,16> & other) const107     short_vec<double, 16> operator+(const short_vec<double, 16>& other) const
108     {
109         return short_vec<double, 16>(
110             vec_add(val1, other.val1),
111             vec_add(val2, other.val2),
112             vec_add(val3, other.val3),
113             vec_add(val4, other.val4));
114     }
115 
116     inline
operator *=(const short_vec<double,16> & other)117     void operator*=(const short_vec<double, 16>& other)
118     {
119         val1 = vec_add(val1, other.val1);
120         val2 = vec_add(val2, other.val2);
121         val3 = vec_add(val3, other.val3);
122         val4 = vec_add(val4, other.val4);
123     }
124 
125     inline
operator *(const short_vec<double,16> & other) const126     short_vec<double, 16> operator*(const short_vec<double, 16>& other) const
127     {
128         return short_vec<double, 16>(
129             vec_mul(val1, other.val1),
130             vec_mul(val2, other.val2),
131             vec_mul(val3, other.val3),
132             vec_mul(val4, other.val4));
133     }
134 
135     inline
136     void operator/=(const sqrt_reference<double, 16>& other);
137 
138     inline
operator /=(const short_vec<double,16> & other)139     void operator/=(const short_vec<double, 16>& other)
140     {
141         val1 = vec_swdiv_nochk(val1, other.val1);
142         val2 = vec_swdiv_nochk(val2, other.val2);
143         val3 = vec_swdiv_nochk(val3, other.val3);
144         val4 = vec_swdiv_nochk(val4, other.val4);
145     }
146 
147     inline
operator /(const short_vec<double,16> & other) const148     short_vec<double, 16> operator/(const short_vec<double, 16>& other) const
149     {
150         return short_vec<double, 16>(
151             vec_swdiv_nochk(val1, other.val1),
152             vec_swdiv_nochk(val2, other.val2),
153             vec_swdiv_nochk(val3, other.val3),
154             vec_swdiv_nochk(val4, other.val4));
155     }
156 
157     inline
158     short_vec<double, 16> operator/(const sqrt_reference<double, 16>& other) const;
159 
160     inline
sqrt() const161     short_vec<double, 16> sqrt() const
162     {
163         return short_vec<double, 16>(
164             vec_swsqrt(val1),
165             vec_swsqrt(val2),
166             vec_swsqrt(val3),
167             vec_swsqrt(val4));
168     }
169 
170     inline
load(const double * data)171     void load(const double *data)
172     {
173         val1 = vec_ld(0, const_cast<double *>(data +  0));
174         val2 = vec_ld(0, const_cast<double *>(data +  4));
175         val3 = vec_ld(0, const_cast<double *>(data +  8));
176         val4 = vec_ld(0, const_cast<double *>(data + 12));
177     }
178 
179     inline
load_aligned(const double * data)180     void load_aligned(const double *data)
181     {
182         SHORTVEC_ASSERT_ALIGNED(data, 32);
183         val1 = vec_lda(0, const_cast<double *>(data +  0));
184         val2 = vec_lda(0, const_cast<double *>(data +  4));
185         val3 = vec_lda(0, const_cast<double *>(data +  8));
186         val4 = vec_lda(0, const_cast<double *>(data + 12));
187     }
188 
189     inline
store(double * data) const190     void store(double *data) const
191     {
192         vec_st(val1, 0, data +  0);
193         vec_st(val2, 0, data +  4);
194         vec_st(val3, 0, data +  8);
195         vec_st(val4, 0, data + 12);
196     }
197 
198     inline
store_aligned(double * data) const199     void store_aligned(double *data) const
200     {
201         SHORTVEC_ASSERT_ALIGNED(data, 32);
202         vec_sta(val1, 0, data +  0);
203         vec_sta(val2, 0, data +  4);
204         vec_sta(val3, 0, data +  8);
205         vec_sta(val4, 0, data + 12);
206     }
207 
208     inline
store_nt(double * data) const209     void store_nt(double *data) const
210     {
211         store(data);
212     }
213 
214     inline
gather(const double * ptr,const int * offsets)215     void gather(const double *ptr, const int *offsets)
216     {
217         double *base = const_cast<double *>(ptr);
218         val1 = vec_insert(base[offsets[ 0]], val1, 0);
219         val1 = vec_insert(base[offsets[ 1]], val1, 1);
220         val1 = vec_insert(base[offsets[ 2]], val1, 2);
221         val1 = vec_insert(base[offsets[ 3]], val1, 3);
222 
223         val2 = vec_insert(base[offsets[ 4]], val2, 0);
224         val2 = vec_insert(base[offsets[ 5]], val2, 1);
225         val2 = vec_insert(base[offsets[ 6]], val2, 2);
226         val2 = vec_insert(base[offsets[ 7]], val2, 3);
227 
228         val3 = vec_insert(base[offsets[ 8]], val3, 0);
229         val3 = vec_insert(base[offsets[ 9]], val3, 1);
230         val3 = vec_insert(base[offsets[10]], val3, 2);
231         val3 = vec_insert(base[offsets[11]], val3, 3);
232 
233         val4 = vec_insert(base[offsets[12]], val4, 0);
234         val4 = vec_insert(base[offsets[13]], val4, 1);
235         val4 = vec_insert(base[offsets[14]], val4, 2);
236         val4 = vec_insert(base[offsets[15]], val4, 3);
237     }
238 
239     inline
scatter(double * ptr,const int * offsets) const240     void scatter(double *ptr, const int *offsets) const
241     {
242         ptr[offsets[ 0]] = vec_extract(val1, 0);
243         ptr[offsets[ 1]] = vec_extract(val1, 1);
244         ptr[offsets[ 2]] = vec_extract(val1, 2);
245         ptr[offsets[ 3]] = vec_extract(val1, 3);
246 
247         ptr[offsets[ 4]] = vec_extract(val2, 0);
248         ptr[offsets[ 5]] = vec_extract(val2, 1);
249         ptr[offsets[ 6]] = vec_extract(val2, 2);
250         ptr[offsets[ 7]] = vec_extract(val2, 3);
251 
252         ptr[offsets[ 8]] = vec_extract(val3, 0);
253         ptr[offsets[ 9]] = vec_extract(val3, 1);
254         ptr[offsets[10]] = vec_extract(val3, 2);
255         ptr[offsets[11]] = vec_extract(val3, 3);
256 
257         ptr[offsets[12]] = vec_extract(val4, 0);
258         ptr[offsets[13]] = vec_extract(val4, 1);
259         ptr[offsets[14]] = vec_extract(val4, 2);
260         ptr[offsets[15]] = vec_extract(val4, 3);
261     }
262 
263 private:
264     vector4double val1;
265     vector4double val2;
266     vector4double val3;
267     vector4double val4;
268 };
269 
270 #ifdef __ICC
271 #pragma warning pop
272 #endif
273 
274 inline
operator <<(double * data,const short_vec<double,16> & vec)275 void operator<<(double *data, const short_vec<double, 16>& vec)
276 {
277     vec.store(data);
278 }
279 
280 template<>
281 class sqrt_reference<double, 16>
282 {
283 public:
284     template<typename OTHER_CARGO, int OTHER_ARITY>
285     friend class short_vec;
286 
sqrt_reference(const short_vec<double,16> & vec)287     sqrt_reference(const short_vec<double, 16>& vec) :
288         vec(vec)
289     {}
290 
291 private:
292     short_vec<double, 16> vec;
293 };
294 
295 inline
short_vec(const sqrt_reference<double,16> & other)296 short_vec<double, 16>::short_vec(const sqrt_reference<double, 16>& other) :
297     val1(vec_swsqrt(other.vec.val1)),
298     val2(vec_swsqrt(other.vec.val2)),
299     val3(vec_swsqrt(other.vec.val3)),
300     val4(vec_swsqrt(other.vec.val4))
301 {}
302 
303 inline
operator /=(const sqrt_reference<double,16> & other)304 void short_vec<double, 16>::operator/=(const sqrt_reference<double, 16>& other)
305 {
306     val1 = vec_mul(val1, vec_rsqrte(other.vec.val1));
307     val2 = vec_mul(val2, vec_rsqrte(other.vec.val2));
308     val3 = vec_mul(val3, vec_rsqrte(other.vec.val3));
309     val4 = vec_mul(val4, vec_rsqrte(other.vec.val4));
310 }
311 
312 inline
operator /(const sqrt_reference<double,16> & other) const313 short_vec<double, 16> short_vec<double, 16>::operator/(const sqrt_reference<double, 16>& other) const
314 {
315     return short_vec<double, 16>(
316         vec_mul(val1, vec_rsqrte(other.vec.val1)),
317         vec_mul(val2, vec_rsqrte(other.vec.val2)),
318         vec_mul(val3, vec_rsqrte(other.vec.val3)),
319         vec_mul(val4, vec_rsqrte(other.vec.val4)));
320 }
321 
322 inline
sqrt(const short_vec<double,16> & vec)323 sqrt_reference<double, 16> sqrt(const short_vec<double, 16>& vec)
324 {
325     return sqrt_reference<double, 16>(vec);
326 }
327 
328 template<typename _CharT, typename _Traits>
329 std::basic_ostream<_CharT, _Traits>&
operator <<(std::basic_ostream<_CharT,_Traits> & __os,const short_vec<double,16> & vec)330 operator<<(std::basic_ostream<_CharT, _Traits>& __os,
331            const short_vec<double, 16>& vec)
332 {
333     const double *data1 = reinterpret_cast<const double *>(&vec.val1);
334     const double *data2 = reinterpret_cast<const double *>(&vec.val2);
335     const double *data3 = reinterpret_cast<const double *>(&vec.val3);
336     const double *data4 = reinterpret_cast<const double *>(&vec.val4);
337     __os << "["  << data1[0] << ", " << data1[1] << ", " << data1[2] << ", " << data1[3]
338          << ", " << data2[0] << ", " << data2[1] << ", " << data2[2] << ", " << data2[3]
339          << ", " << data3[0] << ", " << data3[1] << ", " << data3[2] << ", " << data3[3]
340          << ", " << data4[0] << ", " << data4[1] << ", " << data4[2] << ", " << data4[3]
341          << "]";
342     return __os;
343 }
344 
345 }
346 
347 #endif
348 
349 #endif
350