1 /*  This file is part of the Vc library. {{{
2 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6     * Redistributions of source code must retain the above copyright
7       notice, this list of conditions and the following disclaimer.
8     * Redistributions in binary form must reproduce the above copyright
9       notice, this list of conditions and the following disclaimer in the
10       documentation and/or other materials provided with the distribution.
11     * Neither the names of contributing organizations nor the
12       names of its contributors may be used to endorse or promote products
13       derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_SSE_VECTOR_H_
29 #define VC_SSE_VECTOR_H_
30 
31 #include "../scalar/vector.h"
32 #include "intrinsics.h"
33 #include "types.h"
34 #include "vectorhelper.h"
35 #include "mask.h"
36 #include "../common/writemaskedvector.h"
37 #include "../common/aliasingentryhelper.h"
38 #include "../common/memoryfwd.h"
39 #include "../common/loadstoreflags.h"
40 #include <algorithm>
41 #include <cmath>
42 #include "detail.h"
43 
44 #include "macros.h"
45 
46 #ifdef isfinite
47 #undef isfinite
48 #endif
49 #ifdef isnan
50 #undef isnan
51 #endif
52 
53 namespace Vc_VERSIONED_NAMESPACE
54 {
55 
56 #define Vc_CURRENT_CLASS_NAME Vector
57 template <typename T> class Vector<T, VectorAbi::Sse>
58 {
59     static_assert(std::is_arithmetic<T>::value,
60                   "Vector<T> only accepts arithmetic builtin types as template parameter T.");
61 
62     protected:
63 #ifdef Vc_COMPILE_BENCHMARKS
64     public:
65 #endif
66         typedef typename SSE::VectorTraits<T>::StorageType StorageType;
67         StorageType d;
68         typedef typename SSE::VectorTraits<T>::GatherMaskType GatherMask;
69         typedef SSE::VectorHelper<typename SSE::VectorTraits<T>::VectorType> HV;
70         typedef SSE::VectorHelper<T> HT;
71     public:
72         Vc_FREE_STORE_OPERATORS_ALIGNED(16);
73 
74         typedef typename SSE::VectorTraits<T>::VectorType VectorType;
75         using vector_type = VectorType;
76         static constexpr size_t Size = SSE::VectorTraits<T>::Size;
77         static constexpr size_t MemoryAlignment = alignof(VectorType);
78         typedef typename SSE::VectorTraits<T>::EntryType EntryType;
79         using value_type = EntryType;
80         using VectorEntryType = EntryType;
81         using IndexType = fixed_size_simd<int, Size>;
82         typedef typename SSE::VectorTraits<T>::MaskType Mask;
83         using MaskType = Mask;
84         using mask_type = Mask;
85         typedef typename Mask::Argument MaskArg;
86         typedef typename Mask::Argument MaskArgument;
87         typedef const Vector AsArg;
88         using abi = VectorAbi::Sse;
89         using WriteMaskedVector = Common::WriteMaskedVector<Vector, Mask>;
90         template <typename U> using V = Vector<U, abi>;
91 
92         using reference = Detail::ElementReference<Vector>;
93 
94 #include "../common/generalinterface.h"
95 
96         static Vc_INTRINSIC_L Vector Random() Vc_INTRINSIC_R;
97 
98         ///////////////////////////////////////////////////////////////////////////////////////////
99         // internal: required to enable returning objects of VectorType
Vector(VectorType x)100         Vc_ALWAYS_INLINE Vector(VectorType x) : d(x) {}
101 
102         // implict conversion from compatible Vector<U>
103         template <typename U>
104         Vc_INTRINSIC Vector(
105             V<U> x, typename std::enable_if<Traits::is_implicit_cast_allowed<U, T>::value,
106                                             void *>::type = nullptr)
107             : d(SSE::convert<U, T>(x.data()))
108         {
109         }
110 
111 #if Vc_IS_VERSION_1
112         // static_cast from the remaining Vector<U>
113         template <typename U>
114         Vc_DEPRECATED("use simd_cast instead of explicit type casting to convert between "
115                       "vector types") Vc_INTRINSIC
116             explicit Vector(
117                 V<U> x,
118                 typename std::enable_if<!Traits::is_implicit_cast_allowed<U, T>::value,
119                                         void *>::type = nullptr)
120             : d(SSE::convert<U, T>(x.data()))
121         {
122         }
123 #endif
124 
125         ///////////////////////////////////////////////////////////////////////////////////////////
126         // broadcast
Vector(EntryType a)127         Vc_INTRINSIC Vector(EntryType a) : d(HT::set(a)) {}
128         template <typename U>
129         Vc_INTRINSIC Vector(U a,
130                             typename std::enable_if<std::is_same<U, int>::value &&
131                                                         !std::is_same<U, EntryType>::value,
132                                                     void *>::type = nullptr)
Vector(static_cast<EntryType> (a))133             : Vector(static_cast<EntryType>(a))
134         {
135         }
136 
137 #include "../common/loadinterface.h"
138 #include "../common/storeinterface.h"
139 
140         ///////////////////////////////////////////////////////////////////////////////////////////
141         // zeroing
142         Vc_INTRINSIC_L void setZero() Vc_INTRINSIC_R;
143         Vc_INTRINSIC_L void setZero(const Mask &k) Vc_INTRINSIC_R;
144         Vc_INTRINSIC_L void setZeroInverted(const Mask &k) Vc_INTRINSIC_R;
145 
146         Vc_INTRINSIC_L void setQnan() Vc_INTRINSIC_R;
147         Vc_INTRINSIC_L void setQnan(const Mask &k) Vc_INTRINSIC_R;
148 
149 #include "../common/gatherinterface.h"
150 #include "../common/scatterinterface.h"
151 #if defined Vc_IMPL_AVX2 && !defined Vc_MSVC
152         // skip this code for MSVC because it fails to do overload resolution correctly
153 
154         ////////////////////////////////////////////////////////////////////////////////
155         // non-converting pd, ps, and epi32 gathers
156         template <class U, class A, int Scale, int N = Vector<U, A>::size(),
157                   class = enable_if<(Vector<U, A>::size() >= size() && sizeof(T) >= 4)>>
gatherImplementation(const Common::GatherArguments<T,Vector<U,A>,Scale> & args)158         Vc_INTRINSIC void gatherImplementation(
159             const Common::GatherArguments<T, Vector<U, A>, Scale> &args)
160         {
161             d.v() = SSE::gather<sizeof(T) * Scale>(
162                 args.address, simd_cast<SSE::int_v>(args.indexes).data());
163         }
164 
165         // masked overload
166         template <class U, class A, int Scale, int N = Vector<U, A>::size(),
167                   class = enable_if<(Vector<U, A>::size() >= size() && sizeof(T) >= 4)>>
gatherImplementation(const Common::GatherArguments<T,Vector<U,A>,Scale> & args,MaskArgument k)168         Vc_INTRINSIC void gatherImplementation(
169             const Common::GatherArguments<T, Vector<U, A>, Scale> &args, MaskArgument k)
170         {
171             d.v() = SSE::gather<sizeof(T) * Scale>(
172                 d.v(), k.data(), args.address,
173                 simd_cast<SSE::int_v>(args.indexes).data());
174         }
175 
176         ////////////////////////////////////////////////////////////////////////////////
177         // converting (from 8-bit and 16-bit integers only) epi16 gather emulation via
178         // epi32 gathers
179         template <
180             class MT, class U, class A, int Scale,
181             class = enable_if<(sizeof(T) == 2 && std::is_integral<MT>::value &&
182                                (sizeof(MT) <= 2) && Vector<U, A>::size() >= size())>>
gatherImplementation(const Common::GatherArguments<MT,Vector<U,A>,Scale> & args)183         Vc_INTRINSIC void gatherImplementation(
184             const Common::GatherArguments<MT, Vector<U, A>, Scale> &args)
185         {
186             using AVX2::int_v;
187             const auto idx = simd_cast<int_v>(args.indexes).data();
188             *this = simd_cast<Vector>(int_v(
189                 AVX::gather<sizeof(MT) * Scale>(aliasing_cast<int>(args.address), idx)));
190             if (sizeof(MT) == 1) {
191                 if (std::is_signed<MT>::value) {
192                     d.v() = _mm_srai_epi16(_mm_slli_epi16(d.v(), 8), 8);
193                 } else {
194                     *this &= 0xff;
195                 }
196             }
197         }
198 
199         // masked overload
200         template <
201             class MT, class U, class A, int Scale,
202             class = enable_if<(sizeof(T) == 2 && std::is_integral<MT>::value &&
203                                (sizeof(MT) <= 2) && Vector<U, A>::size() >= size())>>
gatherImplementation(const Common::GatherArguments<MT,Vector<U,A>,Scale> & args,MaskArgument k)204         Vc_INTRINSIC void gatherImplementation(
205             const Common::GatherArguments<MT, Vector<U, A>, Scale> &args, MaskArgument k)
206         {
207             using AVX2::int_v;
208             auto v = simd_cast<Vector>(int_v(AVX::gather<sizeof(MT) * Scale>(
209                 _mm256_setzero_si256(), simd_cast<AVX2::int_m>(k).data(),
210                 aliasing_cast<int>(args.address),
211                 simd_cast<int_v>(args.indexes).data())));
212             if (sizeof(MT) == 1) {
213                 if (std::is_signed<MT>::value) {
214                     v.data() = _mm_srai_epi16(_mm_slli_epi16(v.data(), 8), 8);
215                 } else {
216                     v &= 0xff;
217                 }
218             }
219             assign(v, k);
220         }
221 
222         ////////////////////////////////////////////////////////////////////////////////
223         // all remaining converting gathers
224         template <class MT, class U, class A, int Scale>
225         Vc_INTRINSIC enable_if<((sizeof(T) != 2 || sizeof(MT) > 2) &&
226                                 Traits::is_valid_vector_argument<MT>::value &&
227                                 !std::is_same<MT, T>::value &&
228                                 Vector<U, A>::size() >= size()),
229                                void>
gatherImplementation(const Common::GatherArguments<MT,Vector<U,A>,Scale> & args)230         gatherImplementation(const Common::GatherArguments<MT, Vector<U, A>, Scale> &args)
231         {
232             *this = simd_cast<Vector>(fixed_size_simd<MT, Size>(args));
233         }
234 
235         // masked overload
236         template <class MT, class U, class A, int Scale>
237         Vc_INTRINSIC enable_if<((sizeof(T) != 2 || sizeof(MT) > 2) &&
238                                 Traits::is_valid_vector_argument<MT>::value &&
239                                 !std::is_same<MT, T>::value &&
240                                 Vector<U, A>::size() >= size()),
241                                void>
gatherImplementation(const Common::GatherArguments<MT,Vector<U,A>,Scale> & args,MaskArgument k)242         gatherImplementation(const Common::GatherArguments<MT, Vector<U, A>, Scale> &args,
243                              MaskArgument k)
244         {
245             assign(simd_cast<Vector>(fixed_size_simd<MT, Size>(args, k)), k);
246         }
247 #endif  // Vc_IMPL_AVX2 && !MSVC
248 
249         //prefix
250         Vc_INTRINSIC Vector &operator++() { data() = HT::add(data(), HT::one()); return *this; }
251         Vc_INTRINSIC Vector &operator--() { data() = HT::sub(data(), HT::one()); return *this; }
252         //postfix
253         Vc_INTRINSIC Vector operator++(int) { const Vector r = *this; data() = HT::add(data(), HT::one()); return r; }
254         Vc_INTRINSIC Vector operator--(int) { const Vector r = *this; data() = HT::sub(data(), HT::one()); return r; }
255 
256     private:
257         friend reference;
get(const Vector & o,int i)258         Vc_INTRINSIC static value_type get(const Vector &o, int i) noexcept
259         {
260             return o.d.m(i);
261         }
262         template <typename U>
set(Vector & o,int i,U && v)263         Vc_INTRINSIC static void set(Vector &o, int i, U &&v) noexcept(
264             noexcept(std::declval<value_type &>() = v))
265         {
266             o.d.set(i, v);
267         }
268 
269     public:
270         /**
271          * \note the returned object models the concept of a reference and
272          * as such it can exist longer than the data it is referencing.
273          * \note to avoid lifetime issues, we strongly advice not to store
274          * any reference objects.
275          */
276         Vc_ALWAYS_INLINE reference operator[](size_t index) noexcept
277         {
278             static_assert(noexcept(reference{std::declval<Vector &>(), int()}), "");
279             return {*this, int(index)};
280         }
281         Vc_ALWAYS_INLINE value_type operator[](size_t index) const noexcept
282         {
283             return d.m(index);
284         }
285 
286         Vc_INTRINSIC_L Vector Vc_VDECL operator[](const SSE::int_v &perm) const Vc_INTRINSIC_R;
287 
288         Vc_INTRINSIC Vc_PURE Mask operator!() const
289         {
290             return *this == Zero();
291         }
292         Vc_INTRINSIC Vc_PURE Vector operator~() const
293         {
294 #ifndef Vc_ENABLE_FLOAT_BIT_OPERATORS
295             static_assert(std::is_integral<T>::value,
296                           "bit-complement can only be used with Vectors of integral type");
297 #endif
298             return Detail::andnot_(data(), HV::allone());
299         }
300         Vc_ALWAYS_INLINE_L Vc_PURE_L Vector operator-() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
301         Vc_INTRINSIC Vc_PURE Vector operator+() const { return *this; }
302 
303         Vc_ALWAYS_INLINE Vector  Vc_VDECL operator<< (AsArg shift) const { return generate([&](int i) { return get(*this, i) << get(shift, i); }); }
304         Vc_ALWAYS_INLINE Vector  Vc_VDECL operator>> (AsArg shift) const { return generate([&](int i) { return get(*this, i) >> get(shift, i); }); }
305         Vc_ALWAYS_INLINE Vector &Vc_VDECL operator<<=(AsArg shift) { return *this = *this << shift; }
306         Vc_ALWAYS_INLINE Vector &Vc_VDECL operator>>=(AsArg shift) { return *this = *this >> shift; }
307 
308         Vc_INTRINSIC_L Vector &Vc_VDECL operator<<=(  int shift)       Vc_INTRINSIC_R;
309         Vc_INTRINSIC_L Vector  Vc_VDECL operator<< (  int shift) const Vc_INTRINSIC_R;
310         Vc_INTRINSIC_L Vector &Vc_VDECL operator>>=(  int shift)       Vc_INTRINSIC_R;
311         Vc_INTRINSIC_L Vector  Vc_VDECL operator>> (  int shift) const Vc_INTRINSIC_R;
312 
313         Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC Vc_PURE Mask
isNegative()314             isNegative() const
315         {
316             return Vc::isnegative(*this);
317         }
318 
assign(const Vector & v,const Mask & mask)319         Vc_ALWAYS_INLINE void assign(const Vector &v, const Mask &mask)
320         {
321             data() = HV::blend(data(), v.data(), mask.data());
322         }
323 
324         template <typename V2>
325         Vc_DEPRECATED("Use simd_cast instead of Vector::staticCast")
staticCast()326             Vc_ALWAYS_INLINE Vc_PURE V2 staticCast() const
327         {
328             return SSE::convert<T, typename V2::EntryType>(data());
329         }
330         template <typename V2>
331         Vc_DEPRECATED("use reinterpret_components_cast instead")
reinterpretCast()332             Vc_ALWAYS_INLINE Vc_PURE V2 reinterpretCast() const
333         {
334             return SSE::sse_cast<typename V2::VectorType>(data());
335         }
336 
operator()337         Vc_INTRINSIC WriteMaskedVector operator()(const Mask &k) { return {*this, k}; }
338 
data()339         Vc_ALWAYS_INLINE Vc_PURE VectorType &data() { return d.v(); }
data()340         Vc_ALWAYS_INLINE Vc_PURE const VectorType &data() const { return d.v(); }
341 
342         template<int Index>
343         Vc_INTRINSIC_L Vector broadcast() const Vc_INTRINSIC_R;
344 
min()345         Vc_INTRINSIC EntryType min() const { return HT::min(data()); }
max()346         Vc_INTRINSIC EntryType max() const { return HT::max(data()); }
product()347         Vc_INTRINSIC EntryType product() const { return HT::mul(data()); }
sum()348         Vc_INTRINSIC EntryType sum() const { return HT::add(data()); }
349         Vc_INTRINSIC_L Vector partialSum() const Vc_INTRINSIC_R;
350         Vc_INTRINSIC_L EntryType min(MaskArg m) const Vc_INTRINSIC_R;
351         Vc_INTRINSIC_L EntryType max(MaskArg m) const Vc_INTRINSIC_R;
352         Vc_INTRINSIC_L EntryType product(MaskArg m) const Vc_INTRINSIC_R;
353         Vc_INTRINSIC_L EntryType sum(MaskArg m) const Vc_INTRINSIC_R;
354 
355         Vc_INTRINSIC_L Vector shifted(int amount, Vector shiftIn) const Vc_INTRINSIC_R;
356         Vc_INTRINSIC_L Vector shifted(int amount) const Vc_INTRINSIC_R;
357         Vc_INTRINSIC_L Vector rotated(int amount) const Vc_INTRINSIC_R;
358         Vc_INTRINSIC_L Vc_PURE_L Vector reversed() const Vc_INTRINSIC_R Vc_PURE_R;
359         Vc_ALWAYS_INLINE_L Vc_PURE_L Vector sorted() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
360 
callWithValuesSorted(F && f)361         template <typename F> void callWithValuesSorted(F &&f)
362         {
363             EntryType value = d.m(0);
364             f(value);
365             for (std::size_t i = 1; i < Size; ++i) {
366                 if (d.m(i) != value) {
367                     value = d.m(i);
368                     f(value);
369                 }
370             }
371         }
372 
call(F && f)373         template <typename F> Vc_INTRINSIC void call(F &&f) const
374         {
375             Common::for_all_vector_entries<Size>([&](size_t i) { f(EntryType(d.m(i))); });
376         }
377 
call(F && f,const Mask & mask)378         template <typename F> Vc_INTRINSIC void call(F &&f, const Mask &mask) const
379         {
380             for(size_t i : where(mask)) {
381                 f(EntryType(d.m(i)));
382             }
383         }
384 
apply(F && f)385         template <typename F> Vc_INTRINSIC Vector apply(F &&f) const
386         {
387             Vector r;
388             Common::for_all_vector_entries<Size>(
389                 [&](size_t i) { r.d.set(i, f(EntryType(d.m(i)))); });
390             return r;
391         }
apply(F && f,const Mask & mask)392         template <typename F> Vc_INTRINSIC Vector apply(F &&f, const Mask &mask) const
393         {
394             Vector r(*this);
395             for (size_t i : where(mask)) {
396                 r.d.set(i, f(EntryType(r.d.m(i))));
397             }
398             return r;
399         }
400 
fill(EntryType (& f)(IndexT))401         template<typename IndexT> Vc_INTRINSIC void fill(EntryType (&f)(IndexT)) {
402             Common::for_all_vector_entries<Size>([&](size_t i) { d.set(i, f(i)); });
403         }
fill(EntryType (& f)())404         Vc_INTRINSIC void fill(EntryType (&f)()) {
405             Common::for_all_vector_entries<Size>([&](size_t i) { d.set(i, f()); });
406         }
407 
408         template <typename G> static Vc_INTRINSIC_L Vector generate(G gen) Vc_INTRINSIC_R;
409 
410         Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC Vector
copySign(AsArg x)411             copySign(AsArg x) const
412         {
413             return Vc::copysign(*this, x);
414         }
415 
exponent()416         Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC Vector exponent() const
417         {
418             return Vc::exponent(*this);
419         }
420 
421         Vc_INTRINSIC_L Vector interleaveLow(Vector x) const Vc_INTRINSIC_R;
422         Vc_INTRINSIC_L Vector interleaveHigh(Vector x) const Vc_INTRINSIC_R;
423 };
424 #undef Vc_CURRENT_CLASS_NAME
425 template <typename T> constexpr size_t Vector<T, VectorAbi::Sse>::Size;
426 template <typename T> constexpr size_t Vector<T, VectorAbi::Sse>::MemoryAlignment;
427 
min(const SSE::int_v & x,const SSE::int_v & y)428 static Vc_ALWAYS_INLINE Vc_PURE SSE::int_v    min(const SSE::int_v    &x, const SSE::int_v    &y) { return SSE::min_epi32(x.data(), y.data()); }
min(const SSE::uint_v & x,const SSE::uint_v & y)429 static Vc_ALWAYS_INLINE Vc_PURE SSE::uint_v   min(const SSE::uint_v   &x, const SSE::uint_v   &y) { return SSE::min_epu32(x.data(), y.data()); }
min(const SSE::short_v & x,const SSE::short_v & y)430 static Vc_ALWAYS_INLINE Vc_PURE SSE::short_v  min(const SSE::short_v  &x, const SSE::short_v  &y) { return _mm_min_epi16(x.data(), y.data()); }
min(const SSE::ushort_v & x,const SSE::ushort_v & y)431 static Vc_ALWAYS_INLINE Vc_PURE SSE::ushort_v min(const SSE::ushort_v &x, const SSE::ushort_v &y) { return SSE::min_epu16(x.data(), y.data()); }
min(const SSE::float_v & x,const SSE::float_v & y)432 static Vc_ALWAYS_INLINE Vc_PURE SSE::float_v  min(const SSE::float_v  &x, const SSE::float_v  &y) { return _mm_min_ps(x.data(), y.data()); }
min(const SSE::double_v & x,const SSE::double_v & y)433 static Vc_ALWAYS_INLINE Vc_PURE SSE::double_v min(const SSE::double_v &x, const SSE::double_v &y) { return _mm_min_pd(x.data(), y.data()); }
max(const SSE::int_v & x,const SSE::int_v & y)434 static Vc_ALWAYS_INLINE Vc_PURE SSE::int_v    max(const SSE::int_v    &x, const SSE::int_v    &y) { return SSE::max_epi32(x.data(), y.data()); }
max(const SSE::uint_v & x,const SSE::uint_v & y)435 static Vc_ALWAYS_INLINE Vc_PURE SSE::uint_v   max(const SSE::uint_v   &x, const SSE::uint_v   &y) { return SSE::max_epu32(x.data(), y.data()); }
max(const SSE::short_v & x,const SSE::short_v & y)436 static Vc_ALWAYS_INLINE Vc_PURE SSE::short_v  max(const SSE::short_v  &x, const SSE::short_v  &y) { return _mm_max_epi16(x.data(), y.data()); }
max(const SSE::ushort_v & x,const SSE::ushort_v & y)437 static Vc_ALWAYS_INLINE Vc_PURE SSE::ushort_v max(const SSE::ushort_v &x, const SSE::ushort_v &y) { return SSE::max_epu16(x.data(), y.data()); }
max(const SSE::float_v & x,const SSE::float_v & y)438 static Vc_ALWAYS_INLINE Vc_PURE SSE::float_v  max(const SSE::float_v  &x, const SSE::float_v  &y) { return _mm_max_ps(x.data(), y.data()); }
max(const SSE::double_v & x,const SSE::double_v & y)439 static Vc_ALWAYS_INLINE Vc_PURE SSE::double_v max(const SSE::double_v &x, const SSE::double_v &y) { return _mm_max_pd(x.data(), y.data()); }
440 
441 template <typename T,
442           typename = enable_if<std::is_same<T, double>::value || std::is_same<T, float>::value ||
443                                std::is_same<T, short>::value ||
444                                std::is_same<T, int>::value>>
abs(Vector<T,VectorAbi::Sse> x)445 Vc_ALWAYS_INLINE Vc_PURE Vector<T, VectorAbi::Sse> abs(Vector<T, VectorAbi::Sse> x)
446 {
447     return SSE::VectorHelper<T>::abs(x.data());
448 }
449 
sqrt(const Vector<T,VectorAbi::Sse> & x)450   template<typename T> Vc_ALWAYS_INLINE Vc_PURE Vector<T, VectorAbi::Sse> sqrt (const Vector<T, VectorAbi::Sse> &x) { return SSE::VectorHelper<T>::sqrt(x.data()); }
rsqrt(const Vector<T,VectorAbi::Sse> & x)451   template<typename T> Vc_ALWAYS_INLINE Vc_PURE Vector<T, VectorAbi::Sse> rsqrt(const Vector<T, VectorAbi::Sse> &x) { return SSE::VectorHelper<T>::rsqrt(x.data()); }
reciprocal(const Vector<T,VectorAbi::Sse> & x)452   template<typename T> Vc_ALWAYS_INLINE Vc_PURE Vector<T, VectorAbi::Sse> reciprocal(const Vector<T, VectorAbi::Sse> &x) { return SSE::VectorHelper<T>::reciprocal(x.data()); }
round(const Vector<T,VectorAbi::Sse> & x)453   template<typename T> Vc_ALWAYS_INLINE Vc_PURE Vector<T, VectorAbi::Sse> round(const Vector<T, VectorAbi::Sse> &x) { return SSE::VectorHelper<T>::round(x.data()); }
454 
isfinite(const Vector<T,VectorAbi::Sse> & x)455   template<typename T> Vc_ALWAYS_INLINE Vc_PURE typename Vector<T, VectorAbi::Sse>::Mask isfinite(const Vector<T, VectorAbi::Sse> &x) { return SSE::VectorHelper<T>::isFinite(x.data()); }
isinf(const Vector<T,VectorAbi::Sse> & x)456   template<typename T> Vc_ALWAYS_INLINE Vc_PURE typename Vector<T, VectorAbi::Sse>::Mask isinf(const Vector<T, VectorAbi::Sse> &x) { return SSE::VectorHelper<T>::isInfinite(x.data()); }
isnan(const Vector<T,VectorAbi::Sse> & x)457   template<typename T> Vc_ALWAYS_INLINE Vc_PURE typename Vector<T, VectorAbi::Sse>::Mask isnan(const Vector<T, VectorAbi::Sse> &x) { return SSE::VectorHelper<T>::isNaN(x.data()); }
458 
459 #define Vc_CONDITIONAL_ASSIGN(name_, op_)                                                \
460     template <Operator O, typename T, typename M, typename U>                            \
461     Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign(               \
462         Vector<T, VectorAbi::Sse> &lhs, M &&mask, U &&rhs)                               \
463     {                                                                                    \
464         lhs(mask) op_ rhs;                                                               \
465     }                                                                                    \
466     Vc_NOTHING_EXPECTING_SEMICOLON
467 Vc_CONDITIONAL_ASSIGN(          Assign,  =);
468 Vc_CONDITIONAL_ASSIGN(      PlusAssign, +=);
469 Vc_CONDITIONAL_ASSIGN(     MinusAssign, -=);
470 Vc_CONDITIONAL_ASSIGN(  MultiplyAssign, *=);
471 Vc_CONDITIONAL_ASSIGN(    DivideAssign, /=);
472 Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
473 Vc_CONDITIONAL_ASSIGN(       XorAssign, ^=);
474 Vc_CONDITIONAL_ASSIGN(       AndAssign, &=);
475 Vc_CONDITIONAL_ASSIGN(        OrAssign, |=);
476 Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
477 Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
478 #undef Vc_CONDITIONAL_ASSIGN
479 
480 #define Vc_CONDITIONAL_ASSIGN(name_, expr_)                                              \
481     template <Operator O, typename T, typename M>                                        \
482     Vc_INTRINSIC enable_if<O == Operator::name_, Vector<T, VectorAbi::Sse>>              \
483     conditional_assign(Vector<T, VectorAbi::Sse> &lhs, M &&mask)                         \
484     {                                                                                    \
485         return expr_;                                                                    \
486     }                                                                                    \
487     Vc_NOTHING_EXPECTING_SEMICOLON
488 Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
489 Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
490 Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
491 Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
492 #undef Vc_CONDITIONAL_ASSIGN
493 
494 }  // namespace Vc
495 
496 #include "vector.tcc"
497 #include "simd_cast.h"
498 
499 #endif // VC_SSE_VECTOR_H_
500