1 /* This file is part of the Vc library. {{{
2 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 * Neither the names of contributing organizations nor the
12 names of its contributors may be used to endorse or promote products
13 derived from this software without specific prior written permission.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
26 }}}*/
27
28 #ifndef VC_SSE_VECTOR_H_
29 #define VC_SSE_VECTOR_H_
30
31 #include "../scalar/vector.h"
32 #include "intrinsics.h"
33 #include "types.h"
34 #include "vectorhelper.h"
35 #include "mask.h"
36 #include "../common/writemaskedvector.h"
37 #include "../common/aliasingentryhelper.h"
38 #include "../common/memoryfwd.h"
39 #include "../common/loadstoreflags.h"
40 #include <algorithm>
41 #include <cmath>
42 #include "detail.h"
43
44 #include "macros.h"
45
46 #ifdef isfinite
47 #undef isfinite
48 #endif
49 #ifdef isnan
50 #undef isnan
51 #endif
52
53 namespace Vc_VERSIONED_NAMESPACE
54 {
55
56 #define Vc_CURRENT_CLASS_NAME Vector
57 template <typename T> class Vector<T, VectorAbi::Sse>
58 {
59 static_assert(std::is_arithmetic<T>::value,
60 "Vector<T> only accepts arithmetic builtin types as template parameter T.");
61
62 protected:
63 #ifdef Vc_COMPILE_BENCHMARKS
64 public:
65 #endif
66 typedef typename SSE::VectorTraits<T>::StorageType StorageType;
67 StorageType d;
68 typedef typename SSE::VectorTraits<T>::GatherMaskType GatherMask;
69 typedef SSE::VectorHelper<typename SSE::VectorTraits<T>::VectorType> HV;
70 typedef SSE::VectorHelper<T> HT;
71 public:
72 Vc_FREE_STORE_OPERATORS_ALIGNED(16);
73
74 typedef typename SSE::VectorTraits<T>::VectorType VectorType;
75 using vector_type = VectorType;
76 static constexpr size_t Size = SSE::VectorTraits<T>::Size;
77 static constexpr size_t MemoryAlignment = alignof(VectorType);
78 typedef typename SSE::VectorTraits<T>::EntryType EntryType;
79 using value_type = EntryType;
80 using VectorEntryType = EntryType;
81 using IndexType = fixed_size_simd<int, Size>;
82 typedef typename SSE::VectorTraits<T>::MaskType Mask;
83 using MaskType = Mask;
84 using mask_type = Mask;
85 typedef typename Mask::Argument MaskArg;
86 typedef typename Mask::Argument MaskArgument;
87 typedef const Vector AsArg;
88 using abi = VectorAbi::Sse;
89 using WriteMaskedVector = Common::WriteMaskedVector<Vector, Mask>;
90 template <typename U> using V = Vector<U, abi>;
91
92 using reference = Detail::ElementReference<Vector>;
93
94 #include "../common/generalinterface.h"
95
96 static Vc_INTRINSIC_L Vector Random() Vc_INTRINSIC_R;
97
98 ///////////////////////////////////////////////////////////////////////////////////////////
99 // internal: required to enable returning objects of VectorType
Vector(VectorType x)100 Vc_ALWAYS_INLINE Vector(VectorType x) : d(x) {}
101
102 // implict conversion from compatible Vector<U>
103 template <typename U>
104 Vc_INTRINSIC Vector(
105 V<U> x, typename std::enable_if<Traits::is_implicit_cast_allowed<U, T>::value,
106 void *>::type = nullptr)
107 : d(SSE::convert<U, T>(x.data()))
108 {
109 }
110
111 #if Vc_IS_VERSION_1
112 // static_cast from the remaining Vector<U>
113 template <typename U>
114 Vc_DEPRECATED("use simd_cast instead of explicit type casting to convert between "
115 "vector types") Vc_INTRINSIC
116 explicit Vector(
117 V<U> x,
118 typename std::enable_if<!Traits::is_implicit_cast_allowed<U, T>::value,
119 void *>::type = nullptr)
120 : d(SSE::convert<U, T>(x.data()))
121 {
122 }
123 #endif
124
125 ///////////////////////////////////////////////////////////////////////////////////////////
126 // broadcast
Vector(EntryType a)127 Vc_INTRINSIC Vector(EntryType a) : d(HT::set(a)) {}
128 template <typename U>
129 Vc_INTRINSIC Vector(U a,
130 typename std::enable_if<std::is_same<U, int>::value &&
131 !std::is_same<U, EntryType>::value,
132 void *>::type = nullptr)
Vector(static_cast<EntryType> (a))133 : Vector(static_cast<EntryType>(a))
134 {
135 }
136
137 #include "../common/loadinterface.h"
138 #include "../common/storeinterface.h"
139
140 ///////////////////////////////////////////////////////////////////////////////////////////
141 // zeroing
142 Vc_INTRINSIC_L void setZero() Vc_INTRINSIC_R;
143 Vc_INTRINSIC_L void setZero(const Mask &k) Vc_INTRINSIC_R;
144 Vc_INTRINSIC_L void setZeroInverted(const Mask &k) Vc_INTRINSIC_R;
145
146 Vc_INTRINSIC_L void setQnan() Vc_INTRINSIC_R;
147 Vc_INTRINSIC_L void setQnan(const Mask &k) Vc_INTRINSIC_R;
148
149 #include "../common/gatherinterface.h"
150 #include "../common/scatterinterface.h"
151 #if defined Vc_IMPL_AVX2 && !defined Vc_MSVC
152 // skip this code for MSVC because it fails to do overload resolution correctly
153
154 ////////////////////////////////////////////////////////////////////////////////
155 // non-converting pd, ps, and epi32 gathers
156 template <class U, class A, int Scale, int N = Vector<U, A>::size(),
157 class = enable_if<(Vector<U, A>::size() >= size() && sizeof(T) >= 4)>>
gatherImplementation(const Common::GatherArguments<T,Vector<U,A>,Scale> & args)158 Vc_INTRINSIC void gatherImplementation(
159 const Common::GatherArguments<T, Vector<U, A>, Scale> &args)
160 {
161 d.v() = SSE::gather<sizeof(T) * Scale>(
162 args.address, simd_cast<SSE::int_v>(args.indexes).data());
163 }
164
165 // masked overload
166 template <class U, class A, int Scale, int N = Vector<U, A>::size(),
167 class = enable_if<(Vector<U, A>::size() >= size() && sizeof(T) >= 4)>>
gatherImplementation(const Common::GatherArguments<T,Vector<U,A>,Scale> & args,MaskArgument k)168 Vc_INTRINSIC void gatherImplementation(
169 const Common::GatherArguments<T, Vector<U, A>, Scale> &args, MaskArgument k)
170 {
171 d.v() = SSE::gather<sizeof(T) * Scale>(
172 d.v(), k.data(), args.address,
173 simd_cast<SSE::int_v>(args.indexes).data());
174 }
175
176 ////////////////////////////////////////////////////////////////////////////////
177 // converting (from 8-bit and 16-bit integers only) epi16 gather emulation via
178 // epi32 gathers
179 template <
180 class MT, class U, class A, int Scale,
181 class = enable_if<(sizeof(T) == 2 && std::is_integral<MT>::value &&
182 (sizeof(MT) <= 2) && Vector<U, A>::size() >= size())>>
gatherImplementation(const Common::GatherArguments<MT,Vector<U,A>,Scale> & args)183 Vc_INTRINSIC void gatherImplementation(
184 const Common::GatherArguments<MT, Vector<U, A>, Scale> &args)
185 {
186 using AVX2::int_v;
187 const auto idx = simd_cast<int_v>(args.indexes).data();
188 *this = simd_cast<Vector>(int_v(
189 AVX::gather<sizeof(MT) * Scale>(aliasing_cast<int>(args.address), idx)));
190 if (sizeof(MT) == 1) {
191 if (std::is_signed<MT>::value) {
192 d.v() = _mm_srai_epi16(_mm_slli_epi16(d.v(), 8), 8);
193 } else {
194 *this &= 0xff;
195 }
196 }
197 }
198
199 // masked overload
200 template <
201 class MT, class U, class A, int Scale,
202 class = enable_if<(sizeof(T) == 2 && std::is_integral<MT>::value &&
203 (sizeof(MT) <= 2) && Vector<U, A>::size() >= size())>>
gatherImplementation(const Common::GatherArguments<MT,Vector<U,A>,Scale> & args,MaskArgument k)204 Vc_INTRINSIC void gatherImplementation(
205 const Common::GatherArguments<MT, Vector<U, A>, Scale> &args, MaskArgument k)
206 {
207 using AVX2::int_v;
208 auto v = simd_cast<Vector>(int_v(AVX::gather<sizeof(MT) * Scale>(
209 _mm256_setzero_si256(), simd_cast<AVX2::int_m>(k).data(),
210 aliasing_cast<int>(args.address),
211 simd_cast<int_v>(args.indexes).data())));
212 if (sizeof(MT) == 1) {
213 if (std::is_signed<MT>::value) {
214 v.data() = _mm_srai_epi16(_mm_slli_epi16(v.data(), 8), 8);
215 } else {
216 v &= 0xff;
217 }
218 }
219 assign(v, k);
220 }
221
222 ////////////////////////////////////////////////////////////////////////////////
223 // all remaining converting gathers
224 template <class MT, class U, class A, int Scale>
225 Vc_INTRINSIC enable_if<((sizeof(T) != 2 || sizeof(MT) > 2) &&
226 Traits::is_valid_vector_argument<MT>::value &&
227 !std::is_same<MT, T>::value &&
228 Vector<U, A>::size() >= size()),
229 void>
gatherImplementation(const Common::GatherArguments<MT,Vector<U,A>,Scale> & args)230 gatherImplementation(const Common::GatherArguments<MT, Vector<U, A>, Scale> &args)
231 {
232 *this = simd_cast<Vector>(fixed_size_simd<MT, Size>(args));
233 }
234
235 // masked overload
236 template <class MT, class U, class A, int Scale>
237 Vc_INTRINSIC enable_if<((sizeof(T) != 2 || sizeof(MT) > 2) &&
238 Traits::is_valid_vector_argument<MT>::value &&
239 !std::is_same<MT, T>::value &&
240 Vector<U, A>::size() >= size()),
241 void>
gatherImplementation(const Common::GatherArguments<MT,Vector<U,A>,Scale> & args,MaskArgument k)242 gatherImplementation(const Common::GatherArguments<MT, Vector<U, A>, Scale> &args,
243 MaskArgument k)
244 {
245 assign(simd_cast<Vector>(fixed_size_simd<MT, Size>(args, k)), k);
246 }
247 #endif // Vc_IMPL_AVX2 && !MSVC
248
249 //prefix
250 Vc_INTRINSIC Vector &operator++() { data() = HT::add(data(), HT::one()); return *this; }
251 Vc_INTRINSIC Vector &operator--() { data() = HT::sub(data(), HT::one()); return *this; }
252 //postfix
253 Vc_INTRINSIC Vector operator++(int) { const Vector r = *this; data() = HT::add(data(), HT::one()); return r; }
254 Vc_INTRINSIC Vector operator--(int) { const Vector r = *this; data() = HT::sub(data(), HT::one()); return r; }
255
256 private:
257 friend reference;
get(const Vector & o,int i)258 Vc_INTRINSIC static value_type get(const Vector &o, int i) noexcept
259 {
260 return o.d.m(i);
261 }
262 template <typename U>
set(Vector & o,int i,U && v)263 Vc_INTRINSIC static void set(Vector &o, int i, U &&v) noexcept(
264 noexcept(std::declval<value_type &>() = v))
265 {
266 o.d.set(i, v);
267 }
268
269 public:
270 /**
271 * \note the returned object models the concept of a reference and
272 * as such it can exist longer than the data it is referencing.
273 * \note to avoid lifetime issues, we strongly advice not to store
274 * any reference objects.
275 */
276 Vc_ALWAYS_INLINE reference operator[](size_t index) noexcept
277 {
278 static_assert(noexcept(reference{std::declval<Vector &>(), int()}), "");
279 return {*this, int(index)};
280 }
281 Vc_ALWAYS_INLINE value_type operator[](size_t index) const noexcept
282 {
283 return d.m(index);
284 }
285
286 Vc_INTRINSIC_L Vector Vc_VDECL operator[](const SSE::int_v &perm) const Vc_INTRINSIC_R;
287
288 Vc_INTRINSIC Vc_PURE Mask operator!() const
289 {
290 return *this == Zero();
291 }
292 Vc_INTRINSIC Vc_PURE Vector operator~() const
293 {
294 #ifndef Vc_ENABLE_FLOAT_BIT_OPERATORS
295 static_assert(std::is_integral<T>::value,
296 "bit-complement can only be used with Vectors of integral type");
297 #endif
298 return Detail::andnot_(data(), HV::allone());
299 }
300 Vc_ALWAYS_INLINE_L Vc_PURE_L Vector operator-() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
301 Vc_INTRINSIC Vc_PURE Vector operator+() const { return *this; }
302
303 Vc_ALWAYS_INLINE Vector Vc_VDECL operator<< (AsArg shift) const { return generate([&](int i) { return get(*this, i) << get(shift, i); }); }
304 Vc_ALWAYS_INLINE Vector Vc_VDECL operator>> (AsArg shift) const { return generate([&](int i) { return get(*this, i) >> get(shift, i); }); }
305 Vc_ALWAYS_INLINE Vector &Vc_VDECL operator<<=(AsArg shift) { return *this = *this << shift; }
306 Vc_ALWAYS_INLINE Vector &Vc_VDECL operator>>=(AsArg shift) { return *this = *this >> shift; }
307
308 Vc_INTRINSIC_L Vector &Vc_VDECL operator<<=( int shift) Vc_INTRINSIC_R;
309 Vc_INTRINSIC_L Vector Vc_VDECL operator<< ( int shift) const Vc_INTRINSIC_R;
310 Vc_INTRINSIC_L Vector &Vc_VDECL operator>>=( int shift) Vc_INTRINSIC_R;
311 Vc_INTRINSIC_L Vector Vc_VDECL operator>> ( int shift) const Vc_INTRINSIC_R;
312
313 Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC Vc_PURE Mask
isNegative()314 isNegative() const
315 {
316 return Vc::isnegative(*this);
317 }
318
assign(const Vector & v,const Mask & mask)319 Vc_ALWAYS_INLINE void assign(const Vector &v, const Mask &mask)
320 {
321 data() = HV::blend(data(), v.data(), mask.data());
322 }
323
324 template <typename V2>
325 Vc_DEPRECATED("Use simd_cast instead of Vector::staticCast")
staticCast()326 Vc_ALWAYS_INLINE Vc_PURE V2 staticCast() const
327 {
328 return SSE::convert<T, typename V2::EntryType>(data());
329 }
330 template <typename V2>
331 Vc_DEPRECATED("use reinterpret_components_cast instead")
reinterpretCast()332 Vc_ALWAYS_INLINE Vc_PURE V2 reinterpretCast() const
333 {
334 return SSE::sse_cast<typename V2::VectorType>(data());
335 }
336
operator()337 Vc_INTRINSIC WriteMaskedVector operator()(const Mask &k) { return {*this, k}; }
338
data()339 Vc_ALWAYS_INLINE Vc_PURE VectorType &data() { return d.v(); }
data()340 Vc_ALWAYS_INLINE Vc_PURE const VectorType &data() const { return d.v(); }
341
342 template<int Index>
343 Vc_INTRINSIC_L Vector broadcast() const Vc_INTRINSIC_R;
344
min()345 Vc_INTRINSIC EntryType min() const { return HT::min(data()); }
max()346 Vc_INTRINSIC EntryType max() const { return HT::max(data()); }
product()347 Vc_INTRINSIC EntryType product() const { return HT::mul(data()); }
sum()348 Vc_INTRINSIC EntryType sum() const { return HT::add(data()); }
349 Vc_INTRINSIC_L Vector partialSum() const Vc_INTRINSIC_R;
350 Vc_INTRINSIC_L EntryType min(MaskArg m) const Vc_INTRINSIC_R;
351 Vc_INTRINSIC_L EntryType max(MaskArg m) const Vc_INTRINSIC_R;
352 Vc_INTRINSIC_L EntryType product(MaskArg m) const Vc_INTRINSIC_R;
353 Vc_INTRINSIC_L EntryType sum(MaskArg m) const Vc_INTRINSIC_R;
354
355 Vc_INTRINSIC_L Vector shifted(int amount, Vector shiftIn) const Vc_INTRINSIC_R;
356 Vc_INTRINSIC_L Vector shifted(int amount) const Vc_INTRINSIC_R;
357 Vc_INTRINSIC_L Vector rotated(int amount) const Vc_INTRINSIC_R;
358 Vc_INTRINSIC_L Vc_PURE_L Vector reversed() const Vc_INTRINSIC_R Vc_PURE_R;
359 Vc_ALWAYS_INLINE_L Vc_PURE_L Vector sorted() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
360
callWithValuesSorted(F && f)361 template <typename F> void callWithValuesSorted(F &&f)
362 {
363 EntryType value = d.m(0);
364 f(value);
365 for (std::size_t i = 1; i < Size; ++i) {
366 if (d.m(i) != value) {
367 value = d.m(i);
368 f(value);
369 }
370 }
371 }
372
call(F && f)373 template <typename F> Vc_INTRINSIC void call(F &&f) const
374 {
375 Common::for_all_vector_entries<Size>([&](size_t i) { f(EntryType(d.m(i))); });
376 }
377
call(F && f,const Mask & mask)378 template <typename F> Vc_INTRINSIC void call(F &&f, const Mask &mask) const
379 {
380 for(size_t i : where(mask)) {
381 f(EntryType(d.m(i)));
382 }
383 }
384
apply(F && f)385 template <typename F> Vc_INTRINSIC Vector apply(F &&f) const
386 {
387 Vector r;
388 Common::for_all_vector_entries<Size>(
389 [&](size_t i) { r.d.set(i, f(EntryType(d.m(i)))); });
390 return r;
391 }
apply(F && f,const Mask & mask)392 template <typename F> Vc_INTRINSIC Vector apply(F &&f, const Mask &mask) const
393 {
394 Vector r(*this);
395 for (size_t i : where(mask)) {
396 r.d.set(i, f(EntryType(r.d.m(i))));
397 }
398 return r;
399 }
400
fill(EntryType (& f)(IndexT))401 template<typename IndexT> Vc_INTRINSIC void fill(EntryType (&f)(IndexT)) {
402 Common::for_all_vector_entries<Size>([&](size_t i) { d.set(i, f(i)); });
403 }
fill(EntryType (& f)())404 Vc_INTRINSIC void fill(EntryType (&f)()) {
405 Common::for_all_vector_entries<Size>([&](size_t i) { d.set(i, f()); });
406 }
407
408 template <typename G> static Vc_INTRINSIC_L Vector generate(G gen) Vc_INTRINSIC_R;
409
410 Vc_DEPRECATED("use copysign(x, y) instead") Vc_INTRINSIC Vector
copySign(AsArg x)411 copySign(AsArg x) const
412 {
413 return Vc::copysign(*this, x);
414 }
415
exponent()416 Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC Vector exponent() const
417 {
418 return Vc::exponent(*this);
419 }
420
421 Vc_INTRINSIC_L Vector interleaveLow(Vector x) const Vc_INTRINSIC_R;
422 Vc_INTRINSIC_L Vector interleaveHigh(Vector x) const Vc_INTRINSIC_R;
423 };
424 #undef Vc_CURRENT_CLASS_NAME
425 template <typename T> constexpr size_t Vector<T, VectorAbi::Sse>::Size;
426 template <typename T> constexpr size_t Vector<T, VectorAbi::Sse>::MemoryAlignment;
427
min(const SSE::int_v & x,const SSE::int_v & y)428 static Vc_ALWAYS_INLINE Vc_PURE SSE::int_v min(const SSE::int_v &x, const SSE::int_v &y) { return SSE::min_epi32(x.data(), y.data()); }
min(const SSE::uint_v & x,const SSE::uint_v & y)429 static Vc_ALWAYS_INLINE Vc_PURE SSE::uint_v min(const SSE::uint_v &x, const SSE::uint_v &y) { return SSE::min_epu32(x.data(), y.data()); }
min(const SSE::short_v & x,const SSE::short_v & y)430 static Vc_ALWAYS_INLINE Vc_PURE SSE::short_v min(const SSE::short_v &x, const SSE::short_v &y) { return _mm_min_epi16(x.data(), y.data()); }
min(const SSE::ushort_v & x,const SSE::ushort_v & y)431 static Vc_ALWAYS_INLINE Vc_PURE SSE::ushort_v min(const SSE::ushort_v &x, const SSE::ushort_v &y) { return SSE::min_epu16(x.data(), y.data()); }
min(const SSE::float_v & x,const SSE::float_v & y)432 static Vc_ALWAYS_INLINE Vc_PURE SSE::float_v min(const SSE::float_v &x, const SSE::float_v &y) { return _mm_min_ps(x.data(), y.data()); }
min(const SSE::double_v & x,const SSE::double_v & y)433 static Vc_ALWAYS_INLINE Vc_PURE SSE::double_v min(const SSE::double_v &x, const SSE::double_v &y) { return _mm_min_pd(x.data(), y.data()); }
max(const SSE::int_v & x,const SSE::int_v & y)434 static Vc_ALWAYS_INLINE Vc_PURE SSE::int_v max(const SSE::int_v &x, const SSE::int_v &y) { return SSE::max_epi32(x.data(), y.data()); }
max(const SSE::uint_v & x,const SSE::uint_v & y)435 static Vc_ALWAYS_INLINE Vc_PURE SSE::uint_v max(const SSE::uint_v &x, const SSE::uint_v &y) { return SSE::max_epu32(x.data(), y.data()); }
max(const SSE::short_v & x,const SSE::short_v & y)436 static Vc_ALWAYS_INLINE Vc_PURE SSE::short_v max(const SSE::short_v &x, const SSE::short_v &y) { return _mm_max_epi16(x.data(), y.data()); }
max(const SSE::ushort_v & x,const SSE::ushort_v & y)437 static Vc_ALWAYS_INLINE Vc_PURE SSE::ushort_v max(const SSE::ushort_v &x, const SSE::ushort_v &y) { return SSE::max_epu16(x.data(), y.data()); }
max(const SSE::float_v & x,const SSE::float_v & y)438 static Vc_ALWAYS_INLINE Vc_PURE SSE::float_v max(const SSE::float_v &x, const SSE::float_v &y) { return _mm_max_ps(x.data(), y.data()); }
max(const SSE::double_v & x,const SSE::double_v & y)439 static Vc_ALWAYS_INLINE Vc_PURE SSE::double_v max(const SSE::double_v &x, const SSE::double_v &y) { return _mm_max_pd(x.data(), y.data()); }
440
441 template <typename T,
442 typename = enable_if<std::is_same<T, double>::value || std::is_same<T, float>::value ||
443 std::is_same<T, short>::value ||
444 std::is_same<T, int>::value>>
abs(Vector<T,VectorAbi::Sse> x)445 Vc_ALWAYS_INLINE Vc_PURE Vector<T, VectorAbi::Sse> abs(Vector<T, VectorAbi::Sse> x)
446 {
447 return SSE::VectorHelper<T>::abs(x.data());
448 }
449
sqrt(const Vector<T,VectorAbi::Sse> & x)450 template<typename T> Vc_ALWAYS_INLINE Vc_PURE Vector<T, VectorAbi::Sse> sqrt (const Vector<T, VectorAbi::Sse> &x) { return SSE::VectorHelper<T>::sqrt(x.data()); }
rsqrt(const Vector<T,VectorAbi::Sse> & x)451 template<typename T> Vc_ALWAYS_INLINE Vc_PURE Vector<T, VectorAbi::Sse> rsqrt(const Vector<T, VectorAbi::Sse> &x) { return SSE::VectorHelper<T>::rsqrt(x.data()); }
reciprocal(const Vector<T,VectorAbi::Sse> & x)452 template<typename T> Vc_ALWAYS_INLINE Vc_PURE Vector<T, VectorAbi::Sse> reciprocal(const Vector<T, VectorAbi::Sse> &x) { return SSE::VectorHelper<T>::reciprocal(x.data()); }
round(const Vector<T,VectorAbi::Sse> & x)453 template<typename T> Vc_ALWAYS_INLINE Vc_PURE Vector<T, VectorAbi::Sse> round(const Vector<T, VectorAbi::Sse> &x) { return SSE::VectorHelper<T>::round(x.data()); }
454
isfinite(const Vector<T,VectorAbi::Sse> & x)455 template<typename T> Vc_ALWAYS_INLINE Vc_PURE typename Vector<T, VectorAbi::Sse>::Mask isfinite(const Vector<T, VectorAbi::Sse> &x) { return SSE::VectorHelper<T>::isFinite(x.data()); }
isinf(const Vector<T,VectorAbi::Sse> & x)456 template<typename T> Vc_ALWAYS_INLINE Vc_PURE typename Vector<T, VectorAbi::Sse>::Mask isinf(const Vector<T, VectorAbi::Sse> &x) { return SSE::VectorHelper<T>::isInfinite(x.data()); }
isnan(const Vector<T,VectorAbi::Sse> & x)457 template<typename T> Vc_ALWAYS_INLINE Vc_PURE typename Vector<T, VectorAbi::Sse>::Mask isnan(const Vector<T, VectorAbi::Sse> &x) { return SSE::VectorHelper<T>::isNaN(x.data()); }
458
459 #define Vc_CONDITIONAL_ASSIGN(name_, op_) \
460 template <Operator O, typename T, typename M, typename U> \
461 Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign( \
462 Vector<T, VectorAbi::Sse> &lhs, M &&mask, U &&rhs) \
463 { \
464 lhs(mask) op_ rhs; \
465 } \
466 Vc_NOTHING_EXPECTING_SEMICOLON
467 Vc_CONDITIONAL_ASSIGN( Assign, =);
468 Vc_CONDITIONAL_ASSIGN( PlusAssign, +=);
469 Vc_CONDITIONAL_ASSIGN( MinusAssign, -=);
470 Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=);
471 Vc_CONDITIONAL_ASSIGN( DivideAssign, /=);
472 Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
473 Vc_CONDITIONAL_ASSIGN( XorAssign, ^=);
474 Vc_CONDITIONAL_ASSIGN( AndAssign, &=);
475 Vc_CONDITIONAL_ASSIGN( OrAssign, |=);
476 Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
477 Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
478 #undef Vc_CONDITIONAL_ASSIGN
479
480 #define Vc_CONDITIONAL_ASSIGN(name_, expr_) \
481 template <Operator O, typename T, typename M> \
482 Vc_INTRINSIC enable_if<O == Operator::name_, Vector<T, VectorAbi::Sse>> \
483 conditional_assign(Vector<T, VectorAbi::Sse> &lhs, M &&mask) \
484 { \
485 return expr_; \
486 } \
487 Vc_NOTHING_EXPECTING_SEMICOLON
488 Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
489 Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
490 Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
491 Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
492 #undef Vc_CONDITIONAL_ASSIGN
493
494 } // namespace Vc
495
496 #include "vector.tcc"
497 #include "simd_cast.h"
498
499 #endif // VC_SSE_VECTOR_H_
500