1 /*  This file is part of the Vc library. {{{
2 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3 
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6     * Redistributions of source code must retain the above copyright
7       notice, this list of conditions and the following disclaimer.
8     * Redistributions in binary form must reproduce the above copyright
9       notice, this list of conditions and the following disclaimer in the
10       documentation and/or other materials provided with the distribution.
11     * Neither the names of contributing organizations nor the
12       names of its contributors may be used to endorse or promote products
13       derived from this software without specific prior written permission.
14 
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 
26 }}}*/
27 
28 #ifndef VC_SSE_MASK_H_
29 #define VC_SSE_MASK_H_
30 
31 #include "intrinsics.h"
32 #include "../common/maskbool.h"
33 #include "detail.h"
34 #include "macros.h"
35 
36 namespace Vc_VERSIONED_NAMESPACE
37 {
38 namespace Detail
39 {
40 template <size_t Size>
41 Vc_INTRINSIC_L Vc_CONST_L int mask_count(__m128i) Vc_INTRINSIC_R Vc_CONST_R;
42 template <size_t Size>
43 Vc_INTRINSIC_L Vc_CONST_L int mask_to_int(__m128i) Vc_INTRINSIC_R Vc_CONST_R;
44 template <size_t Size>
45 Vc_INTRINSIC_L Vc_CONST_L bool is_equal(__m128, __m128) Vc_INTRINSIC_R Vc_CONST_R;
46 template <size_t Size>
47 Vc_INTRINSIC_L Vc_CONST_L bool is_not_equal(__m128, __m128) Vc_INTRINSIC_R Vc_CONST_R;
48 }  // namespace Detail
49 
50 using SSE::sse_cast;
51 
52 template <typename T> class Mask<T, VectorAbi::Sse>
53 {
54     using abi = VectorAbi::Sse;
55     friend class Mask<  double, abi>;
56     friend class Mask<   float, abi>;
57     friend class Mask< int32_t, abi>;
58     friend class Mask<uint32_t, abi>;
59     friend class Mask< int16_t, abi>;
60     friend class Mask<uint16_t, abi>;
61 
62     /**
63      * A helper type for aliasing the entries in the mask but behaving like a bool.
64      */
65     typedef Common::MaskBool<sizeof(T)> MaskBool;
66 
67     typedef Common::Storage<T, SSE::VectorTraits<T>::Size> Storage;
68 
69 public:
70 
71     /**
72      * The \c EntryType of masks is always bool, independent of \c T.
73      */
74     typedef bool EntryType;
75     using value_type = EntryType;
76 
77     /**
78      * The return type of the non-const subscript operator.
79      */
80     using EntryReference = Detail::ElementReference<Mask>;
81     using reference = EntryReference;
82 
83     /**
84      * The \c VectorEntryType, in contrast to \c EntryType, reveals information about the SIMD
85      * implementation. This type is useful for the \c sizeof operator in generic functions.
86      */
87     typedef MaskBool VectorEntryType;
88 
89     /**
90      * The \c VectorType reveals the implementation-specific internal type used for the SIMD type.
91      */
92     using VectorType = typename Storage::VectorType;
93 
94     /**
95      * The associated Vector<T> type.
96      */
97     using Vector = SSE::Vector<T>;
98 
99 public:
100     Vc_FREE_STORE_OPERATORS_ALIGNED(16);
101     static constexpr size_t Size = SSE::VectorTraits<T>::Size;
102     static constexpr size_t MemoryAlignment = Size;
103     static constexpr std::size_t size() { return Size; }
104 
105         // abstracts the way Masks are passed to functions, it can easily be changed to const ref here
106 #if defined Vc_MSVC && defined _WIN32
107         typedef const Mask &Argument;
108 #else
109         typedef Mask Argument;
110 #endif
111 
112         Vc_INTRINSIC Mask() = default;
113         Vc_INTRINSIC Mask(const Mask &) = default;
114         Vc_INTRINSIC Mask &operator=(const Mask &) = default;
115 
116         Vc_INTRINSIC Mask(const __m128  &x) : d(sse_cast<VectorType>(x)) {}
117         Vc_INTRINSIC Mask(const __m128d &x) : d(sse_cast<VectorType>(x)) {}
118         Vc_INTRINSIC Mask(const __m128i &x) : d(sse_cast<VectorType>(x)) {}
119         Vc_INTRINSIC explicit Mask(VectorSpecialInitializerZero) : Mask(_mm_setzero_ps()) {}
120         Vc_INTRINSIC explicit Mask(VectorSpecialInitializerOne) : Mask(SSE::_mm_setallone_ps()) {}
121         Vc_INTRINSIC explicit Mask(bool b) : Mask(b ? SSE::_mm_setallone_ps() : _mm_setzero_ps()) {}
122         Vc_INTRINSIC static Mask Zero() { return Mask{Vc::Zero}; }
123         Vc_INTRINSIC static Mask One() { return Mask{Vc::One}; }
124 
125         // implicit cast
126         template <typename U>
127         Vc_INTRINSIC Mask(
128             U &&rhs, Common::enable_if_mask_converts_implicitly<Mask, T, U> = nullarg)
129             : d(sse_cast<VectorType>(
130                   Detail::mask_cast<Traits::simd_vector_size<U>::value, Size, __m128>(
131                       rhs.dataI())))
132         {
133         }
134 
135 #if Vc_IS_VERSION_1
136         // explicit cast, implemented via simd_cast (implementation in sse/simd_cast.h)
137         template <typename U>
138         Vc_DEPRECATED("use simd_cast instead of explicit type casting to convert between "
139                       "mask types") Vc_INTRINSIC
140             explicit Mask(U &&rhs,
141                           Common::enable_if_mask_converts_explicitly<T, U> = nullarg);
142 #endif
143 
144         Vc_ALWAYS_INLINE explicit Mask(const bool *mem) { load(mem); }
145         template<typename Flags> Vc_ALWAYS_INLINE explicit Mask(const bool *mem, Flags f) { load(mem, f); }
146 
147         Vc_ALWAYS_INLINE_L void load(const bool *mem) Vc_ALWAYS_INLINE_R;
148         template<typename Flags> Vc_ALWAYS_INLINE void load(const bool *mem, Flags) { load(mem); }
149 
150         Vc_ALWAYS_INLINE_L void store(bool *) const Vc_ALWAYS_INLINE_R;
151         template<typename Flags> Vc_ALWAYS_INLINE void store(bool *mem, Flags) const { store(mem); }
152 
153         Vc_ALWAYS_INLINE Vc_PURE bool operator==(const Mask &rhs) const
154         {
155             return Detail::is_equal<Size>(dataF(), rhs.dataF());
156         }
157         Vc_ALWAYS_INLINE Vc_PURE bool operator!=(const Mask &rhs) const
158         {
159             return Detail::is_not_equal<Size>(dataF(), rhs.dataF());
160         }
161 
162         Vc_ALWAYS_INLINE Vc_PURE Mask operator!() const
163         {
164 #ifdef Vc_GCC
165             return ~dataI();
166 #else
167             return _mm_andnot_si128(dataI(), SSE::_mm_setallone_si128());
168 #endif
169         }
170 
171         Vc_ALWAYS_INLINE Mask &operator&=(const Mask &rhs) { d.v() = SSE::sse_cast<VectorType>(_mm_and_ps(dataF(), rhs.dataF())); return *this; }
172         Vc_ALWAYS_INLINE Mask &operator|=(const Mask &rhs) { d.v() = SSE::sse_cast<VectorType>(_mm_or_ps (dataF(), rhs.dataF())); return *this; }
173         Vc_ALWAYS_INLINE Mask &operator^=(const Mask &rhs) { d.v() = SSE::sse_cast<VectorType>(_mm_xor_ps(dataF(), rhs.dataF())); return *this; }
174 
175         Vc_ALWAYS_INLINE Vc_PURE Mask operator&(const Mask &rhs) const { return _mm_and_ps(dataF(), rhs.dataF()); }
176         Vc_ALWAYS_INLINE Vc_PURE Mask operator|(const Mask &rhs) const { return _mm_or_ps (dataF(), rhs.dataF()); }
177         Vc_ALWAYS_INLINE Vc_PURE Mask operator^(const Mask &rhs) const { return _mm_xor_ps(dataF(), rhs.dataF()); }
178 
179         Vc_ALWAYS_INLINE Vc_PURE Mask operator&&(const Mask &rhs) const { return _mm_and_ps(dataF(), rhs.dataF()); }
180         Vc_ALWAYS_INLINE Vc_PURE Mask operator||(const Mask &rhs) const { return _mm_or_ps (dataF(), rhs.dataF()); }
181 
182         Vc_ALWAYS_INLINE Vc_PURE bool isFull () const { return
183 #ifdef Vc_USE_PTEST
184             _mm_testc_si128(dataI(), SSE::_mm_setallone_si128()); // return 1 if (0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff) == (~0 & d.v())
185 #else
186             _mm_movemask_epi8(dataI()) == 0xffff;
187 #endif
188         }
189         Vc_ALWAYS_INLINE Vc_PURE bool isNotEmpty() const { return
190 #ifdef Vc_USE_PTEST
191             0 == _mm_testz_si128(dataI(), dataI()); // return 1 if (0, 0, 0, 0) == (d.v() & d.v())
192 #else
193             _mm_movemask_epi8(dataI()) != 0x0000;
194 #endif
195         }
196         Vc_ALWAYS_INLINE Vc_PURE bool isEmpty() const { return
197 #ifdef Vc_USE_PTEST
198             0 != _mm_testz_si128(dataI(), dataI()); // return 1 if (0, 0, 0, 0) == (d.v() & d.v())
199 #else
200             _mm_movemask_epi8(dataI()) == 0x0000;
201 #endif
202         }
203         Vc_ALWAYS_INLINE Vc_PURE bool isMix() const {
204 #ifdef Vc_USE_PTEST
205             return _mm_test_mix_ones_zeros(dataI(), SSE::_mm_setallone_si128());
206 #else
207             const int tmp = _mm_movemask_epi8(dataI());
208             return tmp != 0 && (tmp ^ 0xffff) != 0;
209 #endif
210         }
211 
212         Vc_ALWAYS_INLINE Vc_PURE int shiftMask() const { return _mm_movemask_epi8(dataI()); }
213 
214         Vc_ALWAYS_INLINE Vc_PURE int toInt() const { return Detail::mask_to_int<Size>(dataI()); }
215 
216         Vc_ALWAYS_INLINE Vc_PURE VectorType  data() const { return d.v(); }
217         Vc_ALWAYS_INLINE Vc_PURE __m128  dataF() const { return SSE::sse_cast<__m128 >(d.v()); }
218         Vc_ALWAYS_INLINE Vc_PURE __m128i dataI() const { return SSE::sse_cast<__m128i>(d.v()); }
219         Vc_ALWAYS_INLINE Vc_PURE __m128d dataD() const { return SSE::sse_cast<__m128d>(d.v()); }
220 
221 private:
222     friend reference;
223     static Vc_INTRINSIC Vc_PURE value_type get(const Mask &m, int i) noexcept
224     {
225         return MaskBool(m.d.m(i));
226     }
227     template <typename U>
228     static Vc_INTRINSIC void set(Mask &m, int i,
229                                  U &&v) noexcept(noexcept(MaskBool(std::declval<U>())))
230     {
231         m.d.set(i, MaskBool(std::forward<U>(v)));
232     }
233 
234 public:
235     /**
236      * \note the returned object models the concept of a reference and
237      * as such it can exist longer than the data it is referencing.
238      * \note to avoid lifetime issues, we strongly advice not to store
239      * any reference objects.
240      */
241     Vc_ALWAYS_INLINE reference operator[](size_t index) noexcept
242     {
243         return {*this, int(index)};
244     }
245     Vc_ALWAYS_INLINE Vc_PURE value_type operator[](size_t index) const noexcept
246     {
247         return get(*this, index);
248     }
249 
250         Vc_ALWAYS_INLINE Vc_PURE int count() const
251         {
252             return Detail::mask_count<Size>(dataI());
253         }
254 
255         /**
256          * Returns the index of the first one in the mask.
257          *
258          * The return value is undefined if the mask is empty.
259          */
260         Vc_ALWAYS_INLINE_L Vc_PURE_L int firstOne() const Vc_ALWAYS_INLINE_R Vc_PURE_R;
261 
262         template <typename G> static Vc_INTRINSIC_L Mask generate(G &&gen) Vc_INTRINSIC_R;
263         Vc_INTRINSIC_L Vc_PURE_L Mask shifted(int amount) const Vc_INTRINSIC_R Vc_PURE_R;
264 
265     private:
266 #ifdef Vc_COMPILE_BENCHMARKS
267     public:
268 #endif
269         Storage d;
270 };
271 template <typename T> constexpr size_t Mask<T, VectorAbi::Sse>::Size;
272 template <typename T> constexpr size_t Mask<T, VectorAbi::Sse>::MemoryAlignment;
273 
274 }  // namespace Vc
275 
276 #include "mask.tcc"
277 
278 #endif // VC_SSE_MASK_H_
279