1 /* This file is part of the Vc library. {{{ 2 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org> 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions are met: 6 * Redistributions of source code must retain the above copyright 7 notice, this list of conditions and the following disclaimer. 8 * Redistributions in binary form must reproduce the above copyright 9 notice, this list of conditions and the following disclaimer in the 10 documentation and/or other materials provided with the distribution. 11 * Neither the names of contributing organizations nor the 12 names of its contributors may be used to endorse or promote products 13 derived from this software without specific prior written permission. 14 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY 19 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 26 }}}*/ 27 28 #ifndef VC_SSE_MASK_H_ 29 #define VC_SSE_MASK_H_ 30 31 #include "intrinsics.h" 32 #include "../common/maskbool.h" 33 #include "detail.h" 34 #include "macros.h" 35 36 namespace Vc_VERSIONED_NAMESPACE 37 { 38 namespace Detail 39 { 40 template <size_t Size> 41 Vc_INTRINSIC_L Vc_CONST_L int mask_count(__m128i) Vc_INTRINSIC_R Vc_CONST_R; 42 template <size_t Size> 43 Vc_INTRINSIC_L Vc_CONST_L int mask_to_int(__m128i) Vc_INTRINSIC_R Vc_CONST_R; 44 template <size_t Size> 45 Vc_INTRINSIC_L Vc_CONST_L bool is_equal(__m128, __m128) Vc_INTRINSIC_R Vc_CONST_R; 46 template <size_t Size> 47 Vc_INTRINSIC_L Vc_CONST_L bool is_not_equal(__m128, __m128) Vc_INTRINSIC_R Vc_CONST_R; 48 } // namespace Detail 49 50 using SSE::sse_cast; 51 52 template <typename T> class Mask<T, VectorAbi::Sse> 53 { 54 using abi = VectorAbi::Sse; 55 friend class Mask< double, abi>; 56 friend class Mask< float, abi>; 57 friend class Mask< int32_t, abi>; 58 friend class Mask<uint32_t, abi>; 59 friend class Mask< int16_t, abi>; 60 friend class Mask<uint16_t, abi>; 61 62 /** 63 * A helper type for aliasing the entries in the mask but behaving like a bool. 64 */ 65 typedef Common::MaskBool<sizeof(T)> MaskBool; 66 67 typedef Common::Storage<T, SSE::VectorTraits<T>::Size> Storage; 68 69 public: 70 71 /** 72 * The \c EntryType of masks is always bool, independent of \c T. 73 */ 74 typedef bool EntryType; 75 using value_type = EntryType; 76 77 /** 78 * The return type of the non-const subscript operator. 79 */ 80 using EntryReference = Detail::ElementReference<Mask>; 81 using reference = EntryReference; 82 83 /** 84 * The \c VectorEntryType, in contrast to \c EntryType, reveals information about the SIMD 85 * implementation. This type is useful for the \c sizeof operator in generic functions. 86 */ 87 typedef MaskBool VectorEntryType; 88 89 /** 90 * The \c VectorType reveals the implementation-specific internal type used for the SIMD type. 91 */ 92 using VectorType = typename Storage::VectorType; 93 94 /** 95 * The associated Vector<T> type. 96 */ 97 using Vector = SSE::Vector<T>; 98 99 public: 100 Vc_FREE_STORE_OPERATORS_ALIGNED(16); 101 static constexpr size_t Size = SSE::VectorTraits<T>::Size; 102 static constexpr size_t MemoryAlignment = Size; 103 static constexpr std::size_t size() { return Size; } 104 105 // abstracts the way Masks are passed to functions, it can easily be changed to const ref here 106 #if defined Vc_MSVC && defined _WIN32 107 typedef const Mask &Argument; 108 #else 109 typedef Mask Argument; 110 #endif 111 112 Vc_INTRINSIC Mask() = default; 113 Vc_INTRINSIC Mask(const Mask &) = default; 114 Vc_INTRINSIC Mask &operator=(const Mask &) = default; 115 116 Vc_INTRINSIC Mask(const __m128 &x) : d(sse_cast<VectorType>(x)) {} 117 Vc_INTRINSIC Mask(const __m128d &x) : d(sse_cast<VectorType>(x)) {} 118 Vc_INTRINSIC Mask(const __m128i &x) : d(sse_cast<VectorType>(x)) {} 119 Vc_INTRINSIC explicit Mask(VectorSpecialInitializerZero) : Mask(_mm_setzero_ps()) {} 120 Vc_INTRINSIC explicit Mask(VectorSpecialInitializerOne) : Mask(SSE::_mm_setallone_ps()) {} 121 Vc_INTRINSIC explicit Mask(bool b) : Mask(b ? SSE::_mm_setallone_ps() : _mm_setzero_ps()) {} 122 Vc_INTRINSIC static Mask Zero() { return Mask{Vc::Zero}; } 123 Vc_INTRINSIC static Mask One() { return Mask{Vc::One}; } 124 125 // implicit cast 126 template <typename U> 127 Vc_INTRINSIC Mask( 128 U &&rhs, Common::enable_if_mask_converts_implicitly<Mask, T, U> = nullarg) 129 : d(sse_cast<VectorType>( 130 Detail::mask_cast<Traits::simd_vector_size<U>::value, Size, __m128>( 131 rhs.dataI()))) 132 { 133 } 134 135 #if Vc_IS_VERSION_1 136 // explicit cast, implemented via simd_cast (implementation in sse/simd_cast.h) 137 template <typename U> 138 Vc_DEPRECATED("use simd_cast instead of explicit type casting to convert between " 139 "mask types") Vc_INTRINSIC 140 explicit Mask(U &&rhs, 141 Common::enable_if_mask_converts_explicitly<T, U> = nullarg); 142 #endif 143 144 Vc_ALWAYS_INLINE explicit Mask(const bool *mem) { load(mem); } 145 template<typename Flags> Vc_ALWAYS_INLINE explicit Mask(const bool *mem, Flags f) { load(mem, f); } 146 147 Vc_ALWAYS_INLINE_L void load(const bool *mem) Vc_ALWAYS_INLINE_R; 148 template<typename Flags> Vc_ALWAYS_INLINE void load(const bool *mem, Flags) { load(mem); } 149 150 Vc_ALWAYS_INLINE_L void store(bool *) const Vc_ALWAYS_INLINE_R; 151 template<typename Flags> Vc_ALWAYS_INLINE void store(bool *mem, Flags) const { store(mem); } 152 153 Vc_ALWAYS_INLINE Vc_PURE bool operator==(const Mask &rhs) const 154 { 155 return Detail::is_equal<Size>(dataF(), rhs.dataF()); 156 } 157 Vc_ALWAYS_INLINE Vc_PURE bool operator!=(const Mask &rhs) const 158 { 159 return Detail::is_not_equal<Size>(dataF(), rhs.dataF()); 160 } 161 162 Vc_ALWAYS_INLINE Vc_PURE Mask operator!() const 163 { 164 #ifdef Vc_GCC 165 return ~dataI(); 166 #else 167 return _mm_andnot_si128(dataI(), SSE::_mm_setallone_si128()); 168 #endif 169 } 170 171 Vc_ALWAYS_INLINE Mask &operator&=(const Mask &rhs) { d.v() = SSE::sse_cast<VectorType>(_mm_and_ps(dataF(), rhs.dataF())); return *this; } 172 Vc_ALWAYS_INLINE Mask &operator|=(const Mask &rhs) { d.v() = SSE::sse_cast<VectorType>(_mm_or_ps (dataF(), rhs.dataF())); return *this; } 173 Vc_ALWAYS_INLINE Mask &operator^=(const Mask &rhs) { d.v() = SSE::sse_cast<VectorType>(_mm_xor_ps(dataF(), rhs.dataF())); return *this; } 174 175 Vc_ALWAYS_INLINE Vc_PURE Mask operator&(const Mask &rhs) const { return _mm_and_ps(dataF(), rhs.dataF()); } 176 Vc_ALWAYS_INLINE Vc_PURE Mask operator|(const Mask &rhs) const { return _mm_or_ps (dataF(), rhs.dataF()); } 177 Vc_ALWAYS_INLINE Vc_PURE Mask operator^(const Mask &rhs) const { return _mm_xor_ps(dataF(), rhs.dataF()); } 178 179 Vc_ALWAYS_INLINE Vc_PURE Mask operator&&(const Mask &rhs) const { return _mm_and_ps(dataF(), rhs.dataF()); } 180 Vc_ALWAYS_INLINE Vc_PURE Mask operator||(const Mask &rhs) const { return _mm_or_ps (dataF(), rhs.dataF()); } 181 182 Vc_ALWAYS_INLINE Vc_PURE bool isFull () const { return 183 #ifdef Vc_USE_PTEST 184 _mm_testc_si128(dataI(), SSE::_mm_setallone_si128()); // return 1 if (0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff) == (~0 & d.v()) 185 #else 186 _mm_movemask_epi8(dataI()) == 0xffff; 187 #endif 188 } 189 Vc_ALWAYS_INLINE Vc_PURE bool isNotEmpty() const { return 190 #ifdef Vc_USE_PTEST 191 0 == _mm_testz_si128(dataI(), dataI()); // return 1 if (0, 0, 0, 0) == (d.v() & d.v()) 192 #else 193 _mm_movemask_epi8(dataI()) != 0x0000; 194 #endif 195 } 196 Vc_ALWAYS_INLINE Vc_PURE bool isEmpty() const { return 197 #ifdef Vc_USE_PTEST 198 0 != _mm_testz_si128(dataI(), dataI()); // return 1 if (0, 0, 0, 0) == (d.v() & d.v()) 199 #else 200 _mm_movemask_epi8(dataI()) == 0x0000; 201 #endif 202 } 203 Vc_ALWAYS_INLINE Vc_PURE bool isMix() const { 204 #ifdef Vc_USE_PTEST 205 return _mm_test_mix_ones_zeros(dataI(), SSE::_mm_setallone_si128()); 206 #else 207 const int tmp = _mm_movemask_epi8(dataI()); 208 return tmp != 0 && (tmp ^ 0xffff) != 0; 209 #endif 210 } 211 212 Vc_ALWAYS_INLINE Vc_PURE int shiftMask() const { return _mm_movemask_epi8(dataI()); } 213 214 Vc_ALWAYS_INLINE Vc_PURE int toInt() const { return Detail::mask_to_int<Size>(dataI()); } 215 216 Vc_ALWAYS_INLINE Vc_PURE VectorType data() const { return d.v(); } 217 Vc_ALWAYS_INLINE Vc_PURE __m128 dataF() const { return SSE::sse_cast<__m128 >(d.v()); } 218 Vc_ALWAYS_INLINE Vc_PURE __m128i dataI() const { return SSE::sse_cast<__m128i>(d.v()); } 219 Vc_ALWAYS_INLINE Vc_PURE __m128d dataD() const { return SSE::sse_cast<__m128d>(d.v()); } 220 221 private: 222 friend reference; 223 static Vc_INTRINSIC Vc_PURE value_type get(const Mask &m, int i) noexcept 224 { 225 return MaskBool(m.d.m(i)); 226 } 227 template <typename U> 228 static Vc_INTRINSIC void set(Mask &m, int i, 229 U &&v) noexcept(noexcept(MaskBool(std::declval<U>()))) 230 { 231 m.d.set(i, MaskBool(std::forward<U>(v))); 232 } 233 234 public: 235 /** 236 * \note the returned object models the concept of a reference and 237 * as such it can exist longer than the data it is referencing. 238 * \note to avoid lifetime issues, we strongly advice not to store 239 * any reference objects. 240 */ 241 Vc_ALWAYS_INLINE reference operator[](size_t index) noexcept 242 { 243 return {*this, int(index)}; 244 } 245 Vc_ALWAYS_INLINE Vc_PURE value_type operator[](size_t index) const noexcept 246 { 247 return get(*this, index); 248 } 249 250 Vc_ALWAYS_INLINE Vc_PURE int count() const 251 { 252 return Detail::mask_count<Size>(dataI()); 253 } 254 255 /** 256 * Returns the index of the first one in the mask. 257 * 258 * The return value is undefined if the mask is empty. 259 */ 260 Vc_ALWAYS_INLINE_L Vc_PURE_L int firstOne() const Vc_ALWAYS_INLINE_R Vc_PURE_R; 261 262 template <typename G> static Vc_INTRINSIC_L Mask generate(G &&gen) Vc_INTRINSIC_R; 263 Vc_INTRINSIC_L Vc_PURE_L Mask shifted(int amount) const Vc_INTRINSIC_R Vc_PURE_R; 264 265 private: 266 #ifdef Vc_COMPILE_BENCHMARKS 267 public: 268 #endif 269 Storage d; 270 }; 271 template <typename T> constexpr size_t Mask<T, VectorAbi::Sse>::Size; 272 template <typename T> constexpr size_t Mask<T, VectorAbi::Sse>::MemoryAlignment; 273 274 } // namespace Vc 275 276 #include "mask.tcc" 277 278 #endif // VC_SSE_MASK_H_ 279