1 //=================================================================================================
2 /*!
3 // \file blaze/math/simd/Max.h
4 // \brief Header file for the SIMD max functionality
5 //
6 // Copyright (C) 2012-2020 Klaus Iglberger - All Rights Reserved
7 //
8 // This file is part of the Blaze library. You can redistribute it and/or modify it under
9 // the terms of the New (Revised) BSD License. Redistribution and use in source and binary
10 // forms, with or without modification, are permitted provided that the following conditions
11 // are met:
12 //
13 // 1. Redistributions of source code must retain the above copyright notice, this list of
14 // conditions and the following disclaimer.
15 // 2. Redistributions in binary form must reproduce the above copyright notice, this list
16 // of conditions and the following disclaimer in the documentation and/or other materials
17 // provided with the distribution.
18 // 3. Neither the names of the Blaze development group nor the names of its contributors
19 // may be used to endorse or promote products derived from this software without specific
20 // prior written permission.
21 //
22 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
23 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
25 // SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28 // BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 // DAMAGE.
32 */
33 //=================================================================================================
34
35 #ifndef _BLAZE_MATH_SIMD_MAX_H_
36 #define _BLAZE_MATH_SIMD_MAX_H_
37
38
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42
43 #include <blaze/math/simd/BasicTypes.h>
44 #include <blaze/math/typetraits/IsSIMDPack.h>
45 #include <blaze/system/Inline.h>
46 #include <blaze/system/Vectorization.h>
47 #include <blaze/util/IntegralConstant.h>
48
49
50 namespace blaze {
51
52 //=================================================================================================
53 //
54 // 8-BIT INTEGRAL SIMD TYPES
55 //
56 //=================================================================================================
57
58 //*************************************************************************************************
59 /*!\brief Componentwise maximum of two vectors of 8-bit signed integral SIMD values.
60 // \ingroup simd
61 //
62 // \param a The left-hand side SIMD operand.
63 // \param b The right-hand side SIMD operand.
64 // \return The resulting vector.
65 //
66 // This operation is only available for SSE4, AVX2, and AVX-512.
67 */
max(const SIMDint8 & a,const SIMDint8 & b)68 BLAZE_ALWAYS_INLINE const SIMDint8 max( const SIMDint8& a, const SIMDint8& b ) noexcept
69 #if BLAZE_AVX512BW_MODE
70 {
71 return _mm512_max_epi8( (*a).value, (*b).value );
72 }
73 #elif BLAZE_AVX2_MODE
74 {
75 return _mm256_max_epi8( (*a).value, (*b).value );
76 }
77 #elif BLAZE_SSE4_MODE
78 {
79 return _mm_max_epi8( (*a).value, (*b).value );
80 }
81 #else
82 = delete;
83 #endif
84 //*************************************************************************************************
85
86
87 //*************************************************************************************************
88 /*!\brief Componentwise maximum of two vectors of 8-bit unsigned integral SIMD values.
89 // \ingroup simd
90 //
91 // \param a The left-hand side SIMD operand.
92 // \param b The right-hand side SIMD operand.
93 // \return The resulting vector.
94 //
95 // This operation is only available for SSE2, AVX2, and AVX-512.
96 */
max(const SIMDuint8 & a,const SIMDuint8 & b)97 BLAZE_ALWAYS_INLINE const SIMDuint8 max( const SIMDuint8& a, const SIMDuint8& b ) noexcept
98 #if BLAZE_AVX512BW_MODE
99 {
100 return _mm512_max_epu8( (*a).value, (*b).value );
101 }
102 #elif BLAZE_AVX2_MODE
103 {
104 return _mm256_max_epu8( (*a).value, (*b).value );
105 }
106 #elif BLAZE_SSE2_MODE
107 {
108 return _mm_max_epu8( (*a).value, (*b).value );
109 }
110 #else
111 = delete;
112 #endif
113 //*************************************************************************************************
114
115
116
117
118 //=================================================================================================
119 //
120 // 16-BIT INTEGRAL SIMD TYPES
121 //
122 //=================================================================================================
123
124 //*************************************************************************************************
125 /*!\brief Componentwise maximum of two vectors of 16-bit signed integral SIMD values.
126 // \ingroup simd
127 //
128 // \param a The left-hand side SIMD operand.
129 // \param b The right-hand side SIMD operand.
130 // \return The resulting vector.
131 //
132 // This operation is only available for SSE2, AVX2, and AVX-512.
133 */
max(const SIMDint16 & a,const SIMDint16 & b)134 BLAZE_ALWAYS_INLINE const SIMDint16 max( const SIMDint16& a, const SIMDint16& b ) noexcept
135 #if BLAZE_AVX512BW_MODE
136 {
137 return _mm512_max_epi16( (*a).value, (*b).value );
138 }
139 #elif BLAZE_AVX2_MODE
140 {
141 return _mm256_max_epi16( (*a).value, (*b).value );
142 }
143 #elif BLAZE_SSE2_MODE
144 {
145 return _mm_max_epi16( (*a).value, (*b).value );
146 }
147 #else
148 = delete;
149 #endif
150 //*************************************************************************************************
151
152
153 //*************************************************************************************************
154 /*!\brief Componentwise maximum of two vectors of 16-bit unsigned integral SIMD values.
155 // \ingroup simd
156 //
157 // \param a The left-hand side SIMD operand.
158 // \param b The right-hand side SIMD operand.
159 // \return The resulting vector.
160 //
161 // This operation is only available for SSE4, AVX2, and AVX-512.
162 */
max(const SIMDuint16 & a,const SIMDuint16 & b)163 BLAZE_ALWAYS_INLINE const SIMDuint16 max( const SIMDuint16& a, const SIMDuint16& b ) noexcept
164 #if BLAZE_AVX512BW_MODE
165 {
166 return _mm512_max_epu16( (*a).value, (*b).value );
167 }
168 #elif BLAZE_AVX2_MODE
169 {
170 return _mm256_max_epu16( (*a).value, (*b).value );
171 }
172 #elif BLAZE_SSE4_MODE
173 {
174 return _mm_max_epu16( (*a).value, (*b).value );
175 }
176 #else
177 = delete;
178 #endif
179 //*************************************************************************************************
180
181
182
183
184 //=================================================================================================
185 //
186 // 32-BIT INTEGRAL SIMD TYPES
187 //
188 //=================================================================================================
189
190 //*************************************************************************************************
191 /*!\brief Componentwise maximim of two vectors of 32-bit signed integral SIMD values.
192 // \ingroup simd
193 //
194 // \param a The left-hand side SIMD operand.
195 // \param b The right-hand side SIMD operand.
196 // \return The resulting vector.
197 //
198 // This operation is only available for SSE4, AVX2, MIC, and AVX-512.
199 */
max(const SIMDint32 & a,const SIMDint32 & b)200 BLAZE_ALWAYS_INLINE const SIMDint32 max( const SIMDint32& a, const SIMDint32& b ) noexcept
201 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
202 {
203 return _mm512_max_epi32( (*a).value, (*b).value );
204 }
205 #elif BLAZE_AVX2_MODE
206 {
207 return _mm256_max_epi32( (*a).value, (*b).value );
208 }
209 #elif BLAZE_SSE4_MODE
210 {
211 return _mm_max_epi32( (*a).value, (*b).value );
212 }
213 #else
214 = delete;
215 #endif
216 //*************************************************************************************************
217
218
219 //*************************************************************************************************
220 /*!\brief Componentwise maximum of two vectors of 32-bit unsigned integral SIMD values.
221 // \ingroup simd
222 //
223 // \param a The left-hand side SIMD operand.
224 // \param b The right-hand side SIMD operand.
225 // \return The resulting vector.
226 //
227 // This operation is only available for SSE4, AVX2, MIC, and AVX-512.
228 */
max(const SIMDuint32 & a,const SIMDuint32 & b)229 BLAZE_ALWAYS_INLINE const SIMDuint32 max( const SIMDuint32& a, const SIMDuint32& b ) noexcept
230 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
231 {
232 return _mm512_max_epu32( (*a).value, (*b).value );
233 }
234 #elif BLAZE_AVX2_MODE
235 {
236 return _mm256_max_epu32( (*a).value, (*b).value );
237 }
238 #elif BLAZE_SSE4_MODE
239 {
240 return _mm_max_epu32( (*a).value, (*b).value );
241 }
242 #else
243 = delete;
244 #endif
245 //*************************************************************************************************
246
247
248
249
250 //=================================================================================================
251 //
252 // 32-BIT FLOATING POINT SIMD TYPES
253 //
254 //=================================================================================================
255
256 //*************************************************************************************************
257 /*!\brief Componentwise maximum of two vectors of single precision floating point SIMD values.
258 // \ingroup simd
259 //
260 // \param a The left-hand side SIMD operand.
261 // \param b The right-hand side SIMD operand.
262 // \return The resulting vector.
263 //
264 // This operation is only available for SSE, AVX, MIC, and AVX-512.
265 */
266 template< typename T1 // Type of the left-hand side operand
267 , typename T2 > // Type of the right-hand side operand
268 BLAZE_ALWAYS_INLINE const SIMDfloat
max(const SIMDf32<T1> & a,const SIMDf32<T2> & b)269 max( const SIMDf32<T1>& a, const SIMDf32<T2>& b ) noexcept
270 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
271 {
272 return _mm512_max_ps( (*a).eval().value, (*b).eval().value );
273 }
274 #elif BLAZE_AVX_MODE
275 {
276 return _mm256_max_ps( (*a).eval().value, (*b).eval().value );
277 }
278 #elif BLAZE_SSE_MODE
279 {
280 return _mm_max_ps( (*a).eval().value, (*b).eval().value );
281 }
282 #else
283 = delete;
284 #endif
285 //*************************************************************************************************
286
287
288
289
290 //=================================================================================================
291 //
292 // 64-BIT FLOATING POINT SIMD TYPES
293 //
294 //=================================================================================================
295
296 //*************************************************************************************************
297 /*!\brief Componentwise maximum of two vectors of double precision floating point SIMD values.
298 // \ingroup simd
299 //
300 // \param a The left-hand side SIMD operand.
301 // \param b The right-hand side SIMD operand.
302 // \return The resulting vector.
303 //
304 // This operation is only available for SSE2, AVX, MIC, and AVX-512.
305 */
306 template< typename T1 // Type of the left-hand side operand
307 , typename T2 > // Type of the right-hand side operand
308 BLAZE_ALWAYS_INLINE const SIMDdouble
max(const SIMDf64<T1> & a,const SIMDf64<T2> & b)309 max( const SIMDf64<T1>& a, const SIMDf64<T2>& b ) noexcept
310 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
311 {
312 return _mm512_max_pd( (*a).eval().value, (*b).eval().value );
313 }
314 #elif BLAZE_AVX_MODE
315 {
316 return _mm256_max_pd( (*a).eval().value, (*b).eval().value );
317 }
318 #elif BLAZE_SSE2_MODE
319 {
320 return _mm_max_pd( (*a).eval().value, (*b).eval().value );
321 }
322 #else
323 = delete;
324 #endif
325 //*************************************************************************************************
326
327 } // namespace blaze
328
329 #endif
330