1 //=================================================================================================
2 /*!
3 //  \file blaze/math/simd/Prod.h
4 //  \brief Header file for the SIMD multiplication reduction functionality
5 //
6 //  Copyright (C) 2012-2020 Klaus Iglberger - All Rights Reserved
7 //
8 //  This file is part of the Blaze library. You can redistribute it and/or modify it under
9 //  the terms of the New (Revised) BSD License. Redistribution and use in source and binary
10 //  forms, with or without modification, are permitted provided that the following conditions
11 //  are met:
12 //
13 //  1. Redistributions of source code must retain the above copyright notice, this list of
14 //     conditions and the following disclaimer.
15 //  2. Redistributions in binary form must reproduce the above copyright notice, this list
16 //     of conditions and the following disclaimer in the documentation and/or other materials
17 //     provided with the distribution.
18 //  3. Neither the names of the Blaze development group nor the names of its contributors
19 //     may be used to endorse or promote products derived from this software without specific
20 //     prior written permission.
21 //
22 //  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
23 //  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 //  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
25 //  SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 //  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27 //  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28 //  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 //  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 //  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 //  DAMAGE.
32 */
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SIMD_PROD_H_
36 #define _BLAZE_MATH_SIMD_PROD_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/Aliases.h>
44 #include <blaze/math/simd/BasicTypes.h>
45 #include <blaze/system/Inline.h>
46 #include <blaze/system/Vectorization.h>
47 
48 
49 namespace blaze {
50 
51 //=================================================================================================
52 //
53 //  8-BIT INTEGRAL SIMD TYPES
54 //
55 //=================================================================================================
56 
57 //*************************************************************************************************
58 /*!\brief Returns the product of all elements in the 8-bit integral SIMD vector.
59 // \ingroup simd
60 //
61 // \param a The vector to be reduced by multiplication.
62 // \return The product of all vector elements.
63 */
64 template< typename T >  // Type of the SIMD element
prod(const SIMDi8<T> & a)65 BLAZE_ALWAYS_INLINE ValueType_t<T> prod( const SIMDi8<T>& a ) noexcept
66 {
67 #if BLAZE_AVX512BW_MODE
68    return (*a)[ 0] * (*a)[ 1] * (*a)[ 2] * (*a)[ 3] * (*a)[ 4] * (*a)[ 5] * (*a)[ 6] * (*a)[ 7] *
69           (*a)[ 8] * (*a)[ 9] * (*a)[10] * (*a)[11] * (*a)[12] * (*a)[13] * (*a)[14] * (*a)[15] *
70           (*a)[16] * (*a)[17] * (*a)[18] * (*a)[19] * (*a)[20] * (*a)[21] * (*a)[22] * (*a)[23] *
71           (*a)[24] * (*a)[25] * (*a)[26] * (*a)[27] * (*a)[28] * (*a)[29] * (*a)[30] * (*a)[31] *
72           (*a)[32] * (*a)[33] * (*a)[34] * (*a)[35] * (*a)[36] * (*a)[37] * (*a)[38] * (*a)[39] *
73           (*a)[40] * (*a)[41] * (*a)[42] * (*a)[43] * (*a)[44] * (*a)[45] * (*a)[46] * (*a)[47] *
74           (*a)[48] * (*a)[49] * (*a)[50] * (*a)[51] * (*a)[52] * (*a)[53] * (*a)[54] * (*a)[55] *
75           (*a)[56] * (*a)[57] * (*a)[58] * (*a)[59] * (*a)[60] * (*a)[61] * (*a)[62] * (*a)[63];
76 #elif BLAZE_AVX2_MODE
77    return (*a)[ 0] * (*a)[ 1] * (*a)[ 2] * (*a)[ 3] * (*a)[ 4] * (*a)[ 5] * (*a)[ 6] * (*a)[ 7] *
78           (*a)[ 8] * (*a)[ 9] * (*a)[10] * (*a)[11] * (*a)[12] * (*a)[13] * (*a)[14] * (*a)[15] *
79           (*a)[16] * (*a)[17] * (*a)[18] * (*a)[19] * (*a)[20] * (*a)[21] * (*a)[22] * (*a)[23] *
80           (*a)[24] * (*a)[25] * (*a)[26] * (*a)[27] * (*a)[28] * (*a)[29] * (*a)[30] * (*a)[31];
81 #elif BLAZE_SSE2_MODE
82    return (*a)[ 0] * (*a)[ 1] * (*a)[ 2] * (*a)[ 3] * (*a)[ 4] * (*a)[ 5] * (*a)[ 6] * (*a)[ 7] *
83           (*a)[ 8] * (*a)[ 9] * (*a)[10] * (*a)[11] * (*a)[12] * (*a)[13] * (*a)[14] * (*a)[15];
84 #else
85    return (*a).value;
86 #endif
87 }
88 //*************************************************************************************************
89 
90 
91 //*************************************************************************************************
92 /*!\brief Returns the product of all elements in the 8-bit integral complex SIMD vector.
93 // \ingroup simd
94 //
95 // \param a The vector to be reduced by multiplication.
96 // \return The product of all vector elements.
97 */
98 template< typename T >  // Type of the SIMD element
prod(const SIMDci8<T> & a)99 BLAZE_ALWAYS_INLINE const ValueType_t<T> prod( const SIMDci8<T>& a ) noexcept
100 {
101 #if BLAZE_AVX512BW_MODE
102    return complex<int8_t>( (*a)[ 0] * (*a)[ 1] * (*a)[ 2] * (*a)[ 3] * (*a)[ 4] * (*a)[ 5] * (*a)[ 6] * (*a)[ 7] *
103                            (*a)[ 8] * (*a)[ 9] * (*a)[10] * (*a)[11] * (*a)[12] * (*a)[13] * (*a)[14] * (*a)[15] *
104                            (*a)[16] * (*a)[17] * (*a)[18] * (*a)[19] * (*a)[20] * (*a)[21] * (*a)[22] * (*a)[23] *
105                            (*a)[24] * (*a)[25] * (*a)[26] * (*a)[27] * (*a)[28] * (*a)[29] * (*a)[30] * (*a)[31] );
106 #elif BLAZE_AVX2_MODE
107    return complex<int8_t>( (*a)[0] * (*a)[1] * (*a)[ 2] * (*a)[ 3] * (*a)[ 4] * (*a)[ 5] * (*a)[ 6] * (*a)[ 7] *
108                            (*a)[8] * (*a)[9] * (*a)[10] * (*a)[11] * (*a)[12] * (*a)[13] * (*a)[14] * (*a)[15] );
109 #elif BLAZE_SSE2_MODE
110    return complex<int8_t>( (*a)[0] * (*a)[1] * (*a)[2] * (*a)[3] * (*a)[4] * (*a)[5] * (*a)[6] * (*a)[7] );
111 #else
112    return (*a).value;
113 #endif
114 }
115 //*************************************************************************************************
116 
117 
118 
119 
120 //=================================================================================================
121 //
122 //  16-BIT INTEGRAL SIMD TYPES
123 //
124 //=================================================================================================
125 
126 //*************************************************************************************************
127 /*!\brief Returns the product of all elements in the 16-bit integral SIMD vector.
128 // \ingroup simd
129 //
130 // \param a The vector to be reduced by multiplication.
131 // \return The product of all vector elements.
132 */
133 template< typename T >  // Type of the SIMD element
prod(const SIMDi16<T> & a)134 BLAZE_ALWAYS_INLINE ValueType_t<T> prod( const SIMDi16<T>& a ) noexcept
135 {
136 #if BLAZE_AVX512BW_MODE
137    return (*a)[ 0] * (*a)[ 1] * (*a)[ 2] * (*a)[ 3] * (*a)[ 4] * (*a)[ 5] * (*a)[ 6] * (*a)[ 7] *
138           (*a)[ 8] * (*a)[ 9] * (*a)[10] * (*a)[11] * (*a)[12] * (*a)[13] * (*a)[14] * (*a)[15] *
139           (*a)[16] * (*a)[17] * (*a)[18] * (*a)[19] * (*a)[20] * (*a)[21] * (*a)[22] * (*a)[23] *
140           (*a)[24] * (*a)[25] * (*a)[26] * (*a)[27] * (*a)[28] * (*a)[29] * (*a)[30] * (*a)[31];
141 #elif BLAZE_AVX2_MODE
142    return (*a)[ 0] * (*a)[ 1] * (*a)[ 2] * (*a)[ 3] * (*a)[ 4] * (*a)[ 5] * (*a)[ 6] * (*a)[ 7] *
143           (*a)[ 8] * (*a)[ 9] * (*a)[10] * (*a)[11] * (*a)[12] * (*a)[13] * (*a)[14] * (*a)[15];
144 #elif BLAZE_SSE2_MODE
145    return (*a)[0] * (*a)[1] * (*a)[2] * (*a)[3] * (*a)[4] * (*a)[5] * (*a)[6] * (*a)[7];
146 #else
147    return (*a).value;
148 #endif
149 }
150 //*************************************************************************************************
151 
152 
153 //*************************************************************************************************
154 /*!\brief Returns the product of all elements in the 16-bit integral complex SIMD vector.
155 // \ingroup simd
156 //
157 // \param a The vector to be reduced by multiplication.
158 // \return The product of all vector elements.
159 */
160 template< typename T >  // Type of the SIMD element
prod(const SIMDci16<T> & a)161 BLAZE_ALWAYS_INLINE const ValueType_t<T> prod( const SIMDci16<T>& a ) noexcept
162 {
163 #if BLAZE_AVX512BW_MODE
164    return complex<int16_t>( (*a)[0] * (*a)[1] * (*a)[ 2] * (*a)[ 3] * (*a)[ 4] * (*a)[ 5] * (*a)[ 6] * (*a)[ 7] *
165                             (*a)[8] * (*a)[9] * (*a)[10] * (*a)[11] * (*a)[12] * (*a)[13] * (*a)[14] * (*a)[15] );
166 #elif BLAZE_AVX2_MODE
167    return complex<int16_t>( (*a)[0] * (*a)[1] * (*a)[2] * (*a)[3] * (*a)[4] * (*a)[5] * (*a)[6] * (*a)[7] );
168 #elif BLAZE_SSE2_MODE
169    return complex<int16_t>( (*a)[0] * (*a)[1] * (*a)[2] * (*a)[3] );
170 #else
171    return (*a).value;
172 #endif
173 }
174 //*************************************************************************************************
175 
176 
177 
178 
179 //=================================================================================================
180 //
181 //  32-BIT INTEGRAL SIMD TYPES
182 //
183 //=================================================================================================
184 
185 //*************************************************************************************************
186 /*!\brief Returns the product of all elements in the 32-bit integral SIMD vector.
187 // \ingroup simd
188 //
189 // \param a The vector to be reduced by multiplication.
190 // \return The product of all vector elements.
191 */
192 template< typename T >  // Type of the SIMD element
prod(const SIMDi32<T> & a)193 BLAZE_ALWAYS_INLINE ValueType_t<T> prod( const SIMDi32<T>& a ) noexcept
194 {
195 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
196    return _mm512_reduce_mul_epi32( (*a).value );
197 #elif BLAZE_AVX2_MODE
198    const __m256i b( _mm256_mullo_epi32( (*a).value, _mm256_shuffle_epi32( (*a).value, _MM_SHUFFLE(1,0,3,2) ) ) );
199    const __m256i c( _mm256_mullo_epi32( b, _mm256_shuffle_epi32( b, _MM_SHUFFLE(2,3,0,1) ) ) );
200    const __m128i d( _mm_mullo_epi32( _mm256_extracti128_si256( c, 1 ), _mm256_castsi256_si128( c ) ) );
201    return _mm_extract_epi32( d, 0 );
202 #elif BLAZE_SSE4_MODE
203    const __m128i b( _mm_mullo_epi32( (*a).value, _mm_shuffle_epi32( (*a).value, _MM_SHUFFLE(1,0,3,2) ) ) );
204    return _mm_extract_epi32( _mm_mullo_epi32( b, _mm_shuffle_epi32( b, 1U ) ), 0 );
205 #elif BLAZE_SSE2_MODE
206    return (*a)[0] * (*a)[1] * (*a)[2] * (*a)[3];
207 #else
208    return (*a).value;
209 #endif
210 }
211 //*************************************************************************************************
212 
213 
214 //*************************************************************************************************
215 /*!\brief Returns the product of all elements in the 32-bit integral complex SIMD vector.
216 // \ingroup simd
217 //
218 // \param a The vector to be reduced by multiplication.
219 // \return The product of all vector elements.
220 */
221 template< typename T >  // Type of the SIMD element
prod(const SIMDci32<T> & a)222 BLAZE_ALWAYS_INLINE const ValueType_t<T> prod( const SIMDci32<T>& a ) noexcept
223 {
224 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
225    return complex<int32_t>( (*a)[0] * (*a)[1] * (*a)[2] * (*a)[3] * (*a)[4] * (*a)[5] * (*a)[6] * (*a)[7] );
226 #elif BLAZE_AVX2_MODE
227    return complex<int32_t>( (*a)[0] * (*a)[1] * (*a)[2] * (*a)[3] );
228 #elif BLAZE_SSE2_MODE
229    return complex<int32_t>( (*a)[0] * (*a)[1] );
230 #else
231    return (*a).value;
232 #endif
233 }
234 //*************************************************************************************************
235 
236 
237 
238 
239 //=================================================================================================
240 //
241 //  64-BIT INTEGRAL SIMD TYPES
242 //
243 //=================================================================================================
244 
245 //*************************************************************************************************
246 /*!\brief Returns the product of all elements in the 64-bit integral SIMD vector.
247 // \ingroup simd
248 //
249 // \param a The vector to be reduced by multiplication.
250 // \return The product of all vector elements.
251 */
252 template< typename T >  // Type of the SIMD element
prod(const SIMDi64<T> & a)253 BLAZE_ALWAYS_INLINE ValueType_t<T> prod( const SIMDi64<T>& a ) noexcept
254 {
255 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
256    return (*a)[0] * (*a)[1] * (*a)[2] * (*a)[3] * (*a)[4] * (*a)[5] * (*a)[6] * (*a)[7];
257 #elif BLAZE_AVX2_MODE
258    return (*a)[0] * (*a)[1] * (*a)[2] * (*a)[3];
259 #elif BLAZE_SSE2_MODE
260    return (*a)[0] * (*a)[1];
261 #else
262    return (*a).value;
263 #endif
264 }
265 //*************************************************************************************************
266 
267 
268 //*************************************************************************************************
269 /*!\brief Returns the product of all elements in the 64-bit integral complex SIMD vector.
270 // \ingroup simd
271 //
272 // \param a The vector to be reduced by multiplication.
273 // \return The product of all vector elements.
274 */
275 template< typename T >  // Type of the SIMD element
prod(const SIMDci64<T> & a)276 BLAZE_ALWAYS_INLINE const ValueType_t<T> prod( const SIMDci64<T>& a ) noexcept
277 {
278 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
279    return complex<int64_t>( (*a)[0] * (*a)[1] * (*a)[2] * (*a)[3] );
280 #elif BLAZE_AVX2_MODE
281    return complex<int64_t>( (*a)[0] * (*a)[1] );
282 #elif BLAZE_SSE2_MODE
283    return (*a)[0];
284 #else
285    return (*a).value;
286 #endif
287 }
288 //*************************************************************************************************
289 
290 
291 
292 
293 //=================================================================================================
294 //
295 //  32-BIT FLOATING POINT SIMD TYPES
296 //
297 //=================================================================================================
298 
299 //*************************************************************************************************
300 /*!\brief Returns the product of all elements in the single precision floating point SIMD vector.
301 // \ingroup simd
302 //
303 // \param a The vector to be reduced by multiplication.
304 // \return The product of all vector elements.
305 */
prod(const SIMDfloat & a)306 BLAZE_ALWAYS_INLINE float prod( const SIMDfloat& a ) noexcept
307 {
308 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
309    return _mm512_reduce_mul_ps( a.value );
310 #elif BLAZE_AVX_MODE
311    const __m256 b( _mm256_mul_ps( a.value, _mm256_permute2f128_ps( a.value, a.value, 1 ) ) );
312    const __m256 c( _mm256_mul_ps( b, _mm256_shuffle_ps( b, b, _MM_SHUFFLE(1,0,3,2) ) ) );
313    return _mm_cvtss_f32( _mm256_castps256_ps128( _mm256_mul_ps( c, _mm256_shuffle_ps( c, c, 1 ) ) ) );
314 #elif BLAZE_SSE_MODE
315    const __m128 b = _mm_mul_ps( a.value, _mm_movehl_ps( a.value, a.value ) );
316    return _mm_cvtss_f32( _mm_mul_ss( b, _mm_shuffle_ps( b, b, 1U ) ) );
317 #else
318    return a.value;
319 #endif
320 }
321 //*************************************************************************************************
322 
323 
324 //*************************************************************************************************
325 /*!\brief Returns the product of all elements in the single precision complex SIMD vector.
326 // \ingroup simd
327 //
328 // \param a The vector to be reduced by multiplication.
329 // \return The product of all vector elements.
330 */
prod(const SIMDcfloat & a)331 BLAZE_ALWAYS_INLINE const complex<float> prod( const SIMDcfloat& a ) noexcept
332 {
333 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
334    return complex<float>( a[0] * a[1] * a[2] * a[3] * a[4] * a[5] * a[6] * a[7] );
335 #elif BLAZE_AVX_MODE
336    return complex<float>( a[0] * a[1] * a[2] * a[3] );
337 #elif BLAZE_SSE_MODE
338    return complex<float>( a[0] * a[1] );
339 #else
340    return a.value;
341 #endif
342 }
343 //*************************************************************************************************
344 
345 
346 
347 
348 //=================================================================================================
349 //
350 //  64-BIT FLOATING POINT SIMD TYPES
351 //
352 //=================================================================================================
353 
354 //*************************************************************************************************
355 /*!\brief Returns the product of all elements in the double precision floating point SIMD vector.
356 // \ingroup simd
357 //
358 // \param a The vector to be reduced by multiplication.
359 // \return The produdct of all vector elements.
360 */
prod(const SIMDdouble & a)361 BLAZE_ALWAYS_INLINE double prod( const SIMDdouble& a ) noexcept
362 {
363 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
364    return _mm512_reduce_mul_pd( a.value );
365 #elif BLAZE_AVX_MODE
366    const __m256d b( _mm256_mul_pd( a.value, _mm256_permute2f128_pd( a.value, a.value, 1 ) ) );
367    return _mm_cvtsd_f64( _mm256_castpd256_pd128( _mm256_mul_pd( b, _mm256_shuffle_pd( b, b, 1 ) ) ) );
368 #elif BLAZE_SSE2_MODE
369    return _mm_cvtsd_f64( _mm_mul_sd( a.value, _mm_unpackhi_pd( a.value, a.value ) ) );
370 #else
371    return a.value;
372 #endif
373 }
374 //*************************************************************************************************
375 
376 
377 //*************************************************************************************************
378 /*!\brief Returns the product of all elements in the double precision complex SIMD vector.
379 // \ingroup simd
380 //
381 // \param a The vector to be reduced by multiplication.
382 // \return The product of all vector elements.
383 */
prod(const SIMDcdouble & a)384 BLAZE_ALWAYS_INLINE const complex<double> prod( const SIMDcdouble& a ) noexcept
385 {
386 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
387    return complex<double>( a[0] * a[1] * a[2] * a[3] );
388 #elif BLAZE_AVX_MODE
389    return complex<double>( a[0] * a[1] );
390 #elif BLAZE_SSE2_MODE
391    return a[0];
392 #else
393    return a.value;
394 #endif
395 }
396 //*************************************************************************************************
397 
398 } // namespace blaze
399 
400 #endif
401