1 //=================================================================================================
2 /*!
3 //  \file blaze/math/simd/Sum.h
4 //  \brief Header file for the SIMD addition reduction functionality
5 //
6 //  Copyright (C) 2012-2020 Klaus Iglberger - All Rights Reserved
7 //
8 //  This file is part of the Blaze library. You can redistribute it and/or modify it under
9 //  the terms of the New (Revised) BSD License. Redistribution and use in source and binary
10 //  forms, with or without modification, are permitted provided that the following conditions
11 //  are met:
12 //
13 //  1. Redistributions of source code must retain the above copyright notice, this list of
14 //     conditions and the following disclaimer.
15 //  2. Redistributions in binary form must reproduce the above copyright notice, this list
16 //     of conditions and the following disclaimer in the documentation and/or other materials
17 //     provided with the distribution.
18 //  3. Neither the names of the Blaze development group nor the names of its contributors
19 //     may be used to endorse or promote products derived from this software without specific
20 //     prior written permission.
21 //
22 //  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
23 //  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 //  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
25 //  SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 //  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27 //  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28 //  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 //  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 //  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 //  DAMAGE.
32 */
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SIMD_SUM_H_
36 #define _BLAZE_MATH_SIMD_SUM_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/Aliases.h>
44 #include <blaze/math/simd/BasicTypes.h>
45 #include <blaze/system/Inline.h>
46 #include <blaze/system/Vectorization.h>
47 
48 
49 namespace blaze {
50 
51 //=================================================================================================
52 //
53 //  8-BIT INTEGRAL SIMD TYPES
54 //
55 //=================================================================================================
56 
57 //*************************************************************************************************
58 /*!\brief Returns the sum of all elements in the 8-bit integral SIMD vector.
59 // \ingroup simd
60 //
61 // \param a The vector to be summed up.
62 // \return The sum of all vector elements.
63 */
64 template< typename T >  // Type of the SIMD element
sum(const SIMDi8<T> & a)65 BLAZE_ALWAYS_INLINE ValueType_t<T> sum( const SIMDi8<T>& a ) noexcept
66 {
67 #if BLAZE_AVX512BW_MODE
68    return (*a)[ 0] + (*a)[ 1] + (*a)[ 2] + (*a)[ 3] + (*a)[ 4] + (*a)[ 5] + (*a)[ 6] + (*a)[ 7] +
69           (*a)[ 8] + (*a)[ 9] + (*a)[10] + (*a)[11] + (*a)[12] + (*a)[13] + (*a)[14] + (*a)[15] +
70           (*a)[16] + (*a)[17] + (*a)[18] + (*a)[19] + (*a)[20] + (*a)[21] + (*a)[22] + (*a)[23] +
71           (*a)[24] + (*a)[25] + (*a)[26] + (*a)[27] + (*a)[28] + (*a)[29] + (*a)[30] + (*a)[31] +
72           (*a)[32] + (*a)[33] + (*a)[34] + (*a)[35] + (*a)[36] + (*a)[37] + (*a)[38] + (*a)[39] +
73           (*a)[40] + (*a)[41] + (*a)[42] + (*a)[43] + (*a)[44] + (*a)[45] + (*a)[46] + (*a)[47] +
74           (*a)[48] + (*a)[49] + (*a)[50] + (*a)[51] + (*a)[52] + (*a)[53] + (*a)[54] + (*a)[55] +
75           (*a)[56] + (*a)[57] + (*a)[58] + (*a)[59] + (*a)[60] + (*a)[61] + (*a)[62] + (*a)[63];
76 #elif BLAZE_AVX2_MODE
77    return (*a)[ 0] + (*a)[ 1] + (*a)[ 2] + (*a)[ 3] + (*a)[ 4] + (*a)[ 5] + (*a)[ 6] + (*a)[ 7] +
78           (*a)[ 8] + (*a)[ 9] + (*a)[10] + (*a)[11] + (*a)[12] + (*a)[13] + (*a)[14] + (*a)[15] +
79           (*a)[16] + (*a)[17] + (*a)[18] + (*a)[19] + (*a)[20] + (*a)[21] + (*a)[22] + (*a)[23] +
80           (*a)[24] + (*a)[25] + (*a)[26] + (*a)[27] + (*a)[28] + (*a)[29] + (*a)[30] + (*a)[31];
81 #elif BLAZE_SSE2_MODE
82    return (*a)[ 0] + (*a)[ 1] + (*a)[ 2] + (*a)[ 3] + (*a)[ 4] + (*a)[ 5] + (*a)[ 6] + (*a)[ 7] +
83           (*a)[ 8] + (*a)[ 9] + (*a)[10] + (*a)[11] + (*a)[12] + (*a)[13] + (*a)[14] + (*a)[15];
84 #else
85    return (*a).value;
86 #endif
87 }
88 //*************************************************************************************************
89 
90 
91 //*************************************************************************************************
92 /*!\brief Returns the sum of all elements in the 8-bit integral complex SIMD vector.
93 // \ingroup simd
94 //
95 // \param a The vector to be summed up.
96 // \return The sum of all vector elements.
97 */
98 template< typename T >  // Type of the SIMD element
sum(const SIMDci8<T> & a)99 BLAZE_ALWAYS_INLINE const ValueType_t<T> sum( const SIMDci8<T>& a ) noexcept
100 {
101 #if BLAZE_AVX512BW_MODE
102    return complex<int8_t>( (*a)[ 0] + (*a)[ 1] + (*a)[ 2] + (*a)[ 3] + (*a)[ 4] + (*a)[ 5] + (*a)[ 6] + (*a)[ 7] +
103                            (*a)[ 8] + (*a)[ 9] + (*a)[10] + (*a)[11] + (*a)[12] + (*a)[13] + (*a)[14] + (*a)[15] +
104                            (*a)[16] + (*a)[17] + (*a)[18] + (*a)[19] + (*a)[20] + (*a)[21] + (*a)[22] + (*a)[23] +
105                            (*a)[24] + (*a)[25] + (*a)[26] + (*a)[27] + (*a)[28] + (*a)[29] + (*a)[30] + (*a)[31] );
106 #elif BLAZE_AVX2_MODE
107    return complex<int8_t>( (*a)[0] + (*a)[1] + (*a)[ 2] + (*a)[ 3] + (*a)[ 4] + (*a)[ 5] + (*a)[ 6] + (*a)[ 7] +
108                            (*a)[8] + (*a)[9] + (*a)[10] + (*a)[11] + (*a)[12] + (*a)[13] + (*a)[14] + (*a)[15] );
109 #elif BLAZE_SSE2_MODE
110    return complex<int8_t>( (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3] + (*a)[4] + (*a)[5] + (*a)[6] + (*a)[7] );
111 #else
112    return (*a).value;
113 #endif
114 }
115 //*************************************************************************************************
116 
117 
118 
119 
120 //=================================================================================================
121 //
122 //  16-BIT INTEGRAL SIMD TYPES
123 //
124 //=================================================================================================
125 
126 //*************************************************************************************************
127 /*!\brief Returns the sum of all elements in the 16-bit integral SIMD vector.
128 // \ingroup simd
129 //
130 // \param a The vector to be summed up.
131 // \return The sum of all vector elements.
132 */
133 template< typename T >  // Type of the SIMD element
sum(const SIMDi16<T> & a)134 BLAZE_ALWAYS_INLINE ValueType_t<T> sum( const SIMDi16<T>& a ) noexcept
135 {
136 #if BLAZE_AVX512BW_MODE
137    const __m256i low ( _mm512_castsi512_si256( (*a).value ) );
138    const __m256i high( _mm512_extracti64x4_epi64( (*a).value, 1 ) );
139    const __m256i b   ( _mm256_hadd_epi16( low, high ) );
140    const __m256i c   ( _mm256_hadd_epi16( b, b ) );
141    const __m256i d   ( _mm256_hadd_epi16( c, c ) );
142    const __m256i e   ( _mm256_hadd_epi16( d, d ) );
143    const __m128i f   ( _mm_add_epi16( _mm256_extracti128_si256( e, 1 )
144                                     , _mm256_castsi256_si128( e ) ) );
145    return _mm_extract_epi16( f, 0 );
146 #elif BLAZE_AVX2_MODE
147    const __m256i b( _mm256_hadd_epi16( (*a).value, (*a).value ) );
148    const __m256i c( _mm256_hadd_epi16( b, b ) );
149    const __m256i d( _mm256_hadd_epi16( c, c ) );
150    const __m128i e( _mm_add_epi16( _mm256_extracti128_si256( d, 1 )
151                                  , _mm256_castsi256_si128( d ) ) );
152    return _mm_extract_epi16( e, 0 );
153 #elif BLAZE_SSSE3_MODE
154    const __m128i b( _mm_hadd_epi16( (*a).value, (*a).value ) );
155    const __m128i c( _mm_hadd_epi16( b, b ) );
156    const __m128i d( _mm_hadd_epi16( c, c ) );
157    return _mm_extract_epi16( d, 0 );
158 #elif BLAZE_SSE2_MODE
159    return (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3] + (*a)[4] + (*a)[5] + (*a)[6] + (*a)[7];
160 #else
161    return (*a).value;
162 #endif
163 }
164 //*************************************************************************************************
165 
166 
167 //*************************************************************************************************
168 /*!\brief Returns the sum of all elements in the 16-bit integral complex SIMD vector.
169 // \ingroup simd
170 //
171 // \param a The vector to be summed up.
172 // \return The sum of all vector elements.
173 */
174 template< typename T >  // Type of the SIMD element
sum(const SIMDci16<T> & a)175 BLAZE_ALWAYS_INLINE const ValueType_t<T> sum( const SIMDci16<T>& a ) noexcept
176 {
177 #if BLAZE_AVX512BW_MODE
178    return complex<int16_t>( (*a)[0] + (*a)[1] + (*a)[ 2] + (*a)[ 3] + (*a)[ 4] + (*a)[ 5] + (*a)[ 6] + (*a)[ 7] +
179                             (*a)[8] + (*a)[9] + (*a)[10] + (*a)[11] + (*a)[12] + (*a)[13] + (*a)[14] + (*a)[15] );
180 #elif BLAZE_AVX2_MODE
181    return complex<int16_t>( (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3] + (*a)[4] + (*a)[5] + (*a)[6] + (*a)[7] );
182 #elif BLAZE_SSE2_MODE
183    return complex<int16_t>( (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3] );
184 #else
185    return (*a).value;
186 #endif
187 }
188 //*************************************************************************************************
189 
190 
191 
192 
193 //=================================================================================================
194 //
195 //  32-BIT INTEGRAL SIMD TYPES
196 //
197 //=================================================================================================
198 
199 //*************************************************************************************************
200 /*!\brief Returns the sum of all elements in the 32-bit integral SIMD vector.
201 // \ingroup simd
202 //
203 // \param a The vector to be summed up.
204 // \return The sum of all vector elements.
205 */
206 template< typename T >  // Type of the SIMD element
sum(const SIMDi32<T> & a)207 BLAZE_ALWAYS_INLINE ValueType_t<T> sum( const SIMDi32<T>& a ) noexcept
208 {
209 #if BLAZE_AVX512F_MODE
210    const __m256i low ( _mm512_castsi512_si256( (*a).value ) );
211    const __m256i high( _mm512_extracti64x4_epi64( (*a).value, 1 ) );
212    const __m256i b   ( _mm256_hadd_epi32( low, high ) );
213    const __m256i c   ( _mm256_hadd_epi32( b, b ) );
214    const __m256i d   ( _mm256_hadd_epi32( c, c ) );
215    const __m128i e   ( _mm_add_epi32( _mm256_extracti128_si256( d, 1 )
216                                     , _mm256_castsi256_si128( d ) ) );
217    return _mm_extract_epi32( e, 0 );
218 #elif BLAZE_MIC_MODE
219    return _mm512_reduce_add_epi32( (*a).value );
220 #elif BLAZE_AVX2_MODE
221    const __m256i b( _mm256_hadd_epi32( (*a).value, (*a).value ) );
222    const __m256i c( _mm256_hadd_epi32( b, b ) );
223    const __m128i d( _mm_add_epi32( _mm256_extracti128_si256( c, 1 )
224                                  , _mm256_castsi256_si128( c ) ) );
225    return _mm_extract_epi32( d, 0 );
226 #elif BLAZE_SSSE3_MODE
227    const __m128i b( _mm_hadd_epi32( (*a).value, (*a).value ) );
228    return _mm_cvtsi128_si32( _mm_hadd_epi32( b, b ) );
229 #elif BLAZE_SSE2_MODE
230    const __m128i b( _mm_add_epi32( (*a).value, _mm_shuffle_epi32( (*a).value, 0x4E ) ) );
231    return _mm_cvtsi128_si32( _mm_add_epi32( b, _mm_shuffle_epi32( b, 0xB1 ) ) );
232 #else
233    return (*a).value;
234 #endif
235 }
236 //*************************************************************************************************
237 
238 
239 //*************************************************************************************************
240 /*!\brief Returns the sum of all elements in the 32-bit integral complex SIMD vector.
241 // \ingroup simd
242 //
243 // \param a The vector to be summed up.
244 // \return The sum of all vector elements.
245 */
246 template< typename T >  // Type of the SIMD element
sum(const SIMDci32<T> & a)247 BLAZE_ALWAYS_INLINE const ValueType_t<T> sum( const SIMDci32<T>& a ) noexcept
248 {
249 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
250    return complex<int32_t>( (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3] + (*a)[4] + (*a)[5] + (*a)[6] + (*a)[7] );
251 #elif BLAZE_AVX2_MODE
252    return complex<int32_t>( (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3] );
253 #elif BLAZE_SSE2_MODE
254    return complex<int32_t>( (*a)[0] + (*a)[1] );
255 #else
256    return (*a).value;
257 #endif
258 }
259 //*************************************************************************************************
260 
261 
262 
263 
264 //=================================================================================================
265 //
266 //  64-BIT INTEGRAL SIMD TYPES
267 //
268 //=================================================================================================
269 
270 //*************************************************************************************************
271 /*!\brief Returns the sum of all elements in the 64-bit integral SIMD vector.
272 // \ingroup simd
273 //
274 // \param a The vector to be summed up.
275 // \return The sum of all vector elements.
276 */
277 template< typename T >  // Type of the SIMD element
sum(const SIMDi64<T> & a)278 BLAZE_ALWAYS_INLINE ValueType_t<T> sum( const SIMDi64<T>& a ) noexcept
279 {
280 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
281    return (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3] + (*a)[4] + (*a)[5] + (*a)[6] + (*a)[7];
282 #elif BLAZE_AVX2_MODE
283    return (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3];
284 #elif BLAZE_SSE2_MODE
285    return (*a)[0] + (*a)[1];
286 #else
287    return (*a).value;
288 #endif
289 }
290 //*************************************************************************************************
291 
292 
293 //*************************************************************************************************
294 /*!\brief Returns the sum of all elements in the 64-bit integral complex SIMD vector.
295 // \ingroup simd
296 //
297 // \param a The vector to be summed up.
298 // \return The sum of all vector elements.
299 */
300 template< typename T >  // Type of the SIMD element
sum(const SIMDci64<T> & a)301 BLAZE_ALWAYS_INLINE const ValueType_t<T> sum( const SIMDci64<T>& a ) noexcept
302 {
303 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
304    return complex<int64_t>( (*a)[0] + (*a)[1] + (*a)[2] + (*a)[3] );
305 #elif BLAZE_AVX2_MODE
306    return complex<int64_t>( (*a)[0] + (*a)[1] );
307 #elif BLAZE_SSE2_MODE
308    return (*a)[0];
309 #else
310    return (*a).value;
311 #endif
312 }
313 //*************************************************************************************************
314 
315 
316 
317 
318 //=================================================================================================
319 //
320 //  32-BIT FLOATING POINT SIMD TYPES
321 //
322 //=================================================================================================
323 
324 //*************************************************************************************************
325 /*!\brief Returns the sum of all elements in the single precision floating point SIMD vector.
326 // \ingroup simd
327 //
328 // \param a The vector to be summed up.
329 // \return The sum of all vector elements.
330 */
sum(const SIMDfloat & a)331 BLAZE_ALWAYS_INLINE float sum( const SIMDfloat& a ) noexcept
332 {
333 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
334    return _mm512_reduce_add_ps( a.value );
335 #elif BLAZE_AVX_MODE
336    const __m128 b( _mm_add_ps( _mm256_extractf128_ps( a.value, 1 ), _mm256_castps256_ps128( a.value ) ) );
337    const __m128 c( _mm_add_ps( b, _mm_movehl_ps( b, b ) ) );
338    return _mm_cvtss_f32( _mm_add_ss( c, _mm_shuffle_ps( c, c, 1 ) ) );
339 #elif BLAZE_SSE3_MODE
340    const __m128 b( _mm_add_ps( a.value, _mm_movehl_ps( a.value, a.value ) ) );
341    return _mm_cvtss_f32( _mm_add_ss( b, _mm_shuffle_ps( b, b, 1 ) ) );
342 #elif BLAZE_SSE_MODE
343    const __m128 b( _mm_add_ps( a.value, _mm_movehl_ps( a.value, a.value ) ) );
344    return _mm_cvtss_f32( _mm_add_ss( b, _mm_shuffle_ps( b, b, 1 ) ) );
345 #else
346    return a.value;
347 #endif
348 }
349 //*************************************************************************************************
350 
351 
352 //*************************************************************************************************
353 /*!\brief Returns the sum of all elements in the single precision complex SIMD vector.
354 // \ingroup simd
355 //
356 // \param a The vector to be summed up.
357 // \return The sum of all vector elements.
358 */
sum(const SIMDcfloat & a)359 BLAZE_ALWAYS_INLINE const complex<float> sum( const SIMDcfloat& a ) noexcept
360 {
361 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
362    return complex<float>( a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7] );
363 #elif BLAZE_AVX_MODE
364    return complex<float>( a[0] + a[1] + a[2] + a[3] );
365 #elif BLAZE_SSE_MODE
366    return complex<float>( a[0] + a[1] );
367 #else
368    return a.value;
369 #endif
370 }
371 //*************************************************************************************************
372 
373 
374 
375 
376 //=================================================================================================
377 //
378 //  64-BIT FLOATING POINT SIMD TYPES
379 //
380 //=================================================================================================
381 
382 //*************************************************************************************************
383 /*!\brief Returns the sum of all elements in the double precision floating point SIMD vector.
384 // \ingroup simd
385 //
386 // \param a The vector to be summed up.
387 // \return The sum of all vector elements.
388 */
sum(const SIMDdouble & a)389 BLAZE_ALWAYS_INLINE double sum( const SIMDdouble& a ) noexcept
390 {
391 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
392    return _mm512_reduce_add_pd( a.value );
393 #elif BLAZE_AVX_MODE
394    const __m128d b( _mm_add_pd( _mm256_castpd256_pd128( a.value ), _mm256_extractf128_pd( a.value, 1 ) ) );
395    return _mm_cvtsd_f64( _mm_add_sd( b, _mm_unpackhi_pd( b, b ) ) );
396 #elif BLAZE_SSE2_MODE
397    return _mm_cvtsd_f64( _mm_add_sd( a.value, _mm_unpackhi_pd( a.value, a.value ) ) );
398 #else
399    return a.value;
400 #endif
401 }
402 //*************************************************************************************************
403 
404 
405 //*************************************************************************************************
406 /*!\brief Returns the sum of all elements in the double precision complex SIMD vector.
407 // \ingroup simd
408 //
409 // \param a The vector to be summed up.
410 // \return The sum of all vector elements.
411 */
sum(const SIMDcdouble & a)412 BLAZE_ALWAYS_INLINE const complex<double> sum( const SIMDcdouble& a ) noexcept
413 {
414 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
415    return complex<double>( a[0] + a[1] + a[2] + a[3] );
416 #elif BLAZE_AVX_MODE
417    return complex<double>( a[0] + a[1] );
418 #elif BLAZE_SSE2_MODE
419    return a[0];
420 #else
421    return a.value;
422 #endif
423 }
424 //*************************************************************************************************
425 
426 } // namespace blaze
427 
428 #endif
429