1 //=================================================================================================
2 /*!
3 //  \file blaze/math/simd/Storea.h
4 //  \brief Header file for the SIMD aligned store functionality
5 //
6 //  Copyright (C) 2012-2020 Klaus Iglberger - All Rights Reserved
7 //
8 //  This file is part of the Blaze library. You can redistribute it and/or modify it under
9 //  the terms of the New (Revised) BSD License. Redistribution and use in source and binary
10 //  forms, with or without modification, are permitted provided that the following conditions
11 //  are met:
12 //
13 //  1. Redistributions of source code must retain the above copyright notice, this list of
14 //     conditions and the following disclaimer.
15 //  2. Redistributions in binary form must reproduce the above copyright notice, this list
16 //     of conditions and the following disclaimer in the documentation and/or other materials
17 //     provided with the distribution.
18 //  3. Neither the names of the Blaze development group nor the names of its contributors
19 //     may be used to endorse or promote products derived from this software without specific
20 //     prior written permission.
21 //
22 //  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
23 //  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 //  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
25 //  SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 //  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27 //  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28 //  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 //  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 //  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 //  DAMAGE.
32 */
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_SIMD_STOREA_H_
36 #define _BLAZE_MATH_SIMD_STOREA_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/simd/BasicTypes.h>
44 #include <blaze/system/Inline.h>
45 #include <blaze/system/Vectorization.h>
46 #include <blaze/util/AlignmentCheck.h>
47 #include <blaze/util/Assert.h>
48 #include <blaze/util/Complex.h>
49 #include <blaze/util/EnableIf.h>
50 #include <blaze/util/StaticAssert.h>
51 #include <blaze/util/typetraits/HasSize.h>
52 #include <blaze/util/typetraits/IsIntegral.h>
53 
54 
55 namespace blaze {
56 
57 //=================================================================================================
58 //
59 //  8-BIT INTEGRAL SIMD TYPES
60 //
61 //=================================================================================================
62 
63 //*************************************************************************************************
64 /*!\brief Aligned store of a vector of 1-byte integral values.
65 // \ingroup simd
66 //
67 // \param address The target address.
68 // \param value The 1-byte integral vector to be stored.
69 // \return void
70 //
71 // This function stores a vector of 1-byte integral values. The given address must be aligned
72 // according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte alignment
73 // in case of AVX, and 64-byte alignment in case of AVX-512).
74 */
75 template< typename T1    // Type of the integral value
76         , typename T2 >  // Type of the SIMD data type
77 BLAZE_ALWAYS_INLINE EnableIf_t< IsIntegral_v<T1> && HasSize_v<T1,1UL> >
storea(T1 * address,const SIMDi8<T2> & value)78    storea( T1* address, const SIMDi8<T2>& value ) noexcept
79 {
80    BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
81 
82 #if BLAZE_AVX512BW_MODE
83    _mm512_store_si512( address, (*value).value );
84 #elif BLAZE_AVX2_MODE
85    _mm256_store_si256( reinterpret_cast<__m256i*>( address ), (*value).value );
86 #elif BLAZE_SSE2_MODE
87    _mm_store_si128( reinterpret_cast<__m128i*>( address ), (*value).value );
88 #else
89    *address = (*value).value;
90 #endif
91 }
92 //*************************************************************************************************
93 
94 
95 //*************************************************************************************************
96 /*!\brief Aligned store of a vector of 1-byte integral complex values.
97 // \ingroup simd
98 //
99 // \param address The target address.
100 // \param value The 1-byte integral complex vector to be stored.
101 // \return void
102 //
103 // This function stores a vector of 1-byte integral complex values. The given address must be
104 // aligned according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte
105 // alignment in case of AVX, and 64-byte alignment in case of AVX-512).
106 */
107 template< typename T1    // Type of the integral value
108         , typename T2 >  // Type of the SIMD data type
109 BLAZE_ALWAYS_INLINE EnableIf_t< IsIntegral_v<T1> && HasSize_v<T1,1UL> >
storea(complex<T1> * address,const SIMDci8<T2> & value)110    storea( complex<T1>* address, const SIMDci8<T2>& value ) noexcept
111 {
112    BLAZE_STATIC_ASSERT( sizeof( complex<T1> ) == 2UL*sizeof( T1 ) );
113    BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
114 
115 #if BLAZE_AVX512BW_MODE
116    _mm512_store_si512( address, (*value).value );
117 #elif BLAZE_AVX2_MODE
118    _mm256_store_si256( reinterpret_cast<__m256i*>( address ), (*value).value );
119 #elif BLAZE_SSE2_MODE
120    _mm_store_si128( reinterpret_cast<__m128i*>( address ), (*value).value );
121 #else
122    *address = (*value).value;
123 #endif
124 }
125 //*************************************************************************************************
126 
127 
128 
129 
130 //=================================================================================================
131 //
132 //  16-BIT INTEGRAL SIMD TYPES
133 //
134 //=================================================================================================
135 
136 //*************************************************************************************************
137 /*!\brief Aligned store of a vector of 2-byte integral values.
138 // \ingroup simd
139 //
140 // \param address The target address.
141 // \param value The 2-byte integral vector to be stored.
142 // \return void
143 //
144 // This function stores a vector of 2-byte integral values. The given address must be aligned
145 // according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte alignment
146 // in case of AVX, and 64-byte alignment in case of AVX-512).
147 */
148 template< typename T1    // Type of the integral value
149         , typename T2 >  // Type of the SIMD data type
150 BLAZE_ALWAYS_INLINE EnableIf_t< IsIntegral_v<T1> && HasSize_v<T1,2UL> >
storea(T1 * address,const SIMDi16<T2> & value)151    storea( T1* address, const SIMDi16<T2>& value ) noexcept
152 {
153    BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
154 
155 #if BLAZE_AVX512BW_MODE
156    _mm512_store_si512( address, (*value).value );
157 #elif BLAZE_AVX2_MODE
158    _mm256_store_si256( reinterpret_cast<__m256i*>( address ), (*value).value );
159 #elif BLAZE_SSE2_MODE
160    _mm_store_si128( reinterpret_cast<__m128i*>( address ), (*value).value );
161 #else
162    *address = (*value).value;
163 #endif
164 }
165 //*************************************************************************************************
166 
167 
168 //*************************************************************************************************
169 /*!\brief Aligned store of a vector of 2-byte integral complex values.
170 // \ingroup simd
171 //
172 // \param address The target address.
173 // \param value The 2-byte integral complex vector to be stored.
174 // \return void
175 //
176 // This function stores a vector of 2-byte integral complex values. The given address must be
177 // aligned according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte
178 // alignment in case of AVX, and 64-byte alignment in case of AVX-512).
179 */
180 template< typename T1    // Type of the integral value
181         , typename T2 >  // Type of the SIMD data type
182 BLAZE_ALWAYS_INLINE EnableIf_t< IsIntegral_v<T1> && HasSize_v<T1,2UL> >
storea(complex<T1> * address,const SIMDci16<T2> & value)183    storea( complex<T1>* address, const SIMDci16<T2>& value ) noexcept
184 {
185    BLAZE_STATIC_ASSERT( sizeof( complex<T1> ) == 2UL*sizeof( T1 ) );
186    BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
187 
188 #if BLAZE_AVX512BW_MODE
189    _mm512_store_si512( address, (*value).value );
190 #elif BLAZE_AVX2_MODE
191    _mm256_store_si256( reinterpret_cast<__m256i*>( address ), (*value).value );
192 #elif BLAZE_SSE2_MODE
193    _mm_store_si128( reinterpret_cast<__m128i*>( address ), (*value).value );
194 #else
195    *address = (*value).value;
196 #endif
197 }
198 //*************************************************************************************************
199 
200 
201 
202 
203 //=================================================================================================
204 //
205 //  32-BIT INTEGRAL SIMD TYPES
206 //
207 //=================================================================================================
208 
209 //*************************************************************************************************
210 /*!\brief Aligned store of a vector of 4-byte integral values.
211 // \ingroup simd
212 //
213 // \param address The target address.
214 // \param value The 4-byte integral vector to be stored.
215 // \return void
216 //
217 // This function stores a vector of 4-byte integral values. The given address must be aligned
218 // according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte alignment
219 // in case of AVX, and 64-byte alignment in case of AVX-512/MIC).
220 */
221 template< typename T1    // Type of the integral value
222         , typename T2 >  // Type of the SIMD data type
223 BLAZE_ALWAYS_INLINE EnableIf_t< IsIntegral_v<T1> && HasSize_v<T1,4UL> >
storea(T1 * address,const SIMDi32<T2> & value)224    storea( T1* address, const SIMDi32<T2>& value ) noexcept
225 {
226    BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
227 
228 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
229    _mm512_store_epi32( address, (*value).value );
230 #elif BLAZE_AVX2_MODE
231    _mm256_store_si256( reinterpret_cast<__m256i*>( address ), (*value).value );
232 #elif BLAZE_SSE2_MODE
233    _mm_store_si128( reinterpret_cast<__m128i*>( address ), (*value).value );
234 #else
235    *address = (*value).value;
236 #endif
237 }
238 //*************************************************************************************************
239 
240 
241 //*************************************************************************************************
242 /*!\brief Aligned store of a vector of 4-byte integral complex values.
243 // \ingroup simd
244 //
245 // \param address The target address.
246 // \param value The 4-byte integral complex vector to be stored.
247 // \return void
248 //
249 // This function stores a vector of 4-byte integral complex values. The given address must be
250 // aligned according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte
251 // alignment in case of AVX, and 64-byte alignment in case of AVX-512/MIC).
252 */
253 template< typename T1    // Type of the integral value
254         , typename T2 >  // Type of the SIMD data type
255 BLAZE_ALWAYS_INLINE EnableIf_t< IsIntegral_v<T1> && HasSize_v<T1,4UL> >
storea(complex<T1> * address,const SIMDci32<T2> & value)256    storea( complex<T1>* address, const SIMDci32<T2>& value ) noexcept
257 {
258    BLAZE_STATIC_ASSERT( sizeof( complex<T1> ) == 2UL*sizeof( T1 ) );
259    BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
260 
261 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
262    _mm512_store_epi32( address, (*value).value );
263 #elif BLAZE_AVX2_MODE
264    _mm256_store_si256( reinterpret_cast<__m256i*>( address ), (*value).value );
265 #elif BLAZE_SSE2_MODE
266    _mm_store_si128( reinterpret_cast<__m128i*>( address ), (*value).value );
267 #else
268    *address = (*value).value;
269 #endif
270 }
271 //*************************************************************************************************
272 
273 
274 
275 
276 //=================================================================================================
277 //
278 //  64-BIT INTEGRAL SIMD TYPES
279 //
280 //=================================================================================================
281 
282 //*************************************************************************************************
283 /*!\brief Aligned store of a vector of 8-byte integral values.
284 // \ingroup simd
285 //
286 // \param address The target address.
287 // \param value The 8-byte integral vector to be stored.
288 // \return void
289 //
290 // This function stores a vector of 8-byte integral values. The given address must be aligned
291 // according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte alignment
292 // in case of AVX, and 64-byte alignment in case of AVX-512/MIC).
293 */
294 template< typename T1    // Type of the integral value
295         , typename T2 >  // Type of the SIMD data type
296 BLAZE_ALWAYS_INLINE EnableIf_t< IsIntegral_v<T1> && HasSize_v<T1,8UL> >
storea(T1 * address,const SIMDi64<T2> & value)297    storea( T1* address, const SIMDi64<T2>& value ) noexcept
298 {
299    BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
300 
301 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
302    _mm512_store_epi64( address, (*value).value );
303 #elif BLAZE_AVX2_MODE
304    _mm256_store_si256( reinterpret_cast<__m256i*>( address ), (*value).value );
305 #elif BLAZE_SSE2_MODE
306    _mm_store_si128( reinterpret_cast<__m128i*>( address ), (*value).value );
307 #else
308    *address = (*value).value;
309 #endif
310 }
311 //*************************************************************************************************
312 
313 
314 //*************************************************************************************************
315 /*!\brief Aligned store of a vector of 8-byte integral complex values.
316 // \ingroup simd
317 //
318 // \param address The target address.
319 // \param value The 8-byte integral complex vector to be stored.
320 // \return void
321 //
322 // This function stores a vector of 8-byte integral complex values. The given address must be
323 // aligned according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte
324 // alignment in case of AVX, and 64-byte alignment in case of AVX-512/MIC).
325 */
326 template< typename T1    // Type of the integral value
327         , typename T2 >  // Type of the SIMD data type
328 BLAZE_ALWAYS_INLINE EnableIf_t< IsIntegral_v<T1> && HasSize_v<T1,8UL> >
storea(complex<T1> * address,const SIMDci64<T2> & value)329    storea( complex<T1>* address, const SIMDci64<T2>& value ) noexcept
330 {
331    BLAZE_STATIC_ASSERT( sizeof( complex<T1> ) == 2UL*sizeof( T1 ) );
332    BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
333 
334 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
335    _mm512_store_epi64( address, (*value).value );
336 #elif BLAZE_AVX2_MODE
337    _mm256_store_si256( reinterpret_cast<__m256i*>( address ), (*value).value );
338 #elif BLAZE_SSE2_MODE
339    _mm_store_si128( reinterpret_cast<__m128i*>( address ), (*value).value );
340 #else
341    *address = (*value).value;
342 #endif
343 }
344 //*************************************************************************************************
345 
346 
347 
348 
349 //=================================================================================================
350 //
351 //  32-BIT FLOATING POINT SIMD TYPES
352 //
353 //=================================================================================================
354 
355 //*************************************************************************************************
356 /*!\brief Aligned store of a vector of 'float' values.
357 // \ingroup simd
358 //
359 // \param address The target address.
360 // \param value The 'float' vector to be stored.
361 // \return void
362 //
363 // This function stores a vector of 'float' values. The given address must be aligned according
364 // to the enabled instruction set (16-byte alignment in case of SSE, 32-byte alignment in case of
365 // AVX, and 64-byte alignment in case of AVX-512/MIC).
366 */
367 template< typename T >  // Type of the operand
storea(float * address,const SIMDf32<T> & value)368 BLAZE_ALWAYS_INLINE void storea( float* address, const SIMDf32<T>& value ) noexcept
369 {
370    BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
371 
372 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
373    _mm512_store_ps( address, (*value).eval().value );
374 #elif BLAZE_AVX_MODE
375    _mm256_store_ps( address, (*value).eval().value );
376 #elif BLAZE_SSE_MODE
377    _mm_store_ps( address, (*value).eval().value );
378 #else
379    *address = (*value).eval().value;
380 #endif
381 }
382 //*************************************************************************************************
383 
384 
385 //*************************************************************************************************
386 /*!\brief Aligned store of a vector of 'complex<float>' values.
387 // \ingroup simd
388 //
389 // \param address The target address.
390 // \param value The 'complex<float>' vector to be stored.
391 // \return void
392 //
393 // This function stores a vector of 'complex<float>' values. The given address must be aligned
394 // according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte alignment
395 // in case of AVX, and 64-byte alignment in case of AVX-512/MIC).
396 */
storea(complex<float> * address,const SIMDcfloat & value)397 BLAZE_ALWAYS_INLINE void storea( complex<float>* address, const SIMDcfloat& value ) noexcept
398 {
399    BLAZE_STATIC_ASSERT  ( sizeof( complex<float> ) == 2UL*sizeof( float ) );
400    BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
401 
402 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
403    _mm512_store_ps( reinterpret_cast<float*>( address ), value.value );
404 #elif BLAZE_AVX_MODE
405    _mm256_store_ps( reinterpret_cast<float*>( address ), value.value );
406 #elif BLAZE_SSE_MODE
407    _mm_store_ps( reinterpret_cast<float*>( address ), value.value );
408 #else
409    *address = value.value;
410 #endif
411 }
412 //*************************************************************************************************
413 
414 
415 
416 
417 //=================================================================================================
418 //
419 //  64-BIT FLOATING POINT SIMD TYPES
420 //
421 //=================================================================================================
422 
423 //*************************************************************************************************
424 /*!\brief Aligned store of a vector of 'double' values.
425 // \ingroup simd
426 //
427 // \param address The target address.
428 // \param value The 'double' vector to be stored.
429 // \return void
430 //
431 // This function stores a vector of 'double' values. The given address must be aligned according
432 // to the enabled instruction set (16-byte alignment in case of SSE, 32-byte alignment in case of
433 // AVX, and 64-byte alignment in case of AVX-512/MIC).
434 */
435 template< typename T >  // Type of the operand
storea(double * address,const SIMDf64<T> & value)436 BLAZE_ALWAYS_INLINE void storea( double* address, const SIMDf64<T>& value ) noexcept
437 {
438    BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
439 
440 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
441    _mm512_store_pd( address, (*value).eval().value );
442 #elif BLAZE_AVX_MODE
443    _mm256_store_pd( address, (*value).eval().value );
444 #elif BLAZE_SSE2_MODE
445    _mm_store_pd( address, (*value).eval().value );
446 #else
447    *address = (*value).eval().value;
448 #endif
449 }
450 //*************************************************************************************************
451 
452 
453 //*************************************************************************************************
454 /*!\brief Aligned store of a vector of 'complex<double>' values.
455 // \ingroup simd
456 //
457 // \param address The target address.
458 // \param value The 'complex<double>' vector to be stored.
459 // \return void
460 //
461 // This function stores a vector of 'complex<double>' values. The given address must be aligned
462 // according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte alignment
463 // in case of AVX, and 64-byte alignment in case of AVX-512/MIC).
464 */
storea(complex<double> * address,const SIMDcdouble & value)465 BLAZE_ALWAYS_INLINE void storea( complex<double>* address, const SIMDcdouble& value ) noexcept
466 {
467    BLAZE_STATIC_ASSERT  ( sizeof( complex<double> ) == 2UL*sizeof( double ) );
468    BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
469 
470 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
471    _mm512_store_pd( reinterpret_cast<double*>( address ), value.value );
472 #elif BLAZE_AVX_MODE
473    _mm256_store_pd( reinterpret_cast<double*>( address ), value.value );
474 #elif BLAZE_SSE2_MODE
475    _mm_store_pd( reinterpret_cast<double*>( address ), value.value );
476 #else
477    *address = value.value;
478 #endif
479 }
480 //*************************************************************************************************
481 
482 } // namespace blaze
483 
484 #endif
485