1 //=================================================================================================
2 /*!
3 // \file blaze/math/simd/Storea.h
4 // \brief Header file for the SIMD aligned store functionality
5 //
6 // Copyright (C) 2012-2020 Klaus Iglberger - All Rights Reserved
7 //
8 // This file is part of the Blaze library. You can redistribute it and/or modify it under
9 // the terms of the New (Revised) BSD License. Redistribution and use in source and binary
10 // forms, with or without modification, are permitted provided that the following conditions
11 // are met:
12 //
13 // 1. Redistributions of source code must retain the above copyright notice, this list of
14 // conditions and the following disclaimer.
15 // 2. Redistributions in binary form must reproduce the above copyright notice, this list
16 // of conditions and the following disclaimer in the documentation and/or other materials
17 // provided with the distribution.
18 // 3. Neither the names of the Blaze development group nor the names of its contributors
19 // may be used to endorse or promote products derived from this software without specific
20 // prior written permission.
21 //
22 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
23 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
25 // SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
27 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28 // BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 // DAMAGE.
32 */
33 //=================================================================================================
34
35 #ifndef _BLAZE_MATH_SIMD_STOREA_H_
36 #define _BLAZE_MATH_SIMD_STOREA_H_
37
38
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42
43 #include <blaze/math/simd/BasicTypes.h>
44 #include <blaze/system/Inline.h>
45 #include <blaze/system/Vectorization.h>
46 #include <blaze/util/AlignmentCheck.h>
47 #include <blaze/util/Assert.h>
48 #include <blaze/util/Complex.h>
49 #include <blaze/util/EnableIf.h>
50 #include <blaze/util/StaticAssert.h>
51 #include <blaze/util/typetraits/HasSize.h>
52 #include <blaze/util/typetraits/IsIntegral.h>
53
54
55 namespace blaze {
56
57 //=================================================================================================
58 //
59 // 8-BIT INTEGRAL SIMD TYPES
60 //
61 //=================================================================================================
62
63 //*************************************************************************************************
64 /*!\brief Aligned store of a vector of 1-byte integral values.
65 // \ingroup simd
66 //
67 // \param address The target address.
68 // \param value The 1-byte integral vector to be stored.
69 // \return void
70 //
71 // This function stores a vector of 1-byte integral values. The given address must be aligned
72 // according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte alignment
73 // in case of AVX, and 64-byte alignment in case of AVX-512).
74 */
75 template< typename T1 // Type of the integral value
76 , typename T2 > // Type of the SIMD data type
77 BLAZE_ALWAYS_INLINE EnableIf_t< IsIntegral_v<T1> && HasSize_v<T1,1UL> >
storea(T1 * address,const SIMDi8<T2> & value)78 storea( T1* address, const SIMDi8<T2>& value ) noexcept
79 {
80 BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
81
82 #if BLAZE_AVX512BW_MODE
83 _mm512_store_si512( address, (*value).value );
84 #elif BLAZE_AVX2_MODE
85 _mm256_store_si256( reinterpret_cast<__m256i*>( address ), (*value).value );
86 #elif BLAZE_SSE2_MODE
87 _mm_store_si128( reinterpret_cast<__m128i*>( address ), (*value).value );
88 #else
89 *address = (*value).value;
90 #endif
91 }
92 //*************************************************************************************************
93
94
95 //*************************************************************************************************
96 /*!\brief Aligned store of a vector of 1-byte integral complex values.
97 // \ingroup simd
98 //
99 // \param address The target address.
100 // \param value The 1-byte integral complex vector to be stored.
101 // \return void
102 //
103 // This function stores a vector of 1-byte integral complex values. The given address must be
104 // aligned according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte
105 // alignment in case of AVX, and 64-byte alignment in case of AVX-512).
106 */
107 template< typename T1 // Type of the integral value
108 , typename T2 > // Type of the SIMD data type
109 BLAZE_ALWAYS_INLINE EnableIf_t< IsIntegral_v<T1> && HasSize_v<T1,1UL> >
storea(complex<T1> * address,const SIMDci8<T2> & value)110 storea( complex<T1>* address, const SIMDci8<T2>& value ) noexcept
111 {
112 BLAZE_STATIC_ASSERT( sizeof( complex<T1> ) == 2UL*sizeof( T1 ) );
113 BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
114
115 #if BLAZE_AVX512BW_MODE
116 _mm512_store_si512( address, (*value).value );
117 #elif BLAZE_AVX2_MODE
118 _mm256_store_si256( reinterpret_cast<__m256i*>( address ), (*value).value );
119 #elif BLAZE_SSE2_MODE
120 _mm_store_si128( reinterpret_cast<__m128i*>( address ), (*value).value );
121 #else
122 *address = (*value).value;
123 #endif
124 }
125 //*************************************************************************************************
126
127
128
129
130 //=================================================================================================
131 //
132 // 16-BIT INTEGRAL SIMD TYPES
133 //
134 //=================================================================================================
135
136 //*************************************************************************************************
137 /*!\brief Aligned store of a vector of 2-byte integral values.
138 // \ingroup simd
139 //
140 // \param address The target address.
141 // \param value The 2-byte integral vector to be stored.
142 // \return void
143 //
144 // This function stores a vector of 2-byte integral values. The given address must be aligned
145 // according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte alignment
146 // in case of AVX, and 64-byte alignment in case of AVX-512).
147 */
148 template< typename T1 // Type of the integral value
149 , typename T2 > // Type of the SIMD data type
150 BLAZE_ALWAYS_INLINE EnableIf_t< IsIntegral_v<T1> && HasSize_v<T1,2UL> >
storea(T1 * address,const SIMDi16<T2> & value)151 storea( T1* address, const SIMDi16<T2>& value ) noexcept
152 {
153 BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
154
155 #if BLAZE_AVX512BW_MODE
156 _mm512_store_si512( address, (*value).value );
157 #elif BLAZE_AVX2_MODE
158 _mm256_store_si256( reinterpret_cast<__m256i*>( address ), (*value).value );
159 #elif BLAZE_SSE2_MODE
160 _mm_store_si128( reinterpret_cast<__m128i*>( address ), (*value).value );
161 #else
162 *address = (*value).value;
163 #endif
164 }
165 //*************************************************************************************************
166
167
168 //*************************************************************************************************
169 /*!\brief Aligned store of a vector of 2-byte integral complex values.
170 // \ingroup simd
171 //
172 // \param address The target address.
173 // \param value The 2-byte integral complex vector to be stored.
174 // \return void
175 //
176 // This function stores a vector of 2-byte integral complex values. The given address must be
177 // aligned according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte
178 // alignment in case of AVX, and 64-byte alignment in case of AVX-512).
179 */
180 template< typename T1 // Type of the integral value
181 , typename T2 > // Type of the SIMD data type
182 BLAZE_ALWAYS_INLINE EnableIf_t< IsIntegral_v<T1> && HasSize_v<T1,2UL> >
storea(complex<T1> * address,const SIMDci16<T2> & value)183 storea( complex<T1>* address, const SIMDci16<T2>& value ) noexcept
184 {
185 BLAZE_STATIC_ASSERT( sizeof( complex<T1> ) == 2UL*sizeof( T1 ) );
186 BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
187
188 #if BLAZE_AVX512BW_MODE
189 _mm512_store_si512( address, (*value).value );
190 #elif BLAZE_AVX2_MODE
191 _mm256_store_si256( reinterpret_cast<__m256i*>( address ), (*value).value );
192 #elif BLAZE_SSE2_MODE
193 _mm_store_si128( reinterpret_cast<__m128i*>( address ), (*value).value );
194 #else
195 *address = (*value).value;
196 #endif
197 }
198 //*************************************************************************************************
199
200
201
202
203 //=================================================================================================
204 //
205 // 32-BIT INTEGRAL SIMD TYPES
206 //
207 //=================================================================================================
208
209 //*************************************************************************************************
210 /*!\brief Aligned store of a vector of 4-byte integral values.
211 // \ingroup simd
212 //
213 // \param address The target address.
214 // \param value The 4-byte integral vector to be stored.
215 // \return void
216 //
217 // This function stores a vector of 4-byte integral values. The given address must be aligned
218 // according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte alignment
219 // in case of AVX, and 64-byte alignment in case of AVX-512/MIC).
220 */
221 template< typename T1 // Type of the integral value
222 , typename T2 > // Type of the SIMD data type
223 BLAZE_ALWAYS_INLINE EnableIf_t< IsIntegral_v<T1> && HasSize_v<T1,4UL> >
storea(T1 * address,const SIMDi32<T2> & value)224 storea( T1* address, const SIMDi32<T2>& value ) noexcept
225 {
226 BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
227
228 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
229 _mm512_store_epi32( address, (*value).value );
230 #elif BLAZE_AVX2_MODE
231 _mm256_store_si256( reinterpret_cast<__m256i*>( address ), (*value).value );
232 #elif BLAZE_SSE2_MODE
233 _mm_store_si128( reinterpret_cast<__m128i*>( address ), (*value).value );
234 #else
235 *address = (*value).value;
236 #endif
237 }
238 //*************************************************************************************************
239
240
241 //*************************************************************************************************
242 /*!\brief Aligned store of a vector of 4-byte integral complex values.
243 // \ingroup simd
244 //
245 // \param address The target address.
246 // \param value The 4-byte integral complex vector to be stored.
247 // \return void
248 //
249 // This function stores a vector of 4-byte integral complex values. The given address must be
250 // aligned according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte
251 // alignment in case of AVX, and 64-byte alignment in case of AVX-512/MIC).
252 */
253 template< typename T1 // Type of the integral value
254 , typename T2 > // Type of the SIMD data type
255 BLAZE_ALWAYS_INLINE EnableIf_t< IsIntegral_v<T1> && HasSize_v<T1,4UL> >
storea(complex<T1> * address,const SIMDci32<T2> & value)256 storea( complex<T1>* address, const SIMDci32<T2>& value ) noexcept
257 {
258 BLAZE_STATIC_ASSERT( sizeof( complex<T1> ) == 2UL*sizeof( T1 ) );
259 BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
260
261 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
262 _mm512_store_epi32( address, (*value).value );
263 #elif BLAZE_AVX2_MODE
264 _mm256_store_si256( reinterpret_cast<__m256i*>( address ), (*value).value );
265 #elif BLAZE_SSE2_MODE
266 _mm_store_si128( reinterpret_cast<__m128i*>( address ), (*value).value );
267 #else
268 *address = (*value).value;
269 #endif
270 }
271 //*************************************************************************************************
272
273
274
275
276 //=================================================================================================
277 //
278 // 64-BIT INTEGRAL SIMD TYPES
279 //
280 //=================================================================================================
281
282 //*************************************************************************************************
283 /*!\brief Aligned store of a vector of 8-byte integral values.
284 // \ingroup simd
285 //
286 // \param address The target address.
287 // \param value The 8-byte integral vector to be stored.
288 // \return void
289 //
290 // This function stores a vector of 8-byte integral values. The given address must be aligned
291 // according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte alignment
292 // in case of AVX, and 64-byte alignment in case of AVX-512/MIC).
293 */
294 template< typename T1 // Type of the integral value
295 , typename T2 > // Type of the SIMD data type
296 BLAZE_ALWAYS_INLINE EnableIf_t< IsIntegral_v<T1> && HasSize_v<T1,8UL> >
storea(T1 * address,const SIMDi64<T2> & value)297 storea( T1* address, const SIMDi64<T2>& value ) noexcept
298 {
299 BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
300
301 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
302 _mm512_store_epi64( address, (*value).value );
303 #elif BLAZE_AVX2_MODE
304 _mm256_store_si256( reinterpret_cast<__m256i*>( address ), (*value).value );
305 #elif BLAZE_SSE2_MODE
306 _mm_store_si128( reinterpret_cast<__m128i*>( address ), (*value).value );
307 #else
308 *address = (*value).value;
309 #endif
310 }
311 //*************************************************************************************************
312
313
314 //*************************************************************************************************
315 /*!\brief Aligned store of a vector of 8-byte integral complex values.
316 // \ingroup simd
317 //
318 // \param address The target address.
319 // \param value The 8-byte integral complex vector to be stored.
320 // \return void
321 //
322 // This function stores a vector of 8-byte integral complex values. The given address must be
323 // aligned according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte
324 // alignment in case of AVX, and 64-byte alignment in case of AVX-512/MIC).
325 */
326 template< typename T1 // Type of the integral value
327 , typename T2 > // Type of the SIMD data type
328 BLAZE_ALWAYS_INLINE EnableIf_t< IsIntegral_v<T1> && HasSize_v<T1,8UL> >
storea(complex<T1> * address,const SIMDci64<T2> & value)329 storea( complex<T1>* address, const SIMDci64<T2>& value ) noexcept
330 {
331 BLAZE_STATIC_ASSERT( sizeof( complex<T1> ) == 2UL*sizeof( T1 ) );
332 BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
333
334 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
335 _mm512_store_epi64( address, (*value).value );
336 #elif BLAZE_AVX2_MODE
337 _mm256_store_si256( reinterpret_cast<__m256i*>( address ), (*value).value );
338 #elif BLAZE_SSE2_MODE
339 _mm_store_si128( reinterpret_cast<__m128i*>( address ), (*value).value );
340 #else
341 *address = (*value).value;
342 #endif
343 }
344 //*************************************************************************************************
345
346
347
348
349 //=================================================================================================
350 //
351 // 32-BIT FLOATING POINT SIMD TYPES
352 //
353 //=================================================================================================
354
355 //*************************************************************************************************
356 /*!\brief Aligned store of a vector of 'float' values.
357 // \ingroup simd
358 //
359 // \param address The target address.
360 // \param value The 'float' vector to be stored.
361 // \return void
362 //
363 // This function stores a vector of 'float' values. The given address must be aligned according
364 // to the enabled instruction set (16-byte alignment in case of SSE, 32-byte alignment in case of
365 // AVX, and 64-byte alignment in case of AVX-512/MIC).
366 */
367 template< typename T > // Type of the operand
storea(float * address,const SIMDf32<T> & value)368 BLAZE_ALWAYS_INLINE void storea( float* address, const SIMDf32<T>& value ) noexcept
369 {
370 BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
371
372 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
373 _mm512_store_ps( address, (*value).eval().value );
374 #elif BLAZE_AVX_MODE
375 _mm256_store_ps( address, (*value).eval().value );
376 #elif BLAZE_SSE_MODE
377 _mm_store_ps( address, (*value).eval().value );
378 #else
379 *address = (*value).eval().value;
380 #endif
381 }
382 //*************************************************************************************************
383
384
385 //*************************************************************************************************
386 /*!\brief Aligned store of a vector of 'complex<float>' values.
387 // \ingroup simd
388 //
389 // \param address The target address.
390 // \param value The 'complex<float>' vector to be stored.
391 // \return void
392 //
393 // This function stores a vector of 'complex<float>' values. The given address must be aligned
394 // according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte alignment
395 // in case of AVX, and 64-byte alignment in case of AVX-512/MIC).
396 */
storea(complex<float> * address,const SIMDcfloat & value)397 BLAZE_ALWAYS_INLINE void storea( complex<float>* address, const SIMDcfloat& value ) noexcept
398 {
399 BLAZE_STATIC_ASSERT ( sizeof( complex<float> ) == 2UL*sizeof( float ) );
400 BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
401
402 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
403 _mm512_store_ps( reinterpret_cast<float*>( address ), value.value );
404 #elif BLAZE_AVX_MODE
405 _mm256_store_ps( reinterpret_cast<float*>( address ), value.value );
406 #elif BLAZE_SSE_MODE
407 _mm_store_ps( reinterpret_cast<float*>( address ), value.value );
408 #else
409 *address = value.value;
410 #endif
411 }
412 //*************************************************************************************************
413
414
415
416
417 //=================================================================================================
418 //
419 // 64-BIT FLOATING POINT SIMD TYPES
420 //
421 //=================================================================================================
422
423 //*************************************************************************************************
424 /*!\brief Aligned store of a vector of 'double' values.
425 // \ingroup simd
426 //
427 // \param address The target address.
428 // \param value The 'double' vector to be stored.
429 // \return void
430 //
431 // This function stores a vector of 'double' values. The given address must be aligned according
432 // to the enabled instruction set (16-byte alignment in case of SSE, 32-byte alignment in case of
433 // AVX, and 64-byte alignment in case of AVX-512/MIC).
434 */
435 template< typename T > // Type of the operand
storea(double * address,const SIMDf64<T> & value)436 BLAZE_ALWAYS_INLINE void storea( double* address, const SIMDf64<T>& value ) noexcept
437 {
438 BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
439
440 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
441 _mm512_store_pd( address, (*value).eval().value );
442 #elif BLAZE_AVX_MODE
443 _mm256_store_pd( address, (*value).eval().value );
444 #elif BLAZE_SSE2_MODE
445 _mm_store_pd( address, (*value).eval().value );
446 #else
447 *address = (*value).eval().value;
448 #endif
449 }
450 //*************************************************************************************************
451
452
453 //*************************************************************************************************
454 /*!\brief Aligned store of a vector of 'complex<double>' values.
455 // \ingroup simd
456 //
457 // \param address The target address.
458 // \param value The 'complex<double>' vector to be stored.
459 // \return void
460 //
461 // This function stores a vector of 'complex<double>' values. The given address must be aligned
462 // according to the enabled instruction set (16-byte alignment in case of SSE, 32-byte alignment
463 // in case of AVX, and 64-byte alignment in case of AVX-512/MIC).
464 */
storea(complex<double> * address,const SIMDcdouble & value)465 BLAZE_ALWAYS_INLINE void storea( complex<double>* address, const SIMDcdouble& value ) noexcept
466 {
467 BLAZE_STATIC_ASSERT ( sizeof( complex<double> ) == 2UL*sizeof( double ) );
468 BLAZE_INTERNAL_ASSERT( checkAlignment( address ), "Invalid alignment detected" );
469
470 #if BLAZE_AVX512F_MODE || BLAZE_MIC_MODE
471 _mm512_store_pd( reinterpret_cast<double*>( address ), value.value );
472 #elif BLAZE_AVX_MODE
473 _mm256_store_pd( reinterpret_cast<double*>( address ), value.value );
474 #elif BLAZE_SSE2_MODE
475 _mm_store_pd( reinterpret_cast<double*>( address ), value.value );
476 #else
477 *address = value.value;
478 #endif
479 }
480 //*************************************************************************************************
481
482 } // namespace blaze
483
484 #endif
485