1 /*  Copyright (C) 2011-2014  Povilas Kanapickas <povilas@radix.lt>
2 
3     Distributed under the Boost Software License, Version 1.0.
4         (See accompanying file LICENSE_1_0.txt or copy at
5             http://www.boost.org/LICENSE_1_0.txt)
6 */
7 
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_LOAD_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_LOAD_H
10 
11 #ifndef LIBSIMDPP_SIMD_H
12     #error "This file must be included through simd.h"
13 #endif
14 
15 #include <simdpp/types.h>
16 #include <simdpp/types/traits.h>
17 #include <simdpp/detail/align.h>
18 #include <simdpp/detail/insn/mem_unpack.h>
19 #include <simdpp/core/transpose.h>
20 #include <simdpp/detail/null/memory.h>
21 
22 namespace simdpp {
23 namespace SIMDPP_ARCH_NAMESPACE {
24 namespace detail {
25 namespace insn {
26 
27 static SIMDPP_INL
i_load(uint8x16 & a,const char * p)28 void i_load(uint8x16& a, const char* p)
29 {
30     p = detail::assume_aligned(p, 16);
31 #if SIMDPP_USE_NULL
32     detail::null::load(a, p);
33 #elif SIMDPP_USE_SSE2
34     a = _mm_load_si128(reinterpret_cast<const __m128i*>(p));
35 #elif SIMDPP_USE_NEON
36     a = vreinterpretq_u8_u64(vld1q_u64(reinterpret_cast<const uint64_t*>(p)));
37 #elif SIMDPP_USE_ALTIVEC
38     a = vec_ld(0, reinterpret_cast<const uint8_t*>(p));
39 #elif SIMDPP_USE_MSA
40     a = (v16u8) __msa_ld_b(p, 0);
41 #endif
42 }
43 
44 static SIMDPP_INL
i_load(uint16x8 & a,const char * p)45 void i_load(uint16x8& a, const char* p) { uint8x16 r; i_load(r, p); a = r;  }
46 static SIMDPP_INL
i_load(uint32x4 & a,const char * p)47 void i_load(uint32x4& a, const char* p) { uint8x16 r; i_load(r, p); a = r;  }
48 
49 static SIMDPP_INL
i_load(uint64x2 & a,const char * p)50 void i_load(uint64x2& a, const char* p)
51 {
52 #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
53     p = detail::assume_aligned(p, 16);
54     detail::null::load(a, p);
55 #else
56     uint8x16 r; i_load(r, p); a = r;
57 #endif
58 }
59 
60 static SIMDPP_INL
i_load(float32x4 & a,const char * p)61 void i_load(float32x4& a, const char* p)
62 {
63     p = detail::assume_aligned(p, 16);
64     const float* q = reinterpret_cast<const float*>(p);
65     (void) q;
66 #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
67     detail::null::load(a, p);
68 #elif SIMDPP_USE_SSE2
69     a = _mm_load_ps(q);
70 #elif SIMDPP_USE_NEON
71     a = vld1q_f32(q);
72 #elif SIMDPP_USE_ALTIVEC
73     a = vec_ld(0, q);
74 #elif SIMDPP_USE_MSA
75     a = (v4f32) __msa_ld_w(q, 0);
76 #endif
77 }
78 
79 static SIMDPP_INL
i_load(float64x2 & a,const char * p)80 void i_load(float64x2& a, const char* p)
81 {
82     p = detail::assume_aligned(p, 16);
83     const double* q = reinterpret_cast<const double*>(p);
84     (void) q;
85 #if SIMDPP_USE_SSE2
86     a = _mm_load_pd(q);
87 #elif SIMDPP_USE_NEON64
88     a = vld1q_f64(q);
89 #elif SIMDPP_USE_VSX_206
90     a = vec_ld(0, reinterpret_cast<const __vector double*>(q));
91 #elif SIMDPP_USE_MSA
92     a = (v2f64) __msa_ld_d(q, 0);
93 #elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC || SIMDPP_USE_NEON32
94     detail::null::load(a, p);
95 #endif
96 }
97 
98 #if SIMDPP_USE_AVX2
99 static SIMDPP_INL
i_load(uint8x32 & a,const char * p)100 void i_load(uint8x32& a,  const char* p)
101 {
102     a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p));
103 }
104 static SIMDPP_INL
i_load(uint16x16 & a,const char * p)105 void i_load(uint16x16& a, const char* p)
106 {
107     a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p));
108 }
109 static SIMDPP_INL
i_load(uint32x8 & a,const char * p)110 void i_load(uint32x8& a,  const char* p)
111 {
112     a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p));
113 }
114 static SIMDPP_INL
i_load(uint64x4 & a,const char * p)115 void i_load(uint64x4& a,  const char* p)
116 {
117     a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p));
118 }
119 #endif
120 #if SIMDPP_USE_AVX
121 static SIMDPP_INL
i_load(float32x8 & a,const char * p)122 void i_load(float32x8& a, const char* p)
123 {
124     a = _mm256_load_ps(reinterpret_cast<const float*>(p));
125 }
126 static SIMDPP_INL
i_load(float64x4 & a,const char * p)127 void i_load(float64x4& a, const char* p)
128 {
129     a = _mm256_load_pd(reinterpret_cast<const double*>(p));
130 }
131 #endif
132 
133 #if SIMDPP_USE_AVX512BW
i_load(uint8<64> & a,const char * p)134 SIMDPP_INL void i_load(uint8<64>& a, const char* p)
135 {
136     a = _mm512_load_epi32(p);
137 }
i_load(uint16<32> & a,const char * p)138 SIMDPP_INL void i_load(uint16<32>& a, const char* p)
139 {
140     a = _mm512_load_epi32(p);
141 }
142 #endif
143 
144 #if SIMDPP_USE_AVX512F
145 static SIMDPP_INL
i_load(uint32<16> & a,const char * p)146 void i_load(uint32<16>& a,  const char* p)
147 {
148     a = _mm512_load_epi32(p);
149 }
150 static SIMDPP_INL
i_load(uint64<8> & a,const char * p)151 void i_load(uint64<8>& a,  const char* p)
152 {
153     a = _mm512_load_epi64(p);
154 }
155 static SIMDPP_INL
i_load(float32<16> & a,const char * p)156 void i_load(float32<16>& a, const char* p)
157 {
158     a = _mm512_load_ps(reinterpret_cast<const float*>(p));
159 }
160 static SIMDPP_INL
i_load(float64<8> & a,const char * p)161 void i_load(float64<8>& a, const char* p)
162 {
163     a = _mm512_load_pd(reinterpret_cast<const double*>(p));
164 }
165 #endif
166 
167 template<class V> SIMDPP_INL
i_load(V & a,const char * p)168 void i_load(V& a, const char* p)
169 {
170     const unsigned veclen = V::base_vector_type::length_bytes;
171 
172     for (unsigned i = 0; i < V::vec_length; ++i) {
173         i_load(a.vec(i), p);
174         p += veclen;
175     }
176 }
177 
178 template<class V> SIMDPP_INL
i_load_any(const char * p)179 V i_load_any(const char* p)
180 {
181     typename detail::remove_sign<V>::type r;
182     i_load(r, p);
183     return V(r);
184 }
185 
186 } // namespace insn
187 
188 template<class V> SIMDPP_INL
construct_eval(V & v,const expr_vec_load & e)189 void construct_eval(V& v, const expr_vec_load& e)
190 {
191     v = insn::i_load_any<V>(e.a);
192 }
193 
194 } // namespace detail
195 } // namespace SIMDPP_ARCH_NAMESPACE
196 } // namespace simdpp
197 
198 #endif
199 
200