1 /* Copyright (C) 2011-2014 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6 */
7
8 #ifndef LIBSIMDPP_SIMDPP_DETAIL_INSN_LOAD_H
9 #define LIBSIMDPP_SIMDPP_DETAIL_INSN_LOAD_H
10
11 #ifndef LIBSIMDPP_SIMD_H
12 #error "This file must be included through simd.h"
13 #endif
14
15 #include <simdpp/types.h>
16 #include <simdpp/types/traits.h>
17 #include <simdpp/detail/align.h>
18 #include <simdpp/detail/insn/mem_unpack.h>
19 #include <simdpp/core/transpose.h>
20 #include <simdpp/detail/null/memory.h>
21
22 namespace simdpp {
23 namespace SIMDPP_ARCH_NAMESPACE {
24 namespace detail {
25 namespace insn {
26
27 static SIMDPP_INL
i_load(uint8x16 & a,const char * p)28 void i_load(uint8x16& a, const char* p)
29 {
30 p = detail::assume_aligned(p, 16);
31 #if SIMDPP_USE_NULL
32 detail::null::load(a, p);
33 #elif SIMDPP_USE_SSE2
34 a = _mm_load_si128(reinterpret_cast<const __m128i*>(p));
35 #elif SIMDPP_USE_NEON
36 a = vreinterpretq_u8_u64(vld1q_u64(reinterpret_cast<const uint64_t*>(p)));
37 #elif SIMDPP_USE_ALTIVEC
38 a = vec_ld(0, reinterpret_cast<const uint8_t*>(p));
39 #elif SIMDPP_USE_MSA
40 a = (v16u8) __msa_ld_b(p, 0);
41 #endif
42 }
43
44 static SIMDPP_INL
i_load(uint16x8 & a,const char * p)45 void i_load(uint16x8& a, const char* p) { uint8x16 r; i_load(r, p); a = r; }
46 static SIMDPP_INL
i_load(uint32x4 & a,const char * p)47 void i_load(uint32x4& a, const char* p) { uint8x16 r; i_load(r, p); a = r; }
48
49 static SIMDPP_INL
i_load(uint64x2 & a,const char * p)50 void i_load(uint64x2& a, const char* p)
51 {
52 #if SIMDPP_USE_NULL || (SIMDPP_USE_ALTIVEC && !SIMDPP_USE_VSX_207)
53 p = detail::assume_aligned(p, 16);
54 detail::null::load(a, p);
55 #else
56 uint8x16 r; i_load(r, p); a = r;
57 #endif
58 }
59
60 static SIMDPP_INL
i_load(float32x4 & a,const char * p)61 void i_load(float32x4& a, const char* p)
62 {
63 p = detail::assume_aligned(p, 16);
64 const float* q = reinterpret_cast<const float*>(p);
65 (void) q;
66 #if SIMDPP_USE_NULL || SIMDPP_USE_NEON_NO_FLT_SP
67 detail::null::load(a, p);
68 #elif SIMDPP_USE_SSE2
69 a = _mm_load_ps(q);
70 #elif SIMDPP_USE_NEON
71 a = vld1q_f32(q);
72 #elif SIMDPP_USE_ALTIVEC
73 a = vec_ld(0, q);
74 #elif SIMDPP_USE_MSA
75 a = (v4f32) __msa_ld_w(q, 0);
76 #endif
77 }
78
79 static SIMDPP_INL
i_load(float64x2 & a,const char * p)80 void i_load(float64x2& a, const char* p)
81 {
82 p = detail::assume_aligned(p, 16);
83 const double* q = reinterpret_cast<const double*>(p);
84 (void) q;
85 #if SIMDPP_USE_SSE2
86 a = _mm_load_pd(q);
87 #elif SIMDPP_USE_NEON64
88 a = vld1q_f64(q);
89 #elif SIMDPP_USE_VSX_206
90 a = vec_ld(0, reinterpret_cast<const __vector double*>(q));
91 #elif SIMDPP_USE_MSA
92 a = (v2f64) __msa_ld_d(q, 0);
93 #elif SIMDPP_USE_NULL || SIMDPP_USE_ALTIVEC || SIMDPP_USE_NEON32
94 detail::null::load(a, p);
95 #endif
96 }
97
98 #if SIMDPP_USE_AVX2
99 static SIMDPP_INL
i_load(uint8x32 & a,const char * p)100 void i_load(uint8x32& a, const char* p)
101 {
102 a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p));
103 }
104 static SIMDPP_INL
i_load(uint16x16 & a,const char * p)105 void i_load(uint16x16& a, const char* p)
106 {
107 a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p));
108 }
109 static SIMDPP_INL
i_load(uint32x8 & a,const char * p)110 void i_load(uint32x8& a, const char* p)
111 {
112 a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p));
113 }
114 static SIMDPP_INL
i_load(uint64x4 & a,const char * p)115 void i_load(uint64x4& a, const char* p)
116 {
117 a = _mm256_load_si256(reinterpret_cast<const __m256i*>(p));
118 }
119 #endif
120 #if SIMDPP_USE_AVX
121 static SIMDPP_INL
i_load(float32x8 & a,const char * p)122 void i_load(float32x8& a, const char* p)
123 {
124 a = _mm256_load_ps(reinterpret_cast<const float*>(p));
125 }
126 static SIMDPP_INL
i_load(float64x4 & a,const char * p)127 void i_load(float64x4& a, const char* p)
128 {
129 a = _mm256_load_pd(reinterpret_cast<const double*>(p));
130 }
131 #endif
132
133 #if SIMDPP_USE_AVX512BW
i_load(uint8<64> & a,const char * p)134 SIMDPP_INL void i_load(uint8<64>& a, const char* p)
135 {
136 a = _mm512_load_epi32(p);
137 }
i_load(uint16<32> & a,const char * p)138 SIMDPP_INL void i_load(uint16<32>& a, const char* p)
139 {
140 a = _mm512_load_epi32(p);
141 }
142 #endif
143
144 #if SIMDPP_USE_AVX512F
145 static SIMDPP_INL
i_load(uint32<16> & a,const char * p)146 void i_load(uint32<16>& a, const char* p)
147 {
148 a = _mm512_load_epi32(p);
149 }
150 static SIMDPP_INL
i_load(uint64<8> & a,const char * p)151 void i_load(uint64<8>& a, const char* p)
152 {
153 a = _mm512_load_epi64(p);
154 }
155 static SIMDPP_INL
i_load(float32<16> & a,const char * p)156 void i_load(float32<16>& a, const char* p)
157 {
158 a = _mm512_load_ps(reinterpret_cast<const float*>(p));
159 }
160 static SIMDPP_INL
i_load(float64<8> & a,const char * p)161 void i_load(float64<8>& a, const char* p)
162 {
163 a = _mm512_load_pd(reinterpret_cast<const double*>(p));
164 }
165 #endif
166
167 template<class V> SIMDPP_INL
i_load(V & a,const char * p)168 void i_load(V& a, const char* p)
169 {
170 const unsigned veclen = V::base_vector_type::length_bytes;
171
172 for (unsigned i = 0; i < V::vec_length; ++i) {
173 i_load(a.vec(i), p);
174 p += veclen;
175 }
176 }
177
178 template<class V> SIMDPP_INL
i_load_any(const char * p)179 V i_load_any(const char* p)
180 {
181 typename detail::remove_sign<V>::type r;
182 i_load(r, p);
183 return V(r);
184 }
185
186 } // namespace insn
187
188 template<class V> SIMDPP_INL
construct_eval(V & v,const expr_vec_load & e)189 void construct_eval(V& v, const expr_vec_load& e)
190 {
191 v = insn::i_load_any<V>(e.a);
192 }
193
194 } // namespace detail
195 } // namespace SIMDPP_ARCH_NAMESPACE
196 } // namespace simdpp
197
198 #endif
199
200