1 /* Copyright (C) 2011-2012 Povilas Kanapickas <povilas@radix.lt>
2
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at
5 http://www.boost.org/LICENSE_1_0.txt)
6 */
7
8 #ifndef LIBSIMDPP_DETAIL_NULL_MATH_H
9 #define LIBSIMDPP_DETAIL_NULL_MATH_H
10 #if SIMDPP_USE_NULL || SIMDPP_USE_NEON || SIMDPP_USE_ALTIVEC
11
12 #ifndef LIBSIMDPP_SIMD_H
13 #error "This file must be included through simd.h"
14 #endif
15
16 #include <simdpp/types.h>
17 #include <simdpp/core/cast.h>
18
19 #include <cmath>
20 #include <cstdlib>
21 #include <limits>
22
23 // On certain versions of MSVC min and max are defined as macros.
24 #if _MSC_VER
25 #ifdef min
26 #undef min
27 #endif
28
29 #ifdef max
30 #undef max
31 #endif
32 #endif
33
34 namespace simdpp {
35 namespace SIMDPP_ARCH_NAMESPACE {
36 namespace detail {
37 namespace null {
38
39 template<class V> SIMDPP_INL
isnan(const V & a)40 typename V::mask_vector_type isnan(const V& a)
41 {
42 typename V::mask_vector_type r;
43 for (unsigned i = 0; i < V::length; i++) {
44 r.el(i) = std::isnan(a.el(i)) ? 1 : 0;
45 }
46 return r;
47 }
48
49 template<class V> SIMDPP_INL
isnan2(const V & a,const V & b)50 typename V::mask_vector_type isnan2(const V& a, const V& b)
51 {
52 typename V::mask_vector_type r;
53 for (unsigned i = 0; i < V::length; i++) {
54 r.el(i) = (std::isnan(a.el(i)) || std::isnan(b.el(i))) ? 1 : 0;
55 }
56 return r;
57 }
58
59 template<class V> SIMDPP_INL
abs(const V & a)60 V abs(const V& a)
61 {
62 V r;
63 for (unsigned i = 0; i < V::length; i++) {
64 r.el(i) = std::abs(a.el(i));
65 }
66 return r;
67 }
68
69 template<class T, class U> SIMDPP_INL
saturate(U t)70 T saturate(U t)
71 {
72 U min = std::numeric_limits<T>::min();
73 U max = std::numeric_limits<T>::max();
74 t = t < min ? min : t;
75 t = t > max ? max : t;
76 return t;
77 }
78
79 template<class V> SIMDPP_INL
add(const V & a,const V & b)80 V add(const V& a, const V& b)
81 {
82 V r;
83 for (unsigned i = 0; i < V::length; i++) {
84 r.el(i) = a.el(i) + b.el(i);
85 }
86 return r;
87 }
88
89 template<class V> SIMDPP_INL
add_sat(const V & a,const V & b)90 V add_sat(const V& a, const V& b)
91 {
92 V r;
93 for (unsigned i = 0; i < V::length; i++) {
94 r.el(i) = saturate<typename V::element_type>(int32_t(a.el(i)) + b.el(i));
95 }
96 return r;
97 }
98
99 template<class V> SIMDPP_INL
sub(const V & a,const V & b)100 V sub(const V& a, const V& b)
101 {
102 V r;
103 for (unsigned i = 0; i < V::length; i++) {
104 r.el(i) = a.el(i) - b.el(i);
105 }
106 return r;
107 }
108
109 template<class V> SIMDPP_INL
sub_sat(const V & a,const V & b)110 V sub_sat(const V& a, const V& b)
111 {
112 V r;
113 for (unsigned i = 0; i < V::length; i++) {
114 r.el(i) = saturate<typename V::element_type>(int32_t(a.el(i)) - b.el(i));
115 }
116 return r;
117 }
118
119 template<class V> SIMDPP_INL
neg(const V & a)120 V neg(const V& a)
121 {
122 V r;
123 for (unsigned i = 0; i < V::length; i++) {
124 r.el(i) = -a.el(i);
125 }
126 return r;
127 }
128
129 template<class V> SIMDPP_INL
mul(const V & a,const V & b)130 V mul(const V& a, const V& b)
131 {
132 V r;
133 for (unsigned i = 0; i < V::length; i++) {
134 r.el(i) = a.el(i) * b.el(i);
135 }
136 return r;
137 }
138
139 template<class V> SIMDPP_INL
fmadd(const V & a,const V & b,const V & c)140 V fmadd(const V& a, const V& b, const V& c)
141 {
142 V r;
143 for (unsigned i = 0; i < V::length; i++) {
144 r.el(i) = std::fma(a.el(i), b.el(i), c.el(i));
145 }
146 return r;
147 }
148
149 template<class V> SIMDPP_INL
fmsub(const V & a,const V & b,const V & c)150 V fmsub(const V& a, const V& b, const V& c)
151 {
152 V r;
153 for (unsigned i = 0; i < V::length; i++) {
154 r.el(i) = std::fma(a.el(i), b.el(i), -c.el(i));
155 }
156 return r;
157 }
158
159 template<unsigned P, class V> SIMDPP_INL
div_p(const V & a,const V & b)160 V div_p(const V& a, const V& b)
161 {
162 // the least P significant bits of the mask are set.
163 uint64_t mask = 0;
164 mask = ~mask;
165 mask <<= P;
166 mask = ~mask;
167
168 V r;
169 for (unsigned i = 0; i < V::length; i++) {
170 r.el(i) = (a.el(i) & mask) / (b.el(i) & mask);
171 r.el(i) &= mask;
172 }
173 return r;
174 }
175
176
177 template<class V> SIMDPP_INL
shift_r(const V & a,unsigned shift)178 V shift_r(const V& a, unsigned shift)
179 {
180 V r;
181 for (unsigned i = 0; i < V::length; i++) {
182 r.el(i) = a.el(i) >> shift;
183 }
184 return r;
185 }
186
187 template<class V, class S> SIMDPP_INL
shift_r_v(const V & a,const S & shift)188 V shift_r_v(const V& a, const S& shift)
189 {
190 V r;
191 for (unsigned i = 0; i < V::length; i++) {
192 r.el(i) = a.el(i) >> shift.el(i);
193 }
194 return r;
195 }
196
197 template<class V> SIMDPP_INL
shift_l(const V & a,unsigned shift)198 V shift_l(const V& a, unsigned shift)
199 {
200 V r;
201 for (unsigned i = 0; i < V::length; i++) {
202 r.el(i) = a.el(i) << shift;
203 }
204 return r;
205 }
206
207 template<class V, class S> SIMDPP_INL
shift_l_v(const V & a,const S & shift)208 V shift_l_v(const V& a, const S& shift)
209 {
210 V r;
211 for (unsigned i = 0; i < V::length; i++) {
212 r.el(i) = a.el(i) << shift.el(i);
213 }
214 return r;
215 }
216
217 template<class V> SIMDPP_INL
min(const V & a,const V & b)218 V min(const V& a, const V& b)
219 {
220 V r;
221 for (unsigned i = 0; i < V::length; i++) {
222 r.el(i) = a.el(i) <= b.el(i) ? a.el(i) : b.el(i);
223 }
224 return r;
225 }
226
227 template<class V> SIMDPP_INL
max(const V & a,const V & b)228 V max(const V& a, const V& b)
229 {
230 V r;
231 for (unsigned i = 0; i < V::length; i++) {
232 r.el(i) = a.el(i) >= b.el(i) ? a.el(i) : b.el(i);
233 }
234 return r;
235 }
236
237 } // namespace null
238 } // namespace detail
239 } // namespace SIMDPP_ARCH_NAMESPACE
240 } // namespace simdpp
241
242 #endif
243 #endif
244