1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <stddef.h>
16 #include <stdint.h>
17 
18 #undef HWY_TARGET_INCLUDE
19 #define HWY_TARGET_INCLUDE "highway_test.cc"
20 #include "hwy/foreach_target.h"
21 #include "hwy/highway.h"
22 #include "hwy/nanobenchmark.h"  // Unpredictable1
23 #include "hwy/tests/test_util-inl.h"
24 
25 HWY_BEFORE_NAMESPACE();
26 namespace hwy {
27 namespace HWY_NAMESPACE {
28 
29 struct TestSet {
30   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestSet31   HWY_NOINLINE void operator()(T /*unused*/, D d) {
32     // Zero
33     const auto v0 = Zero(d);
34     const size_t N = Lanes(d);
35     auto expected = AllocateAligned<T>(N);
36     std::fill(expected.get(), expected.get() + N, T(0));
37     HWY_ASSERT_VEC_EQ(d, expected.get(), v0);
38 
39     // Set
40     const auto v2 = Set(d, T(2));
41     for (size_t i = 0; i < N; ++i) {
42       expected[i] = 2;
43     }
44     HWY_ASSERT_VEC_EQ(d, expected.get(), v2);
45 
46     // Iota
47     const auto vi = Iota(d, T(5));
48     for (size_t i = 0; i < N; ++i) {
49       expected[i] = T(5 + i);
50     }
51     HWY_ASSERT_VEC_EQ(d, expected.get(), vi);
52 
53     // Undefined
54     const auto vu = Undefined(d);
55     Store(vu, d, expected.get());
56   }
57 };
58 
TestAllSet()59 HWY_NOINLINE void TestAllSet() { ForAllTypes(ForPartialVectors<TestSet>()); }
60 
61 // Ensures wraparound (mod 2^bits)
62 struct TestOverflow {
63   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestOverflow64   HWY_NOINLINE void operator()(T /*unused*/, D d) {
65     const auto v1 = Set(d, T(1));
66     const auto vmax = Set(d, LimitsMax<T>());
67     const auto vmin = Set(d, LimitsMin<T>());
68     // Unsigned underflow / negative -> positive
69     HWY_ASSERT_VEC_EQ(d, vmax, vmin - v1);
70     // Unsigned overflow / positive -> negative
71     HWY_ASSERT_VEC_EQ(d, vmin, vmax + v1);
72   }
73 };
74 
TestAllOverflow()75 HWY_NOINLINE void TestAllOverflow() {
76   ForIntegerTypes(ForPartialVectors<TestOverflow>());
77 }
78 
79 struct TestSignBitInteger {
80   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestSignBitInteger81   HWY_NOINLINE void operator()(T /*unused*/, D d) {
82     const auto v0 = Zero(d);
83     const auto all = VecFromMask(d, Eq(v0, v0));
84     const auto vs = SignBit(d);
85     const auto other = Sub(vs, Set(d, 1));
86 
87     // Shifting left by one => overflow, equal zero
88     HWY_ASSERT_VEC_EQ(d, v0, Add(vs, vs));
89     // Verify the lower bits are zero (only +/- and logical ops are available
90     // for all types)
91     HWY_ASSERT_VEC_EQ(d, all, Add(vs, other));
92   }
93 };
94 
95 struct TestSignBitFloat {
96   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestSignBitFloat97   HWY_NOINLINE void operator()(T /*unused*/, D d) {
98     const auto v0 = Zero(d);
99     const auto vs = SignBit(d);
100     const auto vp = Set(d, 2.25);
101     const auto vn = Set(d, -2.25);
102     HWY_ASSERT_VEC_EQ(d, Or(vp, vs), vn);
103     HWY_ASSERT_VEC_EQ(d, AndNot(vs, vn), vp);
104     HWY_ASSERT_VEC_EQ(d, v0, vs);
105   }
106 };
107 
TestAllSignBit()108 HWY_NOINLINE void TestAllSignBit() {
109   ForIntegerTypes(ForPartialVectors<TestSignBitInteger>());
110   ForFloatTypes(ForPartialVectors<TestSignBitFloat>());
111 }
112 
113 // std::isnan returns false for 0x7F..FF in clang AVX3 builds, so DIY.
114 template <typename TF>
IsNaN(TF f)115 bool IsNaN(TF f) {
116   MakeUnsigned<TF> bits;
117   memcpy(&bits, &f, sizeof(TF));
118   bits += bits;
119   bits >>= 1;  // clear sign bit
120   // NaN if all exponent bits are set and the mantissa is not zero.
121   return bits > ExponentMask<decltype(bits)>();
122 }
123 
124 template <class D, class V>
AssertNaN(const D d,const V v,const char * file,int line)125 HWY_NOINLINE void AssertNaN(const D d, const V v, const char* file, int line) {
126   using T = TFromD<D>;
127   const T lane = GetLane(v);
128   if (!IsNaN(lane)) {
129     const std::string type_name = TypeName(T(), Lanes(d));
130     MakeUnsigned<T> bits;
131     memcpy(&bits, &lane, sizeof(T));
132     // RVV lacks PRIu64, so use size_t; double will be truncated on 32-bit.
133     Abort(file, line, "Expected %s NaN, got %E (%zu)", type_name.c_str(), lane,
134           size_t(bits));
135   }
136 }
137 
138 #define HWY_ASSERT_NAN(d, v) AssertNaN(d, v, __FILE__, __LINE__)
139 
140 struct TestNaN {
141   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestNaN142   HWY_NOINLINE void operator()(T /*unused*/, D d) {
143     const auto v1 = Set(d, T(Unpredictable1()));
144     const auto nan = IfThenElse(Eq(v1, Set(d, T(1))), NaN(d), v1);
145     HWY_ASSERT_NAN(d, nan);
146 
147     // Arithmetic
148     HWY_ASSERT_NAN(d, Add(nan, v1));
149     HWY_ASSERT_NAN(d, Add(v1, nan));
150     HWY_ASSERT_NAN(d, Sub(nan, v1));
151     HWY_ASSERT_NAN(d, Sub(v1, nan));
152     HWY_ASSERT_NAN(d, Mul(nan, v1));
153     HWY_ASSERT_NAN(d, Mul(v1, nan));
154     HWY_ASSERT_NAN(d, Div(nan, v1));
155     HWY_ASSERT_NAN(d, Div(v1, nan));
156 
157     // FMA
158     HWY_ASSERT_NAN(d, MulAdd(nan, v1, v1));
159     HWY_ASSERT_NAN(d, MulAdd(v1, nan, v1));
160     HWY_ASSERT_NAN(d, MulAdd(v1, v1, nan));
161     HWY_ASSERT_NAN(d, MulSub(nan, v1, v1));
162     HWY_ASSERT_NAN(d, MulSub(v1, nan, v1));
163     HWY_ASSERT_NAN(d, MulSub(v1, v1, nan));
164     HWY_ASSERT_NAN(d, NegMulAdd(nan, v1, v1));
165     HWY_ASSERT_NAN(d, NegMulAdd(v1, nan, v1));
166     HWY_ASSERT_NAN(d, NegMulAdd(v1, v1, nan));
167     HWY_ASSERT_NAN(d, NegMulSub(nan, v1, v1));
168     HWY_ASSERT_NAN(d, NegMulSub(v1, nan, v1));
169     HWY_ASSERT_NAN(d, NegMulSub(v1, v1, nan));
170 
171     // Rcp/Sqrt
172     HWY_ASSERT_NAN(d, Sqrt(nan));
173 
174     // Sign manipulation
175     HWY_ASSERT_NAN(d, Abs(nan));
176     HWY_ASSERT_NAN(d, Neg(nan));
177     HWY_ASSERT_NAN(d, CopySign(nan, v1));
178     HWY_ASSERT_NAN(d, CopySignToAbs(nan, v1));
179 
180     // Rounding
181     HWY_ASSERT_NAN(d, Ceil(nan));
182     HWY_ASSERT_NAN(d, Floor(nan));
183     HWY_ASSERT_NAN(d, Round(nan));
184     HWY_ASSERT_NAN(d, Trunc(nan));
185 
186     // Logical (And/AndNot/Xor will clear NaN!)
187     HWY_ASSERT_NAN(d, Or(nan, v1));
188 
189     // Comparison
190     HWY_ASSERT(AllFalse(Eq(nan, v1)));
191     HWY_ASSERT(AllFalse(Gt(nan, v1)));
192     HWY_ASSERT(AllFalse(Lt(nan, v1)));
193     HWY_ASSERT(AllFalse(Ge(nan, v1)));
194     HWY_ASSERT(AllFalse(Le(nan, v1)));
195 
196     // Reduction
197     HWY_ASSERT_NAN(d, SumOfLanes(nan));
198 // TODO(janwas): re-enable after QEMU is fixed
199 #if HWY_TARGET != HWY_RVV
200     HWY_ASSERT_NAN(d, MinOfLanes(nan));
201     HWY_ASSERT_NAN(d, MaxOfLanes(nan));
202 #endif
203 
204     // Min
205 #if HWY_ARCH_X86 && HWY_TARGET != HWY_SCALAR
206     // x86 SIMD returns the second operand if any input is NaN.
207     HWY_ASSERT_VEC_EQ(d, v1, Min(nan, v1));
208     HWY_ASSERT_VEC_EQ(d, v1, Max(nan, v1));
209     HWY_ASSERT_NAN(d, Min(v1, nan));
210     HWY_ASSERT_NAN(d, Max(v1, nan));
211 #elif HWY_ARCH_WASM
212     // Should return NaN if any input is NaN, but does not for scalar.
213     // TODO(janwas): remove once this is fixed.
214 #elif HWY_TARGET == HWY_NEON && HWY_ARCH_ARM_V7
215     // ARMv7 NEON returns NaN if any input is NaN.
216     HWY_ASSERT_NAN(d, Min(v1, nan));
217     HWY_ASSERT_NAN(d, Max(v1, nan));
218     HWY_ASSERT_NAN(d, Min(nan, v1));
219     HWY_ASSERT_NAN(d, Max(nan, v1));
220 #else
221     // IEEE 754-2019 minimumNumber is defined as the other argument if exactly
222     // one is NaN, and qNaN if both are.
223     HWY_ASSERT_VEC_EQ(d, v1, Min(nan, v1));
224     HWY_ASSERT_VEC_EQ(d, v1, Max(nan, v1));
225     HWY_ASSERT_VEC_EQ(d, v1, Min(v1, nan));
226     HWY_ASSERT_VEC_EQ(d, v1, Max(v1, nan));
227 #endif
228     HWY_ASSERT_NAN(d, Min(nan, nan));
229     HWY_ASSERT_NAN(d, Max(nan, nan));
230 
231     // Comparison
232     HWY_ASSERT(AllFalse(Eq(nan, v1)));
233     HWY_ASSERT(AllFalse(Gt(nan, v1)));
234     HWY_ASSERT(AllFalse(Lt(nan, v1)));
235     HWY_ASSERT(AllFalse(Ge(nan, v1)));
236     HWY_ASSERT(AllFalse(Le(nan, v1)));
237   }
238 };
239 
240 // For functions only available for float32
241 struct TestF32NaN {
242   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestF32NaN243   HWY_NOINLINE void operator()(T /*unused*/, D d) {
244     const auto v1 = Set(d, T(Unpredictable1()));
245     const auto nan = IfThenElse(Eq(v1, Set(d, T(1))), NaN(d), v1);
246     HWY_ASSERT_NAN(d, ApproximateReciprocal(nan));
247     HWY_ASSERT_NAN(d, ApproximateReciprocalSqrt(nan));
248     HWY_ASSERT_NAN(d, AbsDiff(nan, v1));
249     HWY_ASSERT_NAN(d, AbsDiff(v1, nan));
250   }
251 };
252 
TestAllNaN()253 HWY_NOINLINE void TestAllNaN() {
254   ForFloatTypes(ForPartialVectors<TestNaN>());
255   ForPartialVectors<TestF32NaN>()(float());
256 }
257 
258 struct TestCopyAndAssign {
259   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestCopyAndAssign260   HWY_NOINLINE void operator()(T /*unused*/, D d) {
261     // copy V
262     const auto v3 = Iota(d, 3);
263     auto v3b(v3);
264     HWY_ASSERT_VEC_EQ(d, v3, v3b);
265 
266     // assign V
267     auto v3c = Undefined(d);
268     v3c = v3;
269     HWY_ASSERT_VEC_EQ(d, v3, v3c);
270   }
271 };
272 
TestAllCopyAndAssign()273 HWY_NOINLINE void TestAllCopyAndAssign() {
274   ForAllTypes(ForPartialVectors<TestCopyAndAssign>());
275 }
276 
277 struct TestGetLane {
278   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestGetLane279   HWY_NOINLINE void operator()(T /*unused*/, D d) {
280     HWY_ASSERT_EQ(T(0), GetLane(Zero(d)));
281     HWY_ASSERT_EQ(T(1), GetLane(Set(d, 1)));
282   }
283 };
284 
TestAllGetLane()285 HWY_NOINLINE void TestAllGetLane() {
286   ForAllTypes(ForPartialVectors<TestGetLane>());
287 }
288 
289 
290 // NOLINTNEXTLINE(google-readability-namespace-comments)
291 }  // namespace HWY_NAMESPACE
292 }  // namespace hwy
293 HWY_AFTER_NAMESPACE();
294 
295 #if HWY_ONCE
296 namespace hwy {
297 HWY_BEFORE_TEST(HighwayTest);
298 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllSet);
299 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllOverflow);
300 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllSignBit);
301 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllNaN);
302 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllCopyAndAssign);
303 HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllGetLane);
304 }  // namespace hwy
305 #endif
306