1 /*
2  * Copyright (c) 2015-2017, Intel Corporation
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  *  * Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *  * Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *  * Neither the name of Intel Corporation nor the names of its contributors
13  *    may be used to endorse or promote products derived from this software
14  *    without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "config.h"
30 
31 #include "gtest/gtest.h"
32 #include "util/arch.h"
33 #include "util/bytecode_ptr.h"
34 #include "util/make_unique.h"
35 #include "util/simd_utils.h"
36 
37 using namespace std;
38 using namespace ue2;
39 
40 namespace {
41 
42 // Switch one bit on in a bitmask.
43 template<class Mask>
setbit(unsigned int bit)44 Mask setbit(unsigned int bit) {
45     union {
46         Mask simd;
47         char bytes[sizeof(Mask)];
48     } cf;
49 
50     memset(cf.bytes, 0, sizeof(Mask));
51 
52     unsigned int byte_idx = bit / 8;
53     cf.bytes[byte_idx] = 1U << (bit % 8);
54 
55     return cf.simd;
56 }
57 
58 // Parameterized tests follow!
59 //
60 // Irritatingly we have to define a whole bunch of overrides here... because
61 // templates. One Admiration Unit for anyone able to build a better way of
62 // doing this.
63 
64 struct simd_zeroes {
operator m128__anoncfee90cc0111::simd_zeroes65     operator m128() { return zeroes128(); }
operator m256__anoncfee90cc0111::simd_zeroes66     operator m256() { return zeroes256(); }
operator m384__anoncfee90cc0111::simd_zeroes67     operator m384() { return zeroes384(); }
operator m512__anoncfee90cc0111::simd_zeroes68     operator m512() { return zeroes512(); }
69 };
70 
71 struct simd_ones {
operator m128__anoncfee90cc0111::simd_ones72     operator m128() { return ones128(); }
operator m256__anoncfee90cc0111::simd_ones73     operator m256() { return ones256(); }
operator m384__anoncfee90cc0111::simd_ones74     operator m384() { return ones384(); }
operator m512__anoncfee90cc0111::simd_ones75     operator m512() { return ones512(); }
76 };
77 
simd_diff(const m128 & a,const m128 & b)78 bool simd_diff(const m128 &a, const m128 &b) { return !!diff128(a, b); }
simd_diff(const m256 & a,const m256 & b)79 bool simd_diff(const m256 &a, const m256 &b) { return !!diff256(a, b); }
simd_diff(const m384 & a,const m384 & b)80 bool simd_diff(const m384 &a, const m384 &b) { return !!diff384(a, b); }
simd_diff(const m512 & a,const m512 & b)81 bool simd_diff(const m512 &a, const m512 &b) { return !!diff512(a, b); }
simd_isnonzero(const m128 & a)82 bool simd_isnonzero(const m128 &a) { return !!isnonzero128(a); }
simd_isnonzero(const m256 & a)83 bool simd_isnonzero(const m256 &a) { return !!isnonzero256(a); }
simd_isnonzero(const m384 & a)84 bool simd_isnonzero(const m384 &a) { return !!isnonzero384(a); }
simd_isnonzero(const m512 & a)85 bool simd_isnonzero(const m512 &a) { return !!isnonzero512(a); }
simd_and(const m128 & a,const m128 & b)86 m128 simd_and(const m128 &a, const m128 &b) { return and128(a, b); }
simd_and(const m256 & a,const m256 & b)87 m256 simd_and(const m256 &a, const m256 &b) { return and256(a, b); }
simd_and(const m384 & a,const m384 & b)88 m384 simd_and(const m384 &a, const m384 &b) { return and384(a, b); }
simd_and(const m512 & a,const m512 & b)89 m512 simd_and(const m512 &a, const m512 &b) { return and512(a, b); }
simd_or(const m128 & a,const m128 & b)90 m128 simd_or(const m128 &a, const m128 &b) { return or128(a, b); }
simd_or(const m256 & a,const m256 & b)91 m256 simd_or(const m256 &a, const m256 &b) { return or256(a, b); }
simd_or(const m384 & a,const m384 & b)92 m384 simd_or(const m384 &a, const m384 &b) { return or384(a, b); }
simd_or(const m512 & a,const m512 & b)93 m512 simd_or(const m512 &a, const m512 &b) { return or512(a, b); }
simd_xor(const m128 & a,const m128 & b)94 m128 simd_xor(const m128 &a, const m128 &b) { return xor128(a, b); }
simd_xor(const m256 & a,const m256 & b)95 m256 simd_xor(const m256 &a, const m256 &b) { return xor256(a, b); }
simd_xor(const m384 & a,const m384 & b)96 m384 simd_xor(const m384 &a, const m384 &b) { return xor384(a, b); }
simd_xor(const m512 & a,const m512 & b)97 m512 simd_xor(const m512 &a, const m512 &b) { return xor512(a, b); }
simd_andnot(const m128 & a,const m128 & b)98 m128 simd_andnot(const m128 &a, const m128 &b) { return andnot128(a, b); }
simd_andnot(const m256 & a,const m256 & b)99 m256 simd_andnot(const m256 &a, const m256 &b) { return andnot256(a, b); }
simd_andnot(const m384 & a,const m384 & b)100 m384 simd_andnot(const m384 &a, const m384 &b) { return andnot384(a, b); }
simd_andnot(const m512 & a,const m512 & b)101 m512 simd_andnot(const m512 &a, const m512 &b) { return andnot512(a, b); }
simd_not(const m128 & a)102 m128 simd_not(const m128 &a) { return not128(a); }
simd_not(const m256 & a)103 m256 simd_not(const m256 &a) { return not256(a); }
simd_not(const m384 & a)104 m384 simd_not(const m384 &a) { return not384(a); }
simd_not(const m512 & a)105 m512 simd_not(const m512 &a) { return not512(a); }
simd_clearbit(m128 * a,unsigned int i)106 void simd_clearbit(m128 *a, unsigned int i) { return clearbit128(a, i); }
simd_clearbit(m256 * a,unsigned int i)107 void simd_clearbit(m256 *a, unsigned int i) { return clearbit256(a, i); }
simd_clearbit(m384 * a,unsigned int i)108 void simd_clearbit(m384 *a, unsigned int i) { return clearbit384(a, i); }
simd_clearbit(m512 * a,unsigned int i)109 void simd_clearbit(m512 *a, unsigned int i) { return clearbit512(a, i); }
simd_setbit(m128 * a,unsigned int i)110 void simd_setbit(m128 *a, unsigned int i) { return setbit128(a, i); }
simd_setbit(m256 * a,unsigned int i)111 void simd_setbit(m256 *a, unsigned int i) { return setbit256(a, i); }
simd_setbit(m384 * a,unsigned int i)112 void simd_setbit(m384 *a, unsigned int i) { return setbit384(a, i); }
simd_setbit(m512 * a,unsigned int i)113 void simd_setbit(m512 *a, unsigned int i) { return setbit512(a, i); }
simd_testbit(const m128 & a,unsigned int i)114 bool simd_testbit(const m128 &a, unsigned int i) { return testbit128(a, i); }
simd_testbit(const m256 & a,unsigned int i)115 bool simd_testbit(const m256 &a, unsigned int i) { return testbit256(a, i); }
simd_testbit(const m384 & a,unsigned int i)116 bool simd_testbit(const m384 &a, unsigned int i) { return testbit384(a, i); }
simd_testbit(const m512 & a,unsigned int i)117 bool simd_testbit(const m512 &a, unsigned int i) { return testbit512(a, i); }
simd_diffrich(const m128 & a,const m128 & b)118 u32 simd_diffrich(const m128 &a, const m128 &b) { return diffrich128(a, b); }
simd_diffrich(const m256 & a,const m256 & b)119 u32 simd_diffrich(const m256 &a, const m256 &b) { return diffrich256(a, b); }
simd_diffrich(const m384 & a,const m384 & b)120 u32 simd_diffrich(const m384 &a, const m384 &b) { return diffrich384(a, b); }
simd_diffrich(const m512 & a,const m512 & b)121 u32 simd_diffrich(const m512 &a, const m512 &b) { return diffrich512(a, b); }
simd_diffrich64(const m128 & a,const m128 & b)122 u32 simd_diffrich64(const m128 &a, const m128 &b) { return diffrich64_128(a, b); }
simd_diffrich64(const m256 & a,const m256 & b)123 u32 simd_diffrich64(const m256 &a, const m256 &b) { return diffrich64_256(a, b); }
simd_diffrich64(const m384 & a,const m384 & b)124 u32 simd_diffrich64(const m384 &a, const m384 &b) { return diffrich64_384(a, b); }
simd_diffrich64(const m512 & a,const m512 & b)125 u32 simd_diffrich64(const m512 &a, const m512 &b) { return diffrich64_512(a, b); }
simd_store(void * ptr,const m128 & a)126 void simd_store(void *ptr, const m128 &a) { store128(ptr, a); }
simd_store(void * ptr,const m256 & a)127 void simd_store(void *ptr, const m256 &a) { store256(ptr, a); }
simd_store(void * ptr,const m384 & a)128 void simd_store(void *ptr, const m384 &a) { store384(ptr, a); }
simd_store(void * ptr,const m512 & a)129 void simd_store(void *ptr, const m512 &a) { store512(ptr, a); }
simd_load(m128 * a,const void * ptr)130 void simd_load(m128 *a, const void *ptr) { *a = load128(ptr); }
simd_load(m256 * a,const void * ptr)131 void simd_load(m256 *a, const void *ptr) { *a = load256(ptr); }
simd_load(m384 * a,const void * ptr)132 void simd_load(m384 *a, const void *ptr) { *a = load384(ptr); }
simd_load(m512 * a,const void * ptr)133 void simd_load(m512 *a, const void *ptr) { *a = load512(ptr); }
simd_loadu(m128 * a,const void * ptr)134 void simd_loadu(m128 *a, const void *ptr) { *a = loadu128(ptr); }
simd_loadu(m256 * a,const void * ptr)135 void simd_loadu(m256 *a, const void *ptr) { *a = loadu256(ptr); }
simd_loadu(m384 * a,const void * ptr)136 void simd_loadu(m384 *a, const void *ptr) { *a = loadu384(ptr); }
simd_loadu(m512 * a,const void * ptr)137 void simd_loadu(m512 *a, const void *ptr) { *a = loadu512(ptr); }
simd_storebytes(void * ptr,const m128 & a,unsigned i)138 void simd_storebytes(void *ptr, const m128 &a, unsigned i) { storebytes128(ptr, a, i); }
simd_storebytes(void * ptr,const m256 & a,unsigned i)139 void simd_storebytes(void *ptr, const m256 &a, unsigned i) { storebytes256(ptr, a, i); }
simd_storebytes(void * ptr,const m384 & a,unsigned i)140 void simd_storebytes(void *ptr, const m384 &a, unsigned i) { storebytes384(ptr, a, i); }
simd_storebytes(void * ptr,const m512 & a,unsigned i)141 void simd_storebytes(void *ptr, const m512 &a, unsigned i) { storebytes512(ptr, a, i); }
simd_loadbytes(m128 * a,const void * ptr,unsigned i)142 void simd_loadbytes(m128 *a, const void *ptr, unsigned i) { *a = loadbytes128(ptr, i); }
simd_loadbytes(m256 * a,const void * ptr,unsigned i)143 void simd_loadbytes(m256 *a, const void *ptr, unsigned i) { *a = loadbytes256(ptr, i); }
simd_loadbytes(m384 * a,const void * ptr,unsigned i)144 void simd_loadbytes(m384 *a, const void *ptr, unsigned i) { *a = loadbytes384(ptr, i); }
simd_loadbytes(m512 * a,const void * ptr,unsigned i)145 void simd_loadbytes(m512 *a, const void *ptr, unsigned i) { *a = loadbytes512(ptr, i); }
simd_lshift64(const m128 & a,unsigned i)146 m128 simd_lshift64(const m128 &a, unsigned i) { return lshift64_m128(a, i); }
simd_lshift64(const m256 & a,unsigned i)147 m256 simd_lshift64(const m256 &a, unsigned i) { return lshift64_m256(a, i); }
simd_lshift64(const m384 & a,unsigned i)148 m384 simd_lshift64(const m384 &a, unsigned i) { return lshift64_m384(a, i); }
simd_lshift64(const m512 & a,unsigned i)149 m512 simd_lshift64(const m512 &a, unsigned i) { return lshift64_m512(a, i); }
150 
151 template<typename T>
152 class SimdUtilsTest : public testing::Test {
153     // empty
154 };
155 
156 typedef ::testing::Types<m128, m256, m384, m512> SimdTypes;
157 TYPED_TEST_CASE(SimdUtilsTest, SimdTypes);
158 
159 //
160 // The tests themselves.
161 //
162 
TYPED_TEST(SimdUtilsTest,zero)163 TYPED_TEST(SimdUtilsTest, zero) {
164     const TypeParam zeroes = simd_zeroes();
165 
166     // Should have no bits on.
167     char cmp[sizeof(zeroes)];
168     memset(cmp, 0, sizeof(zeroes));
169     ASSERT_EQ(0, memcmp(cmp, &zeroes, sizeof(zeroes)));
170 }
171 
TYPED_TEST(SimdUtilsTest,ones)172 TYPED_TEST(SimdUtilsTest, ones) {
173     const TypeParam ones = simd_ones();
174 
175     // Should have all bits on.
176     char cmp[sizeof(ones)];
177     memset(cmp, 0xff, sizeof(ones));
178     ASSERT_EQ(0, memcmp(cmp, &ones, sizeof(ones)));
179 }
180 
TYPED_TEST(SimdUtilsTest,and1)181 TYPED_TEST(SimdUtilsTest, and1) {
182     const TypeParam zeroes = simd_zeroes();
183     const TypeParam ones = simd_ones();
184 
185     TypeParam result;
186 
187     result = simd_and(zeroes, ones);
188     EXPECT_FALSE(simd_diff(result, zeroes));
189 
190     result = simd_and(ones, zeroes);
191     EXPECT_FALSE(simd_diff(result, zeroes));
192 
193     result = simd_and(zeroes, zeroes);
194     EXPECT_FALSE(simd_diff(result, zeroes));
195 
196     result = simd_and(ones, ones);
197     EXPECT_FALSE(simd_diff(result, ones));
198 }
199 
TYPED_TEST(SimdUtilsTest,and2)200 TYPED_TEST(SimdUtilsTest, and2) {
201     TypeParam a, b;
202     memset(&a, 0x33, sizeof(a));
203     memset(&b, 0x55, sizeof(b));
204 
205     union {
206         TypeParam simd;
207         char bytes[sizeof(TypeParam)];
208     } c;
209     c.simd = simd_and(a, b);
210 
211     const char expected = 0x33 & 0x55;
212     for (size_t i = 0; i < sizeof(c); i++) {
213         EXPECT_EQ(expected, c.bytes[i]);
214     }
215 }
216 
TEST(SimdUtils,diff256)217 TEST(SimdUtils, diff256) {
218     const unsigned total_bits = 256;
219 
220     // Test identical cases
221     ASSERT_EQ(0U, diff256(zeroes256(), zeroes256()));
222     ASSERT_EQ(0U, diff256(ones256(), ones256()));
223     for (unsigned i = 0; i < total_bits; i++) {
224         m256 a = setbit<m256>(i);
225         m256 b = setbit<m256>(i);
226         ASSERT_EQ(0U, diff256(a, b));
227     }
228 
229     // Cases that differ in one 32-bit word
230     for (unsigned i = 0; i < total_bits; i++) {
231         m256 a = setbit<m256>(i);
232         u32 rv = diff256(zeroes256(), a);
233         ASSERT_EQ(1U, rv);
234     }
235 }
236 
TYPED_TEST(SimdUtilsTest,or1)237 TYPED_TEST(SimdUtilsTest, or1) {
238     const TypeParam zeroes = simd_zeroes();
239     const TypeParam ones = simd_ones();
240 
241     TypeParam result;
242 
243     result = simd_or(zeroes, ones);
244     EXPECT_FALSE(simd_diff(result, ones));
245 
246     result = simd_or(ones, zeroes);
247     EXPECT_FALSE(simd_diff(result, ones));
248 
249     result = simd_or(zeroes, zeroes);
250     EXPECT_FALSE(simd_diff(result, zeroes));
251 
252     result = simd_or(ones, ones);
253     EXPECT_FALSE(simd_diff(result, ones));
254 }
255 
TYPED_TEST(SimdUtilsTest,or2)256 TYPED_TEST(SimdUtilsTest, or2) {
257     TypeParam a, b;
258     memset(&a, 0x33, sizeof(a));
259     memset(&b, 0x55, sizeof(b));
260 
261     for (unsigned j = 0; j < 8; j++) {
262         for (unsigned i = 0; i < 32; i++) {
263             m256 x = setbit<m256>(j*32+i);
264             m256 y = zeroes256();
265             ASSERT_EQ(1U << j, diffrich256(x, y)) << "bit " << j*32+i << " not happy";
266         }
267     }
268 
269     union {
270         TypeParam simd;
271         char bytes[sizeof(TypeParam)];
272     } c;
273     c.simd = simd_or(a, b);
274 
275     const char expected = 0x33 | 0x55;
276     for (size_t i = 0; i < sizeof(c); i++) {
277         EXPECT_EQ(expected, c.bytes[i]);
278     }
279 }
280 
TYPED_TEST(SimdUtilsTest,xor1)281 TYPED_TEST(SimdUtilsTest, xor1) {
282     const TypeParam zeroes = simd_zeroes();
283     const TypeParam ones = simd_ones();
284 
285     TypeParam result;
286 
287     result = simd_xor(zeroes, ones);
288     EXPECT_FALSE(simd_diff(result, ones));
289 
290     result = simd_xor(ones, zeroes);
291     EXPECT_FALSE(simd_diff(result, ones));
292 
293     result = simd_xor(zeroes, zeroes);
294     EXPECT_FALSE(simd_diff(result, zeroes));
295 
296     result = simd_xor(ones, ones);
297     EXPECT_FALSE(simd_diff(result, zeroes));
298 }
299 
TYPED_TEST(SimdUtilsTest,xor2)300 TYPED_TEST(SimdUtilsTest, xor2) {
301     TypeParam a, b;
302     memset(&a, 0x33, sizeof(a));
303     memset(&b, 0x55, sizeof(b));
304 
305     union {
306         TypeParam simd;
307         char bytes[sizeof(TypeParam)];
308     } c;
309     c.simd = simd_xor(a, b);
310 
311     const char expected = 0x33 ^ 0x55;
312     for (size_t i = 0; i < sizeof(c); i++) {
313         EXPECT_EQ(expected, c.bytes[i]);
314     }
315 }
316 
TYPED_TEST(SimdUtilsTest,andnot1)317 TYPED_TEST(SimdUtilsTest, andnot1) {
318     const TypeParam zeroes = simd_zeroes();
319     const TypeParam ones = simd_ones();
320 
321     TypeParam result;
322 
323     result = simd_andnot(zeroes, ones);
324     EXPECT_FALSE(simd_diff(result, ones));
325 
326     result = simd_andnot(ones, zeroes);
327     EXPECT_FALSE(simd_diff(result, zeroes));
328 
329     result = simd_andnot(zeroes, zeroes);
330     EXPECT_FALSE(simd_diff(result, zeroes));
331 
332     result = simd_andnot(ones, ones);
333     EXPECT_FALSE(simd_diff(result, zeroes));
334 }
335 
TYPED_TEST(SimdUtilsTest,andnot2)336 TYPED_TEST(SimdUtilsTest, andnot2) {
337     TypeParam a, b;
338     memset(&a, 0x33, sizeof(a));
339     memset(&b, 0x55, sizeof(b));
340 
341     union {
342         TypeParam simd;
343         char bytes[sizeof(TypeParam)];
344     } c;
345     c.simd = simd_andnot(a, b);
346 
347     const char expected = ~0x33 & 0x55;
348     for (size_t i = 0; i < sizeof(c); i++) {
349         EXPECT_EQ(expected, c.bytes[i]);
350     }
351 }
352 
TYPED_TEST(SimdUtilsTest,not1)353 TYPED_TEST(SimdUtilsTest, not1) {
354     const TypeParam zeroes = simd_zeroes();
355     const TypeParam ones = simd_ones();
356 
357     TypeParam result;
358 
359     result = simd_not(zeroes);
360     EXPECT_FALSE(simd_diff(result, ones));
361 
362     result = simd_not(ones);
363     EXPECT_FALSE(simd_diff(result, zeroes));
364 }
365 
TYPED_TEST(SimdUtilsTest,not2)366 TYPED_TEST(SimdUtilsTest, not2) {
367     TypeParam a;
368     memset(&a, 0x33, sizeof(a));
369 
370     union {
371         TypeParam simd;
372         char bytes[sizeof(TypeParam)];
373     } c;
374     c.simd = simd_not(a);
375 
376     const char expected = ~0x33;
377     for (size_t i = 0; i < sizeof(c); i++) {
378         EXPECT_EQ(expected, c.bytes[i]);
379     }
380 }
381 
TYPED_TEST(SimdUtilsTest,isnonzero)382 TYPED_TEST(SimdUtilsTest, isnonzero) {
383     TypeParam a = simd_zeroes();
384     EXPECT_FALSE(simd_isnonzero(a));
385 
386     a = simd_ones();
387     EXPECT_TRUE(simd_isnonzero(a));
388 
389     union {
390         TypeParam simd;
391         char bytes[sizeof(TypeParam)];
392     } c;
393 
394     // Try every 1-bit case.
395     for (size_t i = 0; i < sizeof(a); i++) {
396         for (size_t j = 0; j < 8; j++) {
397             memset(&c.simd, 0, sizeof(c.simd));
398             c.bytes[i] = 1 << j;
399             EXPECT_TRUE(simd_isnonzero(c.simd));
400         }
401     }
402 }
403 
TYPED_TEST(SimdUtilsTest,clearbit)404 TYPED_TEST(SimdUtilsTest, clearbit) {
405     const unsigned int total_bits = sizeof(TypeParam) * 8;
406 
407     const TypeParam ones = simd_ones();
408 
409     for (unsigned int i = 0; i < total_bits; i++) {
410         TypeParam a = simd_ones();
411         simd_clearbit(&a, i);
412         ASSERT_NE(0, simd_diff(a, ones)) << "bit " << i << " wasn't cleared";
413 
414         TypeParam mask = setbit<TypeParam>(i);
415         ASSERT_EQ(0, simd_diff(ones, simd_or(a, mask)))
416             << "clearing bit " << i << " caused collateral damage";
417     }
418 }
419 
TYPED_TEST(SimdUtilsTest,testbit)420 TYPED_TEST(SimdUtilsTest, testbit) {
421     const unsigned int total_bits = sizeof(TypeParam) * 8;
422 
423     const TypeParam ones = simd_ones();
424 
425     // First, all bits are on in 'ones'.
426     for (unsigned int i = 0; i < total_bits; i++) {
427         ASSERT_EQ(1, simd_testbit(ones, i)) << "bit " << i << " is on";
428     }
429 
430     // Try individual bits; only 'i' should be on.
431     for (unsigned int i = 0; i < total_bits; i++) {
432         TypeParam a = setbit<TypeParam>(i);
433         for (unsigned int j = 0; j < total_bits; j++) {
434             ASSERT_EQ(i == j ? 1 : 0, simd_testbit(a, j)) << "bit " << i
435                                                           << " is wrong";
436         }
437     }
438 }
439 
TYPED_TEST(SimdUtilsTest,setbit)440 TYPED_TEST(SimdUtilsTest, setbit) {
441     const unsigned int total_bits = sizeof(TypeParam) * 8;
442 
443     // Try individual bits; only 'i' should be on.
444     for (unsigned int i = 0; i < total_bits; i++) {
445         TypeParam a = setbit<TypeParam>(i);
446         TypeParam x = simd_zeroes();
447         simd_setbit(&x, i);
448         ASSERT_FALSE(simd_diff(a, x));
449     }
450 
451     TypeParam a = simd_zeroes();
452 
453     // turn on all bits
454     for (unsigned int i = 0; i < total_bits; i++) {
455         simd_setbit(&a, i);
456     }
457     ASSERT_FALSE(simd_diff(simd_ones(), a));
458 
459 }
460 
TYPED_TEST(SimdUtilsTest,diffrich)461 TYPED_TEST(SimdUtilsTest, diffrich) {
462     const unsigned total_bits = sizeof(TypeParam) * 8;
463 
464     const TypeParam zeroes = simd_zeroes();
465     const TypeParam ones = simd_ones();
466 
467     // Test identical cases
468     EXPECT_EQ(0U, simd_diffrich(zeroes, zeroes));
469     EXPECT_EQ(0U, simd_diffrich(ones, ones));
470     for (unsigned i = 0; i < total_bits; i++) {
471         TypeParam a = setbit<TypeParam>(i);
472         TypeParam b = setbit<TypeParam>(i);
473         EXPECT_EQ(0U, simd_diffrich(a, b));
474     }
475 
476     // and nothing is on in zeroes
477     for (unsigned int i = 0; i < total_bits; i++) {
478         ASSERT_EQ(0, simd_testbit(zeroes, i)) << "bit " << i << " is off";
479     }
480 
481     // All-zeroes and all-ones differ in all words
482     EXPECT_EQ((1U << (total_bits / 32)) - 1, simd_diffrich(zeroes, ones));
483 
484     // Cases that differ in one 32-bit word
485     for (unsigned i = 0; i < total_bits; i++) {
486         TypeParam a = setbit<TypeParam>(i);
487         u32 rv = simd_diffrich(zeroes, a);
488         EXPECT_EQ(1U << i / 32, rv);
489     }
490 }
491 
TYPED_TEST(SimdUtilsTest,diffrich64)492 TYPED_TEST(SimdUtilsTest, diffrich64) {
493     const unsigned total_bits = sizeof(TypeParam) * 8;
494 
495     const TypeParam zeroes = simd_zeroes();
496     const TypeParam ones = simd_ones();
497 
498     // Test identical cases
499     EXPECT_EQ(0U, simd_diffrich64(zeroes, zeroes));
500     EXPECT_EQ(0U, simd_diffrich64(ones, ones));
501     for (unsigned i = 0; i < total_bits; i++) {
502         TypeParam a = setbit<TypeParam>(i);
503         TypeParam b = setbit<TypeParam>(i);
504         EXPECT_EQ(0U, simd_diffrich64(a, b));
505     }
506 
507     // All-zeroes and all-ones differ in all words, which will result in every
508     // second bit being on.
509     EXPECT_EQ(((1U << (total_bits / 32)) - 1) & 0x55555555u,
510               simd_diffrich64(zeroes, ones));
511 
512     // Cases that differ in one 64-bit word
513     for (unsigned i = 0; i < total_bits; i++) {
514         TypeParam a = setbit<TypeParam>(i);
515         u32 rv = simd_diffrich64(zeroes, a);
516         EXPECT_EQ(1U << ((i / 64) * 2), rv);
517     }
518 }
519 
520 // Unaligned load
TYPED_TEST(SimdUtilsTest,loadu)521 TYPED_TEST(SimdUtilsTest, loadu) {
522     const TypeParam ones = simd_ones();
523 
524     const size_t mem_len = sizeof(ones) * 2;
525     unique_ptr<char[]> mem_array = ue2::make_unique<char[]>(mem_len);
526     char *mem = mem_array.get();
527 
528     for (size_t offset = 1; offset < sizeof(ones); offset++) {
529         memset(mem, 0, mem_len);
530         memset(mem + offset, 0xff, sizeof(ones));
531         TypeParam a;
532         simd_loadu(&a, mem + offset);
533         ASSERT_EQ(0, simd_diff(a, ones));
534     }
535 }
536 
537 // Aligned load and store
TYPED_TEST(SimdUtilsTest,load_store)538 TYPED_TEST(SimdUtilsTest, load_store) {
539     union {
540         TypeParam simd;
541         char bytes[sizeof(TypeParam)];
542     } a;
543     for (size_t i = 0; i < sizeof(a); i++) {
544         a.bytes[i] = (char)(i % 256);
545     }
546 
547     auto mem_ptr = make_bytecode_ptr<char>(sizeof(a), alignof(TypeParam));
548     char *mem = mem_ptr.get();
549 
550     ASSERT_EQ(0, (size_t)mem % 16U);
551 
552     memset(mem, 0, sizeof(a));
553 
554     simd_store(mem, a.simd);
555     ASSERT_EQ(0, memcmp(mem, a.bytes, sizeof(a)));
556 
557     TypeParam b;
558     simd_load(&b, mem);
559     ASSERT_FALSE(simd_diff(a.simd, b));
560 }
561 
562 // Packed load and store
TYPED_TEST(SimdUtilsTest,loadbytes_storebytes)563 TYPED_TEST(SimdUtilsTest, loadbytes_storebytes) {
564     union {
565         TypeParam simd;
566         char bytes[sizeof(TypeParam)];
567     } a;
568     for (size_t i = 0; i < sizeof(a); i++) {
569         a.bytes[i] = (char)(i % 256);
570     }
571 
572     char mem[sizeof(TypeParam)];
573     for (size_t i = 1; i < sizeof(TypeParam); i++) {
574         memset(mem, 0xff, sizeof(TypeParam));
575 
576         simd_storebytes(mem, a.simd, i);
577 
578         union {
579             TypeParam simd;
580             char bytes[sizeof(TypeParam)];
581         } b;
582         simd_loadbytes(&b.simd, mem, i);
583 
584         // First i bytes should match a, remaining bytes are zero. (Note that
585         // this takes endianness into account)
586         for (size_t j = 0; j < sizeof(TypeParam); j++) {
587             size_t idx = j;
588             ASSERT_EQ(j < i ? a.bytes[idx] : 0, b.bytes[idx]);
589         }
590     }
591 }
592 
TYPED_TEST(SimdUtilsTest,lshift64)593 TYPED_TEST(SimdUtilsTest, lshift64) {
594     TypeParam a;
595     memset(&a, 0x5a, sizeof(a));
596 
597     static constexpr u64a exp_val = 0x5a5a5a5a5a5a5a5aULL;
598 
599     union {
600         TypeParam simd;
601         u64a qword[sizeof(TypeParam) / 8];
602     } c;
603 
604     for (unsigned s = 0; s < 64; s++) {
605         c.simd = simd_lshift64(a, s);
606 
607         const u64a expected = exp_val << s;
608         for (size_t i = 0; i < sizeof(c) / 8; i++) {
609             EXPECT_EQ(expected, c.qword[i]);
610         }
611     }
612 
613     /* Clang 3.4 on FreeBSD 10 crashes on the following - disable for now */
614 #if !(defined(__FreeBSD__) && defined(__clang__) && __clang_major__ == 3)
615 
616     // test immediates
617     u64a expected;
618 
619     c.simd = simd_lshift64(a, 1);
620     expected = exp_val << 1;
621     for (size_t i = 0; i < sizeof(c) / 8; i++) {
622         EXPECT_EQ(expected, c.qword[i]);
623     }
624 
625     c.simd = simd_lshift64(a, 2);
626     expected = exp_val << 2;
627     for (size_t i = 0; i < sizeof(c) / 8; i++) {
628         EXPECT_EQ(expected, c.qword[i]);
629     }
630 
631     c.simd = simd_lshift64(a, 7);
632     expected = exp_val << 7;
633     for (size_t i = 0; i < sizeof(c) / 8; i++) {
634         EXPECT_EQ(expected, c.qword[i]);
635     }
636 
637     c.simd = simd_lshift64(a, 31);
638     expected = exp_val << 31;
639     for (size_t i = 0; i < sizeof(c) / 8; i++) {
640         EXPECT_EQ(expected, c.qword[i]);
641     }
642 #endif
643 }
644 
TEST(SimdUtilsTest,alignment)645 TEST(SimdUtilsTest, alignment) {
646     ASSERT_EQ(16, alignof(m128));
647     ASSERT_EQ(32, alignof(m256));
648     ASSERT_EQ(16, alignof(m384));
649     ASSERT_EQ(64, alignof(m512));
650 }
651 
TEST(SimdUtilsTest,movq)652 TEST(SimdUtilsTest, movq) {
653     m128 simd;
654 
655     simd = ones128();
656     u64a r = movq(simd);
657     ASSERT_EQ((u64a)(~0), r);
658 
659     char cmp[sizeof(m128)];
660     memset(cmp, 0x80, sizeof(m128));
661     simd = set16x8(0x80);
662     r = movq(simd);
663     ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd)));
664     ASSERT_EQ(0, memcmp(cmp, &r, sizeof(r)));
665 
666     simd = _mm_set_epi64x(~0LL, 0x123456789abcdef);
667     r = movq(simd);
668     ASSERT_EQ(r, 0x123456789abcdef);
669 }
670 
671 
TEST(SimdUtilsTest,set16x8)672 TEST(SimdUtilsTest, set16x8) {
673     char cmp[sizeof(m128)];
674 
675     for (unsigned i = 0; i < 256; i++) {
676         m128 simd = set16x8(i);
677         memset(cmp, i, sizeof(simd));
678         ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd)));
679     }
680 }
681 
TEST(SimdUtilsTest,set4x32)682 TEST(SimdUtilsTest, set4x32) {
683     u32 cmp[4] = { 0x12345678, 0x12345678, 0x12345678, 0x12345678 };
684     m128 simd = set4x32(cmp[0]);
685     ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd)));
686 }
687 
688 #if defined(HAVE_AVX2)
TEST(SimdUtilsTest,set32x8)689 TEST(SimdUtilsTest, set32x8) {
690     char cmp[sizeof(m256)];
691 
692     for (unsigned i = 0; i < 256; i++) {
693         m256 simd = set32x8(i);
694         memset(cmp, i, sizeof(simd));
695         ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd)));
696     }
697 }
698 
TEST(SimdUtilsTest,set2x128)699 TEST(SimdUtilsTest, set2x128) {
700     char cmp[sizeof(m256)];
701 
702     for (unsigned i = 0; i < 256; i++) {
703         m128 x = set16x8(i);
704         m256 y = set32x8(i);
705         m256 z = set2x128(x);
706         memset(cmp, i, sizeof(z));
707         ASSERT_EQ(0, memcmp(cmp, &z, sizeof(z)));
708         ASSERT_EQ(0, memcmp(&y, &z, sizeof(z)));
709     }
710 }
711 #endif
712 
TEST(SimdUtilsTest,variableByteShift128)713 TEST(SimdUtilsTest, variableByteShift128) {
714     char base[] = "0123456789ABCDEF";
715     m128 in = loadu128(base);
716 
717     EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 0),
718                          variable_byte_shift_m128(in, 0)));
719     EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 1),
720                          variable_byte_shift_m128(in, -1)));
721     EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 2),
722                          variable_byte_shift_m128(in, -2)));
723     EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 3),
724                          variable_byte_shift_m128(in, -3)));
725     EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 4),
726                          variable_byte_shift_m128(in, -4)));
727     EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 5),
728                          variable_byte_shift_m128(in, -5)));
729     EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 6),
730                          variable_byte_shift_m128(in, -6)));
731     EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 7),
732                          variable_byte_shift_m128(in, -7)));
733     EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 8),
734                          variable_byte_shift_m128(in, -8)));
735     EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 9),
736                          variable_byte_shift_m128(in, -9)));
737     EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 10),
738                          variable_byte_shift_m128(in, -10)));
739 
740     EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 0),
741                          variable_byte_shift_m128(in, 0)));
742     EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 1),
743                          variable_byte_shift_m128(in, 1)));
744     EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 2),
745                          variable_byte_shift_m128(in, 2)));
746     EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 3),
747                          variable_byte_shift_m128(in, 3)));
748     EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 4),
749                          variable_byte_shift_m128(in, 4)));
750     EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 5),
751                          variable_byte_shift_m128(in, 5)));
752     EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 6),
753                          variable_byte_shift_m128(in, 6)));
754     EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 7),
755                          variable_byte_shift_m128(in, 7)));
756     EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 8),
757                          variable_byte_shift_m128(in, 8)));
758     EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 9),
759                          variable_byte_shift_m128(in, 9)));
760     EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 10),
761                          variable_byte_shift_m128(in, 10)));
762 
763     EXPECT_TRUE(!diff128(zeroes128(), variable_byte_shift_m128(in, 16)));
764     EXPECT_TRUE(!diff128(zeroes128(), variable_byte_shift_m128(in, -16)));
765 }
766 
TEST(SimdUtilsTest,max_u8_m128)767 TEST(SimdUtilsTest, max_u8_m128) {
768     char base1[] = "0123456789ABCDE\xfe";
769     char base2[] = "!!23455889aBCd\xff\xff";
770     char expec[] = "0123456889aBCd\xff\xff";
771     m128 in1 = loadu128(base1);
772     m128 in2 = loadu128(base2);
773     m128 result = max_u8_m128(in1, in2);
774     EXPECT_TRUE(!diff128(result, loadu128(expec)));
775 }
776 
TEST(SimdUtilsTest,min_u8_m128)777 TEST(SimdUtilsTest, min_u8_m128) {
778     char base1[] = "0123456789ABCDE\xfe";
779     char base2[] = "!!23455889aBCd\xff\xff";
780     char expec[] = "!!23455789ABCDE\xfe";
781     m128 in1 = loadu128(base1);
782     m128 in2 = loadu128(base2);
783     m128 result = min_u8_m128(in1, in2);
784     EXPECT_TRUE(!diff128(result, loadu128(expec)));
785 }
786 
TEST(SimdUtilsTest,sadd_u8_m128)787 TEST(SimdUtilsTest, sadd_u8_m128) {
788     unsigned char base1[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4',
789                              '1', '2', '3', '4', '1', '2', '3', '4'};
790     unsigned char base2[] = {'a', 0x80, 'b', 'A', 0x10, 0x10, 0x10, 0x10,
791                              0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0};
792     unsigned char expec[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D',
793                              'a', 'b', 'c', 'd', '1', '2', '3', '4'};
794     m128 in1 = loadu128(base1);
795     m128 in2 = loadu128(base2);
796     m128 result = sadd_u8_m128(in1, in2);
797     EXPECT_TRUE(!diff128(result, loadu128(expec)));
798 }
799 
TEST(SimdUtilsTest,sub_u8_m128)800 TEST(SimdUtilsTest, sub_u8_m128) {
801     unsigned char base1[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D',
802                              'a', 'b', 'c', 'd', '1', '2', '3', '4'};
803     unsigned char base2[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4',
804                              '1', '2', '3', '4', '1', '2', '3', '4'};
805     unsigned char expec[] = {'a', 0x7f, 0, 'A', 0x10, 0x10, 0x10, 0x10,
806                              0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0};
807     m128 in1 = loadu128(base1);
808     m128 in2 = loadu128(base2);
809     m128 result = sub_u8_m128(in1, in2);
810     EXPECT_TRUE(!diff128(result, loadu128(expec)));
811 }
812 
813 } // namespace
814