1 /*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "config.h"
30
31 #include "gtest/gtest.h"
32 #include "util/arch.h"
33 #include "util/bytecode_ptr.h"
34 #include "util/make_unique.h"
35 #include "util/simd_utils.h"
36
37 using namespace std;
38 using namespace ue2;
39
40 namespace {
41
42 // Switch one bit on in a bitmask.
43 template<class Mask>
setbit(unsigned int bit)44 Mask setbit(unsigned int bit) {
45 union {
46 Mask simd;
47 char bytes[sizeof(Mask)];
48 } cf;
49
50 memset(cf.bytes, 0, sizeof(Mask));
51
52 unsigned int byte_idx = bit / 8;
53 cf.bytes[byte_idx] = 1U << (bit % 8);
54
55 return cf.simd;
56 }
57
58 // Parameterized tests follow!
59 //
60 // Irritatingly we have to define a whole bunch of overrides here... because
61 // templates. One Admiration Unit for anyone able to build a better way of
62 // doing this.
63
64 struct simd_zeroes {
operator m128__anoncfee90cc0111::simd_zeroes65 operator m128() { return zeroes128(); }
operator m256__anoncfee90cc0111::simd_zeroes66 operator m256() { return zeroes256(); }
operator m384__anoncfee90cc0111::simd_zeroes67 operator m384() { return zeroes384(); }
operator m512__anoncfee90cc0111::simd_zeroes68 operator m512() { return zeroes512(); }
69 };
70
71 struct simd_ones {
operator m128__anoncfee90cc0111::simd_ones72 operator m128() { return ones128(); }
operator m256__anoncfee90cc0111::simd_ones73 operator m256() { return ones256(); }
operator m384__anoncfee90cc0111::simd_ones74 operator m384() { return ones384(); }
operator m512__anoncfee90cc0111::simd_ones75 operator m512() { return ones512(); }
76 };
77
simd_diff(const m128 & a,const m128 & b)78 bool simd_diff(const m128 &a, const m128 &b) { return !!diff128(a, b); }
simd_diff(const m256 & a,const m256 & b)79 bool simd_diff(const m256 &a, const m256 &b) { return !!diff256(a, b); }
simd_diff(const m384 & a,const m384 & b)80 bool simd_diff(const m384 &a, const m384 &b) { return !!diff384(a, b); }
simd_diff(const m512 & a,const m512 & b)81 bool simd_diff(const m512 &a, const m512 &b) { return !!diff512(a, b); }
simd_isnonzero(const m128 & a)82 bool simd_isnonzero(const m128 &a) { return !!isnonzero128(a); }
simd_isnonzero(const m256 & a)83 bool simd_isnonzero(const m256 &a) { return !!isnonzero256(a); }
simd_isnonzero(const m384 & a)84 bool simd_isnonzero(const m384 &a) { return !!isnonzero384(a); }
simd_isnonzero(const m512 & a)85 bool simd_isnonzero(const m512 &a) { return !!isnonzero512(a); }
simd_and(const m128 & a,const m128 & b)86 m128 simd_and(const m128 &a, const m128 &b) { return and128(a, b); }
simd_and(const m256 & a,const m256 & b)87 m256 simd_and(const m256 &a, const m256 &b) { return and256(a, b); }
simd_and(const m384 & a,const m384 & b)88 m384 simd_and(const m384 &a, const m384 &b) { return and384(a, b); }
simd_and(const m512 & a,const m512 & b)89 m512 simd_and(const m512 &a, const m512 &b) { return and512(a, b); }
simd_or(const m128 & a,const m128 & b)90 m128 simd_or(const m128 &a, const m128 &b) { return or128(a, b); }
simd_or(const m256 & a,const m256 & b)91 m256 simd_or(const m256 &a, const m256 &b) { return or256(a, b); }
simd_or(const m384 & a,const m384 & b)92 m384 simd_or(const m384 &a, const m384 &b) { return or384(a, b); }
simd_or(const m512 & a,const m512 & b)93 m512 simd_or(const m512 &a, const m512 &b) { return or512(a, b); }
simd_xor(const m128 & a,const m128 & b)94 m128 simd_xor(const m128 &a, const m128 &b) { return xor128(a, b); }
simd_xor(const m256 & a,const m256 & b)95 m256 simd_xor(const m256 &a, const m256 &b) { return xor256(a, b); }
simd_xor(const m384 & a,const m384 & b)96 m384 simd_xor(const m384 &a, const m384 &b) { return xor384(a, b); }
simd_xor(const m512 & a,const m512 & b)97 m512 simd_xor(const m512 &a, const m512 &b) { return xor512(a, b); }
simd_andnot(const m128 & a,const m128 & b)98 m128 simd_andnot(const m128 &a, const m128 &b) { return andnot128(a, b); }
simd_andnot(const m256 & a,const m256 & b)99 m256 simd_andnot(const m256 &a, const m256 &b) { return andnot256(a, b); }
simd_andnot(const m384 & a,const m384 & b)100 m384 simd_andnot(const m384 &a, const m384 &b) { return andnot384(a, b); }
simd_andnot(const m512 & a,const m512 & b)101 m512 simd_andnot(const m512 &a, const m512 &b) { return andnot512(a, b); }
simd_not(const m128 & a)102 m128 simd_not(const m128 &a) { return not128(a); }
simd_not(const m256 & a)103 m256 simd_not(const m256 &a) { return not256(a); }
simd_not(const m384 & a)104 m384 simd_not(const m384 &a) { return not384(a); }
simd_not(const m512 & a)105 m512 simd_not(const m512 &a) { return not512(a); }
simd_clearbit(m128 * a,unsigned int i)106 void simd_clearbit(m128 *a, unsigned int i) { return clearbit128(a, i); }
simd_clearbit(m256 * a,unsigned int i)107 void simd_clearbit(m256 *a, unsigned int i) { return clearbit256(a, i); }
simd_clearbit(m384 * a,unsigned int i)108 void simd_clearbit(m384 *a, unsigned int i) { return clearbit384(a, i); }
simd_clearbit(m512 * a,unsigned int i)109 void simd_clearbit(m512 *a, unsigned int i) { return clearbit512(a, i); }
simd_setbit(m128 * a,unsigned int i)110 void simd_setbit(m128 *a, unsigned int i) { return setbit128(a, i); }
simd_setbit(m256 * a,unsigned int i)111 void simd_setbit(m256 *a, unsigned int i) { return setbit256(a, i); }
simd_setbit(m384 * a,unsigned int i)112 void simd_setbit(m384 *a, unsigned int i) { return setbit384(a, i); }
simd_setbit(m512 * a,unsigned int i)113 void simd_setbit(m512 *a, unsigned int i) { return setbit512(a, i); }
simd_testbit(const m128 & a,unsigned int i)114 bool simd_testbit(const m128 &a, unsigned int i) { return testbit128(a, i); }
simd_testbit(const m256 & a,unsigned int i)115 bool simd_testbit(const m256 &a, unsigned int i) { return testbit256(a, i); }
simd_testbit(const m384 & a,unsigned int i)116 bool simd_testbit(const m384 &a, unsigned int i) { return testbit384(a, i); }
simd_testbit(const m512 & a,unsigned int i)117 bool simd_testbit(const m512 &a, unsigned int i) { return testbit512(a, i); }
simd_diffrich(const m128 & a,const m128 & b)118 u32 simd_diffrich(const m128 &a, const m128 &b) { return diffrich128(a, b); }
simd_diffrich(const m256 & a,const m256 & b)119 u32 simd_diffrich(const m256 &a, const m256 &b) { return diffrich256(a, b); }
simd_diffrich(const m384 & a,const m384 & b)120 u32 simd_diffrich(const m384 &a, const m384 &b) { return diffrich384(a, b); }
simd_diffrich(const m512 & a,const m512 & b)121 u32 simd_diffrich(const m512 &a, const m512 &b) { return diffrich512(a, b); }
simd_diffrich64(const m128 & a,const m128 & b)122 u32 simd_diffrich64(const m128 &a, const m128 &b) { return diffrich64_128(a, b); }
simd_diffrich64(const m256 & a,const m256 & b)123 u32 simd_diffrich64(const m256 &a, const m256 &b) { return diffrich64_256(a, b); }
simd_diffrich64(const m384 & a,const m384 & b)124 u32 simd_diffrich64(const m384 &a, const m384 &b) { return diffrich64_384(a, b); }
simd_diffrich64(const m512 & a,const m512 & b)125 u32 simd_diffrich64(const m512 &a, const m512 &b) { return diffrich64_512(a, b); }
simd_store(void * ptr,const m128 & a)126 void simd_store(void *ptr, const m128 &a) { store128(ptr, a); }
simd_store(void * ptr,const m256 & a)127 void simd_store(void *ptr, const m256 &a) { store256(ptr, a); }
simd_store(void * ptr,const m384 & a)128 void simd_store(void *ptr, const m384 &a) { store384(ptr, a); }
simd_store(void * ptr,const m512 & a)129 void simd_store(void *ptr, const m512 &a) { store512(ptr, a); }
simd_load(m128 * a,const void * ptr)130 void simd_load(m128 *a, const void *ptr) { *a = load128(ptr); }
simd_load(m256 * a,const void * ptr)131 void simd_load(m256 *a, const void *ptr) { *a = load256(ptr); }
simd_load(m384 * a,const void * ptr)132 void simd_load(m384 *a, const void *ptr) { *a = load384(ptr); }
simd_load(m512 * a,const void * ptr)133 void simd_load(m512 *a, const void *ptr) { *a = load512(ptr); }
simd_loadu(m128 * a,const void * ptr)134 void simd_loadu(m128 *a, const void *ptr) { *a = loadu128(ptr); }
simd_loadu(m256 * a,const void * ptr)135 void simd_loadu(m256 *a, const void *ptr) { *a = loadu256(ptr); }
simd_loadu(m384 * a,const void * ptr)136 void simd_loadu(m384 *a, const void *ptr) { *a = loadu384(ptr); }
simd_loadu(m512 * a,const void * ptr)137 void simd_loadu(m512 *a, const void *ptr) { *a = loadu512(ptr); }
simd_storebytes(void * ptr,const m128 & a,unsigned i)138 void simd_storebytes(void *ptr, const m128 &a, unsigned i) { storebytes128(ptr, a, i); }
simd_storebytes(void * ptr,const m256 & a,unsigned i)139 void simd_storebytes(void *ptr, const m256 &a, unsigned i) { storebytes256(ptr, a, i); }
simd_storebytes(void * ptr,const m384 & a,unsigned i)140 void simd_storebytes(void *ptr, const m384 &a, unsigned i) { storebytes384(ptr, a, i); }
simd_storebytes(void * ptr,const m512 & a,unsigned i)141 void simd_storebytes(void *ptr, const m512 &a, unsigned i) { storebytes512(ptr, a, i); }
simd_loadbytes(m128 * a,const void * ptr,unsigned i)142 void simd_loadbytes(m128 *a, const void *ptr, unsigned i) { *a = loadbytes128(ptr, i); }
simd_loadbytes(m256 * a,const void * ptr,unsigned i)143 void simd_loadbytes(m256 *a, const void *ptr, unsigned i) { *a = loadbytes256(ptr, i); }
simd_loadbytes(m384 * a,const void * ptr,unsigned i)144 void simd_loadbytes(m384 *a, const void *ptr, unsigned i) { *a = loadbytes384(ptr, i); }
simd_loadbytes(m512 * a,const void * ptr,unsigned i)145 void simd_loadbytes(m512 *a, const void *ptr, unsigned i) { *a = loadbytes512(ptr, i); }
simd_lshift64(const m128 & a,unsigned i)146 m128 simd_lshift64(const m128 &a, unsigned i) { return lshift64_m128(a, i); }
simd_lshift64(const m256 & a,unsigned i)147 m256 simd_lshift64(const m256 &a, unsigned i) { return lshift64_m256(a, i); }
simd_lshift64(const m384 & a,unsigned i)148 m384 simd_lshift64(const m384 &a, unsigned i) { return lshift64_m384(a, i); }
simd_lshift64(const m512 & a,unsigned i)149 m512 simd_lshift64(const m512 &a, unsigned i) { return lshift64_m512(a, i); }
150
151 template<typename T>
152 class SimdUtilsTest : public testing::Test {
153 // empty
154 };
155
156 typedef ::testing::Types<m128, m256, m384, m512> SimdTypes;
157 TYPED_TEST_CASE(SimdUtilsTest, SimdTypes);
158
159 //
160 // The tests themselves.
161 //
162
TYPED_TEST(SimdUtilsTest,zero)163 TYPED_TEST(SimdUtilsTest, zero) {
164 const TypeParam zeroes = simd_zeroes();
165
166 // Should have no bits on.
167 char cmp[sizeof(zeroes)];
168 memset(cmp, 0, sizeof(zeroes));
169 ASSERT_EQ(0, memcmp(cmp, &zeroes, sizeof(zeroes)));
170 }
171
TYPED_TEST(SimdUtilsTest,ones)172 TYPED_TEST(SimdUtilsTest, ones) {
173 const TypeParam ones = simd_ones();
174
175 // Should have all bits on.
176 char cmp[sizeof(ones)];
177 memset(cmp, 0xff, sizeof(ones));
178 ASSERT_EQ(0, memcmp(cmp, &ones, sizeof(ones)));
179 }
180
TYPED_TEST(SimdUtilsTest,and1)181 TYPED_TEST(SimdUtilsTest, and1) {
182 const TypeParam zeroes = simd_zeroes();
183 const TypeParam ones = simd_ones();
184
185 TypeParam result;
186
187 result = simd_and(zeroes, ones);
188 EXPECT_FALSE(simd_diff(result, zeroes));
189
190 result = simd_and(ones, zeroes);
191 EXPECT_FALSE(simd_diff(result, zeroes));
192
193 result = simd_and(zeroes, zeroes);
194 EXPECT_FALSE(simd_diff(result, zeroes));
195
196 result = simd_and(ones, ones);
197 EXPECT_FALSE(simd_diff(result, ones));
198 }
199
TYPED_TEST(SimdUtilsTest,and2)200 TYPED_TEST(SimdUtilsTest, and2) {
201 TypeParam a, b;
202 memset(&a, 0x33, sizeof(a));
203 memset(&b, 0x55, sizeof(b));
204
205 union {
206 TypeParam simd;
207 char bytes[sizeof(TypeParam)];
208 } c;
209 c.simd = simd_and(a, b);
210
211 const char expected = 0x33 & 0x55;
212 for (size_t i = 0; i < sizeof(c); i++) {
213 EXPECT_EQ(expected, c.bytes[i]);
214 }
215 }
216
TEST(SimdUtils,diff256)217 TEST(SimdUtils, diff256) {
218 const unsigned total_bits = 256;
219
220 // Test identical cases
221 ASSERT_EQ(0U, diff256(zeroes256(), zeroes256()));
222 ASSERT_EQ(0U, diff256(ones256(), ones256()));
223 for (unsigned i = 0; i < total_bits; i++) {
224 m256 a = setbit<m256>(i);
225 m256 b = setbit<m256>(i);
226 ASSERT_EQ(0U, diff256(a, b));
227 }
228
229 // Cases that differ in one 32-bit word
230 for (unsigned i = 0; i < total_bits; i++) {
231 m256 a = setbit<m256>(i);
232 u32 rv = diff256(zeroes256(), a);
233 ASSERT_EQ(1U, rv);
234 }
235 }
236
TYPED_TEST(SimdUtilsTest,or1)237 TYPED_TEST(SimdUtilsTest, or1) {
238 const TypeParam zeroes = simd_zeroes();
239 const TypeParam ones = simd_ones();
240
241 TypeParam result;
242
243 result = simd_or(zeroes, ones);
244 EXPECT_FALSE(simd_diff(result, ones));
245
246 result = simd_or(ones, zeroes);
247 EXPECT_FALSE(simd_diff(result, ones));
248
249 result = simd_or(zeroes, zeroes);
250 EXPECT_FALSE(simd_diff(result, zeroes));
251
252 result = simd_or(ones, ones);
253 EXPECT_FALSE(simd_diff(result, ones));
254 }
255
TYPED_TEST(SimdUtilsTest,or2)256 TYPED_TEST(SimdUtilsTest, or2) {
257 TypeParam a, b;
258 memset(&a, 0x33, sizeof(a));
259 memset(&b, 0x55, sizeof(b));
260
261 for (unsigned j = 0; j < 8; j++) {
262 for (unsigned i = 0; i < 32; i++) {
263 m256 x = setbit<m256>(j*32+i);
264 m256 y = zeroes256();
265 ASSERT_EQ(1U << j, diffrich256(x, y)) << "bit " << j*32+i << " not happy";
266 }
267 }
268
269 union {
270 TypeParam simd;
271 char bytes[sizeof(TypeParam)];
272 } c;
273 c.simd = simd_or(a, b);
274
275 const char expected = 0x33 | 0x55;
276 for (size_t i = 0; i < sizeof(c); i++) {
277 EXPECT_EQ(expected, c.bytes[i]);
278 }
279 }
280
TYPED_TEST(SimdUtilsTest,xor1)281 TYPED_TEST(SimdUtilsTest, xor1) {
282 const TypeParam zeroes = simd_zeroes();
283 const TypeParam ones = simd_ones();
284
285 TypeParam result;
286
287 result = simd_xor(zeroes, ones);
288 EXPECT_FALSE(simd_diff(result, ones));
289
290 result = simd_xor(ones, zeroes);
291 EXPECT_FALSE(simd_diff(result, ones));
292
293 result = simd_xor(zeroes, zeroes);
294 EXPECT_FALSE(simd_diff(result, zeroes));
295
296 result = simd_xor(ones, ones);
297 EXPECT_FALSE(simd_diff(result, zeroes));
298 }
299
TYPED_TEST(SimdUtilsTest,xor2)300 TYPED_TEST(SimdUtilsTest, xor2) {
301 TypeParam a, b;
302 memset(&a, 0x33, sizeof(a));
303 memset(&b, 0x55, sizeof(b));
304
305 union {
306 TypeParam simd;
307 char bytes[sizeof(TypeParam)];
308 } c;
309 c.simd = simd_xor(a, b);
310
311 const char expected = 0x33 ^ 0x55;
312 for (size_t i = 0; i < sizeof(c); i++) {
313 EXPECT_EQ(expected, c.bytes[i]);
314 }
315 }
316
TYPED_TEST(SimdUtilsTest,andnot1)317 TYPED_TEST(SimdUtilsTest, andnot1) {
318 const TypeParam zeroes = simd_zeroes();
319 const TypeParam ones = simd_ones();
320
321 TypeParam result;
322
323 result = simd_andnot(zeroes, ones);
324 EXPECT_FALSE(simd_diff(result, ones));
325
326 result = simd_andnot(ones, zeroes);
327 EXPECT_FALSE(simd_diff(result, zeroes));
328
329 result = simd_andnot(zeroes, zeroes);
330 EXPECT_FALSE(simd_diff(result, zeroes));
331
332 result = simd_andnot(ones, ones);
333 EXPECT_FALSE(simd_diff(result, zeroes));
334 }
335
TYPED_TEST(SimdUtilsTest,andnot2)336 TYPED_TEST(SimdUtilsTest, andnot2) {
337 TypeParam a, b;
338 memset(&a, 0x33, sizeof(a));
339 memset(&b, 0x55, sizeof(b));
340
341 union {
342 TypeParam simd;
343 char bytes[sizeof(TypeParam)];
344 } c;
345 c.simd = simd_andnot(a, b);
346
347 const char expected = ~0x33 & 0x55;
348 for (size_t i = 0; i < sizeof(c); i++) {
349 EXPECT_EQ(expected, c.bytes[i]);
350 }
351 }
352
TYPED_TEST(SimdUtilsTest,not1)353 TYPED_TEST(SimdUtilsTest, not1) {
354 const TypeParam zeroes = simd_zeroes();
355 const TypeParam ones = simd_ones();
356
357 TypeParam result;
358
359 result = simd_not(zeroes);
360 EXPECT_FALSE(simd_diff(result, ones));
361
362 result = simd_not(ones);
363 EXPECT_FALSE(simd_diff(result, zeroes));
364 }
365
TYPED_TEST(SimdUtilsTest,not2)366 TYPED_TEST(SimdUtilsTest, not2) {
367 TypeParam a;
368 memset(&a, 0x33, sizeof(a));
369
370 union {
371 TypeParam simd;
372 char bytes[sizeof(TypeParam)];
373 } c;
374 c.simd = simd_not(a);
375
376 const char expected = ~0x33;
377 for (size_t i = 0; i < sizeof(c); i++) {
378 EXPECT_EQ(expected, c.bytes[i]);
379 }
380 }
381
TYPED_TEST(SimdUtilsTest,isnonzero)382 TYPED_TEST(SimdUtilsTest, isnonzero) {
383 TypeParam a = simd_zeroes();
384 EXPECT_FALSE(simd_isnonzero(a));
385
386 a = simd_ones();
387 EXPECT_TRUE(simd_isnonzero(a));
388
389 union {
390 TypeParam simd;
391 char bytes[sizeof(TypeParam)];
392 } c;
393
394 // Try every 1-bit case.
395 for (size_t i = 0; i < sizeof(a); i++) {
396 for (size_t j = 0; j < 8; j++) {
397 memset(&c.simd, 0, sizeof(c.simd));
398 c.bytes[i] = 1 << j;
399 EXPECT_TRUE(simd_isnonzero(c.simd));
400 }
401 }
402 }
403
TYPED_TEST(SimdUtilsTest,clearbit)404 TYPED_TEST(SimdUtilsTest, clearbit) {
405 const unsigned int total_bits = sizeof(TypeParam) * 8;
406
407 const TypeParam ones = simd_ones();
408
409 for (unsigned int i = 0; i < total_bits; i++) {
410 TypeParam a = simd_ones();
411 simd_clearbit(&a, i);
412 ASSERT_NE(0, simd_diff(a, ones)) << "bit " << i << " wasn't cleared";
413
414 TypeParam mask = setbit<TypeParam>(i);
415 ASSERT_EQ(0, simd_diff(ones, simd_or(a, mask)))
416 << "clearing bit " << i << " caused collateral damage";
417 }
418 }
419
TYPED_TEST(SimdUtilsTest,testbit)420 TYPED_TEST(SimdUtilsTest, testbit) {
421 const unsigned int total_bits = sizeof(TypeParam) * 8;
422
423 const TypeParam ones = simd_ones();
424
425 // First, all bits are on in 'ones'.
426 for (unsigned int i = 0; i < total_bits; i++) {
427 ASSERT_EQ(1, simd_testbit(ones, i)) << "bit " << i << " is on";
428 }
429
430 // Try individual bits; only 'i' should be on.
431 for (unsigned int i = 0; i < total_bits; i++) {
432 TypeParam a = setbit<TypeParam>(i);
433 for (unsigned int j = 0; j < total_bits; j++) {
434 ASSERT_EQ(i == j ? 1 : 0, simd_testbit(a, j)) << "bit " << i
435 << " is wrong";
436 }
437 }
438 }
439
TYPED_TEST(SimdUtilsTest,setbit)440 TYPED_TEST(SimdUtilsTest, setbit) {
441 const unsigned int total_bits = sizeof(TypeParam) * 8;
442
443 // Try individual bits; only 'i' should be on.
444 for (unsigned int i = 0; i < total_bits; i++) {
445 TypeParam a = setbit<TypeParam>(i);
446 TypeParam x = simd_zeroes();
447 simd_setbit(&x, i);
448 ASSERT_FALSE(simd_diff(a, x));
449 }
450
451 TypeParam a = simd_zeroes();
452
453 // turn on all bits
454 for (unsigned int i = 0; i < total_bits; i++) {
455 simd_setbit(&a, i);
456 }
457 ASSERT_FALSE(simd_diff(simd_ones(), a));
458
459 }
460
TYPED_TEST(SimdUtilsTest,diffrich)461 TYPED_TEST(SimdUtilsTest, diffrich) {
462 const unsigned total_bits = sizeof(TypeParam) * 8;
463
464 const TypeParam zeroes = simd_zeroes();
465 const TypeParam ones = simd_ones();
466
467 // Test identical cases
468 EXPECT_EQ(0U, simd_diffrich(zeroes, zeroes));
469 EXPECT_EQ(0U, simd_diffrich(ones, ones));
470 for (unsigned i = 0; i < total_bits; i++) {
471 TypeParam a = setbit<TypeParam>(i);
472 TypeParam b = setbit<TypeParam>(i);
473 EXPECT_EQ(0U, simd_diffrich(a, b));
474 }
475
476 // and nothing is on in zeroes
477 for (unsigned int i = 0; i < total_bits; i++) {
478 ASSERT_EQ(0, simd_testbit(zeroes, i)) << "bit " << i << " is off";
479 }
480
481 // All-zeroes and all-ones differ in all words
482 EXPECT_EQ((1U << (total_bits / 32)) - 1, simd_diffrich(zeroes, ones));
483
484 // Cases that differ in one 32-bit word
485 for (unsigned i = 0; i < total_bits; i++) {
486 TypeParam a = setbit<TypeParam>(i);
487 u32 rv = simd_diffrich(zeroes, a);
488 EXPECT_EQ(1U << i / 32, rv);
489 }
490 }
491
TYPED_TEST(SimdUtilsTest,diffrich64)492 TYPED_TEST(SimdUtilsTest, diffrich64) {
493 const unsigned total_bits = sizeof(TypeParam) * 8;
494
495 const TypeParam zeroes = simd_zeroes();
496 const TypeParam ones = simd_ones();
497
498 // Test identical cases
499 EXPECT_EQ(0U, simd_diffrich64(zeroes, zeroes));
500 EXPECT_EQ(0U, simd_diffrich64(ones, ones));
501 for (unsigned i = 0; i < total_bits; i++) {
502 TypeParam a = setbit<TypeParam>(i);
503 TypeParam b = setbit<TypeParam>(i);
504 EXPECT_EQ(0U, simd_diffrich64(a, b));
505 }
506
507 // All-zeroes and all-ones differ in all words, which will result in every
508 // second bit being on.
509 EXPECT_EQ(((1U << (total_bits / 32)) - 1) & 0x55555555u,
510 simd_diffrich64(zeroes, ones));
511
512 // Cases that differ in one 64-bit word
513 for (unsigned i = 0; i < total_bits; i++) {
514 TypeParam a = setbit<TypeParam>(i);
515 u32 rv = simd_diffrich64(zeroes, a);
516 EXPECT_EQ(1U << ((i / 64) * 2), rv);
517 }
518 }
519
520 // Unaligned load
TYPED_TEST(SimdUtilsTest,loadu)521 TYPED_TEST(SimdUtilsTest, loadu) {
522 const TypeParam ones = simd_ones();
523
524 const size_t mem_len = sizeof(ones) * 2;
525 unique_ptr<char[]> mem_array = ue2::make_unique<char[]>(mem_len);
526 char *mem = mem_array.get();
527
528 for (size_t offset = 1; offset < sizeof(ones); offset++) {
529 memset(mem, 0, mem_len);
530 memset(mem + offset, 0xff, sizeof(ones));
531 TypeParam a;
532 simd_loadu(&a, mem + offset);
533 ASSERT_EQ(0, simd_diff(a, ones));
534 }
535 }
536
537 // Aligned load and store
TYPED_TEST(SimdUtilsTest,load_store)538 TYPED_TEST(SimdUtilsTest, load_store) {
539 union {
540 TypeParam simd;
541 char bytes[sizeof(TypeParam)];
542 } a;
543 for (size_t i = 0; i < sizeof(a); i++) {
544 a.bytes[i] = (char)(i % 256);
545 }
546
547 auto mem_ptr = make_bytecode_ptr<char>(sizeof(a), alignof(TypeParam));
548 char *mem = mem_ptr.get();
549
550 ASSERT_EQ(0, (size_t)mem % 16U);
551
552 memset(mem, 0, sizeof(a));
553
554 simd_store(mem, a.simd);
555 ASSERT_EQ(0, memcmp(mem, a.bytes, sizeof(a)));
556
557 TypeParam b;
558 simd_load(&b, mem);
559 ASSERT_FALSE(simd_diff(a.simd, b));
560 }
561
562 // Packed load and store
TYPED_TEST(SimdUtilsTest,loadbytes_storebytes)563 TYPED_TEST(SimdUtilsTest, loadbytes_storebytes) {
564 union {
565 TypeParam simd;
566 char bytes[sizeof(TypeParam)];
567 } a;
568 for (size_t i = 0; i < sizeof(a); i++) {
569 a.bytes[i] = (char)(i % 256);
570 }
571
572 char mem[sizeof(TypeParam)];
573 for (size_t i = 1; i < sizeof(TypeParam); i++) {
574 memset(mem, 0xff, sizeof(TypeParam));
575
576 simd_storebytes(mem, a.simd, i);
577
578 union {
579 TypeParam simd;
580 char bytes[sizeof(TypeParam)];
581 } b;
582 simd_loadbytes(&b.simd, mem, i);
583
584 // First i bytes should match a, remaining bytes are zero. (Note that
585 // this takes endianness into account)
586 for (size_t j = 0; j < sizeof(TypeParam); j++) {
587 size_t idx = j;
588 ASSERT_EQ(j < i ? a.bytes[idx] : 0, b.bytes[idx]);
589 }
590 }
591 }
592
TYPED_TEST(SimdUtilsTest,lshift64)593 TYPED_TEST(SimdUtilsTest, lshift64) {
594 TypeParam a;
595 memset(&a, 0x5a, sizeof(a));
596
597 static constexpr u64a exp_val = 0x5a5a5a5a5a5a5a5aULL;
598
599 union {
600 TypeParam simd;
601 u64a qword[sizeof(TypeParam) / 8];
602 } c;
603
604 for (unsigned s = 0; s < 64; s++) {
605 c.simd = simd_lshift64(a, s);
606
607 const u64a expected = exp_val << s;
608 for (size_t i = 0; i < sizeof(c) / 8; i++) {
609 EXPECT_EQ(expected, c.qword[i]);
610 }
611 }
612
613 /* Clang 3.4 on FreeBSD 10 crashes on the following - disable for now */
614 #if !(defined(__FreeBSD__) && defined(__clang__) && __clang_major__ == 3)
615
616 // test immediates
617 u64a expected;
618
619 c.simd = simd_lshift64(a, 1);
620 expected = exp_val << 1;
621 for (size_t i = 0; i < sizeof(c) / 8; i++) {
622 EXPECT_EQ(expected, c.qword[i]);
623 }
624
625 c.simd = simd_lshift64(a, 2);
626 expected = exp_val << 2;
627 for (size_t i = 0; i < sizeof(c) / 8; i++) {
628 EXPECT_EQ(expected, c.qword[i]);
629 }
630
631 c.simd = simd_lshift64(a, 7);
632 expected = exp_val << 7;
633 for (size_t i = 0; i < sizeof(c) / 8; i++) {
634 EXPECT_EQ(expected, c.qword[i]);
635 }
636
637 c.simd = simd_lshift64(a, 31);
638 expected = exp_val << 31;
639 for (size_t i = 0; i < sizeof(c) / 8; i++) {
640 EXPECT_EQ(expected, c.qword[i]);
641 }
642 #endif
643 }
644
TEST(SimdUtilsTest,alignment)645 TEST(SimdUtilsTest, alignment) {
646 ASSERT_EQ(16, alignof(m128));
647 ASSERT_EQ(32, alignof(m256));
648 ASSERT_EQ(16, alignof(m384));
649 ASSERT_EQ(64, alignof(m512));
650 }
651
TEST(SimdUtilsTest,movq)652 TEST(SimdUtilsTest, movq) {
653 m128 simd;
654
655 simd = ones128();
656 u64a r = movq(simd);
657 ASSERT_EQ((u64a)(~0), r);
658
659 char cmp[sizeof(m128)];
660 memset(cmp, 0x80, sizeof(m128));
661 simd = set16x8(0x80);
662 r = movq(simd);
663 ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd)));
664 ASSERT_EQ(0, memcmp(cmp, &r, sizeof(r)));
665
666 simd = _mm_set_epi64x(~0LL, 0x123456789abcdef);
667 r = movq(simd);
668 ASSERT_EQ(r, 0x123456789abcdef);
669 }
670
671
TEST(SimdUtilsTest,set16x8)672 TEST(SimdUtilsTest, set16x8) {
673 char cmp[sizeof(m128)];
674
675 for (unsigned i = 0; i < 256; i++) {
676 m128 simd = set16x8(i);
677 memset(cmp, i, sizeof(simd));
678 ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd)));
679 }
680 }
681
TEST(SimdUtilsTest,set4x32)682 TEST(SimdUtilsTest, set4x32) {
683 u32 cmp[4] = { 0x12345678, 0x12345678, 0x12345678, 0x12345678 };
684 m128 simd = set4x32(cmp[0]);
685 ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd)));
686 }
687
688 #if defined(HAVE_AVX2)
TEST(SimdUtilsTest,set32x8)689 TEST(SimdUtilsTest, set32x8) {
690 char cmp[sizeof(m256)];
691
692 for (unsigned i = 0; i < 256; i++) {
693 m256 simd = set32x8(i);
694 memset(cmp, i, sizeof(simd));
695 ASSERT_EQ(0, memcmp(cmp, &simd, sizeof(simd)));
696 }
697 }
698
TEST(SimdUtilsTest,set2x128)699 TEST(SimdUtilsTest, set2x128) {
700 char cmp[sizeof(m256)];
701
702 for (unsigned i = 0; i < 256; i++) {
703 m128 x = set16x8(i);
704 m256 y = set32x8(i);
705 m256 z = set2x128(x);
706 memset(cmp, i, sizeof(z));
707 ASSERT_EQ(0, memcmp(cmp, &z, sizeof(z)));
708 ASSERT_EQ(0, memcmp(&y, &z, sizeof(z)));
709 }
710 }
711 #endif
712
TEST(SimdUtilsTest,variableByteShift128)713 TEST(SimdUtilsTest, variableByteShift128) {
714 char base[] = "0123456789ABCDEF";
715 m128 in = loadu128(base);
716
717 EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 0),
718 variable_byte_shift_m128(in, 0)));
719 EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 1),
720 variable_byte_shift_m128(in, -1)));
721 EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 2),
722 variable_byte_shift_m128(in, -2)));
723 EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 3),
724 variable_byte_shift_m128(in, -3)));
725 EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 4),
726 variable_byte_shift_m128(in, -4)));
727 EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 5),
728 variable_byte_shift_m128(in, -5)));
729 EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 6),
730 variable_byte_shift_m128(in, -6)));
731 EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 7),
732 variable_byte_shift_m128(in, -7)));
733 EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 8),
734 variable_byte_shift_m128(in, -8)));
735 EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 9),
736 variable_byte_shift_m128(in, -9)));
737 EXPECT_TRUE(!diff128(rshiftbyte_m128(in, 10),
738 variable_byte_shift_m128(in, -10)));
739
740 EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 0),
741 variable_byte_shift_m128(in, 0)));
742 EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 1),
743 variable_byte_shift_m128(in, 1)));
744 EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 2),
745 variable_byte_shift_m128(in, 2)));
746 EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 3),
747 variable_byte_shift_m128(in, 3)));
748 EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 4),
749 variable_byte_shift_m128(in, 4)));
750 EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 5),
751 variable_byte_shift_m128(in, 5)));
752 EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 6),
753 variable_byte_shift_m128(in, 6)));
754 EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 7),
755 variable_byte_shift_m128(in, 7)));
756 EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 8),
757 variable_byte_shift_m128(in, 8)));
758 EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 9),
759 variable_byte_shift_m128(in, 9)));
760 EXPECT_TRUE(!diff128(lshiftbyte_m128(in, 10),
761 variable_byte_shift_m128(in, 10)));
762
763 EXPECT_TRUE(!diff128(zeroes128(), variable_byte_shift_m128(in, 16)));
764 EXPECT_TRUE(!diff128(zeroes128(), variable_byte_shift_m128(in, -16)));
765 }
766
TEST(SimdUtilsTest,max_u8_m128)767 TEST(SimdUtilsTest, max_u8_m128) {
768 char base1[] = "0123456789ABCDE\xfe";
769 char base2[] = "!!23455889aBCd\xff\xff";
770 char expec[] = "0123456889aBCd\xff\xff";
771 m128 in1 = loadu128(base1);
772 m128 in2 = loadu128(base2);
773 m128 result = max_u8_m128(in1, in2);
774 EXPECT_TRUE(!diff128(result, loadu128(expec)));
775 }
776
TEST(SimdUtilsTest,min_u8_m128)777 TEST(SimdUtilsTest, min_u8_m128) {
778 char base1[] = "0123456789ABCDE\xfe";
779 char base2[] = "!!23455889aBCd\xff\xff";
780 char expec[] = "!!23455789ABCDE\xfe";
781 m128 in1 = loadu128(base1);
782 m128 in2 = loadu128(base2);
783 m128 result = min_u8_m128(in1, in2);
784 EXPECT_TRUE(!diff128(result, loadu128(expec)));
785 }
786
TEST(SimdUtilsTest,sadd_u8_m128)787 TEST(SimdUtilsTest, sadd_u8_m128) {
788 unsigned char base1[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4',
789 '1', '2', '3', '4', '1', '2', '3', '4'};
790 unsigned char base2[] = {'a', 0x80, 'b', 'A', 0x10, 0x10, 0x10, 0x10,
791 0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0};
792 unsigned char expec[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D',
793 'a', 'b', 'c', 'd', '1', '2', '3', '4'};
794 m128 in1 = loadu128(base1);
795 m128 in2 = loadu128(base2);
796 m128 result = sadd_u8_m128(in1, in2);
797 EXPECT_TRUE(!diff128(result, loadu128(expec)));
798 }
799
TEST(SimdUtilsTest,sub_u8_m128)800 TEST(SimdUtilsTest, sub_u8_m128) {
801 unsigned char base1[] = {'a', 0xff, 0xff, 0x82, 'A', 'B', 'C', 'D',
802 'a', 'b', 'c', 'd', '1', '2', '3', '4'};
803 unsigned char base2[] = {0, 0x80, 0xff, 'A', '1', '2', '3', '4',
804 '1', '2', '3', '4', '1', '2', '3', '4'};
805 unsigned char expec[] = {'a', 0x7f, 0, 'A', 0x10, 0x10, 0x10, 0x10,
806 0x30, 0x30, 0x30, 0x30, 0, 0, 0, 0};
807 m128 in1 = loadu128(base1);
808 m128 in2 = loadu128(base2);
809 m128 result = sub_u8_m128(in1, in2);
810 EXPECT_TRUE(!diff128(result, loadu128(expec)));
811 }
812
813 } // namespace
814