1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include <stddef.h>
16 #include <stdint.h>
17 #include <string.h>
18
19 #undef HWY_TARGET_INCLUDE
20 #define HWY_TARGET_INCLUDE "tests/blockwise_test.cc"
21 #include "hwy/foreach_target.h"
22 #include "hwy/highway.h"
23 #include "hwy/tests/test_util-inl.h"
24
25 HWY_BEFORE_NAMESPACE();
26 namespace hwy {
27 namespace HWY_NAMESPACE {
28
29 struct TestShiftBytes {
30 template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestShiftBytes31 HWY_NOINLINE void operator()(T /*unused*/, D d) {
32 // Scalar does not define Shift*Bytes.
33 #if HWY_TARGET != HWY_SCALAR || HWY_IDE
34 const Repartition<uint8_t, D> du8;
35 const size_t N8 = Lanes(du8);
36
37 // Zero remains zero
38 const auto v0 = Zero(d);
39 HWY_ASSERT_VEC_EQ(d, v0, ShiftLeftBytes<1>(v0));
40 HWY_ASSERT_VEC_EQ(d, v0, ShiftLeftBytes<1>(d, v0));
41 HWY_ASSERT_VEC_EQ(d, v0, ShiftRightBytes<1>(d, v0));
42
43 // Zero after shifting out the high/low byte
44 auto bytes = AllocateAligned<uint8_t>(N8);
45 std::fill(bytes.get(), bytes.get() + N8, 0);
46 bytes[N8 - 1] = 0x7F;
47 const auto vhi = BitCast(d, Load(du8, bytes.get()));
48 bytes[N8 - 1] = 0;
49 bytes[0] = 0x7F;
50 const auto vlo = BitCast(d, Load(du8, bytes.get()));
51 HWY_ASSERT_VEC_EQ(d, v0, ShiftLeftBytes<1>(vhi));
52 HWY_ASSERT_VEC_EQ(d, v0, ShiftLeftBytes<1>(d, vhi));
53 HWY_ASSERT_VEC_EQ(d, v0, ShiftRightBytes<1>(d, vlo));
54
55 // Check expected result with Iota
56 const size_t N = Lanes(d);
57 auto in = AllocateAligned<T>(N);
58 const uint8_t* in_bytes = reinterpret_cast<const uint8_t*>(in.get());
59 const auto v = BitCast(d, Iota(du8, 1));
60 Store(v, d, in.get());
61
62 auto expected = AllocateAligned<T>(N);
63 uint8_t* expected_bytes = reinterpret_cast<uint8_t*>(expected.get());
64
65 const size_t kBlockSize = HWY_MIN(N8, 16);
66 for (size_t block = 0; block < N8; block += kBlockSize) {
67 expected_bytes[block] = 0;
68 memcpy(expected_bytes + block + 1, in_bytes + block, kBlockSize - 1);
69 }
70 HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftBytes<1>(v));
71 HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftBytes<1>(d, v));
72
73 for (size_t block = 0; block < N8; block += kBlockSize) {
74 memcpy(expected_bytes + block, in_bytes + block + 1, kBlockSize - 1);
75 expected_bytes[block + kBlockSize - 1] = 0;
76 }
77 HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRightBytes<1>(d, v));
78 #else
79 (void)d;
80 #endif // #if HWY_TARGET != HWY_SCALAR
81 }
82 };
83
TestAllShiftBytes()84 HWY_NOINLINE void TestAllShiftBytes() {
85 ForIntegerTypes(ForPartialVectors<TestShiftBytes>());
86 }
87
88 struct TestShiftLanes {
89 template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestShiftLanes90 HWY_NOINLINE void operator()(T /*unused*/, D d) {
91 // Scalar does not define Shift*Lanes.
92 #if HWY_TARGET != HWY_SCALAR || HWY_IDE
93 const auto v = Iota(d, T(1));
94 const size_t N = Lanes(d);
95 auto expected = AllocateAligned<T>(N);
96
97 HWY_ASSERT_VEC_EQ(d, v, ShiftLeftLanes<0>(v));
98 HWY_ASSERT_VEC_EQ(d, v, ShiftLeftLanes<0>(d, v));
99 HWY_ASSERT_VEC_EQ(d, v, ShiftRightLanes<0>(d, v));
100
101 constexpr size_t kLanesPerBlock = 16 / sizeof(T);
102
103 for (size_t i = 0; i < N; ++i) {
104 expected[i] = (i % kLanesPerBlock) == 0 ? T(0) : T(i);
105 }
106 HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftLanes<1>(v));
107 HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftLanes<1>(d, v));
108
109 for (size_t i = 0; i < N; ++i) {
110 const size_t mod = i % kLanesPerBlock;
111 expected[i] = mod == (kLanesPerBlock - 1) || i >= N - 1 ? T(0) : T(2 + i);
112 }
113 HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRightLanes<1>(d, v));
114 #else
115 (void)d;
116 #endif // #if HWY_TARGET != HWY_SCALAR
117 }
118 };
119
TestAllShiftLanes()120 HWY_NOINLINE void TestAllShiftLanes() {
121 ForAllTypes(ForPartialVectors<TestShiftLanes>());
122 }
123
124 template <typename D, int kLane>
125 struct TestBroadcastR {
operator ()hwy::HWY_NAMESPACE::TestBroadcastR126 HWY_NOINLINE void operator()() const {
127 using T = typename D::T;
128 const D d;
129 const size_t N = Lanes(d);
130 if (kLane >= N) return;
131 auto in_lanes = AllocateAligned<T>(N);
132 std::fill(in_lanes.get(), in_lanes.get() + N, T(0));
133 const size_t blockN = HWY_MIN(N * sizeof(T), 16) / sizeof(T);
134 // Need to set within each 128-bit block
135 for (size_t block = 0; block < N; block += blockN) {
136 in_lanes[block + kLane] = static_cast<T>(block + 1);
137 }
138 const auto in = Load(d, in_lanes.get());
139 auto expected = AllocateAligned<T>(N);
140 for (size_t block = 0; block < N; block += blockN) {
141 for (size_t i = 0; i < blockN; ++i) {
142 expected[block + i] = T(block + 1);
143 }
144 }
145 HWY_ASSERT_VEC_EQ(d, expected.get(), Broadcast<kLane>(in));
146
147 TestBroadcastR<D, kLane - 1>()();
148 }
149 };
150
151 template <class D>
152 struct TestBroadcastR<D, -1> {
operator ()hwy::HWY_NAMESPACE::TestBroadcastR153 void operator()() const {}
154 };
155
156 struct TestBroadcast {
157 template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestBroadcast158 HWY_NOINLINE void operator()(T /*unused*/, D d) {
159 TestBroadcastR<D, HWY_MIN(MaxLanes(d), 16 / sizeof(T)) - 1>()();
160 }
161 };
162
TestAllBroadcast()163 HWY_NOINLINE void TestAllBroadcast() {
164 const ForPartialVectors<TestBroadcast> test;
165 // No u/i8.
166 test(uint16_t());
167 test(int16_t());
168 ForUIF3264(test);
169 }
170
171 template <bool kFull>
172 struct ChooseTableSize {
173 template <typename T, typename DIdx>
174 using type = DIdx;
175 };
176 template <>
177 struct ChooseTableSize<true> {
178 template <typename T, typename DIdx>
179 using type = ScalableTag<T>;
180 };
181
182 template <bool kFull>
183 struct TestTableLookupBytes {
184 template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestTableLookupBytes185 HWY_NOINLINE void operator()(T /*unused*/, D d) {
186 #if HWY_TARGET != HWY_SCALAR
187 RandomState rng;
188 const typename ChooseTableSize<kFull>::template type<T, D> d_tbl;
189 const Repartition<uint8_t, decltype(d_tbl)> d_tbl8;
190 const size_t NT8 = Lanes(d_tbl8);
191
192 const Repartition<uint8_t, D> d8;
193 const size_t N = Lanes(d);
194 const size_t N8 = Lanes(d8);
195
196 // Random input bytes
197 auto in_bytes = AllocateAligned<uint8_t>(NT8);
198 for (size_t i = 0; i < NT8; ++i) {
199 in_bytes[i] = Random32(&rng) & 0xFF;
200 }
201 const auto in = BitCast(d_tbl, Load(d_tbl8, in_bytes.get()));
202
203 // Enough test data; for larger vectors, upper lanes will be zero.
204 const uint8_t index_bytes_source[64] = {
205 // Same index as source, multiple outputs from same input,
206 // unused input (9), ascending/descending and nonconsecutive neighbors.
207 0, 2, 1, 2, 15, 12, 13, 14, 6, 7, 8, 5, 4, 3, 10, 11,
208 11, 10, 3, 4, 5, 8, 7, 6, 14, 13, 12, 15, 2, 1, 2, 0,
209 4, 3, 2, 2, 5, 6, 7, 7, 15, 15, 15, 15, 15, 15, 0, 1};
210 auto index_bytes = AllocateAligned<uint8_t>(N8);
211 const size_t max_index = HWY_MIN(N8, 16) - 1;
212 for (size_t i = 0; i < N8; ++i) {
213 index_bytes[i] = (i < 64) ? index_bytes_source[i] : 0;
214 // Avoid asan error for partial vectors.
215 index_bytes[i] = static_cast<uint8_t>(HWY_MIN(index_bytes[i], max_index));
216 }
217 const auto indices = Load(d, reinterpret_cast<const T*>(index_bytes.get()));
218
219 auto expected = AllocateAligned<T>(N);
220 uint8_t* expected_bytes = reinterpret_cast<uint8_t*>(expected.get());
221
222 for (size_t block = 0; block < N8; block += 16) {
223 for (size_t i = 0; i < 16 && (block + i) < N8; ++i) {
224 const uint8_t index = index_bytes[block + i];
225 HWY_ASSERT(block + index < N8); // indices were already capped to N8.
226 // For large vectors, the lane index may wrap around due to block.
227 expected_bytes[block + i] = in_bytes[(block & 0xFF) + index];
228 }
229 }
230 HWY_ASSERT_VEC_EQ(d, expected.get(), TableLookupBytes(in, indices));
231
232 // Individually test zeroing each byte position.
233 for (size_t i = 0; i < N8; ++i) {
234 const uint8_t prev_expected = expected_bytes[i];
235 const uint8_t prev_index = index_bytes[i];
236 expected_bytes[i] = 0;
237
238 const int idx = 0x80 + (int(Random32(&rng) & 7) << 4);
239 HWY_ASSERT(0x80 <= idx && idx < 256);
240 index_bytes[i] = static_cast<uint8_t>(idx);
241
242 const auto indices =
243 Load(d, reinterpret_cast<const T*>(index_bytes.get()));
244 HWY_ASSERT_VEC_EQ(d, expected.get(), TableLookupBytesOr0(in, indices));
245 expected_bytes[i] = prev_expected;
246 index_bytes[i] = prev_index;
247 }
248 #else
249 (void)d;
250 #endif
251 }
252 };
253
TestAllTableLookupBytes()254 HWY_NOINLINE void TestAllTableLookupBytes() {
255 // Partial index, same-sized table.
256 ForIntegerTypes(ForPartialVectors<TestTableLookupBytes<false>>());
257
258 // TODO(janwas): requires LMUL trunc/ext, which is not yet implemented.
259 #if HWY_TARGET != HWY_RVV
260 // Partial index, full-size table.
261 ForIntegerTypes(ForPartialVectors<TestTableLookupBytes<true>>());
262 #endif
263 }
264
265 struct TestInterleaveLower {
266 template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestInterleaveLower267 HWY_NOINLINE void operator()(T /*unused*/, D d) {
268 using TU = MakeUnsigned<T>;
269 const size_t N = Lanes(d);
270 auto even_lanes = AllocateAligned<T>(N);
271 auto odd_lanes = AllocateAligned<T>(N);
272 auto expected = AllocateAligned<T>(N);
273 for (size_t i = 0; i < N; ++i) {
274 even_lanes[i] = static_cast<T>(2 * i + 0);
275 odd_lanes[i] = static_cast<T>(2 * i + 1);
276 }
277 const auto even = Load(d, even_lanes.get());
278 const auto odd = Load(d, odd_lanes.get());
279
280 const size_t blockN = HWY_MIN(16 / sizeof(T), N);
281 for (size_t i = 0; i < Lanes(d); ++i) {
282 const size_t block = i / blockN;
283 const size_t index = (i % blockN) + block * 2 * blockN;
284 expected[i] = static_cast<T>(index & LimitsMax<TU>());
285 }
286 HWY_ASSERT_VEC_EQ(d, expected.get(), InterleaveLower(even, odd));
287 HWY_ASSERT_VEC_EQ(d, expected.get(), InterleaveLower(d, even, odd));
288 }
289 };
290
291 struct TestInterleaveUpper {
292 template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestInterleaveUpper293 HWY_NOINLINE void operator()(T /*unused*/, D d) {
294 const size_t N = Lanes(d);
295 if (N == 1) return;
296 auto even_lanes = AllocateAligned<T>(N);
297 auto odd_lanes = AllocateAligned<T>(N);
298 auto expected = AllocateAligned<T>(N);
299 for (size_t i = 0; i < N; ++i) {
300 even_lanes[i] = static_cast<T>(2 * i + 0);
301 odd_lanes[i] = static_cast<T>(2 * i + 1);
302 }
303 const auto even = Load(d, even_lanes.get());
304 const auto odd = Load(d, odd_lanes.get());
305
306 const size_t blockN = HWY_MIN(16 / sizeof(T), N);
307 for (size_t i = 0; i < Lanes(d); ++i) {
308 const size_t block = i / blockN;
309 expected[i] = T((i % blockN) + block * 2 * blockN + blockN);
310 }
311 HWY_ASSERT_VEC_EQ(d, expected.get(), InterleaveUpper(d, even, odd));
312 }
313 };
314
TestAllInterleave()315 HWY_NOINLINE void TestAllInterleave() {
316 // Not DemoteVectors because this cannot be supported by HWY_SCALAR.
317 ForAllTypes(ForShrinkableVectors<TestInterleaveLower>());
318 ForAllTypes(ForShrinkableVectors<TestInterleaveUpper>());
319 }
320
321 struct TestZipLower {
322 template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestZipLower323 HWY_NOINLINE void operator()(T /*unused*/, D d) {
324 using WideT = MakeWide<T>;
325 static_assert(sizeof(T) * 2 == sizeof(WideT), "Must be double-width");
326 static_assert(IsSigned<T>() == IsSigned<WideT>(), "Must have same sign");
327 const size_t N = Lanes(d);
328 auto even_lanes = AllocateAligned<T>(N);
329 auto odd_lanes = AllocateAligned<T>(N);
330 for (size_t i = 0; i < N; ++i) {
331 even_lanes[i] = static_cast<T>(2 * i + 0);
332 odd_lanes[i] = static_cast<T>(2 * i + 1);
333 }
334 const auto even = Load(d, even_lanes.get());
335 const auto odd = Load(d, odd_lanes.get());
336
337 const Repartition<WideT, D> dw;
338 const size_t NW = Lanes(dw);
339 auto expected = AllocateAligned<WideT>(NW);
340 const size_t blockN = HWY_MIN(size_t(16) / sizeof(WideT), NW);
341
342 for (size_t i = 0; i < NW; ++i) {
343 const size_t block = i / blockN;
344 // Value of least-significant lane in lo-vector.
345 const size_t lo = 2u * (i % blockN) + 4u * block * blockN;
346 const size_t kBits = sizeof(T) * 8;
347 expected[i] = static_cast<WideT>((static_cast<WideT>(lo + 1) << kBits) +
348 static_cast<WideT>(lo));
349 }
350 HWY_ASSERT_VEC_EQ(dw, expected.get(), ZipLower(even, odd));
351 HWY_ASSERT_VEC_EQ(dw, expected.get(), ZipLower(dw, even, odd));
352 }
353 };
354
355 struct TestZipUpper {
356 template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestZipUpper357 HWY_NOINLINE void operator()(T /*unused*/, D d) {
358 using WideT = MakeWide<T>;
359 static_assert(sizeof(T) * 2 == sizeof(WideT), "Must be double-width");
360 static_assert(IsSigned<T>() == IsSigned<WideT>(), "Must have same sign");
361 const size_t N = Lanes(d);
362 if (N < 16 / sizeof(T)) return;
363 auto even_lanes = AllocateAligned<T>(N);
364 auto odd_lanes = AllocateAligned<T>(N);
365 for (size_t i = 0; i < Lanes(d); ++i) {
366 even_lanes[i] = static_cast<T>(2 * i + 0);
367 odd_lanes[i] = static_cast<T>(2 * i + 1);
368 }
369 const auto even = Load(d, even_lanes.get());
370 const auto odd = Load(d, odd_lanes.get());
371
372 const Repartition<WideT, D> dw;
373 const size_t NW = Lanes(dw);
374 auto expected = AllocateAligned<WideT>(NW);
375 const size_t blockN = HWY_MIN(size_t(16) / sizeof(WideT), NW);
376
377 for (size_t i = 0; i < NW; ++i) {
378 const size_t block = i / blockN;
379 const size_t lo = 2u * (i % blockN) + 4u * block * blockN;
380 const size_t kBits = sizeof(T) * 8;
381 expected[i] = static_cast<WideT>(
382 (static_cast<WideT>(lo + 2 * blockN + 1) << kBits) +
383 static_cast<WideT>(lo + 2 * blockN));
384 }
385 HWY_ASSERT_VEC_EQ(dw, expected.get(), ZipUpper(dw, even, odd));
386 }
387 };
388
TestAllZip()389 HWY_NOINLINE void TestAllZip() {
390 const ForDemoteVectors<TestZipLower> lower_unsigned;
391 // TODO(janwas): enable after LowerHalf available
392 #if HWY_TARGET != HWY_RVV
393 lower_unsigned(uint8_t());
394 #endif
395 lower_unsigned(uint16_t());
396 #if HWY_CAP_INTEGER64
397 lower_unsigned(uint32_t()); // generates u64
398 #endif
399
400 const ForDemoteVectors<TestZipLower> lower_signed;
401 #if HWY_TARGET != HWY_RVV
402 lower_signed(int8_t());
403 #endif
404 lower_signed(int16_t());
405 #if HWY_CAP_INTEGER64
406 lower_signed(int32_t()); // generates i64
407 #endif
408
409 const ForShrinkableVectors<TestZipUpper> upper_unsigned;
410 #if HWY_TARGET != HWY_RVV
411 upper_unsigned(uint8_t());
412 #endif
413 upper_unsigned(uint16_t());
414 #if HWY_CAP_INTEGER64
415 upper_unsigned(uint32_t()); // generates u64
416 #endif
417
418 const ForShrinkableVectors<TestZipUpper> upper_signed;
419 #if HWY_TARGET != HWY_RVV
420 upper_signed(int8_t());
421 #endif
422 upper_signed(int16_t());
423 #if HWY_CAP_INTEGER64
424 upper_signed(int32_t()); // generates i64
425 #endif
426
427 // No float - concatenating f32 does not result in a f64
428 }
429
430 template <int kBytes>
431 struct TestCombineShiftRightBytesR {
432 template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestCombineShiftRightBytesR433 HWY_NOINLINE void operator()(T t, D d) {
434 // Scalar does not define CombineShiftRightBytes.
435 #if HWY_TARGET != HWY_SCALAR || HWY_IDE
436 const size_t kBlockSize = 16;
437 static_assert(kBytes < kBlockSize, "Shift count is per block");
438 const Repartition<uint8_t, D> d8;
439 const size_t N8 = Lanes(d8);
440 if (N8 < 16) return;
441 auto hi_bytes = AllocateAligned<uint8_t>(N8);
442 auto lo_bytes = AllocateAligned<uint8_t>(N8);
443 auto expected_bytes = AllocateAligned<uint8_t>(N8);
444 uint8_t combined[2 * kBlockSize];
445
446 // Random inputs in each lane
447 RandomState rng;
448 for (size_t rep = 0; rep < AdjustedReps(100); ++rep) {
449 for (size_t i = 0; i < N8; ++i) {
450 hi_bytes[i] = static_cast<uint8_t>(Random64(&rng) & 0xFF);
451 lo_bytes[i] = static_cast<uint8_t>(Random64(&rng) & 0xFF);
452 }
453 for (size_t i = 0; i < N8; i += kBlockSize) {
454 CopyBytes<kBlockSize>(&lo_bytes[i], combined);
455 CopyBytes<kBlockSize>(&hi_bytes[i], combined + kBlockSize);
456 CopyBytes<kBlockSize>(combined + kBytes, &expected_bytes[i]);
457 }
458
459 const auto hi = BitCast(d, Load(d8, hi_bytes.get()));
460 const auto lo = BitCast(d, Load(d8, lo_bytes.get()));
461 const auto expected = BitCast(d, Load(d8, expected_bytes.get()));
462 HWY_ASSERT_VEC_EQ(d, expected, CombineShiftRightBytes<kBytes>(d, hi, lo));
463 }
464
465 TestCombineShiftRightBytesR<kBytes - 1>()(t, d);
466 #else
467 (void)t;
468 (void)d;
469 #endif // #if HWY_TARGET != HWY_SCALAR
470 }
471 };
472
473 template <int kLanes>
474 struct TestCombineShiftRightLanesR {
475 template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestCombineShiftRightLanesR476 HWY_NOINLINE void operator()(T t, D d) {
477 // Scalar does not define CombineShiftRightBytes (needed for *Lanes).
478 #if HWY_TARGET != HWY_SCALAR || HWY_IDE
479 const Repartition<uint8_t, D> d8;
480 const size_t N8 = Lanes(d8);
481 if (N8 < 16) return;
482
483 auto hi_bytes = AllocateAligned<uint8_t>(N8);
484 auto lo_bytes = AllocateAligned<uint8_t>(N8);
485 auto expected_bytes = AllocateAligned<uint8_t>(N8);
486 const size_t kBlockSize = 16;
487 uint8_t combined[2 * kBlockSize];
488
489 // Random inputs in each lane
490 RandomState rng;
491 for (size_t rep = 0; rep < AdjustedReps(100); ++rep) {
492 for (size_t i = 0; i < N8; ++i) {
493 hi_bytes[i] = static_cast<uint8_t>(Random64(&rng) & 0xFF);
494 lo_bytes[i] = static_cast<uint8_t>(Random64(&rng) & 0xFF);
495 }
496 for (size_t i = 0; i < N8; i += kBlockSize) {
497 CopyBytes<kBlockSize>(&lo_bytes[i], combined);
498 CopyBytes<kBlockSize>(&hi_bytes[i], combined + kBlockSize);
499 CopyBytes<kBlockSize>(combined + kLanes * sizeof(T),
500 &expected_bytes[i]);
501 }
502
503 const auto hi = BitCast(d, Load(d8, hi_bytes.get()));
504 const auto lo = BitCast(d, Load(d8, lo_bytes.get()));
505 const auto expected = BitCast(d, Load(d8, expected_bytes.get()));
506 HWY_ASSERT_VEC_EQ(d, expected, CombineShiftRightLanes<kLanes>(d, hi, lo));
507 }
508
509 TestCombineShiftRightLanesR<kLanes - 1>()(t, d);
510 #else
511 (void)t;
512 (void)d;
513 #endif // #if HWY_TARGET != HWY_SCALAR
514 }
515 };
516
517 template <>
518 struct TestCombineShiftRightBytesR<0> {
519 template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestCombineShiftRightBytesR520 void operator()(T /*unused*/, D /*unused*/) {}
521 };
522
523 template <>
524 struct TestCombineShiftRightLanesR<0> {
525 template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestCombineShiftRightLanesR526 void operator()(T /*unused*/, D /*unused*/) {}
527 };
528
529 struct TestCombineShiftRight {
530 template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestCombineShiftRight531 HWY_NOINLINE void operator()(T t, D d) {
532 constexpr int kMaxBytes = HWY_MIN(16, int(MaxLanes(d) * sizeof(T)));
533 TestCombineShiftRightBytesR<kMaxBytes - 1>()(t, d);
534 TestCombineShiftRightLanesR<kMaxBytes / int(sizeof(T)) - 1>()(t, d);
535 }
536 };
537
TestAllCombineShiftRight()538 HWY_NOINLINE void TestAllCombineShiftRight() {
539 // Need at least 2 lanes.
540 ForAllTypes(ForShrinkableVectors<TestCombineShiftRight>());
541 }
542
543 class TestSpecialShuffle32 {
544 public:
545 template <class T, class D>
operator ()(T,D d)546 HWY_NOINLINE void operator()(T /*unused*/, D d) {
547 const auto v = Iota(d, 0);
548 VerifyLanes32(d, Shuffle2301(v), 2, 3, 0, 1, __FILE__, __LINE__);
549 VerifyLanes32(d, Shuffle1032(v), 1, 0, 3, 2, __FILE__, __LINE__);
550 VerifyLanes32(d, Shuffle0321(v), 0, 3, 2, 1, __FILE__, __LINE__);
551 VerifyLanes32(d, Shuffle2103(v), 2, 1, 0, 3, __FILE__, __LINE__);
552 VerifyLanes32(d, Shuffle0123(v), 0, 1, 2, 3, __FILE__, __LINE__);
553 }
554
555 private:
556 template <class D, class V>
VerifyLanes32(D d,VecArg<V> actual,const size_t i3,const size_t i2,const size_t i1,const size_t i0,const char * filename,const int line)557 HWY_NOINLINE void VerifyLanes32(D d, VecArg<V> actual, const size_t i3,
558 const size_t i2, const size_t i1,
559 const size_t i0, const char* filename,
560 const int line) {
561 using T = TFromD<D>;
562 constexpr size_t kBlockN = 16 / sizeof(T);
563 const size_t N = Lanes(d);
564 if (N < 4) return;
565 auto expected = AllocateAligned<T>(N);
566 for (size_t block = 0; block < N; block += kBlockN) {
567 expected[block + 3] = static_cast<T>(block + i3);
568 expected[block + 2] = static_cast<T>(block + i2);
569 expected[block + 1] = static_cast<T>(block + i1);
570 expected[block + 0] = static_cast<T>(block + i0);
571 }
572 AssertVecEqual(d, expected.get(), actual, filename, line);
573 }
574 };
575
576 class TestSpecialShuffle64 {
577 public:
578 template <class T, class D>
operator ()(T,D d)579 HWY_NOINLINE void operator()(T /*unused*/, D d) {
580 const auto v = Iota(d, 0);
581 VerifyLanes64(d, Shuffle01(v), 0, 1, __FILE__, __LINE__);
582 }
583
584 private:
585 template <class D, class V>
VerifyLanes64(D d,VecArg<V> actual,const size_t i1,const size_t i0,const char * filename,const int line)586 HWY_NOINLINE void VerifyLanes64(D d, VecArg<V> actual, const size_t i1,
587 const size_t i0, const char* filename,
588 const int line) {
589 using T = TFromD<D>;
590 constexpr size_t kBlockN = 16 / sizeof(T);
591 const size_t N = Lanes(d);
592 if (N < 2) return;
593 auto expected = AllocateAligned<T>(N);
594 for (size_t block = 0; block < N; block += kBlockN) {
595 expected[block + 1] = static_cast<T>(block + i1);
596 expected[block + 0] = static_cast<T>(block + i0);
597 }
598 AssertVecEqual(d, expected.get(), actual, filename, line);
599 }
600 };
601
TestAllSpecialShuffles()602 HWY_NOINLINE void TestAllSpecialShuffles() {
603 const ForGE128Vectors<TestSpecialShuffle32> test32;
604 test32(uint32_t());
605 test32(int32_t());
606 test32(float());
607
608 #if HWY_CAP_INTEGER64
609 const ForGE128Vectors<TestSpecialShuffle64> test64;
610 test64(uint64_t());
611 test64(int64_t());
612 #endif
613
614 #if HWY_CAP_FLOAT64
615 const ForGE128Vectors<TestSpecialShuffle64> test_d;
616 test_d(double());
617 #endif
618 }
619
620 // NOLINTNEXTLINE(google-readability-namespace-comments)
621 } // namespace HWY_NAMESPACE
622 } // namespace hwy
623 HWY_AFTER_NAMESPACE();
624
625 #if HWY_ONCE
626
627 namespace hwy {
628 HWY_BEFORE_TEST(HwyBlockwiseTest);
629 HWY_EXPORT_AND_TEST_P(HwyBlockwiseTest, TestAllShiftBytes);
630 HWY_EXPORT_AND_TEST_P(HwyBlockwiseTest, TestAllShiftLanes);
631 HWY_EXPORT_AND_TEST_P(HwyBlockwiseTest, TestAllBroadcast);
632 HWY_EXPORT_AND_TEST_P(HwyBlockwiseTest, TestAllTableLookupBytes);
633 HWY_EXPORT_AND_TEST_P(HwyBlockwiseTest, TestAllInterleave);
634 HWY_EXPORT_AND_TEST_P(HwyBlockwiseTest, TestAllZip);
635 HWY_EXPORT_AND_TEST_P(HwyBlockwiseTest, TestAllCombineShiftRight);
636 HWY_EXPORT_AND_TEST_P(HwyBlockwiseTest, TestAllSpecialShuffles);
637 } // namespace hwy
638
639 // Ought not to be necessary, but without this, no tests run on RVV.
main(int argc,char ** argv)640 int main(int argc, char** argv) {
641 ::testing::InitGoogleTest(&argc, argv);
642 return RUN_ALL_TESTS();
643 }
644
645 #endif
646