1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <stddef.h>
16 #include <stdint.h>
17 
18 #undef HWY_TARGET_INCLUDE
19 #define HWY_TARGET_INCLUDE "tests/combine_test.cc"
20 #include "hwy/foreach_target.h"
21 
22 #include "hwy/highway.h"
23 #include "hwy/tests/test_util-inl.h"
24 
25 // Not yet implemented
26 #if HWY_TARGET != HWY_RVV
27 
28 HWY_BEFORE_NAMESPACE();
29 namespace hwy {
30 namespace HWY_NAMESPACE {
31 
32 struct TestLowerHalf {
33   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestLowerHalf34   HWY_NOINLINE void operator()(T /*unused*/, D d) {
35     const Half<D> d2;
36 
37     const size_t N = Lanes(d);
38     auto lanes = AllocateAligned<T>(N);
39     std::fill(lanes.get(), lanes.get() + N, T(0));
40     const auto v = Iota(d, 1);
41     Store(LowerHalf(v), d2, lanes.get());
42     size_t i = 0;
43     for (; i < Lanes(d2); ++i) {
44       HWY_ASSERT_EQ(T(1 + i), lanes[i]);
45     }
46     // Other half remains unchanged
47     for (; i < N; ++i) {
48       HWY_ASSERT_EQ(T(0), lanes[i]);
49     }
50   }
51 };
52 
53 struct TestLowerQuarter {
54   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestLowerQuarter55   HWY_NOINLINE void operator()(T /*unused*/, D d) {
56     const Half<Half<D>> d4;
57 
58     const size_t N = Lanes(d);
59     auto lanes = AllocateAligned<T>(N);
60     std::fill(lanes.get(), lanes.get() + N, T(0));
61     const auto v = Iota(d, 1);
62     const auto lo = LowerHalf(LowerHalf(v));
63     Store(lo, d4, lanes.get());
64     size_t i = 0;
65     for (; i < Lanes(d4); ++i) {
66       HWY_ASSERT_EQ(T(i + 1), lanes[i]);
67     }
68     // Upper 3/4 remain unchanged
69     for (; i < N; ++i) {
70       HWY_ASSERT_EQ(T(0), lanes[i]);
71     }
72   }
73 };
74 
TestAllLowerHalf()75 HWY_NOINLINE void TestAllLowerHalf() {
76   constexpr size_t kDiv = 1;
77   ForAllTypes(ForPartialVectors<TestLowerHalf, kDiv, /*kMinLanes=*/2>());
78   ForAllTypes(ForPartialVectors<TestLowerQuarter, kDiv, /*kMinLanes=*/4>());
79 }
80 
81 struct TestUpperHalf {
82   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestUpperHalf83   HWY_NOINLINE void operator()(T /*unused*/, D d) {
84     // Scalar does not define UpperHalf.
85 #if HWY_TARGET != HWY_SCALAR
86     const Half<D> d2;
87 
88     const auto v = Iota(d, 1);
89     const size_t N = Lanes(d);
90     auto lanes = AllocateAligned<T>(N);
91     std::fill(lanes.get(), lanes.get() + N, T(0));
92 
93     Store(UpperHalf(v), d2, lanes.get());
94     size_t i = 0;
95     for (; i < Lanes(d2); ++i) {
96       HWY_ASSERT_EQ(T(Lanes(d2) + 1 + i), lanes[i]);
97     }
98     // Other half remains unchanged
99     for (; i < N; ++i) {
100       HWY_ASSERT_EQ(T(0), lanes[i]);
101     }
102 #else
103     (void)d;
104 #endif
105   }
106 };
107 
TestAllUpperHalf()108 HWY_NOINLINE void TestAllUpperHalf() {
109   ForAllTypes(ForGE128Vectors<TestUpperHalf>());
110 }
111 
112 struct TestZeroExtendVector {
113   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestZeroExtendVector114   HWY_NOINLINE void operator()(T /*unused*/, D d) {
115 #if HWY_CAP_GE256
116     const Twice<D> d2;
117 
118     const auto v = Iota(d, 1);
119     const size_t N2 = Lanes(d2);
120     auto lanes = AllocateAligned<T>(N2);
121     Store(v, d, &lanes[0]);
122     Store(v, d, &lanes[N2 / 2]);
123 
124     const auto ext = ZeroExtendVector(v);
125     Store(ext, d2, lanes.get());
126 
127     size_t i = 0;
128     // Lower half is unchanged
129     for (; i < N2 / 2; ++i) {
130       HWY_ASSERT_EQ(T(1 + i), lanes[i]);
131     }
132     // Upper half is zero
133     for (; i < N2; ++i) {
134       HWY_ASSERT_EQ(T(0), lanes[i]);
135     }
136 #else
137     (void)d;
138 #endif
139   }
140 };
141 
TestAllZeroExtendVector()142 HWY_NOINLINE void TestAllZeroExtendVector() {
143   ForAllTypes(ForExtendableVectors<TestZeroExtendVector>());
144 }
145 
146 struct TestCombine {
147   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestCombine148   HWY_NOINLINE void operator()(T /*unused*/, D d) {
149 #if HWY_CAP_GE256
150     const Twice<D> d2;
151     const size_t N2 = Lanes(d2);
152     auto lanes = AllocateAligned<T>(N2);
153 
154     const auto lo = Iota(d, 1);
155     const auto hi = Iota(d, N2 / 2 + 1);
156     const auto combined = Combine(hi, lo);
157     Store(combined, d2, lanes.get());
158 
159     const auto expected = Iota(d2, 1);
160     HWY_ASSERT_VEC_EQ(d2, expected, combined);
161 #else
162     (void)d;
163 #endif
164   }
165 };
166 
TestAllCombine()167 HWY_NOINLINE void TestAllCombine() {
168   ForAllTypes(ForExtendableVectors<TestCombine>());
169 }
170 
171 
172 template <int kBytes>
173 struct TestCombineShiftRightBytesR {
174   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestCombineShiftRightBytesR175   HWY_NOINLINE void operator()(T t, D d) {
176 // Scalar does not define CombineShiftRightBytes.
177 #if HWY_TARGET != HWY_SCALAR || HWY_IDE
178     const Repartition<uint8_t, D> d8;
179     const size_t N8 = Lanes(d8);
180     const auto lo = BitCast(d, Iota(d8, 1));
181     const auto hi = BitCast(d, Iota(d8, 1 + N8));
182 
183     auto expected = AllocateAligned<T>(Lanes(d));
184     uint8_t* expected_bytes = reinterpret_cast<uint8_t*>(expected.get());
185 
186     const size_t kBlockSize = 16;
187     for (size_t i = 0; i < N8; ++i) {
188       const size_t block = i / kBlockSize;
189       const size_t lane = i % kBlockSize;
190       const size_t first_lo = block * kBlockSize;
191       const size_t idx = lane + kBytes;
192       const size_t offset = (idx < kBlockSize) ? 0 : N8 - kBlockSize;
193       const bool at_end = idx >= 2 * kBlockSize;
194       expected_bytes[i] =
195           at_end ? 0 : static_cast<uint8_t>(first_lo + idx + 1 + offset);
196     }
197     HWY_ASSERT_VEC_EQ(d, expected.get(),
198                       CombineShiftRightBytes<kBytes>(hi, lo));
199 
200     TestCombineShiftRightBytesR<kBytes - 1>()(t, d);
201 #else
202     (void)t;
203     (void)d;
204 #endif  // #if HWY_TARGET != HWY_SCALAR
205   }
206 };
207 
208 template <int kLanes>
209 struct TestCombineShiftRightLanesR {
210   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestCombineShiftRightLanesR211   HWY_NOINLINE void operator()(T t, D d) {
212 // Scalar does not define CombineShiftRightBytes (needed for *Lanes).
213 #if HWY_TARGET != HWY_SCALAR || HWY_IDE
214     const Repartition<uint8_t, D> d8;
215     const size_t N8 = Lanes(d8);
216     const auto lo = BitCast(d, Iota(d8, 1));
217     const auto hi = BitCast(d, Iota(d8, 1 + N8));
218 
219     auto expected = AllocateAligned<T>(Lanes(d));
220 
221     uint8_t* expected_bytes = reinterpret_cast<uint8_t*>(expected.get());
222 
223     const size_t kBlockSize = 16;
224     for (size_t i = 0; i < N8; ++i) {
225       const size_t block = i / kBlockSize;
226       const size_t lane = i % kBlockSize;
227       const size_t first_lo = block * kBlockSize;
228       const size_t idx = lane + kLanes * sizeof(T);
229       const size_t offset = (idx < kBlockSize) ? 0 : N8 - kBlockSize;
230       const bool at_end = idx >= 2 * kBlockSize;
231       expected_bytes[i] =
232           at_end ? 0 : static_cast<uint8_t>(first_lo + idx + 1 + offset);
233     }
234     HWY_ASSERT_VEC_EQ(d, expected.get(),
235                       CombineShiftRightLanes<kLanes>(hi, lo));
236 
237     TestCombineShiftRightBytesR<kLanes - 1>()(t, d);
238 #else
239     (void)t;
240     (void)d;
241 #endif  // #if HWY_TARGET != HWY_SCALAR
242   }
243 };
244 
245 template <>
246 struct TestCombineShiftRightBytesR<0> {
247   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestCombineShiftRightBytesR248   void operator()(T /*unused*/, D /*unused*/) {}
249 };
250 
251 template <>
252 struct TestCombineShiftRightLanesR<0> {
253   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestCombineShiftRightLanesR254   void operator()(T /*unused*/, D /*unused*/) {}
255 };
256 
257 struct TestCombineShiftRight {
258   template <class T, class D>
operator ()hwy::HWY_NAMESPACE::TestCombineShiftRight259   HWY_NOINLINE void operator()(T t, D d) {
260     TestCombineShiftRightBytesR<15>()(t, d);
261     TestCombineShiftRightLanesR<16 / sizeof(T) - 1>()(t, d);
262   }
263 };
264 
TestAllCombineShiftRight()265 HWY_NOINLINE void TestAllCombineShiftRight() {
266   ForAllTypes(ForGE128Vectors<TestCombineShiftRight>());
267 }
268 
269 // NOLINTNEXTLINE(google-readability-namespace-comments)
270 }  // namespace HWY_NAMESPACE
271 }  // namespace hwy
272 HWY_AFTER_NAMESPACE();
273 
274 #if HWY_ONCE
275 namespace hwy {
276 HWY_BEFORE_TEST(HwyCombineTest);
277 HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllLowerHalf);
278 HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllUpperHalf);
279 HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllZeroExtendVector);
280 HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllCombine);
281 HWY_EXPORT_AND_TEST_P(HwyCombineTest, TestAllCombineShiftRight);
282 }  // namespace hwy
283 #endif
284 
285 #else
main(int,char **)286 int main(int, char**) { return 0; }
287 #endif  // HWY_TARGET != HWY_RVV
288