1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
5
6 #include <string.h>
7
8 #include <cmath>
9 #include <numeric>
10
11 #undef HWY_TARGET_INCLUDE
12 #define HWY_TARGET_INCLUDE "lib/jxl/dct_test.cc"
13 #include <hwy/foreach_target.h>
14 #include <hwy/highway.h>
15 #include <hwy/tests/test_util-inl.h>
16
17 #include "lib/jxl/base/thread_pool_internal.h"
18 #include "lib/jxl/common.h"
19 #include "lib/jxl/dct-inl.h"
20 #include "lib/jxl/dct_for_test.h"
21 #include "lib/jxl/dct_scales.h"
22 #include "lib/jxl/image.h"
23 #include "lib/jxl/test_utils.h"
24
25 HWY_BEFORE_NAMESPACE();
26 namespace jxl {
27 namespace HWY_NAMESPACE {
28
29 // Computes the in-place NxN DCT of block.
30 // Requires that block is HWY_ALIGN'ed.
31 //
32 // Performs ComputeTransposedScaledDCT and then transposes and scales it to
33 // obtain "vanilla" DCT.
34 template <size_t N>
ComputeDCT(float block[N * N])35 void ComputeDCT(float block[N * N]) {
36 HWY_ALIGN float tmp_block[N * N];
37 HWY_ALIGN float scratch_space[N * N];
38 ComputeScaledDCT<N, N>()(DCTFrom(block, N), tmp_block, scratch_space);
39
40 // Untranspose.
41 Transpose<N, N>::Run(DCTFrom(tmp_block, N), DCTTo(block, N));
42 }
43
44 // Computes the in-place 8x8 iDCT of block.
45 // Requires that block is HWY_ALIGN'ed.
46 template <int N>
ComputeIDCT(float block[N * N])47 void ComputeIDCT(float block[N * N]) {
48 HWY_ALIGN float tmp_block[N * N];
49 HWY_ALIGN float scratch_space[N * N];
50 // Untranspose.
51 Transpose<N, N>::Run(DCTFrom(block, N), DCTTo(tmp_block, N));
52
53 ComputeScaledIDCT<N, N>()(tmp_block, DCTTo(block, N), scratch_space);
54 }
55
56 template <size_t N>
TransposeTestT(float accuracy)57 void TransposeTestT(float accuracy) {
58 constexpr size_t kBlockSize = N * N;
59 HWY_ALIGN float src[kBlockSize];
60 DCTTo to_src(src, N);
61 for (size_t y = 0; y < N; ++y) {
62 for (size_t x = 0; x < N; ++x) {
63 to_src.Write(y * N + x, y, x);
64 }
65 }
66 HWY_ALIGN float dst[kBlockSize];
67 Transpose<N, N>::Run(DCTFrom(src, N), DCTTo(dst, N));
68 DCTFrom from_dst(dst, N);
69 for (size_t y = 0; y < N; ++y) {
70 for (size_t x = 0; x < N; ++x) {
71 float expected = x * N + y;
72 float actual = from_dst.Read(y, x);
73 EXPECT_NEAR(expected, actual, accuracy) << "x = " << x << ", y = " << y;
74 }
75 }
76 }
77
TransposeTest()78 void TransposeTest() {
79 TransposeTestT<8>(1e-7f);
80 TransposeTestT<16>(1e-7f);
81 TransposeTestT<32>(1e-7f);
82 }
83
84 template <size_t N>
ColumnDctRoundtripT(float accuracy)85 void ColumnDctRoundtripT(float accuracy) {
86 constexpr size_t kBlockSize = N * N;
87 // Though we are only interested in single column result, dct.h has built-in
88 // limit on minimal number of columns processed. So, to be safe, we do
89 // regular 8x8 block transformation. On the bright side - we could check all
90 // 8 basis vectors at once.
91 HWY_ALIGN float block[kBlockSize];
92 DCTTo to(block, N);
93 DCTFrom from(block, N);
94 for (size_t i = 0; i < N; ++i) {
95 for (size_t j = 0; j < N; ++j) {
96 to.Write((i == j) ? 1.0f : 0.0f, i, j);
97 }
98 }
99
100 // Running (I)DCT on the same memory block seems to trigger a compiler bug on
101 // ARMv7 with clang6.
102 HWY_ALIGN float tmp[kBlockSize];
103 DCTTo to_tmp(tmp, N);
104 DCTFrom from_tmp(tmp, N);
105
106 DCT1D<N, N>()(from, to_tmp);
107 IDCT1D<N, N>()(from_tmp, to);
108
109 for (size_t i = 0; i < N; ++i) {
110 for (size_t j = 0; j < N; ++j) {
111 float expected = (i == j) ? 1.0f : 0.0f;
112 float actual = from.Read(i, j);
113 EXPECT_NEAR(expected, actual, accuracy) << " i=" << i << ", j=" << j;
114 }
115 }
116 }
117
ColumnDctRoundtrip()118 void ColumnDctRoundtrip() {
119 ColumnDctRoundtripT<8>(1e-6f);
120 ColumnDctRoundtripT<16>(1e-6f);
121 ColumnDctRoundtripT<32>(1e-6f);
122 }
123
124 template <size_t N>
TestDctAccuracy(float accuracy,size_t start=0,size_t end=N * N)125 void TestDctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) {
126 constexpr size_t kBlockSize = N * N;
127 for (size_t i = start; i < end; i++) {
128 HWY_ALIGN float fast[kBlockSize] = {0.0f};
129 double slow[kBlockSize] = {0.0};
130 fast[i] = 1.0;
131 slow[i] = 1.0;
132 DCTSlow<N>(slow);
133 ComputeDCT<N>(fast);
134 for (size_t k = 0; k < kBlockSize; ++k) {
135 EXPECT_NEAR(fast[k], slow[k], accuracy / N)
136 << "i = " << i << ", k = " << k << ", N = " << N;
137 }
138 }
139 }
140
141 template <size_t N>
TestIdctAccuracy(float accuracy,size_t start=0,size_t end=N * N)142 void TestIdctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) {
143 constexpr size_t kBlockSize = N * N;
144 for (size_t i = start; i < end; i++) {
145 HWY_ALIGN float fast[kBlockSize] = {0.0f};
146 double slow[kBlockSize] = {0.0};
147 fast[i] = 1.0;
148 slow[i] = 1.0;
149 IDCTSlow<N>(slow);
150 ComputeIDCT<N>(fast);
151 for (size_t k = 0; k < kBlockSize; ++k) {
152 EXPECT_NEAR(fast[k], slow[k], accuracy * N)
153 << "i = " << i << ", k = " << k << ", N = " << N;
154 }
155 }
156 }
157
158 template <size_t N>
TestInverseT(float accuracy)159 void TestInverseT(float accuracy) {
160 ThreadPoolInternal pool(N < 32 ? 0 : 8);
161 enum { kBlockSize = N * N };
162 EXPECT_TRUE(RunOnPool(
163 &pool, 0, kBlockSize, ThreadPool::NoInit,
164 [accuracy](const uint32_t task, size_t /*thread*/) {
165 const size_t i = static_cast<size_t>(task);
166 HWY_ALIGN float x[kBlockSize] = {0.0f};
167 x[i] = 1.0;
168
169 ComputeIDCT<N>(x);
170 ComputeDCT<N>(x);
171
172 for (size_t k = 0; k < kBlockSize; ++k) {
173 EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy)
174 << "i = " << i << ", k = " << k;
175 }
176 },
177 "TestInverse"));
178 }
179
InverseTest()180 void InverseTest() {
181 TestInverseT<8>(1e-6f);
182 TestInverseT<16>(1e-6f);
183 TestInverseT<32>(3e-6f);
184 }
185
186 template <size_t N>
TestDctTranspose(float accuracy,size_t start=0,size_t end=N * N)187 void TestDctTranspose(float accuracy, size_t start = 0, size_t end = N * N) {
188 constexpr size_t kBlockSize = N * N;
189 for (size_t i = start; i < end; i++) {
190 for (size_t j = 0; j < kBlockSize; ++j) {
191 // We check that <e_i, Me_j> = <M^\dagger{}e_i, e_j>.
192 // That means (Me_j)_i = (M^\dagger{}e_i)_j
193
194 // x := Me_j
195 HWY_ALIGN float x[kBlockSize] = {0.0f};
196 x[j] = 1.0;
197 ComputeIDCT<N>(x);
198 // y := M^\dagger{}e_i
199 HWY_ALIGN float y[kBlockSize] = {0.0f};
200 y[i] = 1.0;
201 ComputeDCT<N>(y);
202
203 EXPECT_NEAR(x[i] / N, y[j] * N, accuracy) << "i = " << i << ", j = " << j;
204 }
205 }
206 }
207
208 template <size_t N>
TestSlowInverse(float accuracy,size_t start=0,size_t end=N * N)209 void TestSlowInverse(float accuracy, size_t start = 0, size_t end = N * N) {
210 constexpr size_t kBlockSize = N * N;
211 for (size_t i = start; i < end; i++) {
212 double x[kBlockSize] = {0.0f};
213 x[i] = 1.0;
214
215 DCTSlow<N>(x);
216 IDCTSlow<N>(x);
217
218 for (size_t k = 0; k < kBlockSize; ++k) {
219 EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy)
220 << "i = " << i << ", k = " << k;
221 }
222 }
223 }
224
225 template <size_t ROWS, size_t COLS>
TestRectInverseT(float accuracy)226 void TestRectInverseT(float accuracy) {
227 constexpr size_t kBlockSize = ROWS * COLS;
228 for (size_t i = 0; i < kBlockSize; ++i) {
229 HWY_ALIGN float x[kBlockSize] = {0.0f};
230 HWY_ALIGN float out[kBlockSize] = {0.0f};
231 x[i] = 1.0;
232 HWY_ALIGN float coeffs[kBlockSize] = {0.0f};
233 HWY_ALIGN float scratch_space[kBlockSize * 2];
234
235 ComputeScaledDCT<ROWS, COLS>()(DCTFrom(x, COLS), coeffs, scratch_space);
236 ComputeScaledIDCT<ROWS, COLS>()(coeffs, DCTTo(out, COLS), scratch_space);
237
238 for (size_t k = 0; k < kBlockSize; ++k) {
239 EXPECT_NEAR(out[k], (k == i) ? 1.0f : 0.0f, accuracy)
240 << "i = " << i << ", k = " << k << " ROWS = " << ROWS
241 << " COLS = " << COLS;
242 }
243 }
244 }
245
TestRectInverse()246 void TestRectInverse() {
247 TestRectInverseT<16, 32>(1e-6f);
248 TestRectInverseT<8, 32>(1e-6f);
249 TestRectInverseT<8, 16>(1e-6f);
250 TestRectInverseT<4, 8>(1e-6f);
251 TestRectInverseT<2, 4>(1e-6f);
252 TestRectInverseT<1, 4>(1e-6f);
253 TestRectInverseT<1, 2>(1e-6f);
254
255 TestRectInverseT<32, 16>(1e-6f);
256 TestRectInverseT<32, 8>(1e-6f);
257 TestRectInverseT<16, 8>(1e-6f);
258 TestRectInverseT<8, 4>(1e-6f);
259 TestRectInverseT<4, 2>(1e-6f);
260 TestRectInverseT<4, 1>(1e-6f);
261 TestRectInverseT<2, 1>(1e-6f);
262 }
263
264 template <size_t ROWS, size_t COLS>
TestRectTransposeT(float accuracy)265 void TestRectTransposeT(float accuracy) {
266 constexpr size_t kBlockSize = ROWS * COLS;
267 HWY_ALIGN float scratch_space[kBlockSize * 2];
268 for (size_t px = 0; px < COLS; ++px) {
269 for (size_t py = 0; py < ROWS; ++py) {
270 HWY_ALIGN float x1[kBlockSize] = {0.0f};
271 HWY_ALIGN float x2[kBlockSize] = {0.0f};
272 HWY_ALIGN float coeffs1[kBlockSize] = {0.0f};
273 HWY_ALIGN float coeffs2[kBlockSize] = {0.0f};
274 x1[py * COLS + px] = 1;
275 x2[px * ROWS + py] = 1;
276
277 constexpr size_t OUT_ROWS = ROWS < COLS ? ROWS : COLS;
278 constexpr size_t OUT_COLS = ROWS < COLS ? COLS : ROWS;
279
280 ComputeScaledDCT<ROWS, COLS>()(DCTFrom(x1, COLS), coeffs1, scratch_space);
281 ComputeScaledDCT<COLS, ROWS>()(DCTFrom(x2, ROWS), coeffs2, scratch_space);
282
283 for (size_t x = 0; x < OUT_COLS; ++x) {
284 for (size_t y = 0; y < OUT_ROWS; ++y) {
285 EXPECT_NEAR(coeffs1[y * OUT_COLS + x], coeffs2[y * OUT_COLS + x],
286 accuracy)
287 << " px = " << px << ", py = " << py << ", x = " << x
288 << ", y = " << y;
289 }
290 }
291 }
292 }
293 }
294
TestRectTranspose()295 void TestRectTranspose() {
296 TestRectTransposeT<16, 32>(1e-6f);
297 TestRectTransposeT<8, 32>(1e-6f);
298 TestRectTransposeT<8, 16>(1e-6f);
299 TestRectTransposeT<4, 8>(1e-6f);
300 TestRectTransposeT<2, 4>(1e-6f);
301 TestRectTransposeT<1, 4>(1e-6f);
302 TestRectTransposeT<1, 2>(1e-6f);
303
304 // Identical to 8, 16
305 // TestRectTranspose<16, 8>(1e-6f);
306 }
307
TestDctAccuracyShard(size_t shard)308 void TestDctAccuracyShard(size_t shard) {
309 if (shard == 0) {
310 TestDctAccuracy<1>(1.1E-7f);
311 TestDctAccuracy<2>(1.1E-7f);
312 TestDctAccuracy<4>(1.1E-7f);
313 TestDctAccuracy<8>(1.1E-7f);
314 TestDctAccuracy<16>(1.3E-7f);
315 }
316 TestDctAccuracy<32>(1.1E-7f, 32 * shard, 32 * (shard + 1));
317 }
318
TestIdctAccuracyShard(size_t shard)319 void TestIdctAccuracyShard(size_t shard) {
320 if (shard == 0) {
321 TestIdctAccuracy<1>(1E-7f);
322 TestIdctAccuracy<2>(1E-7f);
323 TestIdctAccuracy<4>(1E-7f);
324 TestIdctAccuracy<8>(1E-7f);
325 TestIdctAccuracy<16>(1E-7f);
326 }
327 TestIdctAccuracy<32>(1E-7f, 32 * shard, 32 * (shard + 1));
328 }
329
TestDctTransposeShard(size_t shard)330 void TestDctTransposeShard(size_t shard) {
331 if (shard == 0) {
332 TestDctTranspose<8>(1E-6f);
333 TestDctTranspose<16>(1E-6f);
334 }
335 TestDctTranspose<32>(3E-6f, 32 * shard, 32 * (shard + 1));
336 }
337
TestSlowInverseShard(size_t shard)338 void TestSlowInverseShard(size_t shard) {
339 if (shard == 0) {
340 TestSlowInverse<1>(1E-5f);
341 TestSlowInverse<2>(1E-5f);
342 TestSlowInverse<4>(1E-5f);
343 TestSlowInverse<8>(1E-5f);
344 TestSlowInverse<16>(1E-5f);
345 }
346 TestSlowInverse<32>(1E-5f, 32 * shard, 32 * (shard + 1));
347 }
348
349 // NOLINTNEXTLINE(google-readability-namespace-comments)
350 } // namespace HWY_NAMESPACE
351 } // namespace jxl
352 HWY_AFTER_NAMESPACE();
353
354 #if HWY_ONCE
355 namespace jxl {
356
357 class TransposeTest : public hwy::TestWithParamTarget {};
358
359 HWY_TARGET_INSTANTIATE_TEST_SUITE_P(TransposeTest);
360
361 HWY_EXPORT_AND_TEST_P(TransposeTest, TransposeTest);
362 HWY_EXPORT_AND_TEST_P(TransposeTest, InverseTest);
363 HWY_EXPORT_AND_TEST_P(TransposeTest, ColumnDctRoundtrip);
364 HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectInverse);
365 HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectTranspose);
366
367 // Tests in the DctShardedTest class are sharded for N=32.
368 class DctShardedTest : public ::hwy::TestWithParamTargetAndT<uint32_t> {};
369
ShardRange(uint32_t n)370 std::vector<uint32_t> ShardRange(uint32_t n) {
371 #ifdef JXL_DISABLE_SLOW_TESTS
372 JXL_ASSERT(n > 6);
373 std::vector<uint32_t> ret = {0, 1, 3, 5, n - 1};
374 #else
375 std::vector<uint32_t> ret(n);
376 std::iota(ret.begin(), ret.end(), 0);
377 #endif // JXL_DISABLE_SLOW_TESTS
378 return ret;
379 }
380
381 HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(DctShardedTest,
382 ::testing::ValuesIn(ShardRange(32)));
383
384 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctAccuracyShard);
385 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestIdctAccuracyShard);
386 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctTransposeShard);
387 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestSlowInverseShard);
388
389 } // namespace jxl
390 #endif // HWY_ONCE
391