1 // Copyright (c) the JPEG XL Project Authors. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
5 
6 #include <string.h>
7 
8 #include <cmath>
9 #include <numeric>
10 
11 #undef HWY_TARGET_INCLUDE
12 #define HWY_TARGET_INCLUDE "lib/jxl/dct_test.cc"
13 #include <hwy/foreach_target.h>
14 #include <hwy/highway.h>
15 #include <hwy/tests/test_util-inl.h>
16 
17 #include "lib/jxl/base/thread_pool_internal.h"
18 #include "lib/jxl/common.h"
19 #include "lib/jxl/dct-inl.h"
20 #include "lib/jxl/dct_for_test.h"
21 #include "lib/jxl/dct_scales.h"
22 #include "lib/jxl/image.h"
23 #include "lib/jxl/test_utils.h"
24 
25 HWY_BEFORE_NAMESPACE();
26 namespace jxl {
27 namespace HWY_NAMESPACE {
28 
29 // Computes the in-place NxN DCT of block.
30 // Requires that block is HWY_ALIGN'ed.
31 //
32 // Performs ComputeTransposedScaledDCT and then transposes and scales it to
33 // obtain "vanilla" DCT.
34 template <size_t N>
ComputeDCT(float block[N * N])35 void ComputeDCT(float block[N * N]) {
36   HWY_ALIGN float tmp_block[N * N];
37   HWY_ALIGN float scratch_space[N * N];
38   ComputeScaledDCT<N, N>()(DCTFrom(block, N), tmp_block, scratch_space);
39 
40   // Untranspose.
41   Transpose<N, N>::Run(DCTFrom(tmp_block, N), DCTTo(block, N));
42 }
43 
44 // Computes the in-place 8x8 iDCT of block.
45 // Requires that block is HWY_ALIGN'ed.
46 template <int N>
ComputeIDCT(float block[N * N])47 void ComputeIDCT(float block[N * N]) {
48   HWY_ALIGN float tmp_block[N * N];
49   HWY_ALIGN float scratch_space[N * N];
50   // Untranspose.
51   Transpose<N, N>::Run(DCTFrom(block, N), DCTTo(tmp_block, N));
52 
53   ComputeScaledIDCT<N, N>()(tmp_block, DCTTo(block, N), scratch_space);
54 }
55 
56 template <size_t N>
TransposeTestT(float accuracy)57 void TransposeTestT(float accuracy) {
58   constexpr size_t kBlockSize = N * N;
59   HWY_ALIGN float src[kBlockSize];
60   DCTTo to_src(src, N);
61   for (size_t y = 0; y < N; ++y) {
62     for (size_t x = 0; x < N; ++x) {
63       to_src.Write(y * N + x, y, x);
64     }
65   }
66   HWY_ALIGN float dst[kBlockSize];
67   Transpose<N, N>::Run(DCTFrom(src, N), DCTTo(dst, N));
68   DCTFrom from_dst(dst, N);
69   for (size_t y = 0; y < N; ++y) {
70     for (size_t x = 0; x < N; ++x) {
71       float expected = x * N + y;
72       float actual = from_dst.Read(y, x);
73       EXPECT_NEAR(expected, actual, accuracy) << "x = " << x << ", y = " << y;
74     }
75   }
76 }
77 
TransposeTest()78 void TransposeTest() {
79   TransposeTestT<8>(1e-7f);
80   TransposeTestT<16>(1e-7f);
81   TransposeTestT<32>(1e-7f);
82 }
83 
84 template <size_t N>
ColumnDctRoundtripT(float accuracy)85 void ColumnDctRoundtripT(float accuracy) {
86   constexpr size_t kBlockSize = N * N;
87   // Though we are only interested in single column result, dct.h has built-in
88   // limit on minimal number of columns processed. So, to be safe, we do
89   // regular 8x8 block transformation. On the bright side - we could check all
90   // 8 basis vectors at once.
91   HWY_ALIGN float block[kBlockSize];
92   DCTTo to(block, N);
93   DCTFrom from(block, N);
94   for (size_t i = 0; i < N; ++i) {
95     for (size_t j = 0; j < N; ++j) {
96       to.Write((i == j) ? 1.0f : 0.0f, i, j);
97     }
98   }
99 
100   // Running (I)DCT on the same memory block seems to trigger a compiler bug on
101   // ARMv7 with clang6.
102   HWY_ALIGN float tmp[kBlockSize];
103   DCTTo to_tmp(tmp, N);
104   DCTFrom from_tmp(tmp, N);
105 
106   DCT1D<N, N>()(from, to_tmp);
107   IDCT1D<N, N>()(from_tmp, to);
108 
109   for (size_t i = 0; i < N; ++i) {
110     for (size_t j = 0; j < N; ++j) {
111       float expected = (i == j) ? 1.0f : 0.0f;
112       float actual = from.Read(i, j);
113       EXPECT_NEAR(expected, actual, accuracy) << " i=" << i << ", j=" << j;
114     }
115   }
116 }
117 
ColumnDctRoundtrip()118 void ColumnDctRoundtrip() {
119   ColumnDctRoundtripT<8>(1e-6f);
120   ColumnDctRoundtripT<16>(1e-6f);
121   ColumnDctRoundtripT<32>(1e-6f);
122 }
123 
124 template <size_t N>
TestDctAccuracy(float accuracy,size_t start=0,size_t end=N * N)125 void TestDctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) {
126   constexpr size_t kBlockSize = N * N;
127   for (size_t i = start; i < end; i++) {
128     HWY_ALIGN float fast[kBlockSize] = {0.0f};
129     double slow[kBlockSize] = {0.0};
130     fast[i] = 1.0;
131     slow[i] = 1.0;
132     DCTSlow<N>(slow);
133     ComputeDCT<N>(fast);
134     for (size_t k = 0; k < kBlockSize; ++k) {
135       EXPECT_NEAR(fast[k], slow[k], accuracy / N)
136           << "i = " << i << ", k = " << k << ", N = " << N;
137     }
138   }
139 }
140 
141 template <size_t N>
TestIdctAccuracy(float accuracy,size_t start=0,size_t end=N * N)142 void TestIdctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) {
143   constexpr size_t kBlockSize = N * N;
144   for (size_t i = start; i < end; i++) {
145     HWY_ALIGN float fast[kBlockSize] = {0.0f};
146     double slow[kBlockSize] = {0.0};
147     fast[i] = 1.0;
148     slow[i] = 1.0;
149     IDCTSlow<N>(slow);
150     ComputeIDCT<N>(fast);
151     for (size_t k = 0; k < kBlockSize; ++k) {
152       EXPECT_NEAR(fast[k], slow[k], accuracy * N)
153           << "i = " << i << ", k = " << k << ", N = " << N;
154     }
155   }
156 }
157 
158 template <size_t N>
TestInverseT(float accuracy)159 void TestInverseT(float accuracy) {
160   ThreadPoolInternal pool(N < 32 ? 0 : 8);
161   enum { kBlockSize = N * N };
162   EXPECT_TRUE(RunOnPool(
163       &pool, 0, kBlockSize, ThreadPool::NoInit,
164       [accuracy](const uint32_t task, size_t /*thread*/) {
165         const size_t i = static_cast<size_t>(task);
166         HWY_ALIGN float x[kBlockSize] = {0.0f};
167         x[i] = 1.0;
168 
169         ComputeIDCT<N>(x);
170         ComputeDCT<N>(x);
171 
172         for (size_t k = 0; k < kBlockSize; ++k) {
173           EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy)
174               << "i = " << i << ", k = " << k;
175         }
176       },
177       "TestInverse"));
178 }
179 
InverseTest()180 void InverseTest() {
181   TestInverseT<8>(1e-6f);
182   TestInverseT<16>(1e-6f);
183   TestInverseT<32>(3e-6f);
184 }
185 
186 template <size_t N>
TestDctTranspose(float accuracy,size_t start=0,size_t end=N * N)187 void TestDctTranspose(float accuracy, size_t start = 0, size_t end = N * N) {
188   constexpr size_t kBlockSize = N * N;
189   for (size_t i = start; i < end; i++) {
190     for (size_t j = 0; j < kBlockSize; ++j) {
191       // We check that <e_i, Me_j> = <M^\dagger{}e_i, e_j>.
192       // That means (Me_j)_i = (M^\dagger{}e_i)_j
193 
194       // x := Me_j
195       HWY_ALIGN float x[kBlockSize] = {0.0f};
196       x[j] = 1.0;
197       ComputeIDCT<N>(x);
198       // y := M^\dagger{}e_i
199       HWY_ALIGN float y[kBlockSize] = {0.0f};
200       y[i] = 1.0;
201       ComputeDCT<N>(y);
202 
203       EXPECT_NEAR(x[i] / N, y[j] * N, accuracy) << "i = " << i << ", j = " << j;
204     }
205   }
206 }
207 
208 template <size_t N>
TestSlowInverse(float accuracy,size_t start=0,size_t end=N * N)209 void TestSlowInverse(float accuracy, size_t start = 0, size_t end = N * N) {
210   constexpr size_t kBlockSize = N * N;
211   for (size_t i = start; i < end; i++) {
212     double x[kBlockSize] = {0.0f};
213     x[i] = 1.0;
214 
215     DCTSlow<N>(x);
216     IDCTSlow<N>(x);
217 
218     for (size_t k = 0; k < kBlockSize; ++k) {
219       EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy)
220           << "i = " << i << ", k = " << k;
221     }
222   }
223 }
224 
225 template <size_t ROWS, size_t COLS>
TestRectInverseT(float accuracy)226 void TestRectInverseT(float accuracy) {
227   constexpr size_t kBlockSize = ROWS * COLS;
228   for (size_t i = 0; i < kBlockSize; ++i) {
229     HWY_ALIGN float x[kBlockSize] = {0.0f};
230     HWY_ALIGN float out[kBlockSize] = {0.0f};
231     x[i] = 1.0;
232     HWY_ALIGN float coeffs[kBlockSize] = {0.0f};
233     HWY_ALIGN float scratch_space[kBlockSize * 2];
234 
235     ComputeScaledDCT<ROWS, COLS>()(DCTFrom(x, COLS), coeffs, scratch_space);
236     ComputeScaledIDCT<ROWS, COLS>()(coeffs, DCTTo(out, COLS), scratch_space);
237 
238     for (size_t k = 0; k < kBlockSize; ++k) {
239       EXPECT_NEAR(out[k], (k == i) ? 1.0f : 0.0f, accuracy)
240           << "i = " << i << ", k = " << k << " ROWS = " << ROWS
241           << " COLS = " << COLS;
242     }
243   }
244 }
245 
TestRectInverse()246 void TestRectInverse() {
247   TestRectInverseT<16, 32>(1e-6f);
248   TestRectInverseT<8, 32>(1e-6f);
249   TestRectInverseT<8, 16>(1e-6f);
250   TestRectInverseT<4, 8>(1e-6f);
251   TestRectInverseT<2, 4>(1e-6f);
252   TestRectInverseT<1, 4>(1e-6f);
253   TestRectInverseT<1, 2>(1e-6f);
254 
255   TestRectInverseT<32, 16>(1e-6f);
256   TestRectInverseT<32, 8>(1e-6f);
257   TestRectInverseT<16, 8>(1e-6f);
258   TestRectInverseT<8, 4>(1e-6f);
259   TestRectInverseT<4, 2>(1e-6f);
260   TestRectInverseT<4, 1>(1e-6f);
261   TestRectInverseT<2, 1>(1e-6f);
262 }
263 
264 template <size_t ROWS, size_t COLS>
TestRectTransposeT(float accuracy)265 void TestRectTransposeT(float accuracy) {
266   constexpr size_t kBlockSize = ROWS * COLS;
267   HWY_ALIGN float scratch_space[kBlockSize * 2];
268   for (size_t px = 0; px < COLS; ++px) {
269     for (size_t py = 0; py < ROWS; ++py) {
270       HWY_ALIGN float x1[kBlockSize] = {0.0f};
271       HWY_ALIGN float x2[kBlockSize] = {0.0f};
272       HWY_ALIGN float coeffs1[kBlockSize] = {0.0f};
273       HWY_ALIGN float coeffs2[kBlockSize] = {0.0f};
274       x1[py * COLS + px] = 1;
275       x2[px * ROWS + py] = 1;
276 
277       constexpr size_t OUT_ROWS = ROWS < COLS ? ROWS : COLS;
278       constexpr size_t OUT_COLS = ROWS < COLS ? COLS : ROWS;
279 
280       ComputeScaledDCT<ROWS, COLS>()(DCTFrom(x1, COLS), coeffs1, scratch_space);
281       ComputeScaledDCT<COLS, ROWS>()(DCTFrom(x2, ROWS), coeffs2, scratch_space);
282 
283       for (size_t x = 0; x < OUT_COLS; ++x) {
284         for (size_t y = 0; y < OUT_ROWS; ++y) {
285           EXPECT_NEAR(coeffs1[y * OUT_COLS + x], coeffs2[y * OUT_COLS + x],
286                       accuracy)
287               << " px = " << px << ", py = " << py << ", x = " << x
288               << ", y = " << y;
289         }
290       }
291     }
292   }
293 }
294 
TestRectTranspose()295 void TestRectTranspose() {
296   TestRectTransposeT<16, 32>(1e-6f);
297   TestRectTransposeT<8, 32>(1e-6f);
298   TestRectTransposeT<8, 16>(1e-6f);
299   TestRectTransposeT<4, 8>(1e-6f);
300   TestRectTransposeT<2, 4>(1e-6f);
301   TestRectTransposeT<1, 4>(1e-6f);
302   TestRectTransposeT<1, 2>(1e-6f);
303 
304   // Identical to 8, 16
305   //  TestRectTranspose<16, 8>(1e-6f);
306 }
307 
TestDctAccuracyShard(size_t shard)308 void TestDctAccuracyShard(size_t shard) {
309   if (shard == 0) {
310     TestDctAccuracy<1>(1.1E-7f);
311     TestDctAccuracy<2>(1.1E-7f);
312     TestDctAccuracy<4>(1.1E-7f);
313     TestDctAccuracy<8>(1.1E-7f);
314     TestDctAccuracy<16>(1.3E-7f);
315   }
316   TestDctAccuracy<32>(1.1E-7f, 32 * shard, 32 * (shard + 1));
317 }
318 
TestIdctAccuracyShard(size_t shard)319 void TestIdctAccuracyShard(size_t shard) {
320   if (shard == 0) {
321     TestIdctAccuracy<1>(1E-7f);
322     TestIdctAccuracy<2>(1E-7f);
323     TestIdctAccuracy<4>(1E-7f);
324     TestIdctAccuracy<8>(1E-7f);
325     TestIdctAccuracy<16>(1E-7f);
326   }
327   TestIdctAccuracy<32>(1E-7f, 32 * shard, 32 * (shard + 1));
328 }
329 
TestDctTransposeShard(size_t shard)330 void TestDctTransposeShard(size_t shard) {
331   if (shard == 0) {
332     TestDctTranspose<8>(1E-6f);
333     TestDctTranspose<16>(1E-6f);
334   }
335   TestDctTranspose<32>(3E-6f, 32 * shard, 32 * (shard + 1));
336 }
337 
TestSlowInverseShard(size_t shard)338 void TestSlowInverseShard(size_t shard) {
339   if (shard == 0) {
340     TestSlowInverse<1>(1E-5f);
341     TestSlowInverse<2>(1E-5f);
342     TestSlowInverse<4>(1E-5f);
343     TestSlowInverse<8>(1E-5f);
344     TestSlowInverse<16>(1E-5f);
345   }
346   TestSlowInverse<32>(1E-5f, 32 * shard, 32 * (shard + 1));
347 }
348 
349 // NOLINTNEXTLINE(google-readability-namespace-comments)
350 }  // namespace HWY_NAMESPACE
351 }  // namespace jxl
352 HWY_AFTER_NAMESPACE();
353 
354 #if HWY_ONCE
355 namespace jxl {
356 
357 class TransposeTest : public hwy::TestWithParamTarget {};
358 
359 HWY_TARGET_INSTANTIATE_TEST_SUITE_P(TransposeTest);
360 
361 HWY_EXPORT_AND_TEST_P(TransposeTest, TransposeTest);
362 HWY_EXPORT_AND_TEST_P(TransposeTest, InverseTest);
363 HWY_EXPORT_AND_TEST_P(TransposeTest, ColumnDctRoundtrip);
364 HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectInverse);
365 HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectTranspose);
366 
367 // Tests in the DctShardedTest class are sharded for N=32.
368 class DctShardedTest : public ::hwy::TestWithParamTargetAndT<uint32_t> {};
369 
ShardRange(uint32_t n)370 std::vector<uint32_t> ShardRange(uint32_t n) {
371 #ifdef JXL_DISABLE_SLOW_TESTS
372   JXL_ASSERT(n > 6);
373   std::vector<uint32_t> ret = {0, 1, 3, 5, n - 1};
374 #else
375   std::vector<uint32_t> ret(n);
376   std::iota(ret.begin(), ret.end(), 0);
377 #endif  // JXL_DISABLE_SLOW_TESTS
378   return ret;
379 }
380 
381 HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(DctShardedTest,
382                                       ::testing::ValuesIn(ShardRange(32)));
383 
384 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctAccuracyShard);
385 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestIdctAccuracyShard);
386 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctTransposeShard);
387 HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestSlowInverseShard);
388 
389 }  // namespace jxl
390 #endif  // HWY_ONCE
391