1 // Copyright 2021 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/dsp/warp.h"
16
17 #include <algorithm>
18 #include <cassert>
19 #include <cmath>
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstdio>
23 #include <cstdlib>
24 #include <ostream>
25 #include <string>
26 #include <type_traits>
27
28 #include "absl/base/macros.h"
29 #include "absl/strings/match.h"
30 #include "absl/strings/str_format.h"
31 #include "absl/strings/string_view.h"
32 #include "absl/time/clock.h"
33 #include "absl/time/time.h"
34 #include "gtest/gtest.h"
35 #include "src/dsp/constants.h"
36 #include "src/dsp/dsp.h"
37 #include "src/post_filter.h"
38 #include "src/utils/common.h"
39 #include "src/utils/constants.h"
40 #include "src/utils/cpu.h"
41 #include "src/utils/memory.h"
42 #include "tests/block_utils.h"
43 #include "tests/third_party/libvpx/acm_random.h"
44 #include "tests/utils.h"
45
46 namespace libgav1 {
47 namespace dsp {
48 namespace {
49
50 constexpr int kSourceBorderHorizontal = 16;
51 constexpr int kSourceBorderVertical = 13;
52
53 constexpr int kMaxSourceBlockWidth =
54 kMaxSuperBlockSizeInPixels + kSourceBorderHorizontal * 2;
55 constexpr int kMaxSourceBlockHeight =
56 kMaxSuperBlockSizeInPixels + kSourceBorderVertical * 2;
57 constexpr int kMaxDestBlockWidth =
58 kMaxSuperBlockSizeInPixels + kConvolveBorderLeftTop * 2;
59 constexpr int kMaxDestBlockHeight =
60 kMaxSuperBlockSizeInPixels + kConvolveBorderLeftTop * 2;
61
62 constexpr uint16_t kDivisorLookup[257] = {
63 16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
64 15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
65 15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
66 14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
67 13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
68 13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
69 13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
70 12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
71 12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
72 11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
73 11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
74 11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
75 10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
76 10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
77 10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
78 9963, 9939, 9916, 9892, 9869, 9846, 9823, 9800, 9777, 9754, 9732,
79 9709, 9687, 9664, 9642, 9620, 9598, 9576, 9554, 9533, 9511, 9489,
80 9468, 9447, 9425, 9404, 9383, 9362, 9341, 9321, 9300, 9279, 9259,
81 9239, 9218, 9198, 9178, 9158, 9138, 9118, 9098, 9079, 9059, 9039,
82 9020, 9001, 8981, 8962, 8943, 8924, 8905, 8886, 8867, 8849, 8830,
83 8812, 8793, 8775, 8756, 8738, 8720, 8702, 8684, 8666, 8648, 8630,
84 8613, 8595, 8577, 8560, 8542, 8525, 8508, 8490, 8473, 8456, 8439,
85 8422, 8405, 8389, 8372, 8355, 8339, 8322, 8306, 8289, 8273, 8257,
86 8240, 8224, 8208, 8192};
87
88 template <bool is_compound>
GetDigest8bpp(int id)89 const char* GetDigest8bpp(int id) {
90 static const char* const kDigest[] = {
91 "77ba358a0f5e19a8e69fa0a95712578e", "141b23d13a04e0b84d26d514de76d6b0",
92 "b0265858454b979852ffadae323f0fb7", "9cf38e3579265b656f1f2100ba15b0e9",
93 "ab51d05cc255ef8e37921182df1d89b1", "e3e96f90a4b07ca733e40f057dc01c41",
94 "4eee8c1a52a62a266db9b1c9338e124c", "901a87d8f88f6324dbc0960a6de861ac",
95 "da9cb6faf6adaeeae12b6784f39186c5", "14450ab05536cdb0d2f499716ccb559d",
96 "566b396cbf008bbb869b364fdc81860d", "681a872baf2de4e58d73ea9ab8643a72",
97 "7f17d290d513a7416761b3a01f10fd2f",
98 };
99 static const char* const kCompoundDigest[] = {
100 "7e9339d265b7beac7bbe32fe7bb0fccb", "f747d663b427bb38a3ff36b0815a394c",
101 "858cf54d2253281a919fbdb48fe91c53", "4721dd97a212c6068bd488f400259afc",
102 "36878c7906492bc740112abdea77616f", "89deb68aa35764bbf3024b501a6bed50",
103 "8ac5b08f9b2afd38143c357646af0f82", "bf6e2a64835ea0c9d7467394253d0eb2",
104 "7b0a539acd2a27eff398dd084abad933", "61c8d81b397c1cf727ff8a9fabab90af",
105 "4d412349a25a832c1fb3fb29e3f0e2b3", "2c6dd2a9a4ede9fa00adb567ba646f30",
106 "b2a0ce68db3cadd207299f73112bed74",
107 };
108 return is_compound ? kCompoundDigest[id] : kDigest[id];
109 }
110
111 #if LIBGAV1_MAX_BITDEPTH >= 10
112 template <bool is_compound>
GetDigest10bpp(int id)113 const char* GetDigest10bpp(int id) {
114 static const char* const kDigest[] = {
115 "1fef54f56a0bafccf7f8da1ac3b18b76", "8a65c72f171feafa2f393d31d6b7fe1b",
116 "808019346f2f1f45f8cf2e9fc9a49320", "c28e2f2c6c830a29bcc2452166cba521",
117 "f040674d6f54e8910d655f0d11fd8cdd", "473af9bb1c6023965c2284b716feef97",
118 "e4f6d7babd0813d5afb0f575ebfa8166", "58f96ef8a880963a213624bb0d06d47c",
119 "1ec0995fa4490628b679d03683233388", "9526fb102fde7dc1a7e160e65af6da33",
120 "f0457427d0c0e31d82ea4f612f7f86f1", "ddc82ae298cccebad493ba9de0f69fbd",
121 "5ed615091e2f62df26de7e91a985cb81",
122 };
123 static const char* const kCompoundDigest[] = {
124 "8e6986ae143260e0b8b4887f15a141a1", "0a7f0db8316b8c3569f08834dd0c6f50",
125 "90705b2e7dbe083e8a1f70f29d6f257e", "e428a75bea77d769d21f3f7a1d2b0b38",
126 "a570b13d790c085c4ab50d71dd085d56", "e5d043c6cd6ff6dbab6e38a8877e93bd",
127 "12ea96991e46e3e9aa78ab812ffa0525", "84293a94a53f1cf814fa25e793c3fe27",
128 "b98a7502c84ac8437266f702dcc0a92e", "d8db5d52e9b0a5be0ad2d517d5bd16e9",
129 "f3be504bbb609ce4cc71c5539252638a", "fcde83b54e14e9de23460644f244b047",
130 "42eb66e752e9ef289b47053b5c73fdd6",
131 };
132 return is_compound ? kCompoundDigest[id] : kDigest[id];
133 }
134 #endif
135
RandomWarpedParam(int seed_offset,int bits)136 int RandomWarpedParam(int seed_offset, int bits) {
137 libvpx_test::ACMRandom rnd(seed_offset +
138 libvpx_test::ACMRandom::DeterministicSeed());
139 // 1 in 8 chance of generating zero (arbitrary).
140 const bool zero = (rnd.Rand16() & 7) == 0;
141 if (zero) return 0;
142 // Generate uniform values in the range [-(1 << bits), 1] U [1, 1 <<
143 // bits].
144 const int mask = (1 << bits) - 1;
145 const int value = 1 + (rnd.RandRange(1u << 31) & mask);
146 const bool sign = (rnd.Rand16() & 1) != 0;
147 return sign ? value : -value;
148 }
149
150 // This function is a copy from warp_prediction.cc.
151 template <typename T>
GenerateApproximateDivisor(T value,int16_t * division_factor,int16_t * division_shift)152 void GenerateApproximateDivisor(T value, int16_t* division_factor,
153 int16_t* division_shift) {
154 const int n = FloorLog2(std::abs(value));
155 const T e = std::abs(value) - (static_cast<T>(1) << n);
156 const int entry = (n > kDivisorLookupBits)
157 ? RightShiftWithRounding(e, n - kDivisorLookupBits)
158 : static_cast<int>(e << (kDivisorLookupBits - n));
159 *division_shift = n + kDivisorLookupPrecisionBits;
160 *division_factor =
161 (value < 0) ? -kDivisorLookup[entry] : kDivisorLookup[entry];
162 }
163
164 // This function is a copy from warp_prediction.cc.
GetShearParameter(int value)165 int16_t GetShearParameter(int value) {
166 return static_cast<int16_t>(
167 LeftShift(RightShiftWithRoundingSigned(value, kWarpParamRoundingBits),
168 kWarpParamRoundingBits));
169 }
170
171 // This function is a copy from warp_prediction.cc.
172 // This function is used here to help generate valid warp parameters.
SetupShear(const int * params,int16_t * alpha,int16_t * beta,int16_t * gamma,int16_t * delta)173 bool SetupShear(const int* params, int16_t* alpha, int16_t* beta,
174 int16_t* gamma, int16_t* delta) {
175 int16_t division_shift;
176 int16_t division_factor;
177 GenerateApproximateDivisor<int32_t>(params[2], &division_factor,
178 &division_shift);
179 const int alpha0 =
180 Clip3(params[2] - (1 << kWarpedModelPrecisionBits), INT16_MIN, INT16_MAX);
181 const int beta0 = Clip3(params[3], INT16_MIN, INT16_MAX);
182 const int64_t v = LeftShift(params[4], kWarpedModelPrecisionBits);
183 const int gamma0 =
184 Clip3(RightShiftWithRoundingSigned(v * division_factor, division_shift),
185 INT16_MIN, INT16_MAX);
186 const int64_t w = static_cast<int64_t>(params[3]) * params[4];
187 const int delta0 = Clip3(
188 params[5] -
189 RightShiftWithRoundingSigned(w * division_factor, division_shift) -
190 (1 << kWarpedModelPrecisionBits),
191 INT16_MIN, INT16_MAX);
192
193 *alpha = GetShearParameter(alpha0);
194 *beta = GetShearParameter(beta0);
195 *gamma = GetShearParameter(gamma0);
196 *delta = GetShearParameter(delta0);
197 if ((4 * std::abs(*alpha) + 7 * std::abs(*beta) >=
198 (1 << kWarpedModelPrecisionBits)) ||
199 (4 * std::abs(*gamma) + 4 * std::abs(*delta) >=
200 (1 << kWarpedModelPrecisionBits))) {
201 return false; // NOLINT (easier condition to understand).
202 }
203
204 return true;
205 }
206
GenerateWarpedModel(int * params,int16_t * alpha,int16_t * beta,int16_t * gamma,int16_t * delta,int seed)207 void GenerateWarpedModel(int* params, int16_t* alpha, int16_t* beta,
208 int16_t* gamma, int16_t* delta, int seed) {
209 do {
210 params[0] = RandomWarpedParam(seed, kWarpedModelPrecisionBits + 6);
211 params[1] = RandomWarpedParam(seed, kWarpedModelPrecisionBits + 6);
212 params[2] = RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3) +
213 (1 << kWarpedModelPrecisionBits);
214 params[3] = RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3);
215 params[4] = RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3);
216 params[5] = RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3) +
217 (1 << kWarpedModelPrecisionBits);
218 ++seed;
219 } while (params[2] == 0 || !SetupShear(params, alpha, beta, gamma, delta));
220 }
221
222 struct WarpTestParam {
WarpTestParamlibgav1::dsp::__anoncd0269190111::WarpTestParam223 WarpTestParam(int width, int height) : width(width), height(height) {}
224 int width;
225 int height;
226 };
227
228 template <bool is_compound, int bitdepth, typename Pixel>
229 class WarpTest : public testing::TestWithParam<WarpTestParam> {
230 public:
231 WarpTest() = default;
232 ~WarpTest() override = default;
233
SetUp()234 void SetUp() override {
235 test_utils::ResetDspTable(bitdepth);
236 WarpInit_C();
237 const dsp::Dsp* const dsp = dsp::GetDspTable(bitdepth);
238 ASSERT_NE(dsp, nullptr);
239 const testing::TestInfo* const test_info =
240 testing::UnitTest::GetInstance()->current_test_info();
241 const absl::string_view test_case = test_info->test_suite_name();
242 if (absl::StartsWith(test_case, "C/")) {
243 } else if (absl::StartsWith(test_case, "NEON/")) {
244 WarpInit_NEON();
245 } else if (absl::StartsWith(test_case, "SSE41/")) {
246 WarpInit_SSE4_1();
247 } else {
248 FAIL() << "Unrecognized architecture prefix in test case name: "
249 << test_case;
250 }
251 func_ = is_compound ? dsp->warp_compound : dsp->warp;
252 }
253
254 protected:
255 using DestType =
256 typename std::conditional<is_compound, uint16_t, Pixel>::type;
257
258 void SetInputData(bool use_fixed_values, int value);
259 void Test(bool use_fixed_values, int value, int num_runs = 1);
260 void TestFixedValues();
261 void TestRandomValues();
262 void TestSpeed();
263
264 const WarpTestParam param_ = GetParam();
265
266 private:
267 int warp_params_[8];
268 dsp::WarpFunc func_;
269 // Warp filters are 7-tap, which needs 3 pixels (kConvolveBorderLeftTop)
270 // padding. Destination buffer indices are based on subsampling values (x+y):
271 // 0: (4:4:4), 1:(4:2:2), 2: (4:2:0).
272 Pixel source_[kMaxSourceBlockHeight * kMaxSourceBlockWidth] = {};
273 DestType dest_[3][kMaxDestBlockHeight * kMaxDestBlockWidth] = {};
274 };
275
276 template <bool is_compound, int bitdepth, typename Pixel>
SetInputData(bool use_fixed_values,int value)277 void WarpTest<is_compound, bitdepth, Pixel>::SetInputData(bool use_fixed_values,
278 int value) {
279 if (use_fixed_values) {
280 for (int y = 0; y < param_.height; ++y) {
281 const int row = kSourceBorderVertical + y;
282 Memset(source_ + row * kMaxSourceBlockWidth + kSourceBorderHorizontal,
283 value, param_.width);
284 }
285 } else {
286 const int mask = (1 << bitdepth) - 1;
287 libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
288 for (int y = 0; y < param_.height; ++y) {
289 const int row = kSourceBorderVertical + y;
290 for (int x = 0; x < param_.width; ++x) {
291 const int column = kSourceBorderHorizontal + x;
292 source_[row * kMaxSourceBlockWidth + column] = rnd.Rand16() & mask;
293 }
294 }
295 }
296 PostFilter::ExtendFrame<Pixel>(
297 &source_[kSourceBorderVertical * kMaxSourceBlockWidth +
298 kSourceBorderHorizontal],
299 param_.width, param_.height, kMaxSourceBlockWidth,
300 kSourceBorderHorizontal, kSourceBorderHorizontal, kSourceBorderVertical,
301 kSourceBorderVertical);
302 }
303
304 template <bool is_compound, int bitdepth, typename Pixel>
Test(bool use_fixed_values,int value,int num_runs)305 void WarpTest<is_compound, bitdepth, Pixel>::Test(bool use_fixed_values,
306 int value,
307 int num_runs /*= 1*/) {
308 if (func_ == nullptr) return;
309 SetInputData(use_fixed_values, value);
310 libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
311 const int source_offset =
312 kSourceBorderVertical * kMaxSourceBlockWidth + kSourceBorderHorizontal;
313 const int dest_offset =
314 kConvolveBorderLeftTop * kMaxDestBlockWidth + kConvolveBorderLeftTop;
315 const Pixel* const src = source_ + source_offset;
316 const ptrdiff_t src_stride = kMaxSourceBlockWidth * sizeof(Pixel);
317 const ptrdiff_t dst_stride =
318 is_compound ? kMaxDestBlockWidth : kMaxDestBlockWidth * sizeof(Pixel);
319
320 absl::Duration elapsed_time;
321 for (int subsampling_x = 0; subsampling_x <= 1; ++subsampling_x) {
322 for (int subsampling_y = 0; subsampling_y <= 1; ++subsampling_y) {
323 if (subsampling_x == 0 && subsampling_y == 1) {
324 // When both are 0: 4:4:4
325 // When both are 1: 4:2:0
326 // When only |subsampling_x| is 1: 4:2:2
327 // Having only |subsampling_y| == 1 is unsupported.
328 continue;
329 }
330 int params[8];
331 int16_t alpha;
332 int16_t beta;
333 int16_t gamma;
334 int16_t delta;
335 GenerateWarpedModel(params, &alpha, &beta, &gamma, &delta, rnd.Rand8());
336
337 const int dest_id = subsampling_x + subsampling_y;
338 DestType* const dst = dest_[dest_id] + dest_offset;
339 const absl::Time start = absl::Now();
340 for (int n = 0; n < num_runs; ++n) {
341 func_(src, src_stride, param_.width, param_.height, params,
342 subsampling_x, subsampling_y, 0, 0, param_.width, param_.height,
343 alpha, beta, gamma, delta, dst, dst_stride);
344 }
345 elapsed_time += absl::Now() - start;
346 }
347 }
348
349 if (use_fixed_values) {
350 // For fixed values, input and output are identical.
351 for (size_t i = 0; i < ABSL_ARRAYSIZE(dest_); ++i) {
352 // |is_compound| holds a few more bits of precision and an offset value.
353 Pixel compensated_dest[kMaxDestBlockWidth * kMaxDestBlockHeight];
354 const int compound_offset = (bitdepth == 8) ? 0 : kCompoundOffset;
355 if (is_compound) {
356 for (int y = 0; y < param_.height; ++y) {
357 for (int x = 0; x < param_.width; ++x) {
358 const int compound_value =
359 dest_[i][dest_offset + y * kMaxDestBlockWidth + x];
360 const int remove_offset = compound_value - compound_offset;
361 const int full_shift =
362 remove_offset >>
363 (kInterRoundBitsVertical - kInterRoundBitsCompoundVertical);
364 compensated_dest[y * kMaxDestBlockWidth + x] =
365 Clip3(full_shift, 0, (1 << bitdepth) - 1);
366 }
367 }
368 }
369 Pixel* pixel_dest =
370 is_compound ? compensated_dest
371 : reinterpret_cast<Pixel*>(dest_[i] + dest_offset);
372 const bool success = test_utils::CompareBlocks(
373 src, pixel_dest, param_.width, param_.height, kMaxSourceBlockWidth,
374 kMaxDestBlockWidth, false);
375 EXPECT_TRUE(success) << "subsampling_x + subsampling_y: " << i;
376 }
377 } else {
378 // (width, height):
379 // (8, 8), id = 0. (8, 16), id = 1. (16, 8), id = 2.
380 // (16, 16), id = 3. (16, 32), id = 4. (32, 16), id = 5.
381 // ...
382 // (128, 128), id = 12.
383 int id;
384 if (param_.width == param_.height) {
385 id = 3 * static_cast<int>(FloorLog2(param_.width) - 3);
386 } else if (param_.width < param_.height) {
387 id = 1 + 3 * static_cast<int>(FloorLog2(param_.width) - 3);
388 } else {
389 id = 2 + 3 * static_cast<int>(FloorLog2(param_.height) - 3);
390 }
391
392 const char* expected_digest;
393 if (bitdepth == 8) {
394 expected_digest = GetDigest8bpp<is_compound>(id);
395 } else {
396 #if LIBGAV1_MAX_BITDEPTH >= 10
397 expected_digest = GetDigest10bpp<is_compound>(id);
398 #endif
399 }
400 test_utils::CheckMd5Digest(
401 "Warp", absl::StrFormat("%dx%d", param_.width, param_.height).c_str(),
402 expected_digest, dest_, sizeof(dest_), elapsed_time);
403 }
404 }
405
406 template <bool is_compound, int bitdepth, typename Pixel>
TestFixedValues()407 void WarpTest<is_compound, bitdepth, Pixel>::TestFixedValues() {
408 Test(true, 0);
409 Test(true, 1);
410 Test(true, 128);
411 Test(true, (1 << bitdepth) - 1);
412 }
413
414 template <bool is_compound, int bitdepth, typename Pixel>
TestRandomValues()415 void WarpTest<is_compound, bitdepth, Pixel>::TestRandomValues() {
416 Test(false, 0);
417 }
418
419 template <bool is_compound, int bitdepth, typename Pixel>
TestSpeed()420 void WarpTest<is_compound, bitdepth, Pixel>::TestSpeed() {
421 const int num_runs = static_cast<int>(1.0e7 / (param_.width * param_.height));
422 Test(false, 0, num_runs);
423 }
424
ApplyFilterToSignedInput(const int min_input,const int max_input,const int8_t filter[kSubPixelTaps],int * min_output,int * max_output)425 void ApplyFilterToSignedInput(const int min_input, const int max_input,
426 const int8_t filter[kSubPixelTaps],
427 int* min_output, int* max_output) {
428 int min = 0, max = 0;
429 for (int i = 0; i < kSubPixelTaps; ++i) {
430 const int tap = filter[i];
431 if (tap > 0) {
432 max += max_input * tap;
433 min += min_input * tap;
434 } else {
435 min += max_input * tap;
436 max += min_input * tap;
437 }
438 }
439 *min_output = min;
440 *max_output = max;
441 }
442
ApplyFilterToUnsignedInput(const int max_input,const int8_t filter[kSubPixelTaps],int * min_output,int * max_output)443 void ApplyFilterToUnsignedInput(const int max_input,
444 const int8_t filter[kSubPixelTaps],
445 int* min_output, int* max_output) {
446 ApplyFilterToSignedInput(0, max_input, filter, min_output, max_output);
447 }
448
449 // Validate the maximum ranges for different parts of the Warp process.
450 template <int bitdepth>
ShowRange()451 void ShowRange() {
452 constexpr int horizontal_bits = (bitdepth == kBitdepth12)
453 ? kInterRoundBitsHorizontal12bpp
454 : kInterRoundBitsHorizontal;
455 constexpr int vertical_bits = (bitdepth == kBitdepth12)
456 ? kInterRoundBitsVertical12bpp
457 : kInterRoundBitsVertical;
458 constexpr int compound_vertical_bits = kInterRoundBitsCompoundVertical;
459
460 constexpr int compound_offset = (bitdepth == 8) ? 0 : kCompoundOffset;
461
462 constexpr int max_input = (1 << bitdepth) - 1;
463
464 const int8_t* worst_warp_filter = kWarpedFilters8[93];
465
466 // First pass.
467 printf("Bitdepth: %2d Input range: [%8d, %8d]\n", bitdepth, 0,
468 max_input);
469
470 int min = 0, max = 0;
471 ApplyFilterToUnsignedInput(max_input, worst_warp_filter, &min, &max);
472
473 int first_pass_offset;
474 if (bitdepth == 8) {
475 // Derive an offset for 8 bit.
476 for (first_pass_offset = 1; - first_pass_offset > min;
477 first_pass_offset <<= 1) {
478 }
479 printf(" 8bpp intermediate offset: %d.\n", first_pass_offset);
480 min += first_pass_offset;
481 max += first_pass_offset;
482 assert(min > 0);
483 assert(max < UINT16_MAX);
484 } else {
485 // 10bpp and 12bpp require int32_t for the intermediate values. Adding an
486 // offset is not required.
487 assert(min > INT32_MIN);
488 assert(max > INT16_MAX && max < INT32_MAX);
489 }
490
491 printf(" intermediate range: [%8d, %8d]\n", min, max);
492
493 const int first_pass_min = RightShiftWithRounding(min, horizontal_bits);
494 const int first_pass_max = RightShiftWithRounding(max, horizontal_bits);
495
496 printf(" first pass output range: [%8d, %8d]\n", first_pass_min,
497 first_pass_max);
498
499 // Second pass.
500 if (bitdepth == 8) {
501 ApplyFilterToUnsignedInput(first_pass_max, worst_warp_filter, &min, &max);
502 } else {
503 ApplyFilterToSignedInput(first_pass_min, first_pass_max, worst_warp_filter,
504 &min, &max);
505 }
506
507 if (bitdepth == 8) {
508 // Remove the offset that was applied in the first pass since we must use
509 // int32_t for this phase anyway. 128 is the sum of the filter taps.
510 const int offset_removal = (first_pass_offset >> horizontal_bits) * 128;
511 printf(" 8bpp intermediate offset removal: %d.\n", offset_removal);
512 max -= offset_removal;
513 min -= offset_removal;
514 assert(min < INT16_MIN && min > INT32_MIN);
515 assert(max > INT16_MAX && max < INT32_MAX);
516 } else {
517 // 10bpp and 12bpp require int32_t for the intermediate values. Adding an
518 // offset is not required.
519 assert(min > INT32_MIN);
520 assert(max > INT16_MAX && max < INT32_MAX);
521 }
522
523 printf(" intermediate range: [%8d, %8d]\n", min, max);
524
525 // Second pass non-compound output is clipped to Pixel values.
526 const int second_pass_min =
527 Clip3(RightShiftWithRounding(min, vertical_bits), 0, max_input);
528 const int second_pass_max =
529 Clip3(RightShiftWithRounding(max, vertical_bits), 0, max_input);
530 printf(" second pass output range: [%8d, %8d]\n", second_pass_min,
531 second_pass_max);
532
533 // Output is Pixel so matches Pixel values.
534 assert(second_pass_min == 0);
535 assert(second_pass_max == max_input);
536
537 const int compound_second_pass_min =
538 RightShiftWithRounding(min, compound_vertical_bits) + compound_offset;
539 const int compound_second_pass_max =
540 RightShiftWithRounding(max, compound_vertical_bits) + compound_offset;
541
542 printf(" compound second pass output range: [%8d, %8d]\n",
543 compound_second_pass_min, compound_second_pass_max);
544
545 if (bitdepth == 8) {
546 // 8bpp output is int16_t without an offset.
547 assert(compound_second_pass_min > INT16_MIN);
548 assert(compound_second_pass_max < INT16_MAX);
549 } else {
550 // 10bpp and 12bpp use the offset to fit inside uint16_t.
551 assert(compound_second_pass_min > 0);
552 assert(compound_second_pass_max < UINT16_MAX);
553 }
554
555 printf("\n");
556 }
557
TEST(WarpTest,ShowRange)558 TEST(WarpTest, ShowRange) {
559 ShowRange<kBitdepth8>();
560 ShowRange<kBitdepth10>();
561 ShowRange<kBitdepth12>();
562 }
563
564 using WarpTest8bpp = WarpTest</*is_compound=*/false, 8, uint8_t>;
565 // TODO(jzern): Coverage could be added for kInterRoundBitsCompoundVertical via
566 // WarpCompoundTest.
567 // using WarpCompoundTest8bpp = WarpTest</*is_compound=*/true, 8, uint8_t>;
568
569 // Verifies the sum of the warped filter coefficients is 128 for every filter.
570 //
571 // Verifies the properties used in the calculation of ranges of variables in
572 // the block warp process:
573 // * The maximum sum of the positive warped filter coefficients is 175.
574 // * The minimum (i.e., most negative) sum of the negative warped filter
575 // coefficients is -47.
576 //
577 // NOTE: This test is independent of the bitdepth and the implementation of the
578 // block warp function, so it just needs to be a test in the WarpTest8bpp class
579 // and does not need to be defined with TEST_P.
TEST(WarpTest8bpp,WarpedFilterCoefficientSums)580 TEST(WarpTest8bpp, WarpedFilterCoefficientSums) {
581 int max_positive_sum = 0;
582 int min_negative_sum = 0;
583 for (const auto& filter : kWarpedFilters) {
584 int sum = 0;
585 int positive_sum = 0;
586 int negative_sum = 0;
587 for (const auto coefficient : filter) {
588 sum += coefficient;
589 if (coefficient > 0) {
590 positive_sum += coefficient;
591 } else {
592 negative_sum += coefficient;
593 }
594 }
595 EXPECT_EQ(sum, 128);
596 max_positive_sum = std::max(positive_sum, max_positive_sum);
597 min_negative_sum = std::min(negative_sum, min_negative_sum);
598 }
599 EXPECT_EQ(max_positive_sum, 175);
600 EXPECT_EQ(min_negative_sum, -47);
601 }
602
TEST_P(WarpTest8bpp,FixedValues)603 TEST_P(WarpTest8bpp, FixedValues) { TestFixedValues(); }
604
TEST_P(WarpTest8bpp,RandomValues)605 TEST_P(WarpTest8bpp, RandomValues) { TestRandomValues(); }
606
TEST_P(WarpTest8bpp,DISABLED_Speed)607 TEST_P(WarpTest8bpp, DISABLED_Speed) { TestSpeed(); }
608 const WarpTestParam warp_test_param[] = {
609 WarpTestParam(8, 8), WarpTestParam(8, 16), WarpTestParam(16, 8),
610 WarpTestParam(16, 16), WarpTestParam(16, 32), WarpTestParam(32, 16),
611 WarpTestParam(32, 32), WarpTestParam(32, 64), WarpTestParam(64, 32),
612 WarpTestParam(64, 64), WarpTestParam(64, 128), WarpTestParam(128, 64),
613 WarpTestParam(128, 128),
614 };
615
616 INSTANTIATE_TEST_SUITE_P(C, WarpTest8bpp, testing::ValuesIn(warp_test_param));
617
618 #if LIBGAV1_ENABLE_NEON
619 INSTANTIATE_TEST_SUITE_P(NEON, WarpTest8bpp,
620 testing::ValuesIn(warp_test_param));
621 #endif
622
623 #if LIBGAV1_ENABLE_SSE4_1
624 INSTANTIATE_TEST_SUITE_P(SSE41, WarpTest8bpp,
625 testing::ValuesIn(warp_test_param));
626 #endif
627
628 #if LIBGAV1_MAX_BITDEPTH >= 10
629 using WarpTest10bpp = WarpTest</*is_compound=*/false, 10, uint16_t>;
630 // TODO(jzern): Coverage could be added for kInterRoundBitsCompoundVertical via
631 // WarpCompoundTest.
632 // using WarpCompoundTest10bpp = WarpTest</*is_compound=*/true, 10, uint16_t>;
633
TEST_P(WarpTest10bpp,FixedValues)634 TEST_P(WarpTest10bpp, FixedValues) { TestFixedValues(); }
635
TEST_P(WarpTest10bpp,RandomValues)636 TEST_P(WarpTest10bpp, RandomValues) { TestRandomValues(); }
637
TEST_P(WarpTest10bpp,DISABLED_Speed)638 TEST_P(WarpTest10bpp, DISABLED_Speed) { TestSpeed(); }
639
640 INSTANTIATE_TEST_SUITE_P(C, WarpTest10bpp, testing::ValuesIn(warp_test_param));
641
642 #if LIBGAV1_ENABLE_NEON
643 INSTANTIATE_TEST_SUITE_P(NEON, WarpTest10bpp,
644 testing::ValuesIn(warp_test_param));
645 #endif
646 #endif
647
operator <<(std::ostream & os,const WarpTestParam & warp_param)648 std::ostream& operator<<(std::ostream& os, const WarpTestParam& warp_param) {
649 return os << "BlockSize" << warp_param.width << "x" << warp_param.height;
650 }
651
652 } // namespace
653 } // namespace dsp
654 } // namespace libgav1
655