1 // Copyright 2021 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/dsp/warp.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cmath>
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstdio>
23 #include <cstdlib>
24 #include <ostream>
25 #include <string>
26 #include <type_traits>
27 
28 #include "absl/base/macros.h"
29 #include "absl/strings/match.h"
30 #include "absl/strings/str_format.h"
31 #include "absl/strings/string_view.h"
32 #include "absl/time/clock.h"
33 #include "absl/time/time.h"
34 #include "gtest/gtest.h"
35 #include "src/dsp/constants.h"
36 #include "src/dsp/dsp.h"
37 #include "src/post_filter.h"
38 #include "src/utils/common.h"
39 #include "src/utils/constants.h"
40 #include "src/utils/cpu.h"
41 #include "src/utils/memory.h"
42 #include "tests/block_utils.h"
43 #include "tests/third_party/libvpx/acm_random.h"
44 #include "tests/utils.h"
45 
46 namespace libgav1 {
47 namespace dsp {
48 namespace {
49 
50 constexpr int kSourceBorderHorizontal = 16;
51 constexpr int kSourceBorderVertical = 13;
52 
53 constexpr int kMaxSourceBlockWidth =
54     kMaxSuperBlockSizeInPixels + kSourceBorderHorizontal * 2;
55 constexpr int kMaxSourceBlockHeight =
56     kMaxSuperBlockSizeInPixels + kSourceBorderVertical * 2;
57 constexpr int kMaxDestBlockWidth =
58     kMaxSuperBlockSizeInPixels + kConvolveBorderLeftTop * 2;
59 constexpr int kMaxDestBlockHeight =
60     kMaxSuperBlockSizeInPixels + kConvolveBorderLeftTop * 2;
61 
62 constexpr uint16_t kDivisorLookup[257] = {
63     16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
64     15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
65     15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
66     14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
67     13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
68     13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
69     13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
70     12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
71     12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
72     11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
73     11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
74     11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
75     10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
76     10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
77     10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010, 9986,
78     9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
79     9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
80     9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
81     9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
82     9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
83     8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
84     8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
85     8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
86     8240,  8224,  8208,  8192};
87 
88 template <bool is_compound>
GetDigest8bpp(int id)89 const char* GetDigest8bpp(int id) {
90   static const char* const kDigest[] = {
91       "77ba358a0f5e19a8e69fa0a95712578e", "141b23d13a04e0b84d26d514de76d6b0",
92       "b0265858454b979852ffadae323f0fb7", "9cf38e3579265b656f1f2100ba15b0e9",
93       "ab51d05cc255ef8e37921182df1d89b1", "e3e96f90a4b07ca733e40f057dc01c41",
94       "4eee8c1a52a62a266db9b1c9338e124c", "901a87d8f88f6324dbc0960a6de861ac",
95       "da9cb6faf6adaeeae12b6784f39186c5", "14450ab05536cdb0d2f499716ccb559d",
96       "566b396cbf008bbb869b364fdc81860d", "681a872baf2de4e58d73ea9ab8643a72",
97       "7f17d290d513a7416761b3a01f10fd2f",
98   };
99   static const char* const kCompoundDigest[] = {
100       "7e9339d265b7beac7bbe32fe7bb0fccb", "f747d663b427bb38a3ff36b0815a394c",
101       "858cf54d2253281a919fbdb48fe91c53", "4721dd97a212c6068bd488f400259afc",
102       "36878c7906492bc740112abdea77616f", "89deb68aa35764bbf3024b501a6bed50",
103       "8ac5b08f9b2afd38143c357646af0f82", "bf6e2a64835ea0c9d7467394253d0eb2",
104       "7b0a539acd2a27eff398dd084abad933", "61c8d81b397c1cf727ff8a9fabab90af",
105       "4d412349a25a832c1fb3fb29e3f0e2b3", "2c6dd2a9a4ede9fa00adb567ba646f30",
106       "b2a0ce68db3cadd207299f73112bed74",
107   };
108   return is_compound ? kCompoundDigest[id] : kDigest[id];
109 }
110 
111 #if LIBGAV1_MAX_BITDEPTH >= 10
112 template <bool is_compound>
GetDigest10bpp(int id)113 const char* GetDigest10bpp(int id) {
114   static const char* const kDigest[] = {
115       "1fef54f56a0bafccf7f8da1ac3b18b76", "8a65c72f171feafa2f393d31d6b7fe1b",
116       "808019346f2f1f45f8cf2e9fc9a49320", "c28e2f2c6c830a29bcc2452166cba521",
117       "f040674d6f54e8910d655f0d11fd8cdd", "473af9bb1c6023965c2284b716feef97",
118       "e4f6d7babd0813d5afb0f575ebfa8166", "58f96ef8a880963a213624bb0d06d47c",
119       "1ec0995fa4490628b679d03683233388", "9526fb102fde7dc1a7e160e65af6da33",
120       "f0457427d0c0e31d82ea4f612f7f86f1", "ddc82ae298cccebad493ba9de0f69fbd",
121       "5ed615091e2f62df26de7e91a985cb81",
122   };
123   static const char* const kCompoundDigest[] = {
124       "8e6986ae143260e0b8b4887f15a141a1", "0a7f0db8316b8c3569f08834dd0c6f50",
125       "90705b2e7dbe083e8a1f70f29d6f257e", "e428a75bea77d769d21f3f7a1d2b0b38",
126       "a570b13d790c085c4ab50d71dd085d56", "e5d043c6cd6ff6dbab6e38a8877e93bd",
127       "12ea96991e46e3e9aa78ab812ffa0525", "84293a94a53f1cf814fa25e793c3fe27",
128       "b98a7502c84ac8437266f702dcc0a92e", "d8db5d52e9b0a5be0ad2d517d5bd16e9",
129       "f3be504bbb609ce4cc71c5539252638a", "fcde83b54e14e9de23460644f244b047",
130       "42eb66e752e9ef289b47053b5c73fdd6",
131   };
132   return is_compound ? kCompoundDigest[id] : kDigest[id];
133 }
134 #endif
135 
RandomWarpedParam(int seed_offset,int bits)136 int RandomWarpedParam(int seed_offset, int bits) {
137   libvpx_test::ACMRandom rnd(seed_offset +
138                              libvpx_test::ACMRandom::DeterministicSeed());
139   // 1 in 8 chance of generating zero (arbitrary).
140   const bool zero = (rnd.Rand16() & 7) == 0;
141   if (zero) return 0;
142   // Generate uniform values in the range [-(1 << bits), 1] U [1, 1 <<
143   // bits].
144   const int mask = (1 << bits) - 1;
145   const int value = 1 + (rnd.RandRange(1u << 31) & mask);
146   const bool sign = (rnd.Rand16() & 1) != 0;
147   return sign ? value : -value;
148 }
149 
150 // This function is a copy from warp_prediction.cc.
151 template <typename T>
GenerateApproximateDivisor(T value,int16_t * division_factor,int16_t * division_shift)152 void GenerateApproximateDivisor(T value, int16_t* division_factor,
153                                 int16_t* division_shift) {
154   const int n = FloorLog2(std::abs(value));
155   const T e = std::abs(value) - (static_cast<T>(1) << n);
156   const int entry = (n > kDivisorLookupBits)
157                         ? RightShiftWithRounding(e, n - kDivisorLookupBits)
158                         : static_cast<int>(e << (kDivisorLookupBits - n));
159   *division_shift = n + kDivisorLookupPrecisionBits;
160   *division_factor =
161       (value < 0) ? -kDivisorLookup[entry] : kDivisorLookup[entry];
162 }
163 
164 // This function is a copy from warp_prediction.cc.
GetShearParameter(int value)165 int16_t GetShearParameter(int value) {
166   return static_cast<int16_t>(
167       LeftShift(RightShiftWithRoundingSigned(value, kWarpParamRoundingBits),
168                 kWarpParamRoundingBits));
169 }
170 
171 // This function is a copy from warp_prediction.cc.
172 // This function is used here to help generate valid warp parameters.
SetupShear(const int * params,int16_t * alpha,int16_t * beta,int16_t * gamma,int16_t * delta)173 bool SetupShear(const int* params, int16_t* alpha, int16_t* beta,
174                 int16_t* gamma, int16_t* delta) {
175   int16_t division_shift;
176   int16_t division_factor;
177   GenerateApproximateDivisor<int32_t>(params[2], &division_factor,
178                                       &division_shift);
179   const int alpha0 =
180       Clip3(params[2] - (1 << kWarpedModelPrecisionBits), INT16_MIN, INT16_MAX);
181   const int beta0 = Clip3(params[3], INT16_MIN, INT16_MAX);
182   const int64_t v = LeftShift(params[4], kWarpedModelPrecisionBits);
183   const int gamma0 =
184       Clip3(RightShiftWithRoundingSigned(v * division_factor, division_shift),
185             INT16_MIN, INT16_MAX);
186   const int64_t w = static_cast<int64_t>(params[3]) * params[4];
187   const int delta0 = Clip3(
188       params[5] -
189           RightShiftWithRoundingSigned(w * division_factor, division_shift) -
190           (1 << kWarpedModelPrecisionBits),
191       INT16_MIN, INT16_MAX);
192 
193   *alpha = GetShearParameter(alpha0);
194   *beta = GetShearParameter(beta0);
195   *gamma = GetShearParameter(gamma0);
196   *delta = GetShearParameter(delta0);
197   if ((4 * std::abs(*alpha) + 7 * std::abs(*beta) >=
198        (1 << kWarpedModelPrecisionBits)) ||
199       (4 * std::abs(*gamma) + 4 * std::abs(*delta) >=
200        (1 << kWarpedModelPrecisionBits))) {
201     return false;  // NOLINT (easier condition to understand).
202   }
203 
204   return true;
205 }
206 
GenerateWarpedModel(int * params,int16_t * alpha,int16_t * beta,int16_t * gamma,int16_t * delta,int seed)207 void GenerateWarpedModel(int* params, int16_t* alpha, int16_t* beta,
208                          int16_t* gamma, int16_t* delta, int seed) {
209   do {
210     params[0] = RandomWarpedParam(seed, kWarpedModelPrecisionBits + 6);
211     params[1] = RandomWarpedParam(seed, kWarpedModelPrecisionBits + 6);
212     params[2] = RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3) +
213                 (1 << kWarpedModelPrecisionBits);
214     params[3] = RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3);
215     params[4] = RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3);
216     params[5] = RandomWarpedParam(seed, kWarpedModelPrecisionBits - 3) +
217                 (1 << kWarpedModelPrecisionBits);
218     ++seed;
219   } while (params[2] == 0 || !SetupShear(params, alpha, beta, gamma, delta));
220 }
221 
222 struct WarpTestParam {
WarpTestParamlibgav1::dsp::__anoncd0269190111::WarpTestParam223   WarpTestParam(int width, int height) : width(width), height(height) {}
224   int width;
225   int height;
226 };
227 
228 template <bool is_compound, int bitdepth, typename Pixel>
229 class WarpTest : public testing::TestWithParam<WarpTestParam> {
230  public:
231   WarpTest() = default;
232   ~WarpTest() override = default;
233 
SetUp()234   void SetUp() override {
235     test_utils::ResetDspTable(bitdepth);
236     WarpInit_C();
237     const dsp::Dsp* const dsp = dsp::GetDspTable(bitdepth);
238     ASSERT_NE(dsp, nullptr);
239     const testing::TestInfo* const test_info =
240         testing::UnitTest::GetInstance()->current_test_info();
241     const absl::string_view test_case = test_info->test_suite_name();
242     if (absl::StartsWith(test_case, "C/")) {
243     } else if (absl::StartsWith(test_case, "NEON/")) {
244       WarpInit_NEON();
245     } else if (absl::StartsWith(test_case, "SSE41/")) {
246       WarpInit_SSE4_1();
247     } else {
248       FAIL() << "Unrecognized architecture prefix in test case name: "
249              << test_case;
250     }
251     func_ = is_compound ? dsp->warp_compound : dsp->warp;
252   }
253 
254  protected:
255   using DestType =
256       typename std::conditional<is_compound, uint16_t, Pixel>::type;
257 
258   void SetInputData(bool use_fixed_values, int value);
259   void Test(bool use_fixed_values, int value, int num_runs = 1);
260   void TestFixedValues();
261   void TestRandomValues();
262   void TestSpeed();
263 
264   const WarpTestParam param_ = GetParam();
265 
266  private:
267   int warp_params_[8];
268   dsp::WarpFunc func_;
269   // Warp filters are 7-tap, which needs 3 pixels (kConvolveBorderLeftTop)
270   // padding. Destination buffer indices are based on subsampling values (x+y):
271   // 0: (4:4:4), 1:(4:2:2), 2: (4:2:0).
272   Pixel source_[kMaxSourceBlockHeight * kMaxSourceBlockWidth] = {};
273   DestType dest_[3][kMaxDestBlockHeight * kMaxDestBlockWidth] = {};
274 };
275 
276 template <bool is_compound, int bitdepth, typename Pixel>
SetInputData(bool use_fixed_values,int value)277 void WarpTest<is_compound, bitdepth, Pixel>::SetInputData(bool use_fixed_values,
278                                                           int value) {
279   if (use_fixed_values) {
280     for (int y = 0; y < param_.height; ++y) {
281       const int row = kSourceBorderVertical + y;
282       Memset(source_ + row * kMaxSourceBlockWidth + kSourceBorderHorizontal,
283              value, param_.width);
284     }
285   } else {
286     const int mask = (1 << bitdepth) - 1;
287     libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
288     for (int y = 0; y < param_.height; ++y) {
289       const int row = kSourceBorderVertical + y;
290       for (int x = 0; x < param_.width; ++x) {
291         const int column = kSourceBorderHorizontal + x;
292         source_[row * kMaxSourceBlockWidth + column] = rnd.Rand16() & mask;
293       }
294     }
295   }
296   PostFilter::ExtendFrame<Pixel>(
297       &source_[kSourceBorderVertical * kMaxSourceBlockWidth +
298                kSourceBorderHorizontal],
299       param_.width, param_.height, kMaxSourceBlockWidth,
300       kSourceBorderHorizontal, kSourceBorderHorizontal, kSourceBorderVertical,
301       kSourceBorderVertical);
302 }
303 
304 template <bool is_compound, int bitdepth, typename Pixel>
Test(bool use_fixed_values,int value,int num_runs)305 void WarpTest<is_compound, bitdepth, Pixel>::Test(bool use_fixed_values,
306                                                   int value,
307                                                   int num_runs /*= 1*/) {
308   if (func_ == nullptr) return;
309   SetInputData(use_fixed_values, value);
310   libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
311   const int source_offset =
312       kSourceBorderVertical * kMaxSourceBlockWidth + kSourceBorderHorizontal;
313   const int dest_offset =
314       kConvolveBorderLeftTop * kMaxDestBlockWidth + kConvolveBorderLeftTop;
315   const Pixel* const src = source_ + source_offset;
316   const ptrdiff_t src_stride = kMaxSourceBlockWidth * sizeof(Pixel);
317   const ptrdiff_t dst_stride =
318       is_compound ? kMaxDestBlockWidth : kMaxDestBlockWidth * sizeof(Pixel);
319 
320   absl::Duration elapsed_time;
321   for (int subsampling_x = 0; subsampling_x <= 1; ++subsampling_x) {
322     for (int subsampling_y = 0; subsampling_y <= 1; ++subsampling_y) {
323       if (subsampling_x == 0 && subsampling_y == 1) {
324         // When both are 0: 4:4:4
325         // When both are 1: 4:2:0
326         // When only |subsampling_x| is 1: 4:2:2
327         // Having only |subsampling_y| == 1 is unsupported.
328         continue;
329       }
330       int params[8];
331       int16_t alpha;
332       int16_t beta;
333       int16_t gamma;
334       int16_t delta;
335       GenerateWarpedModel(params, &alpha, &beta, &gamma, &delta, rnd.Rand8());
336 
337       const int dest_id = subsampling_x + subsampling_y;
338       DestType* const dst = dest_[dest_id] + dest_offset;
339       const absl::Time start = absl::Now();
340       for (int n = 0; n < num_runs; ++n) {
341         func_(src, src_stride, param_.width, param_.height, params,
342               subsampling_x, subsampling_y, 0, 0, param_.width, param_.height,
343               alpha, beta, gamma, delta, dst, dst_stride);
344       }
345       elapsed_time += absl::Now() - start;
346     }
347   }
348 
349   if (use_fixed_values) {
350     // For fixed values, input and output are identical.
351     for (size_t i = 0; i < ABSL_ARRAYSIZE(dest_); ++i) {
352       // |is_compound| holds a few more bits of precision and an offset value.
353       Pixel compensated_dest[kMaxDestBlockWidth * kMaxDestBlockHeight];
354       const int compound_offset = (bitdepth == 8) ? 0 : kCompoundOffset;
355       if (is_compound) {
356         for (int y = 0; y < param_.height; ++y) {
357           for (int x = 0; x < param_.width; ++x) {
358             const int compound_value =
359                 dest_[i][dest_offset + y * kMaxDestBlockWidth + x];
360             const int remove_offset = compound_value - compound_offset;
361             const int full_shift =
362                 remove_offset >>
363                 (kInterRoundBitsVertical - kInterRoundBitsCompoundVertical);
364             compensated_dest[y * kMaxDestBlockWidth + x] =
365                 Clip3(full_shift, 0, (1 << bitdepth) - 1);
366           }
367         }
368       }
369       Pixel* pixel_dest =
370           is_compound ? compensated_dest
371                       : reinterpret_cast<Pixel*>(dest_[i] + dest_offset);
372       const bool success = test_utils::CompareBlocks(
373           src, pixel_dest, param_.width, param_.height, kMaxSourceBlockWidth,
374           kMaxDestBlockWidth, false);
375       EXPECT_TRUE(success) << "subsampling_x + subsampling_y: " << i;
376     }
377   } else {
378     // (width, height):
379     // (8, 8), id = 0. (8, 16), id = 1. (16, 8), id = 2.
380     // (16, 16), id = 3. (16, 32), id = 4. (32, 16), id = 5.
381     // ...
382     // (128, 128), id = 12.
383     int id;
384     if (param_.width == param_.height) {
385       id = 3 * static_cast<int>(FloorLog2(param_.width) - 3);
386     } else if (param_.width < param_.height) {
387       id = 1 + 3 * static_cast<int>(FloorLog2(param_.width) - 3);
388     } else {
389       id = 2 + 3 * static_cast<int>(FloorLog2(param_.height) - 3);
390     }
391 
392     const char* expected_digest;
393     if (bitdepth == 8) {
394       expected_digest = GetDigest8bpp<is_compound>(id);
395     } else {
396 #if LIBGAV1_MAX_BITDEPTH >= 10
397       expected_digest = GetDigest10bpp<is_compound>(id);
398 #endif
399     }
400     test_utils::CheckMd5Digest(
401         "Warp", absl::StrFormat("%dx%d", param_.width, param_.height).c_str(),
402         expected_digest, dest_, sizeof(dest_), elapsed_time);
403   }
404 }
405 
406 template <bool is_compound, int bitdepth, typename Pixel>
TestFixedValues()407 void WarpTest<is_compound, bitdepth, Pixel>::TestFixedValues() {
408   Test(true, 0);
409   Test(true, 1);
410   Test(true, 128);
411   Test(true, (1 << bitdepth) - 1);
412 }
413 
414 template <bool is_compound, int bitdepth, typename Pixel>
TestRandomValues()415 void WarpTest<is_compound, bitdepth, Pixel>::TestRandomValues() {
416   Test(false, 0);
417 }
418 
419 template <bool is_compound, int bitdepth, typename Pixel>
TestSpeed()420 void WarpTest<is_compound, bitdepth, Pixel>::TestSpeed() {
421   const int num_runs = static_cast<int>(1.0e7 / (param_.width * param_.height));
422   Test(false, 0, num_runs);
423 }
424 
ApplyFilterToSignedInput(const int min_input,const int max_input,const int8_t filter[kSubPixelTaps],int * min_output,int * max_output)425 void ApplyFilterToSignedInput(const int min_input, const int max_input,
426                               const int8_t filter[kSubPixelTaps],
427                               int* min_output, int* max_output) {
428   int min = 0, max = 0;
429   for (int i = 0; i < kSubPixelTaps; ++i) {
430     const int tap = filter[i];
431     if (tap > 0) {
432       max += max_input * tap;
433       min += min_input * tap;
434     } else {
435       min += max_input * tap;
436       max += min_input * tap;
437     }
438   }
439   *min_output = min;
440   *max_output = max;
441 }
442 
ApplyFilterToUnsignedInput(const int max_input,const int8_t filter[kSubPixelTaps],int * min_output,int * max_output)443 void ApplyFilterToUnsignedInput(const int max_input,
444                                 const int8_t filter[kSubPixelTaps],
445                                 int* min_output, int* max_output) {
446   ApplyFilterToSignedInput(0, max_input, filter, min_output, max_output);
447 }
448 
449 // Validate the maximum ranges for different parts of the Warp process.
450 template <int bitdepth>
ShowRange()451 void ShowRange() {
452   constexpr int horizontal_bits = (bitdepth == kBitdepth12)
453                                       ? kInterRoundBitsHorizontal12bpp
454                                       : kInterRoundBitsHorizontal;
455   constexpr int vertical_bits = (bitdepth == kBitdepth12)
456                                     ? kInterRoundBitsVertical12bpp
457                                     : kInterRoundBitsVertical;
458   constexpr int compound_vertical_bits = kInterRoundBitsCompoundVertical;
459 
460   constexpr int compound_offset = (bitdepth == 8) ? 0 : kCompoundOffset;
461 
462   constexpr int max_input = (1 << bitdepth) - 1;
463 
464   const int8_t* worst_warp_filter = kWarpedFilters8[93];
465 
466   // First pass.
467   printf("Bitdepth: %2d Input range:            [%8d, %8d]\n", bitdepth, 0,
468          max_input);
469 
470   int min = 0, max = 0;
471   ApplyFilterToUnsignedInput(max_input, worst_warp_filter, &min, &max);
472 
473   int first_pass_offset;
474   if (bitdepth == 8) {
475     // Derive an offset for 8 bit.
476     for (first_pass_offset = 1; - first_pass_offset > min;
477          first_pass_offset <<= 1) {
478     }
479     printf("  8bpp intermediate offset: %d.\n", first_pass_offset);
480     min += first_pass_offset;
481     max += first_pass_offset;
482     assert(min > 0);
483     assert(max < UINT16_MAX);
484   } else {
485     // 10bpp and 12bpp require int32_t for the intermediate values. Adding an
486     // offset is not required.
487     assert(min > INT32_MIN);
488     assert(max > INT16_MAX && max < INT32_MAX);
489   }
490 
491   printf("  intermediate range:                [%8d, %8d]\n", min, max);
492 
493   const int first_pass_min = RightShiftWithRounding(min, horizontal_bits);
494   const int first_pass_max = RightShiftWithRounding(max, horizontal_bits);
495 
496   printf("  first pass output range:           [%8d, %8d]\n", first_pass_min,
497          first_pass_max);
498 
499   // Second pass.
500   if (bitdepth == 8) {
501     ApplyFilterToUnsignedInput(first_pass_max, worst_warp_filter, &min, &max);
502   } else {
503     ApplyFilterToSignedInput(first_pass_min, first_pass_max, worst_warp_filter,
504                              &min, &max);
505   }
506 
507   if (bitdepth == 8) {
508     // Remove the offset that was applied in the first pass since we must use
509     // int32_t for this phase anyway. 128 is the sum of the filter taps.
510     const int offset_removal = (first_pass_offset >> horizontal_bits) * 128;
511     printf("  8bpp intermediate offset removal: %d.\n", offset_removal);
512     max -= offset_removal;
513     min -= offset_removal;
514     assert(min < INT16_MIN && min > INT32_MIN);
515     assert(max > INT16_MAX && max < INT32_MAX);
516   } else {
517     // 10bpp and 12bpp require int32_t for the intermediate values. Adding an
518     // offset is not required.
519     assert(min > INT32_MIN);
520     assert(max > INT16_MAX && max < INT32_MAX);
521   }
522 
523   printf("  intermediate range:                [%8d, %8d]\n", min, max);
524 
525   // Second pass non-compound output is clipped to Pixel values.
526   const int second_pass_min =
527       Clip3(RightShiftWithRounding(min, vertical_bits), 0, max_input);
528   const int second_pass_max =
529       Clip3(RightShiftWithRounding(max, vertical_bits), 0, max_input);
530   printf("  second pass output range:          [%8d, %8d]\n", second_pass_min,
531          second_pass_max);
532 
533   // Output is Pixel so matches Pixel values.
534   assert(second_pass_min == 0);
535   assert(second_pass_max == max_input);
536 
537   const int compound_second_pass_min =
538       RightShiftWithRounding(min, compound_vertical_bits) + compound_offset;
539   const int compound_second_pass_max =
540       RightShiftWithRounding(max, compound_vertical_bits) + compound_offset;
541 
542   printf("  compound second pass output range: [%8d, %8d]\n",
543          compound_second_pass_min, compound_second_pass_max);
544 
545   if (bitdepth == 8) {
546     // 8bpp output is int16_t without an offset.
547     assert(compound_second_pass_min > INT16_MIN);
548     assert(compound_second_pass_max < INT16_MAX);
549   } else {
550     // 10bpp and 12bpp use the offset to fit inside uint16_t.
551     assert(compound_second_pass_min > 0);
552     assert(compound_second_pass_max < UINT16_MAX);
553   }
554 
555   printf("\n");
556 }
557 
TEST(WarpTest,ShowRange)558 TEST(WarpTest, ShowRange) {
559   ShowRange<kBitdepth8>();
560   ShowRange<kBitdepth10>();
561   ShowRange<kBitdepth12>();
562 }
563 
564 using WarpTest8bpp = WarpTest</*is_compound=*/false, 8, uint8_t>;
565 // TODO(jzern): Coverage could be added for kInterRoundBitsCompoundVertical via
566 // WarpCompoundTest.
567 // using WarpCompoundTest8bpp = WarpTest</*is_compound=*/true, 8, uint8_t>;
568 
569 // Verifies the sum of the warped filter coefficients is 128 for every filter.
570 //
571 // Verifies the properties used in the calculation of ranges of variables in
572 // the block warp process:
573 // * The maximum sum of the positive warped filter coefficients is 175.
574 // * The minimum (i.e., most negative) sum of the negative warped filter
575 //   coefficients is -47.
576 //
577 // NOTE: This test is independent of the bitdepth and the implementation of the
578 // block warp function, so it just needs to be a test in the WarpTest8bpp class
579 // and does not need to be defined with TEST_P.
TEST(WarpTest8bpp,WarpedFilterCoefficientSums)580 TEST(WarpTest8bpp, WarpedFilterCoefficientSums) {
581   int max_positive_sum = 0;
582   int min_negative_sum = 0;
583   for (const auto& filter : kWarpedFilters) {
584     int sum = 0;
585     int positive_sum = 0;
586     int negative_sum = 0;
587     for (const auto coefficient : filter) {
588       sum += coefficient;
589       if (coefficient > 0) {
590         positive_sum += coefficient;
591       } else {
592         negative_sum += coefficient;
593       }
594     }
595     EXPECT_EQ(sum, 128);
596     max_positive_sum = std::max(positive_sum, max_positive_sum);
597     min_negative_sum = std::min(negative_sum, min_negative_sum);
598   }
599   EXPECT_EQ(max_positive_sum, 175);
600   EXPECT_EQ(min_negative_sum, -47);
601 }
602 
TEST_P(WarpTest8bpp,FixedValues)603 TEST_P(WarpTest8bpp, FixedValues) { TestFixedValues(); }
604 
TEST_P(WarpTest8bpp,RandomValues)605 TEST_P(WarpTest8bpp, RandomValues) { TestRandomValues(); }
606 
TEST_P(WarpTest8bpp,DISABLED_Speed)607 TEST_P(WarpTest8bpp, DISABLED_Speed) { TestSpeed(); }
608 const WarpTestParam warp_test_param[] = {
609     WarpTestParam(8, 8),     WarpTestParam(8, 16),   WarpTestParam(16, 8),
610     WarpTestParam(16, 16),   WarpTestParam(16, 32),  WarpTestParam(32, 16),
611     WarpTestParam(32, 32),   WarpTestParam(32, 64),  WarpTestParam(64, 32),
612     WarpTestParam(64, 64),   WarpTestParam(64, 128), WarpTestParam(128, 64),
613     WarpTestParam(128, 128),
614 };
615 
616 INSTANTIATE_TEST_SUITE_P(C, WarpTest8bpp, testing::ValuesIn(warp_test_param));
617 
618 #if LIBGAV1_ENABLE_NEON
619 INSTANTIATE_TEST_SUITE_P(NEON, WarpTest8bpp,
620                          testing::ValuesIn(warp_test_param));
621 #endif
622 
623 #if LIBGAV1_ENABLE_SSE4_1
624 INSTANTIATE_TEST_SUITE_P(SSE41, WarpTest8bpp,
625                          testing::ValuesIn(warp_test_param));
626 #endif
627 
628 #if LIBGAV1_MAX_BITDEPTH >= 10
629 using WarpTest10bpp = WarpTest</*is_compound=*/false, 10, uint16_t>;
630 // TODO(jzern): Coverage could be added for kInterRoundBitsCompoundVertical via
631 // WarpCompoundTest.
632 // using WarpCompoundTest10bpp = WarpTest</*is_compound=*/true, 10, uint16_t>;
633 
TEST_P(WarpTest10bpp,FixedValues)634 TEST_P(WarpTest10bpp, FixedValues) { TestFixedValues(); }
635 
TEST_P(WarpTest10bpp,RandomValues)636 TEST_P(WarpTest10bpp, RandomValues) { TestRandomValues(); }
637 
TEST_P(WarpTest10bpp,DISABLED_Speed)638 TEST_P(WarpTest10bpp, DISABLED_Speed) { TestSpeed(); }
639 
640 INSTANTIATE_TEST_SUITE_P(C, WarpTest10bpp, testing::ValuesIn(warp_test_param));
641 
642 #if LIBGAV1_ENABLE_NEON
643 INSTANTIATE_TEST_SUITE_P(NEON, WarpTest10bpp,
644                          testing::ValuesIn(warp_test_param));
645 #endif
646 #endif
647 
operator <<(std::ostream & os,const WarpTestParam & warp_param)648 std::ostream& operator<<(std::ostream& os, const WarpTestParam& warp_param) {
649   return os << "BlockSize" << warp_param.width << "x" << warp_param.height;
650 }
651 
652 }  // namespace
653 }  // namespace dsp
654 }  // namespace libgav1
655