1 // Copyright 2021 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/dsp/intrapred_directional.h"
16 
17 #include <cmath>
18 #include <cstddef>
19 #include <cstdint>
20 #include <cstring>
21 #include <memory>
22 #include <ostream>
23 
24 #include "absl/strings/match.h"
25 #include "absl/time/clock.h"
26 #include "absl/time/time.h"
27 #include "gtest/gtest.h"
28 #include "src/dsp/constants.h"
29 #include "src/dsp/dsp.h"
30 #include "src/utils/common.h"
31 #include "src/utils/compiler_attributes.h"
32 #include "src/utils/constants.h"
33 #include "src/utils/cpu.h"
34 #include "src/utils/memory.h"
35 #include "tests/block_utils.h"
36 #include "tests/third_party/libvpx/acm_random.h"
37 #include "tests/utils.h"
38 
39 namespace libgav1 {
40 namespace dsp {
41 namespace {
42 
43 constexpr int kMaxBlockSize = 64;
44 constexpr int kTotalPixels = kMaxBlockSize * kMaxBlockSize;
45 constexpr int kNumDirectionalIntraPredictors = 3;
46 
47 constexpr int kBaseAngles[] = {45, 67, 90, 113, 135, 157, 180, 203};
48 
49 const char* const kDirectionalPredNames[kNumDirectionalIntraPredictors] = {
50     "kDirectionalIntraPredictorZone1", "kDirectionalIntraPredictorZone2",
51     "kDirectionalIntraPredictorZone3"};
52 
GetDirectionalIntraPredictorDerivative(const int angle)53 int16_t GetDirectionalIntraPredictorDerivative(const int angle) {
54   EXPECT_GE(angle, 3);
55   EXPECT_LE(angle, 87);
56   return kDirectionalIntraPredictorDerivative[DivideBy2(angle) - 1];
57 }
58 
59 template <int bitdepth, typename Pixel>
60 class IntraPredTestBase : public testing::TestWithParam<TransformSize>,
61                           public test_utils::MaxAlignedAllocable {
62  public:
IntraPredTestBase()63   IntraPredTestBase() {
64     switch (tx_size_) {
65       case kNumTransformSizes:
66         EXPECT_NE(tx_size_, kNumTransformSizes);
67         break;
68       default:
69         block_width_ = kTransformWidth[tx_size_];
70         block_height_ = kTransformHeight[tx_size_];
71         break;
72     }
73   }
74 
75   IntraPredTestBase(const IntraPredTestBase&) = delete;
76   IntraPredTestBase& operator=(const IntraPredTestBase&) = delete;
77   ~IntraPredTestBase() override = default;
78 
79  protected:
80   struct IntraPredMem {
Resetlibgav1::dsp::__anon3849cbb50111::IntraPredTestBase::IntraPredMem81     void Reset(libvpx_test::ACMRandom* rnd) {
82       ASSERT_NE(rnd, nullptr);
83 #if LIBGAV1_MSAN
84       // Match the behavior of Tile::IntraPrediction to prevent warnings due to
85       // assembly code (safely) overreading to fill a register.
86       memset(left_mem, 0, sizeof(left_mem));
87       memset(top_mem, 0, sizeof(top_mem));
88 #endif  // LIBGAV1_MSAN
89       Pixel* const left = left_mem + 16;
90       Pixel* const top = top_mem + 16;
91       const int mask = (1 << bitdepth) - 1;
92       for (auto& r : ref_src) r = rnd->Rand16() & mask;
93       for (int i = 0; i < kMaxBlockSize; ++i) left[i] = rnd->Rand16() & mask;
94       for (int i = -1; i < kMaxBlockSize; ++i) top[i] = rnd->Rand16() & mask;
95 
96       // Some directional predictors require top-right, bottom-left.
97       for (int i = kMaxBlockSize; i < 2 * kMaxBlockSize; ++i) {
98         left[i] = rnd->Rand16() & mask;
99         top[i] = rnd->Rand16() & mask;
100       }
101       // TODO(jzern): reorder this and regenerate the digests after switching
102       // random number generators.
103       // Upsampling in the directional predictors extends left/top[-1] to [-2].
104       left[-1] = rnd->Rand16() & mask;
105       left[-2] = rnd->Rand16() & mask;
106       top[-2] = rnd->Rand16() & mask;
107       memset(left_mem, 0, sizeof(left_mem[0]) * 14);
108       memset(top_mem, 0, sizeof(top_mem[0]) * 14);
109       memset(top_mem + kMaxBlockSize * 2 + 16, 0,
110              sizeof(top_mem[0]) * kTopMemPadding);
111     }
112 
113     // Set ref_src, top-left, top and left to |pixel|.
Setlibgav1::dsp::__anon3849cbb50111::IntraPredTestBase::IntraPredMem114     void Set(const Pixel pixel) {
115 #if LIBGAV1_MSAN
116       // Match the behavior of Tile::IntraPrediction to prevent warnings due to
117       // assembly code (safely) overreading to fill a register.
118       memset(left_mem, 0, sizeof(left_mem));
119       memset(top_mem, 0, sizeof(top_mem));
120 #endif  // LIBGAV1_MSAN
121       Pixel* const left = left_mem + 16;
122       Pixel* const top = top_mem + 16;
123       for (auto& r : ref_src) r = pixel;
124       // Upsampling in the directional predictors extends left/top[-1] to [-2].
125       for (int i = -2; i < 2 * kMaxBlockSize; ++i) {
126         left[i] = top[i] = pixel;
127       }
128     }
129 
130     // DirectionalZone1_Large() overreads up to 7 pixels in |top_mem|.
131     static constexpr int kTopMemPadding = 7;
132     alignas(kMaxAlignment) Pixel dst[kTotalPixels];
133     alignas(kMaxAlignment) Pixel ref_src[kTotalPixels];
134     alignas(kMaxAlignment) Pixel left_mem[kMaxBlockSize * 2 + 16];
135     alignas(
136         kMaxAlignment) Pixel top_mem[kMaxBlockSize * 2 + 16 + kTopMemPadding];
137   };
138 
SetUp()139   void SetUp() override { test_utils::ResetDspTable(bitdepth); }
140 
141   const TransformSize tx_size_ = GetParam();
142   int block_width_;
143   int block_height_;
144   IntraPredMem intra_pred_mem_;
145 };
146 
147 //------------------------------------------------------------------------------
148 // DirectionalIntraPredTest
149 
150 template <int bitdepth, typename Pixel>
151 class DirectionalIntraPredTest : public IntraPredTestBase<bitdepth, Pixel> {
152  public:
153   DirectionalIntraPredTest() = default;
154   DirectionalIntraPredTest(const DirectionalIntraPredTest&) = delete;
155   DirectionalIntraPredTest& operator=(const DirectionalIntraPredTest&) = delete;
156   ~DirectionalIntraPredTest() override = default;
157 
158  protected:
159   using IntraPredTestBase<bitdepth, Pixel>::tx_size_;
160   using IntraPredTestBase<bitdepth, Pixel>::block_width_;
161   using IntraPredTestBase<bitdepth, Pixel>::block_height_;
162   using IntraPredTestBase<bitdepth, Pixel>::intra_pred_mem_;
163 
164   enum Zone { kZone1, kZone2, kZone3, kNumZones };
165 
166   enum { kAngleDeltaStart = -9, kAngleDeltaStop = 9, kAngleDeltaStep = 3 };
167 
SetUp()168   void SetUp() override {
169     IntraPredTestBase<bitdepth, Pixel>::SetUp();
170     IntraPredDirectionalInit_C();
171 
172     const Dsp* const dsp = GetDspTable(bitdepth);
173     ASSERT_NE(dsp, nullptr);
174     base_directional_intra_pred_zone1_ = dsp->directional_intra_predictor_zone1;
175     base_directional_intra_pred_zone2_ = dsp->directional_intra_predictor_zone2;
176     base_directional_intra_pred_zone3_ = dsp->directional_intra_predictor_zone3;
177 
178     const testing::TestInfo* const test_info =
179         testing::UnitTest::GetInstance()->current_test_info();
180     const char* const test_case = test_info->test_suite_name();
181     if (absl::StartsWith(test_case, "C/")) {
182       base_directional_intra_pred_zone1_ = nullptr;
183       base_directional_intra_pred_zone2_ = nullptr;
184       base_directional_intra_pred_zone3_ = nullptr;
185     } else if (absl::StartsWith(test_case, "NEON/")) {
186       IntraPredDirectionalInit_NEON();
187     } else if (absl::StartsWith(test_case, "SSE41/")) {
188       if ((GetCpuInfo() & kSSE4_1) != 0) {
189         IntraPredDirectionalInit_SSE4_1();
190       }
191     } else {
192       FAIL() << "Unrecognized architecture prefix in test case name: "
193              << test_case;
194     }
195 
196     cur_directional_intra_pred_zone1_ = dsp->directional_intra_predictor_zone1;
197     cur_directional_intra_pred_zone2_ = dsp->directional_intra_predictor_zone2;
198     cur_directional_intra_pred_zone3_ = dsp->directional_intra_predictor_zone3;
199 
200     // Skip functions that haven't been specialized for this particular
201     // architecture.
202     if (cur_directional_intra_pred_zone1_ ==
203         base_directional_intra_pred_zone1_) {
204       cur_directional_intra_pred_zone1_ = nullptr;
205     }
206     if (cur_directional_intra_pred_zone2_ ==
207         base_directional_intra_pred_zone2_) {
208       cur_directional_intra_pred_zone2_ = nullptr;
209     }
210     if (cur_directional_intra_pred_zone3_ ==
211         base_directional_intra_pred_zone3_) {
212       cur_directional_intra_pred_zone3_ = nullptr;
213     }
214   }
215 
IsEdgeUpsampled(int delta,const int filter_type) const216   bool IsEdgeUpsampled(int delta, const int filter_type) const {
217     delta = std::abs(delta);
218     if (delta == 0 || delta >= 40) return false;
219     const int block_wh = block_width_ + block_height_;
220     return (filter_type == 1) ? block_wh <= 8 : block_wh <= 16;
221   }
222 
223   // Returns the minimum and maximum (exclusive) range of angles that the
224   // predictor should be applied to.
GetZoneAngleRange(const Zone zone,int * const min_angle,int * const max_angle) const225   void GetZoneAngleRange(const Zone zone, int* const min_angle,
226                          int* const max_angle) const {
227     ASSERT_NE(min_angle, nullptr);
228     ASSERT_NE(max_angle, nullptr);
229     switch (zone) {
230         // The overall minimum angle comes from mode D45_PRED, yielding:
231         // min_angle = 45-(MAX_ANGLE_DELTA*ANGLE_STEP) = 36
232         // The overall maximum angle comes from mode D203_PRED, yielding:
233         // max_angle = 203+(MAX_ANGLE_DELTA*ANGLE_STEP) = 212
234         // The angles 180 and 90 are not permitted because they correspond to
235         // V_PRED and H_PRED, which are handled in distinct functions.
236       case kZone1:
237         *min_angle = 36;
238         *max_angle = 87;
239         break;
240       case kZone2:
241         *min_angle = 93;
242         *max_angle = 177;
243         break;
244       case kZone3:
245         *min_angle = 183;
246         *max_angle = 212;
247         break;
248       case kNumZones:
249         FAIL() << "Invalid zone value: " << zone;
250         break;
251     }
252   }
253 
254   // These tests modify intra_pred_mem_.
255   void TestSpeed(const char* const digests[kNumDirectionalIntraPredictors],
256                  Zone zone, int num_runs);
257   void TestSaturatedValues();
258   void TestRandomValues();
259 
260   DirectionalIntraPredictorZone1Func base_directional_intra_pred_zone1_;
261   DirectionalIntraPredictorZone2Func base_directional_intra_pred_zone2_;
262   DirectionalIntraPredictorZone3Func base_directional_intra_pred_zone3_;
263   DirectionalIntraPredictorZone1Func cur_directional_intra_pred_zone1_;
264   DirectionalIntraPredictorZone2Func cur_directional_intra_pred_zone2_;
265   DirectionalIntraPredictorZone3Func cur_directional_intra_pred_zone3_;
266 };
267 
268 template <int bitdepth, typename Pixel>
TestSpeed(const char * const digests[kNumDirectionalIntraPredictors],const Zone zone,const int num_runs)269 void DirectionalIntraPredTest<bitdepth, Pixel>::TestSpeed(
270     const char* const digests[kNumDirectionalIntraPredictors], const Zone zone,
271     const int num_runs) {
272   switch (zone) {
273     case kZone1:
274       if (cur_directional_intra_pred_zone1_ == nullptr) return;
275       break;
276     case kZone2:
277       if (cur_directional_intra_pred_zone2_ == nullptr) return;
278       break;
279     case kZone3:
280       if (cur_directional_intra_pred_zone3_ == nullptr) return;
281       break;
282     case kNumZones:
283       FAIL() << "Invalid zone value: " << zone;
284       break;
285   }
286   ASSERT_NE(digests, nullptr);
287   const Pixel* const left = intra_pred_mem_.left_mem + 16;
288   const Pixel* const top = intra_pred_mem_.top_mem + 16;
289 
290   libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
291   intra_pred_mem_.Reset(&rnd);
292 
293   // Allocate separate blocks for each angle + filter + upsampled combination.
294   // Add a 1 pixel right border to test for overwrites.
295   static constexpr int kMaxZoneAngles = 27;  // zone 2
296   static constexpr int kMaxFilterTypes = 2;
297   static constexpr int kBlockBorder = 1;
298   static constexpr int kBorderSize =
299       kBlockBorder * kMaxZoneAngles * kMaxFilterTypes;
300   const int ref_stride =
301       kMaxZoneAngles * kMaxFilterTypes * block_width_ + kBorderSize;
302   const size_t ref_alloc_size = sizeof(Pixel) * ref_stride * block_height_;
303 
304   using AlignedPtr = std::unique_ptr<Pixel[], decltype(&AlignedFree)>;
305   AlignedPtr ref_src(static_cast<Pixel*>(AlignedAlloc(16, ref_alloc_size)),
306                      &AlignedFree);
307   AlignedPtr dest(static_cast<Pixel*>(AlignedAlloc(16, ref_alloc_size)),
308                   &AlignedFree);
309   ASSERT_NE(ref_src, nullptr);
310   ASSERT_NE(dest, nullptr);
311 
312   const int mask = (1 << bitdepth) - 1;
313   for (size_t i = 0; i < ref_alloc_size / sizeof(ref_src[0]); ++i) {
314     ref_src[i] = rnd.Rand16() & mask;
315   }
316 
317   int min_angle = 0, max_angle = 0;
318   ASSERT_NO_FATAL_FAILURE(GetZoneAngleRange(zone, &min_angle, &max_angle));
319 
320   absl::Duration elapsed_time;
321   for (int run = 0; run < num_runs; ++run) {
322     Pixel* dst = dest.get();
323     memcpy(dst, ref_src.get(), ref_alloc_size);
324     for (const auto& base_angle : kBaseAngles) {
325       for (int filter_type = 0; filter_type <= 1; ++filter_type) {
326         for (int angle_delta = kAngleDeltaStart; angle_delta <= kAngleDeltaStop;
327              angle_delta += kAngleDeltaStep) {
328           const int predictor_angle = base_angle + angle_delta;
329           if (predictor_angle < min_angle || predictor_angle > max_angle) {
330             continue;
331           }
332 
333           ASSERT_GT(predictor_angle, 0) << "base_angle: " << base_angle
334                                         << " angle_delta: " << angle_delta;
335           const bool upsampled_left =
336               IsEdgeUpsampled(predictor_angle - 180, filter_type);
337           const bool upsampled_top =
338               IsEdgeUpsampled(predictor_angle - 90, filter_type);
339           const ptrdiff_t stride = ref_stride * sizeof(ref_src[0]);
340           if (predictor_angle < 90) {
341             ASSERT_EQ(zone, kZone1);
342             const int xstep =
343                 GetDirectionalIntraPredictorDerivative(predictor_angle);
344             const absl::Time start = absl::Now();
345             cur_directional_intra_pred_zone1_(dst, stride, top, block_width_,
346                                               block_height_, xstep,
347                                               upsampled_top);
348             elapsed_time += absl::Now() - start;
349           } else if (predictor_angle < 180) {
350             ASSERT_EQ(zone, kZone2);
351             const int xstep =
352                 GetDirectionalIntraPredictorDerivative(180 - predictor_angle);
353             const int ystep =
354                 GetDirectionalIntraPredictorDerivative(predictor_angle - 90);
355             const absl::Time start = absl::Now();
356             cur_directional_intra_pred_zone2_(
357                 dst, stride, top, left, block_width_, block_height_, xstep,
358                 ystep, upsampled_top, upsampled_left);
359             elapsed_time += absl::Now() - start;
360           } else {
361             ASSERT_EQ(zone, kZone3);
362             ASSERT_LT(predictor_angle, 270);
363             const int ystep =
364                 GetDirectionalIntraPredictorDerivative(270 - predictor_angle);
365             const absl::Time start = absl::Now();
366             cur_directional_intra_pred_zone3_(dst, stride, left, block_width_,
367                                               block_height_, ystep,
368                                               upsampled_left);
369             elapsed_time += absl::Now() - start;
370           }
371           dst += block_width_ + kBlockBorder;
372         }
373       }
374     }
375   }
376 
377   test_utils::CheckMd5Digest(ToString(tx_size_), kDirectionalPredNames[zone],
378                              digests[zone], dest.get(), ref_alloc_size,
379                              elapsed_time);
380 }
381 
382 template <int bitdepth, typename Pixel>
TestSaturatedValues()383 void DirectionalIntraPredTest<bitdepth, Pixel>::TestSaturatedValues() {
384   const Pixel* const left = intra_pred_mem_.left_mem + 16;
385   const Pixel* const top = intra_pred_mem_.top_mem + 16;
386   const auto kMaxPixel = static_cast<Pixel>((1 << bitdepth) - 1);
387   intra_pred_mem_.Set(kMaxPixel);
388 
389   for (int i = kZone1; i < kNumZones; ++i) {
390     switch (i) {
391       case kZone1:
392         if (cur_directional_intra_pred_zone1_ == nullptr) continue;
393         break;
394       case kZone2:
395         if (cur_directional_intra_pred_zone2_ == nullptr) continue;
396         break;
397       case kZone3:
398         if (cur_directional_intra_pred_zone3_ == nullptr) continue;
399         break;
400       case kNumZones:
401         FAIL() << "Invalid zone value: " << i;
402         break;
403     }
404     int min_angle = 0, max_angle = 0;
405     ASSERT_NO_FATAL_FAILURE(
406         GetZoneAngleRange(static_cast<Zone>(i), &min_angle, &max_angle));
407 
408     for (const auto& base_angle : kBaseAngles) {
409       for (int filter_type = 0; filter_type <= 1; ++filter_type) {
410         for (int angle_delta = kAngleDeltaStart; angle_delta <= kAngleDeltaStop;
411              angle_delta += kAngleDeltaStep) {
412           const int predictor_angle = base_angle + angle_delta;
413           if (predictor_angle <= min_angle || predictor_angle >= max_angle) {
414             continue;
415           }
416           ASSERT_GT(predictor_angle, 0) << "base_angle: " << base_angle
417                                         << " angle_delta: " << angle_delta;
418 
419           memcpy(intra_pred_mem_.dst, intra_pred_mem_.ref_src,
420                  sizeof(intra_pred_mem_.dst));
421 
422           const bool upsampled_left =
423               IsEdgeUpsampled(predictor_angle - 180, filter_type);
424           const bool upsampled_top =
425               IsEdgeUpsampled(predictor_angle - 90, filter_type);
426           const ptrdiff_t stride = kMaxBlockSize * sizeof(Pixel);
427           if (predictor_angle < 90) {
428             const int xstep =
429                 GetDirectionalIntraPredictorDerivative(predictor_angle);
430             cur_directional_intra_pred_zone1_(intra_pred_mem_.dst, stride, top,
431                                               block_width_, block_height_,
432                                               xstep, upsampled_top);
433           } else if (predictor_angle < 180) {
434             const int xstep =
435                 GetDirectionalIntraPredictorDerivative(180 - predictor_angle);
436             const int ystep =
437                 GetDirectionalIntraPredictorDerivative(predictor_angle - 90);
438             cur_directional_intra_pred_zone2_(
439                 intra_pred_mem_.dst, stride, top, left, block_width_,
440                 block_height_, xstep, ystep, upsampled_top, upsampled_left);
441           } else {
442             ASSERT_LT(predictor_angle, 270);
443             const int ystep =
444                 GetDirectionalIntraPredictorDerivative(270 - predictor_angle);
445             cur_directional_intra_pred_zone3_(intra_pred_mem_.dst, stride, left,
446                                               block_width_, block_height_,
447                                               ystep, upsampled_left);
448           }
449 
450           if (!test_utils::CompareBlocks(
451                   intra_pred_mem_.dst, intra_pred_mem_.ref_src, block_width_,
452                   block_height_, kMaxBlockSize, kMaxBlockSize, true)) {
453             ADD_FAILURE() << "Expected " << kDirectionalPredNames[i]
454                           << " (angle: " << predictor_angle
455                           << " filter type: " << filter_type
456                           << ") to produce a block containing '"
457                           << static_cast<int>(kMaxPixel) << "'";
458             return;
459           }
460         }
461       }
462     }
463   }
464 }
465 
466 template <int bitdepth, typename Pixel>
TestRandomValues()467 void DirectionalIntraPredTest<bitdepth, Pixel>::TestRandomValues() {
468   const Pixel* const left = intra_pred_mem_.left_mem + 16;
469   const Pixel* const top = intra_pred_mem_.top_mem + 16;
470   // Use an alternate seed to differentiate this test from TestSpeed().
471   libvpx_test::ACMRandom rnd(test_utils::kAlternateDeterministicSeed);
472 
473   for (int i = kZone1; i < kNumZones; ++i) {
474     // Only run when there is a reference version (base) and a different
475     // optimized version (cur).
476     switch (i) {
477       case kZone1:
478         if (base_directional_intra_pred_zone1_ == nullptr ||
479             cur_directional_intra_pred_zone1_ == nullptr) {
480           continue;
481         }
482         break;
483       case kZone2:
484         if (base_directional_intra_pred_zone2_ == nullptr ||
485             cur_directional_intra_pred_zone2_ == nullptr) {
486           continue;
487         }
488         break;
489       case kZone3:
490         if (base_directional_intra_pred_zone3_ == nullptr ||
491             cur_directional_intra_pred_zone3_ == nullptr) {
492           continue;
493         }
494         break;
495       case kNumZones:
496         FAIL() << "Invalid zone value: " << i;
497         break;
498     }
499     int min_angle = 0, max_angle = 0;
500     ASSERT_NO_FATAL_FAILURE(
501         GetZoneAngleRange(static_cast<Zone>(i), &min_angle, &max_angle));
502 
503     for (const auto& base_angle : kBaseAngles) {
504       for (int n = 0; n < 1000; ++n) {
505         for (int filter_type = 0; filter_type <= 1; ++filter_type) {
506           for (int angle_delta = kAngleDeltaStart;
507                angle_delta <= kAngleDeltaStop; angle_delta += kAngleDeltaStep) {
508             const int predictor_angle = base_angle + angle_delta;
509             if (predictor_angle <= min_angle || predictor_angle >= max_angle) {
510               continue;
511             }
512             ASSERT_GT(predictor_angle, 0) << "base_angle: " << base_angle
513                                           << " angle_delta: " << angle_delta;
514 
515             intra_pred_mem_.Reset(&rnd);
516             memcpy(intra_pred_mem_.dst, intra_pred_mem_.ref_src,
517                    sizeof(intra_pred_mem_.dst));
518 
519             const bool upsampled_left =
520                 IsEdgeUpsampled(predictor_angle - 180, filter_type);
521             const bool upsampled_top =
522                 IsEdgeUpsampled(predictor_angle - 90, filter_type);
523             const ptrdiff_t stride = kMaxBlockSize * sizeof(Pixel);
524             if (predictor_angle < 90) {
525               const int xstep =
526                   GetDirectionalIntraPredictorDerivative(predictor_angle);
527               base_directional_intra_pred_zone1_(
528                   intra_pred_mem_.ref_src, stride, top, block_width_,
529                   block_height_, xstep, upsampled_top);
530               cur_directional_intra_pred_zone1_(
531                   intra_pred_mem_.dst, stride, top, block_width_, block_height_,
532                   xstep, upsampled_top);
533             } else if (predictor_angle < 180) {
534               const int xstep =
535                   GetDirectionalIntraPredictorDerivative(180 - predictor_angle);
536               const int ystep =
537                   GetDirectionalIntraPredictorDerivative(predictor_angle - 90);
538               base_directional_intra_pred_zone2_(
539                   intra_pred_mem_.ref_src, stride, top, left, block_width_,
540                   block_height_, xstep, ystep, upsampled_top, upsampled_left);
541               cur_directional_intra_pred_zone2_(
542                   intra_pred_mem_.dst, stride, top, left, block_width_,
543                   block_height_, xstep, ystep, upsampled_top, upsampled_left);
544             } else {
545               ASSERT_LT(predictor_angle, 270);
546               const int ystep =
547                   GetDirectionalIntraPredictorDerivative(270 - predictor_angle);
548               base_directional_intra_pred_zone3_(
549                   intra_pred_mem_.ref_src, stride, left, block_width_,
550                   block_height_, ystep, upsampled_left);
551               cur_directional_intra_pred_zone3_(
552                   intra_pred_mem_.dst, stride, left, block_width_,
553                   block_height_, ystep, upsampled_left);
554             }
555 
556             if (!test_utils::CompareBlocks(
557                     intra_pred_mem_.dst, intra_pred_mem_.ref_src, block_width_,
558                     block_height_, kMaxBlockSize, kMaxBlockSize, true)) {
559               ADD_FAILURE() << "Result from optimized version of "
560                             << kDirectionalPredNames[i]
561                             << " differs from reference at angle "
562                             << predictor_angle << " with filter type "
563                             << filter_type << " in iteration #" << n;
564               return;
565             }
566           }
567         }
568       }
569     }
570   }
571 }
572 
573 using DirectionalIntraPredTest8bpp = DirectionalIntraPredTest<8, uint8_t>;
574 
GetDirectionalIntraPredDigests8bpp(TransformSize tx_size)575 const char* const* GetDirectionalIntraPredDigests8bpp(TransformSize tx_size) {
576   static const char* const kDigests4x4[kNumDirectionalIntraPredictors] = {
577       "9cfc1da729ad08682e165826c29b280b",
578       "bb73539c7afbda7bddd2184723b932d6",
579       "9d2882800ffe948196e984a26a2da72c",
580   };
581   static const char* const kDigests4x8[kNumDirectionalIntraPredictors] = {
582       "090efe6f83cc6fa301f65d3bbd5c38d2",
583       "d0fba4cdfb90f8bd293a94cae9db1a15",
584       "f7ad0eeab4389d0baa485d30fec87617",
585   };
586   static const char* const kDigests4x16[kNumDirectionalIntraPredictors] = {
587       "1d32b33c75fe85248c48cdc8caa78d84",
588       "7000e18159443d366129a6cc6ef8fcee",
589       "06c02fac5f8575f687abb3f634eb0b4c",
590   };
591   static const char* const kDigests8x4[kNumDirectionalIntraPredictors] = {
592       "1b591799685bc135982114b731293f78",
593       "5cd9099acb9f7b2618dafa6712666580",
594       "d023883efede88f99c19d006044d9fa1",
595   };
596   static const char* const kDigests8x8[kNumDirectionalIntraPredictors] = {
597       "f1e46ecf62a2516852f30c5025adb7ea",
598       "864442a209c16998065af28d8cdd839a",
599       "411a6e554868982af577de69e53f12e8",
600   };
601   static const char* const kDigests8x16[kNumDirectionalIntraPredictors] = {
602       "89278302be913a85cfb06feaea339459",
603       "6c42f1a9493490cd4529fd40729cec3c",
604       "2516b5e1c681e5dcb1acedd5f3d41106",
605   };
606   static const char* const kDigests8x32[kNumDirectionalIntraPredictors] = {
607       "aea7078f3eeaa8afbfe6c959c9e676f1",
608       "cad30babf12729dda5010362223ba65c",
609       "ff384ebdc832007775af418a2aae1463",
610   };
611   static const char* const kDigests16x4[kNumDirectionalIntraPredictors] = {
612       "964a821c313c831e12f4d32e616c0b55",
613       "adf6dad3a84ab4d16c16eea218bec57a",
614       "a54fa008d43895e523474686c48a81c2",
615   };
616   static const char* const kDigests16x8[kNumDirectionalIntraPredictors] = {
617       "fe2851b4e4f9fcf924cf17d50415a4c0",
618       "50a0e279c481437ff315d08eb904c733",
619       "0682065c8fb6cbf9be4949316c87c9e5",
620   };
621   static const char* const kDigests16x16[kNumDirectionalIntraPredictors] = {
622       "ef15503b1943642e7a0bace1616c0e11",
623       "bf1a4d3f855f1072a902a88ec6ce0350",
624       "7e87a03e29cd7fd843fd71b729a18f3f",
625   };
626   static const char* const kDigests16x32[kNumDirectionalIntraPredictors] = {
627       "f7b636615d2e5bf289b5db452a6f188d",
628       "e95858c532c10d00b0ce7a02a02121dd",
629       "34a18ccf58ef490f32268e85ce8c7de4",
630   };
631   static const char* const kDigests16x64[kNumDirectionalIntraPredictors] = {
632       "b250099986c2fab9670748598058846b",
633       "f25d80af4da862a9b6b72979f1e17cb4",
634       "5347dc7bc346733b4887f6c8ad5e0898",
635   };
636   static const char* const kDigests32x8[kNumDirectionalIntraPredictors] = {
637       "72e4c9f8af043b1cb1263490351818ab",
638       "1fc010d2df011b9e4e3d0957107c78df",
639       "f4cbfa3ca941ef08b972a68d7e7bafc4",
640   };
641   static const char* const kDigests32x16[kNumDirectionalIntraPredictors] = {
642       "37e5a1aaf7549d2bce08eece9d20f0f6",
643       "6a2794025d0aca414ab17baa3cf8251a",
644       "63dd37a6efdc91eeefef166c99ce2db1",
645   };
646   static const char* const kDigests32x32[kNumDirectionalIntraPredictors] = {
647       "198aabc958992eb49cceab97d1acb43e",
648       "aee88b6c8bacfcf38799fe338e6c66e7",
649       "01e8f8f96696636f6d79d33951907a16",
650   };
651   static const char* const kDigests32x64[kNumDirectionalIntraPredictors] = {
652       "0611390202c4f90f7add7aec763ded58",
653       "960240c7ceda2ccfac7c90b71460578a",
654       "7e7d97594aab8ad56e8c01c340335607",
655   };
656   static const char* const kDigests64x16[kNumDirectionalIntraPredictors] = {
657       "7e1f567e7fc510757f2d89d638bc826f",
658       "c929d687352ce40a58670be2ce3c8c90",
659       "f6881e6a9ba3c3d3d730b425732656b1",
660   };
661   static const char* const kDigests64x32[kNumDirectionalIntraPredictors] = {
662       "27b4c2a7081d4139f22003ba8b6dfdf2",
663       "301e82740866b9274108a04c872fa848",
664       "98d3aa4fef838f4abf00dac33806659f",
665   };
666   static const char* const kDigests64x64[kNumDirectionalIntraPredictors] = {
667       "b31816db8fade3accfd975b21aa264c7",
668       "2adce01a03b9452633d5830e1a9b4e23",
669       "7b988fadba8b07c36e88d7be6b270494",
670   };
671 
672   switch (tx_size) {
673     case kTransformSize4x4:
674       return kDigests4x4;
675     case kTransformSize4x8:
676       return kDigests4x8;
677     case kTransformSize4x16:
678       return kDigests4x16;
679     case kTransformSize8x4:
680       return kDigests8x4;
681     case kTransformSize8x8:
682       return kDigests8x8;
683     case kTransformSize8x16:
684       return kDigests8x16;
685     case kTransformSize8x32:
686       return kDigests8x32;
687     case kTransformSize16x4:
688       return kDigests16x4;
689     case kTransformSize16x8:
690       return kDigests16x8;
691     case kTransformSize16x16:
692       return kDigests16x16;
693     case kTransformSize16x32:
694       return kDigests16x32;
695     case kTransformSize16x64:
696       return kDigests16x64;
697     case kTransformSize32x8:
698       return kDigests32x8;
699     case kTransformSize32x16:
700       return kDigests32x16;
701     case kTransformSize32x32:
702       return kDigests32x32;
703     case kTransformSize32x64:
704       return kDigests32x64;
705     case kTransformSize64x16:
706       return kDigests64x16;
707     case kTransformSize64x32:
708       return kDigests64x32;
709     case kTransformSize64x64:
710       return kDigests64x64;
711     default:
712       ADD_FAILURE() << "Unknown transform size: " << tx_size;
713       return nullptr;
714   }
715 }
716 
TEST_P(DirectionalIntraPredTest8bpp,DISABLED_Speed)717 TEST_P(DirectionalIntraPredTest8bpp, DISABLED_Speed) {
718 #if LIBGAV1_ENABLE_NEON
719   const auto num_runs = static_cast<int>(2e7 / (block_width_ * block_height_));
720 #else
721   const int num_runs = static_cast<int>(4e7 / (block_width_ * block_height_));
722 #endif
723   for (int i = kZone1; i < kNumZones; ++i) {
724     TestSpeed(GetDirectionalIntraPredDigests8bpp(tx_size_),
725               static_cast<Zone>(i), num_runs);
726   }
727 }
728 
TEST_P(DirectionalIntraPredTest8bpp,FixedInput)729 TEST_P(DirectionalIntraPredTest8bpp, FixedInput) {
730   for (int i = kZone1; i < kNumZones; ++i) {
731     TestSpeed(GetDirectionalIntraPredDigests8bpp(tx_size_),
732               static_cast<Zone>(i), 1);
733   }
734 }
735 
TEST_P(DirectionalIntraPredTest8bpp,Overflow)736 TEST_P(DirectionalIntraPredTest8bpp, Overflow) { TestSaturatedValues(); }
TEST_P(DirectionalIntraPredTest8bpp,Random)737 TEST_P(DirectionalIntraPredTest8bpp, Random) { TestRandomValues(); }
738 
739 //------------------------------------------------------------------------------
740 #if LIBGAV1_MAX_BITDEPTH >= 10
741 
742 using DirectionalIntraPredTest10bpp = DirectionalIntraPredTest<10, uint16_t>;
743 
GetDirectionalIntraPredDigests10bpp(TransformSize tx_size)744 const char* const* GetDirectionalIntraPredDigests10bpp(TransformSize tx_size) {
745   static const char* const kDigests4x4[kNumDirectionalIntraPredictors] = {
746       "a683f4d7ccd978737615f61ecb4d638d",
747       "90c94374eaf7e9501f197863937b8639",
748       "0d3969cd081523ac6a906eecc7980c43",
749   };
750   static const char* const kDigests4x8[kNumDirectionalIntraPredictors] = {
751       "c3ffa2979b325644e4a56c882fe27347",
752       "1f61f5ee413a9a3b8d1d93869ec2aee0",
753       "4795ea944779ec4a783408769394d874",
754   };
755   static const char* const kDigests4x16[kNumDirectionalIntraPredictors] = {
756       "45c3282c9aa51024c1d64a40f230aa45",
757       "5cd47dd69f8bd0b15365a0c5cfc0a49a",
758       "06336c507b05f98c1d6a21abc43e6182",
759   };
760   static const char* const kDigests8x4[kNumDirectionalIntraPredictors] = {
761       "7370476ff0abbdc5e92f811b8879c861",
762       "a239a50adb28a4791b52a0dfff3bee06",
763       "4779a17f958a9ca04e8ec08c5aba1d36",
764   };
765   static const char* const kDigests8x8[kNumDirectionalIntraPredictors] = {
766       "305463f346c376594f82aad8304e0362",
767       "0cd481e5bda286c87a645417569fd948",
768       "48c7899dc9b7163b0b1f61b3a2b4b73e",
769   };
770   static const char* const kDigests8x16[kNumDirectionalIntraPredictors] = {
771       "5c18fd5339be90628c82b1fb6af50d5e",
772       "35eaa566ebd3bb7c903cfead5dc9ac78",
773       "9fdb0e790e5965810d02c02713c84071",
774   };
775   static const char* const kDigests8x32[kNumDirectionalIntraPredictors] = {
776       "2168d6cc858c704748b7b343ced2ac3a",
777       "1d3ce273107447faafd2e55877e48ffb",
778       "d344164049d1fe9b65a3ae8764bbbd37",
779   };
780   static const char* const kDigests16x4[kNumDirectionalIntraPredictors] = {
781       "dcef2cf51abe3fe150f388a14c762d30",
782       "6a810b289b1c14f8eab8ca1274e91ecd",
783       "c94da7c11f3fb11963d85c8804fce2d9",
784   };
785   static const char* const kDigests16x8[kNumDirectionalIntraPredictors] = {
786       "50a0d08b0d99b7a574bad2cfb36efc39",
787       "2dcb55874db39da70c8ca1318559f9fe",
788       "6390bcd30ff3bc389ecc0a0952bea531",
789   };
790   static const char* const kDigests16x16[kNumDirectionalIntraPredictors] = {
791       "7146c83c2620935606d49f3cb5876f41",
792       "2318ddf30c070a53c9b9cf199cd1b2c5",
793       "e9042e2124925aa7c1b6110617cb10e8",
794   };
795   static const char* const kDigests16x32[kNumDirectionalIntraPredictors] = {
796       "c970f401de7b7c5bb4e3ad447fcbef8f",
797       "a18cc70730eecdaa31dbcf4306ff490f",
798       "32c1528ad4a576a2210399d6b4ccd46e",
799   };
800   static const char* const kDigests16x64[kNumDirectionalIntraPredictors] = {
801       "00b3f0007da2e5d01380594a3d7162d5",
802       "1971af519e4a18967b7311f93efdd1b8",
803       "e6139769ce5a9c4982cfab9363004516",
804   };
805   static const char* const kDigests32x8[kNumDirectionalIntraPredictors] = {
806       "08107ad971179cc9f465ae5966bd4901",
807       "b215212a3c0dfe9182c4f2e903d731f7",
808       "791274416a0da87c674e1ae318b3ce09",
809   };
810   static const char* const kDigests32x16[kNumDirectionalIntraPredictors] = {
811       "94ea6cccae35b5d08799aa003ac08ccf",
812       "ae105e20e63fb55d4fd9d9e59dc62dde",
813       "973d0b2358ea585e4f486e7e645c5310",
814   };
815   static const char* const kDigests32x32[kNumDirectionalIntraPredictors] = {
816       "d14c695c4853ddf5e5d8256bc1d1ed60",
817       "6bd0ebeb53adecc11442b1218b870cb7",
818       "e03bc402a9999aba8272275dce93e89f",
819   };
820   static const char* const kDigests32x64[kNumDirectionalIntraPredictors] = {
821       "b21a8a8723758392ee659eeeae518a1e",
822       "e50285454896210ce44d6f04dfde05a7",
823       "f0f8ea0c6c2acc8d7d390927c3a90370",
824   };
825   static const char* const kDigests64x16[kNumDirectionalIntraPredictors] = {
826       "ce51db16fd4fa56e601631397b098c89",
827       "aa87a8635e02c1e91d13158c61e443f6",
828       "4c1ee3afd46ef34bd711a34d0bf86f13",
829   };
830   static const char* const kDigests64x32[kNumDirectionalIntraPredictors] = {
831       "25aaf5971e24e543e3e69a47254af777",
832       "eb6f444b3df127d69460778ab5bf8fc1",
833       "2f846cc0d506f90c0a58438600819817",
834   };
835   static const char* const kDigests64x64[kNumDirectionalIntraPredictors] = {
836       "b26ce5b5f4b5d4a438b52e5987877fb8",
837       "35721a00a70938111939cf69988d928e",
838       "0af7ec35939483fac82c246a13845806",
839   };
840 
841   switch (tx_size) {
842     case kTransformSize4x4:
843       return kDigests4x4;
844     case kTransformSize4x8:
845       return kDigests4x8;
846     case kTransformSize4x16:
847       return kDigests4x16;
848     case kTransformSize8x4:
849       return kDigests8x4;
850     case kTransformSize8x8:
851       return kDigests8x8;
852     case kTransformSize8x16:
853       return kDigests8x16;
854     case kTransformSize8x32:
855       return kDigests8x32;
856     case kTransformSize16x4:
857       return kDigests16x4;
858     case kTransformSize16x8:
859       return kDigests16x8;
860     case kTransformSize16x16:
861       return kDigests16x16;
862     case kTransformSize16x32:
863       return kDigests16x32;
864     case kTransformSize16x64:
865       return kDigests16x64;
866     case kTransformSize32x8:
867       return kDigests32x8;
868     case kTransformSize32x16:
869       return kDigests32x16;
870     case kTransformSize32x32:
871       return kDigests32x32;
872     case kTransformSize32x64:
873       return kDigests32x64;
874     case kTransformSize64x16:
875       return kDigests64x16;
876     case kTransformSize64x32:
877       return kDigests64x32;
878     case kTransformSize64x64:
879       return kDigests64x64;
880     default:
881       ADD_FAILURE() << "Unknown transform size: " << tx_size;
882       return nullptr;
883   }
884 }
885 
TEST_P(DirectionalIntraPredTest10bpp,DISABLED_Speed)886 TEST_P(DirectionalIntraPredTest10bpp, DISABLED_Speed) {
887 #if LIBGAV1_ENABLE_NEON
888   const int num_runs = static_cast<int>(2e7 / (block_width_ * block_height_));
889 #else
890   const int num_runs = static_cast<int>(4e7 / (block_width_ * block_height_));
891 #endif
892   for (int i = kZone1; i < kNumZones; ++i) {
893     TestSpeed(GetDirectionalIntraPredDigests10bpp(tx_size_),
894               static_cast<Zone>(i), num_runs);
895   }
896 }
897 
TEST_P(DirectionalIntraPredTest10bpp,FixedInput)898 TEST_P(DirectionalIntraPredTest10bpp, FixedInput) {
899   for (int i = kZone1; i < kNumZones; ++i) {
900     TestSpeed(GetDirectionalIntraPredDigests10bpp(tx_size_),
901               static_cast<Zone>(i), 1);
902   }
903 }
904 
TEST_P(DirectionalIntraPredTest10bpp,Overflow)905 TEST_P(DirectionalIntraPredTest10bpp, Overflow) { TestSaturatedValues(); }
TEST_P(DirectionalIntraPredTest10bpp,Random)906 TEST_P(DirectionalIntraPredTest10bpp, Random) { TestRandomValues(); }
907 
908 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
909 
910 constexpr TransformSize kTransformSizes[] = {
911     kTransformSize4x4,   kTransformSize4x8,   kTransformSize4x16,
912     kTransformSize8x4,   kTransformSize8x8,   kTransformSize8x16,
913     kTransformSize8x32,  kTransformSize16x4,  kTransformSize16x8,
914     kTransformSize16x16, kTransformSize16x32, kTransformSize16x64,
915     kTransformSize32x8,  kTransformSize32x16, kTransformSize32x32,
916     kTransformSize32x64, kTransformSize64x16, kTransformSize64x32,
917     kTransformSize64x64};
918 
919 INSTANTIATE_TEST_SUITE_P(C, DirectionalIntraPredTest8bpp,
920                          testing::ValuesIn(kTransformSizes));
921 #if LIBGAV1_ENABLE_SSE4_1
922 INSTANTIATE_TEST_SUITE_P(SSE41, DirectionalIntraPredTest8bpp,
923                          testing::ValuesIn(kTransformSizes));
924 #endif  // LIBGAV1_ENABLE_SSE4_1
925 #if LIBGAV1_ENABLE_NEON
926 INSTANTIATE_TEST_SUITE_P(NEON, DirectionalIntraPredTest8bpp,
927                          testing::ValuesIn(kTransformSizes));
928 #endif  // LIBGAV1_ENABLE_NEON
929 
930 #if LIBGAV1_MAX_BITDEPTH >= 10
931 INSTANTIATE_TEST_SUITE_P(C, DirectionalIntraPredTest10bpp,
932                          testing::ValuesIn(kTransformSizes));
933 #if LIBGAV1_ENABLE_SSE4_1
934 INSTANTIATE_TEST_SUITE_P(SSE41, DirectionalIntraPredTest10bpp,
935                          testing::ValuesIn(kTransformSizes));
936 #endif  // LIBGAV1_ENABLE_SSE4_1
937 #if LIBGAV1_ENABLE_NEON
938 INSTANTIATE_TEST_SUITE_P(NEON, DirectionalIntraPredTest10bpp,
939                          testing::ValuesIn(kTransformSizes));
940 #endif  // LIBGAV1_ENABLE_NEON
941 
942 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
943 
944 }  // namespace
945 }  // namespace dsp
946 
operator <<(std::ostream & os,const TransformSize tx_size)947 static std::ostream& operator<<(std::ostream& os, const TransformSize tx_size) {
948   return os << ToString(tx_size);
949 }
950 
951 }  // namespace libgav1
952