1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <vector>
16 
17 #include "config/av1_rtcd.h"
18 
19 #include "test/acm_random.h"
20 #include "test/util.h"
21 #include "test/av1_txfm_test.h"
22 #include "av1/common/av1_txfm.h"
23 #include "av1/encoder/hybrid_fwd_txfm.h"
24 
25 using libaom_test::ACMRandom;
26 using libaom_test::TYPE_TXFM;
27 using libaom_test::bd;
28 using libaom_test::compute_avg_abs_error;
29 using libaom_test::input_base;
30 
31 using std::vector;
32 
33 namespace {
34 // tx_type_, tx_size_, max_error_, max_avg_error_
35 typedef ::testing::tuple<TX_TYPE, TX_SIZE, double, double> AV1FwdTxfm2dParam;
36 
37 class AV1FwdTxfm2d : public ::testing::TestWithParam<AV1FwdTxfm2dParam> {
38  public:
SetUp()39   virtual void SetUp() {
40     tx_type_ = GET_PARAM(0);
41     tx_size_ = GET_PARAM(1);
42     max_error_ = GET_PARAM(2);
43     max_avg_error_ = GET_PARAM(3);
44     count_ = 500;
45     TXFM_2D_FLIP_CFG fwd_txfm_flip_cfg;
46     av1_get_fwd_txfm_cfg(tx_type_, tx_size_, &fwd_txfm_flip_cfg);
47     amplify_factor_ = libaom_test::get_amplification_factor(tx_type_, tx_size_);
48     tx_width_ = tx_size_wide[fwd_txfm_flip_cfg.tx_size];
49     tx_height_ = tx_size_high[fwd_txfm_flip_cfg.tx_size];
50     ud_flip_ = fwd_txfm_flip_cfg.ud_flip;
51     lr_flip_ = fwd_txfm_flip_cfg.lr_flip;
52 
53     fwd_txfm_ = libaom_test::fwd_txfm_func_ls[tx_size_];
54     txfm2d_size_ = tx_width_ * tx_height_;
55     input_ = reinterpret_cast<int16_t *>(
56         aom_memalign(16, sizeof(input_[0]) * txfm2d_size_));
57     output_ = reinterpret_cast<int32_t *>(
58         aom_memalign(16, sizeof(output_[0]) * txfm2d_size_));
59     ref_input_ = reinterpret_cast<double *>(
60         aom_memalign(16, sizeof(ref_input_[0]) * txfm2d_size_));
61     ref_output_ = reinterpret_cast<double *>(
62         aom_memalign(16, sizeof(ref_output_[0]) * txfm2d_size_));
63   }
64 
RunFwdAccuracyCheck()65   void RunFwdAccuracyCheck() {
66     ACMRandom rnd(ACMRandom::DeterministicSeed());
67     double avg_abs_error = 0;
68     for (int ci = 0; ci < count_; ci++) {
69       for (int ni = 0; ni < txfm2d_size_; ++ni) {
70         input_[ni] = rnd.Rand16() % input_base;
71         ref_input_[ni] = static_cast<double>(input_[ni]);
72         output_[ni] = 0;
73         ref_output_[ni] = 0;
74       }
75 
76       fwd_txfm_(input_, output_, tx_width_, tx_type_, bd);
77 
78       if (lr_flip_ && ud_flip_) {
79         libaom_test::fliplrud(ref_input_, tx_width_, tx_height_, tx_width_);
80       } else if (lr_flip_) {
81         libaom_test::fliplr(ref_input_, tx_width_, tx_height_, tx_width_);
82       } else if (ud_flip_) {
83         libaom_test::flipud(ref_input_, tx_width_, tx_height_, tx_width_);
84       }
85 
86       libaom_test::reference_hybrid_2d(ref_input_, ref_output_, tx_type_,
87                                        tx_size_);
88 
89       double actual_max_error = 0;
90       for (int ni = 0; ni < txfm2d_size_; ++ni) {
91         ref_output_[ni] = round(ref_output_[ni]);
92         const double this_error =
93             fabs(output_[ni] - ref_output_[ni]) / amplify_factor_;
94         actual_max_error = AOMMAX(actual_max_error, this_error);
95       }
96       EXPECT_GE(max_error_, actual_max_error)
97           << "tx_size = " << tx_size_ << ", tx_type = " << tx_type_;
98       if (actual_max_error > max_error_) {  // exit early.
99         break;
100       }
101 
102       avg_abs_error += compute_avg_abs_error<int32_t, double>(
103           output_, ref_output_, txfm2d_size_);
104     }
105 
106     avg_abs_error /= amplify_factor_;
107     avg_abs_error /= count_;
108     EXPECT_GE(max_avg_error_, avg_abs_error)
109         << "tx_size = " << tx_size_ << ", tx_type = " << tx_type_;
110   }
111 
TearDown()112   virtual void TearDown() {
113     aom_free(input_);
114     aom_free(output_);
115     aom_free(ref_input_);
116     aom_free(ref_output_);
117   }
118 
119  private:
120   double max_error_;
121   double max_avg_error_;
122   int count_;
123   double amplify_factor_;
124   TX_TYPE tx_type_;
125   TX_SIZE tx_size_;
126   int tx_width_;
127   int tx_height_;
128   int txfm2d_size_;
129   FwdTxfm2dFunc fwd_txfm_;
130   int16_t *input_;
131   int32_t *output_;
132   double *ref_input_;
133   double *ref_output_;
134   int ud_flip_;  // flip upside down
135   int lr_flip_;  // flip left to right
136 };
137 
138 static double avg_error_ls[TX_SIZES_ALL] = {
139   0.5,   // 4x4 transform
140   0.5,   // 8x8 transform
141   1.2,   // 16x16 transform
142   6.1,   // 32x32 transform
143   3.4,   // 64x64 transform
144   0.57,  // 4x8 transform
145   0.68,  // 8x4 transform
146   0.92,  // 8x16 transform
147   1.1,   // 16x8 transform
148   4.1,   // 16x32 transform
149   6,     // 32x16 transform
150   3.5,   // 32x64 transform
151   5.7,   // 64x32 transform
152   0.6,   // 4x16 transform
153   0.9,   // 16x4 transform
154   1.2,   // 8x32 transform
155   1.7,   // 32x8 transform
156   2.0,   // 16x64 transform
157   4.7,   // 64x16 transform
158 };
159 
160 static double max_error_ls[TX_SIZES_ALL] = {
161   3,    // 4x4 transform
162   5,    // 8x8 transform
163   11,   // 16x16 transform
164   70,   // 32x32 transform
165   64,   // 64x64 transform
166   3.9,  // 4x8 transform
167   4.3,  // 8x4 transform
168   12,   // 8x16 transform
169   12,   // 16x8 transform
170   32,   // 16x32 transform
171   46,   // 32x16 transform
172   136,  // 32x64 transform
173   136,  // 64x32 transform
174   5,    // 4x16 transform
175   6,    // 16x4 transform
176   21,   // 8x32 transform
177   13,   // 32x8 transform
178   30,   // 16x64 transform
179   36,   // 64x16 transform
180 };
181 
GetTxfm2dParamList()182 vector<AV1FwdTxfm2dParam> GetTxfm2dParamList() {
183   vector<AV1FwdTxfm2dParam> param_list;
184   for (int s = 0; s < TX_SIZES; ++s) {
185     const double max_error = max_error_ls[s];
186     const double avg_error = avg_error_ls[s];
187     for (int t = 0; t < TX_TYPES; ++t) {
188       const TX_TYPE tx_type = static_cast<TX_TYPE>(t);
189       const TX_SIZE tx_size = static_cast<TX_SIZE>(s);
190       if (libaom_test::IsTxSizeTypeValid(tx_size, tx_type)) {
191         param_list.push_back(
192             AV1FwdTxfm2dParam(tx_type, tx_size, max_error, avg_error));
193       }
194     }
195   }
196   return param_list;
197 }
198 
199 INSTANTIATE_TEST_CASE_P(C, AV1FwdTxfm2d,
200                         ::testing::ValuesIn(GetTxfm2dParamList()));
201 
TEST_P(AV1FwdTxfm2d,RunFwdAccuracyCheck)202 TEST_P(AV1FwdTxfm2d, RunFwdAccuracyCheck) { RunFwdAccuracyCheck(); }
203 
TEST(AV1FwdTxfm2d,CfgTest)204 TEST(AV1FwdTxfm2d, CfgTest) {
205   for (int bd_idx = 0; bd_idx < BD_NUM; ++bd_idx) {
206     int bd = libaom_test::bd_arr[bd_idx];
207     int8_t low_range = libaom_test::low_range_arr[bd_idx];
208     int8_t high_range = libaom_test::high_range_arr[bd_idx];
209     for (int tx_size = 0; tx_size < TX_SIZES_ALL; ++tx_size) {
210       for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
211         if (libaom_test::IsTxSizeTypeValid(static_cast<TX_SIZE>(tx_size),
212                                            static_cast<TX_TYPE>(tx_type)) ==
213             false) {
214           continue;
215         }
216         TXFM_2D_FLIP_CFG cfg;
217         av1_get_fwd_txfm_cfg(static_cast<TX_TYPE>(tx_type),
218                              static_cast<TX_SIZE>(tx_size), &cfg);
219         int8_t stage_range_col[MAX_TXFM_STAGE_NUM];
220         int8_t stage_range_row[MAX_TXFM_STAGE_NUM];
221         av1_gen_fwd_stage_range(stage_range_col, stage_range_row, &cfg, bd);
222         libaom_test::txfm_stage_range_check(stage_range_col, cfg.stage_num_col,
223                                             cfg.cos_bit_col, low_range,
224                                             high_range);
225         libaom_test::txfm_stage_range_check(stage_range_row, cfg.stage_num_row,
226                                             cfg.cos_bit_row, low_range,
227                                             high_range);
228       }
229     }
230   }
231 }
232 
233 typedef void (*lowbd_fwd_txfm_func)(const int16_t *src_diff, tran_low_t *coeff,
234                                     int diff_stride, TxfmParam *txfm_param);
235 
AV1FwdTxfm2dMatchTest(TX_SIZE tx_size,lowbd_fwd_txfm_func target_func)236 void AV1FwdTxfm2dMatchTest(TX_SIZE tx_size, lowbd_fwd_txfm_func target_func) {
237   const int bd = 8;
238   TxfmParam param;
239   memset(&param, 0, sizeof(param));
240   const int rows = tx_size_high[tx_size];
241   const int cols = tx_size_wide[tx_size];
242   // printf("%d x %d\n", cols, rows);
243   for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
244     if (libaom_test::IsTxSizeTypeValid(
245             tx_size, static_cast<TX_TYPE>(tx_type)) == false) {
246       continue;
247     }
248 
249     FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size];
250     if (ref_func != NULL) {
251       DECLARE_ALIGNED(32, int16_t, input[64 * 64]) = { 0 };
252       DECLARE_ALIGNED(32, int32_t, output[64 * 64]);
253       DECLARE_ALIGNED(32, int32_t, ref_output[64 * 64]);
254       int input_stride = 64;
255       ACMRandom rnd(ACMRandom::DeterministicSeed());
256       for (int cnt = 0; cnt < 500; ++cnt) {
257         if (cnt == 0) {
258           for (int r = 0; r < rows; ++r) {
259             for (int c = 0; c < cols; ++c) {
260               input[r * input_stride + c] = (1 << bd) - 1;
261             }
262           }
263         } else {
264           for (int r = 0; r < rows; ++r) {
265             for (int c = 0; c < cols; ++c) {
266               input[r * input_stride + c] = rnd.Rand16() % (1 << bd);
267             }
268           }
269         }
270         param.tx_type = (TX_TYPE)tx_type;
271         param.tx_size = (TX_SIZE)tx_size;
272         param.tx_set_type = EXT_TX_SET_ALL16;
273         param.bd = bd;
274         ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd);
275         target_func(input, output, input_stride, &param);
276         const int check_rows = AOMMIN(32, rows);
277         const int check_cols = AOMMIN(32, rows * cols / check_rows);
278         for (int r = 0; r < check_rows; ++r) {
279           for (int c = 0; c < check_cols; ++c) {
280             ASSERT_EQ(ref_output[r * check_cols + c],
281                       output[r * check_cols + c])
282                 << "[" << r << "," << c << "] cnt:" << cnt
283                 << " tx_size: " << tx_size << " tx_type: " << tx_type;
284           }
285         }
286       }
287     }
288   }
289 }
290 
291 typedef ::testing::tuple<TX_SIZE, lowbd_fwd_txfm_func> LbdFwdTxfm2dParam;
292 
293 class AV1FwdTxfm2dTest : public ::testing::TestWithParam<LbdFwdTxfm2dParam> {};
294 
TEST_P(AV1FwdTxfm2dTest,match)295 TEST_P(AV1FwdTxfm2dTest, match) {
296   AV1FwdTxfm2dMatchTest(GET_PARAM(0), GET_PARAM(1));
297 }
298 
299 using ::testing::Combine;
300 using ::testing::Values;
301 using ::testing::ValuesIn;
302 
303 #if HAVE_SSE2
304 static TX_SIZE fwd_txfm_for_sse2[] = {
305   TX_4X4,
306   TX_8X8,
307   TX_16X16,
308   TX_32X32,
309   // TX_64X64,
310   TX_4X8,
311   TX_8X4,
312   TX_8X16,
313   TX_16X8,
314   TX_16X32,
315   TX_32X16,
316   // TX_32X64,
317   // TX_64X32,
318   TX_4X16,
319   TX_16X4,
320   TX_8X32,
321   TX_32X8,
322   TX_16X64,
323   TX_64X16,
324 };
325 
326 INSTANTIATE_TEST_CASE_P(SSE2, AV1FwdTxfm2dTest,
327                         Combine(ValuesIn(fwd_txfm_for_sse2),
328                                 Values(av1_lowbd_fwd_txfm_sse2)));
329 #endif  // HAVE_SSE2
330 
331 #if HAVE_SSE4_1
332 static TX_SIZE fwd_txfm_for_sse41[] = {
333   TX_4X4,
334   TX_64X64,
335   TX_32X64,
336   TX_64X32,
337 };
338 
339 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1FwdTxfm2dTest,
340                         Combine(ValuesIn(fwd_txfm_for_sse41),
341                                 Values(av1_lowbd_fwd_txfm_sse4_1)));
342 #endif  // HAVE_SSE4_1
343 
344 #if HAVE_AVX2
345 static TX_SIZE fwd_txfm_for_avx2[] = {
346   TX_4X4,  TX_8X8,  TX_16X16, TX_32X32, TX_64X64, TX_4X8,   TX_8X4,
347   TX_8X16, TX_16X8, TX_16X32, TX_32X16, TX_32X64, TX_64X32, TX_4X16,
348   TX_16X4, TX_8X32, TX_32X8,  TX_16X64, TX_64X16,
349 };
350 
351 INSTANTIATE_TEST_CASE_P(AVX2, AV1FwdTxfm2dTest,
352                         Combine(ValuesIn(fwd_txfm_for_avx2),
353                                 Values(av1_lowbd_fwd_txfm_avx2)));
354 #endif  // HAVE_AVX2
355 
356 typedef void (*Highbd_fwd_txfm_func)(const int16_t *src_diff, tran_low_t *coeff,
357                                      int diff_stride, TxfmParam *txfm_param);
358 
AV1HighbdFwdTxfm2dMatchTest(TX_SIZE tx_size,Highbd_fwd_txfm_func target_func)359 void AV1HighbdFwdTxfm2dMatchTest(TX_SIZE tx_size,
360                                  Highbd_fwd_txfm_func target_func) {
361   const int bd_ar[2] = { 10, 12 };
362   TxfmParam param;
363   memset(&param, 0, sizeof(param));
364   const int rows = tx_size_high[tx_size];
365   const int cols = tx_size_wide[tx_size];
366   for (int i = 0; i < 2; ++i) {
367     const int bd = bd_ar[i];
368     for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
369       if (libaom_test::IsTxSizeTypeValid(
370               tx_size, static_cast<TX_TYPE>(tx_type)) == false) {
371         continue;
372       }
373 
374       FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size];
375       if (ref_func != NULL) {
376         DECLARE_ALIGNED(32, int16_t, input[64 * 64]) = { 0 };
377         DECLARE_ALIGNED(32, int32_t, output[64 * 64]);
378         DECLARE_ALIGNED(32, int32_t, ref_output[64 * 64]);
379         int input_stride = 64;
380         ACMRandom rnd(ACMRandom::DeterministicSeed());
381         for (int cnt = 0; cnt < 500; ++cnt) {
382           if (cnt == 0) {
383             for (int r = 0; r < rows; ++r) {
384               for (int c = 0; c < cols; ++c) {
385                 input[r * input_stride + c] = (1 << bd) - 1;
386               }
387             }
388           } else {
389             for (int r = 0; r < rows; ++r) {
390               for (int c = 0; c < cols; ++c) {
391                 input[r * input_stride + c] = rnd.Rand16() % (1 << bd);
392               }
393             }
394           }
395           param.tx_type = (TX_TYPE)tx_type;
396           param.tx_size = (TX_SIZE)tx_size;
397           param.tx_set_type = EXT_TX_SET_ALL16;
398           param.bd = bd;
399 
400           ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd);
401           target_func(input, output, input_stride, &param);
402           const int check_rows = AOMMIN(32, rows);
403           const int check_cols = AOMMIN(32, rows * cols / check_rows);
404           for (int r = 0; r < check_rows; ++r) {
405             for (int c = 0; c < check_cols; ++c) {
406               ASSERT_EQ(ref_output[r * check_cols + c],
407                         output[r * check_cols + c])
408                   << "[" << r << "," << c << "] cnt:" << cnt
409                   << " tx_size: " << tx_size << " tx_type: " << tx_type;
410             }
411           }
412         }
413       }
414     }
415   }
416 }
417 
AV1HighbdFwdTxfm2dSpeedTest(TX_SIZE tx_size,Highbd_fwd_txfm_func target_func)418 void AV1HighbdFwdTxfm2dSpeedTest(TX_SIZE tx_size,
419                                  Highbd_fwd_txfm_func target_func) {
420   const int bd_ar[2] = { 10, 12 };
421   TxfmParam param;
422   memset(&param, 0, sizeof(param));
423   const int rows = tx_size_high[tx_size];
424   const int cols = tx_size_wide[tx_size];
425   const int num_loops = 1000000 / (rows * cols);
426 
427   for (int i = 0; i < 2; ++i) {
428     const int bd = bd_ar[i];
429     for (int tx_type = 0; tx_type < TX_TYPES; ++tx_type) {
430       if (libaom_test::IsTxSizeTypeValid(
431               tx_size, static_cast<TX_TYPE>(tx_type)) == false) {
432         continue;
433       }
434 
435       FwdTxfm2dFunc ref_func = libaom_test::fwd_txfm_func_ls[tx_size];
436       if (ref_func != NULL) {
437         DECLARE_ALIGNED(32, int16_t, input[64 * 64]) = { 0 };
438         DECLARE_ALIGNED(32, int32_t, output[64 * 64]);
439         DECLARE_ALIGNED(32, int32_t, ref_output[64 * 64]);
440         int input_stride = 64;
441         ACMRandom rnd(ACMRandom::DeterministicSeed());
442 
443         for (int r = 0; r < rows; ++r) {
444           for (int c = 0; c < cols; ++c) {
445             input[r * input_stride + c] = rnd.Rand16() % (1 << bd);
446           }
447         }
448 
449         param.tx_type = (TX_TYPE)tx_type;
450         param.tx_size = (TX_SIZE)tx_size;
451         param.tx_set_type = EXT_TX_SET_ALL16;
452         param.bd = bd;
453 
454         aom_usec_timer ref_timer, test_timer;
455 
456         aom_usec_timer_start(&ref_timer);
457         for (int i = 0; i < num_loops; ++i) {
458           ref_func(input, ref_output, input_stride, (TX_TYPE)tx_type, bd);
459         }
460         aom_usec_timer_mark(&ref_timer);
461         const int elapsed_time_c =
462             static_cast<int>(aom_usec_timer_elapsed(&ref_timer));
463 
464         aom_usec_timer_start(&test_timer);
465         for (int i = 0; i < num_loops; ++i) {
466           target_func(input, output, input_stride, &param);
467         }
468         aom_usec_timer_mark(&test_timer);
469         const int elapsed_time_simd =
470             static_cast<int>(aom_usec_timer_elapsed(&test_timer));
471 
472         printf(
473             "txfm_size[%d] \t txfm_type[%d] \t c_time=%d \t simd_time=%d \t "
474             "gain=%d \n",
475             tx_size, tx_type, elapsed_time_c, elapsed_time_simd,
476             (elapsed_time_c / elapsed_time_simd));
477       }
478     }
479   }
480 }
481 
482 typedef ::testing::tuple<TX_SIZE, Highbd_fwd_txfm_func> HighbdFwdTxfm2dParam;
483 
484 class AV1HighbdFwdTxfm2dTest
485     : public ::testing::TestWithParam<HighbdFwdTxfm2dParam> {};
486 
TEST_P(AV1HighbdFwdTxfm2dTest,match)487 TEST_P(AV1HighbdFwdTxfm2dTest, match) {
488   AV1HighbdFwdTxfm2dMatchTest(GET_PARAM(0), GET_PARAM(1));
489 }
490 
TEST_P(AV1HighbdFwdTxfm2dTest,DISABLED_Speed)491 TEST_P(AV1HighbdFwdTxfm2dTest, DISABLED_Speed) {
492   AV1HighbdFwdTxfm2dSpeedTest(GET_PARAM(0), GET_PARAM(1));
493 }
494 
495 using ::testing::Combine;
496 using ::testing::Values;
497 using ::testing::ValuesIn;
498 
499 #if HAVE_SSE4_1
500 static TX_SIZE Highbd_fwd_txfm_for_sse4_1[] = {
501   TX_4X4,  TX_8X8,  TX_16X16, TX_32X32, TX_64X64, TX_4X8,   TX_8X4,
502   TX_8X16, TX_16X8, TX_16X32, TX_32X16, TX_32X64, TX_64X32, TX_4X16,
503   TX_16X4, TX_8X32, TX_32X8,  TX_16X64, TX_64X16,
504 };
505 
506 INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdFwdTxfm2dTest,
507                         Combine(ValuesIn(Highbd_fwd_txfm_for_sse4_1),
508                                 Values(av1_highbd_fwd_txfm)));
509 #endif  // HAVE_SSE4_1
510 
511 }  // namespace
512