1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  *
11  *  This code was originally written by: Gregory Maxwell, at the Daala
12  *  project.
13  */
14 
15 #include <assert.h>
16 #include <math.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 
20 #include "config/aom_config.h"
21 #include "config/aom_dsp_rtcd.h"
22 
23 #include "aom_dsp/psnr.h"
24 #include "aom_dsp/ssim.h"
25 #include "aom_ports/system_state.h"
26 
od_bin_fdct8x8(tran_low_t * y,int ystride,const int16_t * x,int xstride)27 static void od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,
28                            int xstride) {
29   int i, j;
30   (void)xstride;
31   aom_fdct8x8(x, y, ystride);
32   for (i = 0; i < 8; i++)
33     for (j = 0; j < 8; j++)
34       *(y + ystride * i + j) = (*(y + ystride * i + j) + 4) >> 3;
35 }
36 
hbd_od_bin_fdct8x8(tran_low_t * y,int ystride,const int16_t * x,int xstride)37 static void hbd_od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x,
38                                int xstride) {
39   int i, j;
40   (void)xstride;
41   aom_highbd_fdct8x8(x, y, ystride);
42   for (i = 0; i < 8; i++)
43     for (j = 0; j < 8; j++)
44       *(y + ystride * i + j) = (*(y + ystride * i + j) + 4) >> 3;
45 }
46 
47 /* Normalized inverse quantization matrix for 8x8 DCT at the point of
48  * transparency. This is not the JPEG based matrix from the paper,
49  this one gives a slightly higher MOS agreement.*/
50 static const double csf_y[8][8] = {
51   { 1.6193873005, 2.2901594831, 2.08509755623, 1.48366094411, 1.00227514334,
52     0.678296995242, 0.466224900598, 0.3265091542 },
53   { 2.2901594831, 1.94321815382, 2.04793073064, 1.68731108984, 1.2305666963,
54     0.868920337363, 0.61280991668, 0.436405793551 },
55   { 2.08509755623, 2.04793073064, 1.34329019223, 1.09205635862, 0.875748795257,
56     0.670882927016, 0.501731932449, 0.372504254596 },
57   { 1.48366094411, 1.68731108984, 1.09205635862, 0.772819797575, 0.605636379554,
58     0.48309405692, 0.380429446972, 0.295774038565 },
59   { 1.00227514334, 1.2305666963, 0.875748795257, 0.605636379554, 0.448996256676,
60     0.352889268808, 0.283006984131, 0.226951348204 },
61   { 0.678296995242, 0.868920337363, 0.670882927016, 0.48309405692,
62     0.352889268808, 0.27032073436, 0.215017739696, 0.17408067321 },
63   { 0.466224900598, 0.61280991668, 0.501731932449, 0.380429446972,
64     0.283006984131, 0.215017739696, 0.168869545842, 0.136153931001 },
65   { 0.3265091542, 0.436405793551, 0.372504254596, 0.295774038565,
66     0.226951348204, 0.17408067321, 0.136153931001, 0.109083846276 }
67 };
68 static const double csf_cb420[8][8] = {
69   { 1.91113096927, 2.46074210438, 1.18284184739, 1.14982565193, 1.05017074788,
70     0.898018824055, 0.74725392039, 0.615105596242 },
71   { 2.46074210438, 1.58529308355, 1.21363250036, 1.38190029285, 1.33100189972,
72     1.17428548929, 0.996404342439, 0.830890433625 },
73   { 1.18284184739, 1.21363250036, 0.978712413627, 1.02624506078, 1.03145147362,
74     0.960060382087, 0.849823426169, 0.731221236837 },
75   { 1.14982565193, 1.38190029285, 1.02624506078, 0.861317501629, 0.801821139099,
76     0.751437590932, 0.685398513368, 0.608694761374 },
77   { 1.05017074788, 1.33100189972, 1.03145147362, 0.801821139099, 0.676555426187,
78     0.605503172737, 0.55002013668, 0.495804539034 },
79   { 0.898018824055, 1.17428548929, 0.960060382087, 0.751437590932,
80     0.605503172737, 0.514674450957, 0.454353482512, 0.407050308965 },
81   { 0.74725392039, 0.996404342439, 0.849823426169, 0.685398513368,
82     0.55002013668, 0.454353482512, 0.389234902883, 0.342353999733 },
83   { 0.615105596242, 0.830890433625, 0.731221236837, 0.608694761374,
84     0.495804539034, 0.407050308965, 0.342353999733, 0.295530605237 }
85 };
86 static const double csf_cr420[8][8] = {
87   { 2.03871978502, 2.62502345193, 1.26180942886, 1.11019789803, 1.01397751469,
88     0.867069376285, 0.721500455585, 0.593906509971 },
89   { 2.62502345193, 1.69112867013, 1.17180569821, 1.3342742857, 1.28513006198,
90     1.13381474809, 0.962064122248, 0.802254508198 },
91   { 1.26180942886, 1.17180569821, 0.944981930573, 0.990876405848,
92     0.995903384143, 0.926972725286, 0.820534991409, 0.706020324706 },
93   { 1.11019789803, 1.3342742857, 0.990876405848, 0.831632933426, 0.77418706195,
94     0.725539939514, 0.661776842059, 0.587716619023 },
95   { 1.01397751469, 1.28513006198, 0.995903384143, 0.77418706195, 0.653238524286,
96     0.584635025748, 0.531064164893, 0.478717061273 },
97   { 0.867069376285, 1.13381474809, 0.926972725286, 0.725539939514,
98     0.584635025748, 0.496936637883, 0.438694579826, 0.393021669543 },
99   { 0.721500455585, 0.962064122248, 0.820534991409, 0.661776842059,
100     0.531064164893, 0.438694579826, 0.375820256136, 0.330555063063 },
101   { 0.593906509971, 0.802254508198, 0.706020324706, 0.587716619023,
102     0.478717061273, 0.393021669543, 0.330555063063, 0.285345396658 }
103 };
104 
convert_score_db(double _score,double _weight,int bit_depth)105 static double convert_score_db(double _score, double _weight, int bit_depth) {
106   int16_t pix_max = 255;
107   assert(_score * _weight >= 0.0);
108   if (bit_depth == 10)
109     pix_max = 1023;
110   else if (bit_depth == 12)
111     pix_max = 4095;
112 
113   if (_weight * _score < pix_max * pix_max * 1e-10) return MAX_PSNR;
114   return 10 * (log10(pix_max * pix_max) - log10(_weight * _score));
115 }
116 
calc_psnrhvs(const unsigned char * src,int _systride,const unsigned char * dst,int _dystride,double _par,int _w,int _h,int _step,const double _csf[8][8],uint32_t _shift,int buf_is_hbd)117 static double calc_psnrhvs(const unsigned char *src, int _systride,
118                            const unsigned char *dst, int _dystride, double _par,
119                            int _w, int _h, int _step, const double _csf[8][8],
120                            uint32_t _shift, int buf_is_hbd) {
121   double ret;
122   const uint8_t *_src8 = src;
123   const uint8_t *_dst8 = dst;
124   const uint16_t *_src16 = CONVERT_TO_SHORTPTR(src);
125   const uint16_t *_dst16 = CONVERT_TO_SHORTPTR(dst);
126   DECLARE_ALIGNED(16, int16_t, dct_s[8 * 8]);
127   DECLARE_ALIGNED(16, int16_t, dct_d[8 * 8]);
128   DECLARE_ALIGNED(16, tran_low_t, dct_s_coef[8 * 8]);
129   DECLARE_ALIGNED(16, tran_low_t, dct_d_coef[8 * 8]);
130   double mask[8][8];
131   int pixels;
132   int x;
133   int y;
134   (void)_par;
135   ret = pixels = 0;
136   /*In the PSNR-HVS-M paper[1] the authors describe the construction of
137    their masking table as "we have used the quantization table for the
138    color component Y of JPEG [6] that has been also obtained on the
139    basis of CSF. Note that the values in quantization table JPEG have
140    been normalized and then squared." Their CSF matrix (from PSNR-HVS)
141    was also constructed from the JPEG matrices. I can not find any obvious
142    scheme of normalizing to produce their table, but if I multiply their
143    CSF by 0.38857 and square the result I get their masking table.
144    I have no idea where this constant comes from, but deviating from it
145    too greatly hurts MOS agreement.
146 
147    [1] Nikolay Ponomarenko, Flavia Silvestri, Karen Egiazarian, Marco Carli,
148    Jaakko Astola, Vladimir Lukin, "On between-coefficient contrast masking
149    of DCT basis functions", CD-ROM Proceedings of the Third
150    International Workshop on Video Processing and Quality Metrics for Consumer
151    Electronics VPQM-07, Scottsdale, Arizona, USA, 25-26 January, 2007, 4 p.*/
152   for (x = 0; x < 8; x++)
153     for (y = 0; y < 8; y++)
154       mask[x][y] =
155           (_csf[x][y] * 0.3885746225901003) * (_csf[x][y] * 0.3885746225901003);
156   for (y = 0; y < _h - 7; y += _step) {
157     for (x = 0; x < _w - 7; x += _step) {
158       int i;
159       int j;
160       double s_means[4];
161       double d_means[4];
162       double s_vars[4];
163       double d_vars[4];
164       double s_gmean = 0;
165       double d_gmean = 0;
166       double s_gvar = 0;
167       double d_gvar = 0;
168       double s_mask = 0;
169       double d_mask = 0;
170       for (i = 0; i < 4; i++)
171         s_means[i] = d_means[i] = s_vars[i] = d_vars[i] = 0;
172       for (i = 0; i < 8; i++) {
173         for (j = 0; j < 8; j++) {
174           int sub = ((i & 12) >> 2) + ((j & 12) >> 1);
175           if (!buf_is_hbd) {
176             dct_s[i * 8 + j] = _src8[(y + i) * _systride + (j + x)];
177             dct_d[i * 8 + j] = _dst8[(y + i) * _dystride + (j + x)];
178           } else {
179             dct_s[i * 8 + j] = _src16[(y + i) * _systride + (j + x)] >> _shift;
180             dct_d[i * 8 + j] = _dst16[(y + i) * _dystride + (j + x)] >> _shift;
181           }
182           s_gmean += dct_s[i * 8 + j];
183           d_gmean += dct_d[i * 8 + j];
184           s_means[sub] += dct_s[i * 8 + j];
185           d_means[sub] += dct_d[i * 8 + j];
186         }
187       }
188       s_gmean /= 64.f;
189       d_gmean /= 64.f;
190       for (i = 0; i < 4; i++) s_means[i] /= 16.f;
191       for (i = 0; i < 4; i++) d_means[i] /= 16.f;
192       for (i = 0; i < 8; i++) {
193         for (j = 0; j < 8; j++) {
194           int sub = ((i & 12) >> 2) + ((j & 12) >> 1);
195           s_gvar += (dct_s[i * 8 + j] - s_gmean) * (dct_s[i * 8 + j] - s_gmean);
196           d_gvar += (dct_d[i * 8 + j] - d_gmean) * (dct_d[i * 8 + j] - d_gmean);
197           s_vars[sub] += (dct_s[i * 8 + j] - s_means[sub]) *
198                          (dct_s[i * 8 + j] - s_means[sub]);
199           d_vars[sub] += (dct_d[i * 8 + j] - d_means[sub]) *
200                          (dct_d[i * 8 + j] - d_means[sub]);
201         }
202       }
203       s_gvar *= 1 / 63.f * 64;
204       d_gvar *= 1 / 63.f * 64;
205       for (i = 0; i < 4; i++) s_vars[i] *= 1 / 15.f * 16;
206       for (i = 0; i < 4; i++) d_vars[i] *= 1 / 15.f * 16;
207       if (s_gvar > 0)
208         s_gvar = (s_vars[0] + s_vars[1] + s_vars[2] + s_vars[3]) / s_gvar;
209       if (d_gvar > 0)
210         d_gvar = (d_vars[0] + d_vars[1] + d_vars[2] + d_vars[3]) / d_gvar;
211       if (!buf_is_hbd) {
212         od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);
213         od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);
214       } else {
215         hbd_od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8);
216         hbd_od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8);
217       }
218       for (i = 0; i < 8; i++)
219         for (j = (i == 0); j < 8; j++)
220           s_mask += dct_s_coef[i * 8 + j] * dct_s_coef[i * 8 + j] * mask[i][j];
221       for (i = 0; i < 8; i++)
222         for (j = (i == 0); j < 8; j++)
223           d_mask += dct_d_coef[i * 8 + j] * dct_d_coef[i * 8 + j] * mask[i][j];
224       s_mask = sqrt(s_mask * s_gvar) / 32.f;
225       d_mask = sqrt(d_mask * d_gvar) / 32.f;
226       if (d_mask > s_mask) s_mask = d_mask;
227       for (i = 0; i < 8; i++) {
228         for (j = 0; j < 8; j++) {
229           double err;
230           err = fabs((double)(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j]));
231           if (i != 0 || j != 0)
232             err = err < s_mask / mask[i][j] ? 0 : err - s_mask / mask[i][j];
233           ret += (err * _csf[i][j]) * (err * _csf[i][j]);
234           pixels++;
235         }
236       }
237     }
238   }
239   if (pixels <= 0) return 0;
240   ret /= pixels;
241   return ret;
242 }
243 
aom_psnrhvs(const YV12_BUFFER_CONFIG * src,const YV12_BUFFER_CONFIG * dst,double * y_psnrhvs,double * u_psnrhvs,double * v_psnrhvs,uint32_t bd,uint32_t in_bd)244 double aom_psnrhvs(const YV12_BUFFER_CONFIG *src, const YV12_BUFFER_CONFIG *dst,
245                    double *y_psnrhvs, double *u_psnrhvs, double *v_psnrhvs,
246                    uint32_t bd, uint32_t in_bd) {
247   double psnrhvs;
248   const double par = 1.0;
249   const int step = 7;
250   uint32_t bd_shift = 0;
251   aom_clear_system_state();
252   assert(bd == 8 || bd == 10 || bd == 12);
253   assert(bd >= in_bd);
254   assert(src->flags == dst->flags);
255   const int buf_is_hbd = src->flags & YV12_FLAG_HIGHBITDEPTH;
256 
257   bd_shift = bd - in_bd;
258 
259   *y_psnrhvs = calc_psnrhvs(
260       src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, par,
261       src->y_crop_width, src->y_crop_height, step, csf_y, bd_shift, buf_is_hbd);
262   *u_psnrhvs =
263       calc_psnrhvs(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride,
264                    par, src->uv_crop_width, src->uv_crop_height, step,
265                    csf_cb420, bd_shift, buf_is_hbd);
266   *v_psnrhvs =
267       calc_psnrhvs(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride,
268                    par, src->uv_crop_width, src->uv_crop_height, step,
269                    csf_cr420, bd_shift, buf_is_hbd);
270   psnrhvs = (*y_psnrhvs) * .8 + .1 * ((*u_psnrhvs) + (*v_psnrhvs));
271   return convert_score_db(psnrhvs, 1.0, in_bd);
272 }
273