1 /*
2  * Copyright (c) 2017, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10  */
11 
12 #include <math.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include "noise_model.h"
16 #include "noise_util.h"
17 #include "mathutils.h"
18 #include "EbLog.h"
19 
20 #define kLowPolyNumParams 3
21 
22 static const int32_t k_max_lag = 4;
23 
24 void *svt_aom_memalign(size_t align, size_t size);
25 void  svt_aom_free(void *memblk);
26 
27 void un_pack2d(uint16_t *in16_bit_buffer, uint32_t in_stride, uint8_t *out8_bit_buffer,
28                uint32_t out8_stride, uint8_t *outn_bit_buffer, uint32_t outn_stride, uint32_t width,
29                uint32_t height);
30 
31 void pack2d_src(uint8_t *in8_bit_buffer, uint32_t in8_stride, uint8_t *inn_bit_buffer,
32                 uint32_t inn_stride, uint16_t *out16_bit_buffer, uint32_t out_stride,
33                 uint32_t width, uint32_t height);
34 
35 // Defines a function that can be used to obtain the mean of a block for the
36 // provided data type (uint8_t, or uint16_t)
37 #define GET_BLOCK_MEAN(INT_TYPE, suffix)                          \
38     static double get_block_mean_##suffix(const INT_TYPE *data,   \
39                                           int32_t         w,      \
40                                           int32_t         h,      \
41                                           int32_t         stride, \
42                                           int32_t         x_o,    \
43                                           int32_t         y_o,    \
44                                           int32_t         block_size) {   \
45         const int32_t max_h      = AOMMIN(h - y_o, block_size);   \
46         const int32_t max_w      = AOMMIN(w - x_o, block_size);   \
47         double        block_mean = 0;                             \
48         for (int32_t y = 0; y < max_h; ++y) {                     \
49             for (int32_t x = 0; x < max_w; ++x) {                 \
50                 block_mean += data[(y_o + y) * stride + x_o + x]; \
51             }                                                     \
52         }                                                         \
53         return block_mean / (max_w * max_h);                      \
54     }
55 
56 GET_BLOCK_MEAN(uint8_t, lowbd);
57 GET_BLOCK_MEAN(uint16_t, highbd);
58 
get_block_mean(const uint8_t * data,int32_t w,int32_t h,int32_t stride,int32_t x_o,int32_t y_o,int32_t block_size,int32_t use_highbd)59 static INLINE double get_block_mean(const uint8_t *data, int32_t w, int32_t h, int32_t stride,
60                                     int32_t x_o, int32_t y_o, int32_t block_size,
61                                     int32_t use_highbd) {
62     if (use_highbd)
63         return get_block_mean_highbd((const uint16_t *)data, w, h, stride, x_o, y_o, block_size);
64     return get_block_mean_lowbd(data, w, h, stride, x_o, y_o, block_size);
65 }
66 
67 // Defines a function that can be used to obtain the variance of a block
68 // for the provided data type (uint8_t, or uint16_t)
69 #define GET_NOISE_VAR(INT_TYPE, suffix)                                     \
70     static double get_noise_var_##suffix(const INT_TYPE *data,              \
71                                          const INT_TYPE *denoised,          \
72                                          int32_t         stride,            \
73                                          int32_t         w,                 \
74                                          int32_t         h,                 \
75                                          int32_t         x_o,               \
76                                          int32_t         y_o,               \
77                                          int32_t         block_size_x,      \
78                                          int32_t         block_size_y) {            \
79         const int32_t max_h      = AOMMIN(h - y_o, block_size_y);           \
80         const int32_t max_w      = AOMMIN(w - x_o, block_size_x);           \
81         double        noise_var  = 0;                                       \
82         double        noise_mean = 0;                                       \
83         for (int32_t y = 0; y < max_h; ++y) {                               \
84             for (int32_t x = 0; x < max_w; ++x) {                           \
85                 double noise = (double)data[(y_o + y) * stride + x_o + x] - \
86                     denoised[(y_o + y) * stride + x_o + x];                 \
87                 noise_mean += noise;                                        \
88                 noise_var += noise * noise;                                 \
89             }                                                               \
90         }                                                                   \
91         noise_mean /= (max_w * max_h);                                      \
92         return noise_var / (max_w * max_h) - noise_mean * noise_mean;       \
93     }
94 
95 GET_NOISE_VAR(uint8_t, lowbd);
96 GET_NOISE_VAR(uint16_t, highbd);
97 
get_noise_var(const uint8_t * data,const uint8_t * denoised,int32_t w,int32_t h,int32_t stride,int32_t x_o,int32_t y_o,int32_t block_size_x,int32_t block_size_y,int32_t use_highbd)98 static INLINE double get_noise_var(const uint8_t *data, const uint8_t *denoised, int32_t w,
99                                    int32_t h, int32_t stride, int32_t x_o, int32_t y_o,
100                                    int32_t block_size_x, int32_t block_size_y, int32_t use_highbd) {
101     if (use_highbd)
102         return get_noise_var_highbd((const uint16_t *)data,
103                                     (const uint16_t *)denoised,
104                                     w,
105                                     h,
106                                     stride,
107                                     x_o,
108                                     y_o,
109                                     block_size_x,
110                                     block_size_y);
111     return get_noise_var_lowbd(data, denoised, w, h, stride, x_o, y_o, block_size_x, block_size_y);
112 }
113 
equation_system_free(AomEquationSystem * eqns)114 static void equation_system_free(AomEquationSystem *eqns) {
115     if (!eqns)
116         return;
117     free(eqns->A);
118     eqns->A = NULL;
119     free(eqns->b);
120     eqns->b = NULL;
121     free(eqns->x);
122     eqns->x = NULL;
123     eqns->n = 0;
124 }
125 
equation_system_clear(AomEquationSystem * eqns)126 static void equation_system_clear(AomEquationSystem *eqns) {
127     const int32_t n = eqns->n;
128     memset(eqns->A, 0, sizeof(*eqns->A) * n * n);
129     memset(eqns->x, 0, sizeof(*eqns->x) * n);
130     memset(eqns->b, 0, sizeof(*eqns->b) * n);
131 }
132 
equation_system_copy(AomEquationSystem * dst,const AomEquationSystem * src)133 static void equation_system_copy(AomEquationSystem *dst, const AomEquationSystem *src) {
134     const int32_t n = dst->n;
135     if (svt_memcpy != NULL) {
136         svt_memcpy(dst->A, src->A, sizeof(*dst->A) * n * n);
137         svt_memcpy(dst->x, src->x, sizeof(*dst->x) * n);
138         svt_memcpy(dst->b, src->b, sizeof(*dst->b) * n);
139     } else {
140         svt_memcpy_c(dst->A, src->A, sizeof(*dst->A) * n * n);
141         svt_memcpy_c(dst->x, src->x, sizeof(*dst->x) * n);
142         svt_memcpy_c(dst->b, src->b, sizeof(*dst->b) * n);
143     }
144 }
145 
equation_system_init(AomEquationSystem * eqns,int32_t n)146 static int32_t equation_system_init(AomEquationSystem *eqns, int32_t n) {
147     eqns->A = (double *)malloc(sizeof(*eqns->A) * n * n);
148     eqns->b = (double *)malloc(sizeof(*eqns->b) * n);
149     eqns->x = (double *)malloc(sizeof(*eqns->x) * n);
150     eqns->n = n;
151     if (!eqns->A || !eqns->b || !eqns->x) {
152         SVT_ERROR("Failed to allocate system of equations of size %d\n", n);
153         equation_system_free(eqns);
154         return 0;
155     }
156     equation_system_clear(eqns);
157     return 1;
158 }
159 
equation_system_solve(AomEquationSystem * eqns)160 static int32_t equation_system_solve(AomEquationSystem *eqns) {
161     const int32_t n   = eqns->n;
162     double *      b   = (double *)malloc(sizeof(*b) * n);
163     double *      A   = (double *)malloc(sizeof(*A) * n * n);
164     int32_t       ret = 0;
165     if (A == NULL || b == NULL) {
166         SVT_ERROR("Unable to allocate temp values of size %dx%d\n", n, n);
167         free(b);
168         free(A);
169         return 0;
170     }
171     if (svt_memcpy != NULL) {
172         svt_memcpy(A, eqns->A, sizeof(*eqns->A) * n * n);
173         svt_memcpy(b, eqns->b, sizeof(*eqns->b) * n);
174     } else {
175         svt_memcpy_c(A, eqns->A, sizeof(*eqns->A) * n * n);
176         svt_memcpy_c(b, eqns->b, sizeof(*eqns->b) * n);
177     }
178     ret = linsolve(n, A, eqns->n, b, eqns->x);
179     free(b);
180     free(A);
181 
182     if (ret == 0)
183         return 0;
184     return 1;
185 }
186 /*
187 static void equation_system_add(AomEquationSystem *dest,
188                                 AomEquationSystem *src) {
189   const int32_t n = dest->n;
190   int32_t i, j;
191   for (i = 0; i < n; ++i) {
192     for (j = 0; j < n; ++j)
193       dest->A[i * n + j] += src->A[i * n + j];
194     dest->b[i] += src->b[i];
195   }
196 }
197 */
198 
noise_strength_solver_clear(AomNoiseStrengthSolver * solver)199 static void noise_strength_solver_clear(AomNoiseStrengthSolver *solver) {
200     equation_system_clear(&solver->eqns);
201     solver->num_equations = 0;
202     solver->total         = 0;
203 }
204 
205 /*
206 static void noise_strength_solver_add(AomNoiseStrengthSolver *dest,
207                                       AomNoiseStrengthSolver *src) {
208   equation_system_add(&dest->eqns, &src->eqns);
209   dest->num_equations += src->num_equations;
210   dest->total += src->total;
211 }
212 */
213 
noise_strength_solver_copy(AomNoiseStrengthSolver * dest,AomNoiseStrengthSolver * src)214 static void noise_strength_solver_copy(AomNoiseStrengthSolver *dest, AomNoiseStrengthSolver *src) {
215     equation_system_copy(&dest->eqns, &src->eqns);
216     dest->num_equations = src->num_equations;
217     dest->total         = src->total;
218 }
219 
220 // Return the number of coefficients required for the given parameters
num_coeffs(const AomNoiseModelParams params)221 static int32_t num_coeffs(const AomNoiseModelParams params) {
222     const int32_t n = 2 * params.lag + 1;
223     switch (params.shape) {
224     case AOM_NOISE_SHAPE_DIAMOND: return params.lag * (params.lag + 1);
225     case AOM_NOISE_SHAPE_SQUARE: return (n * n) / 2;
226     }
227     return 0;
228 }
229 
noise_state_init(AomNoiseState * state,int32_t n,int32_t bit_depth)230 static int32_t noise_state_init(AomNoiseState *state, int32_t n, int32_t bit_depth) {
231     const int32_t k_num_bins = 20;
232     if (!equation_system_init(&state->eqns, n)) {
233         SVT_ERROR("Failed initialization noise state with size %d\n", n);
234         return 0;
235     }
236     state->ar_gain          = 1.0;
237     state->num_observations = 0;
238     return svt_aom_noise_strength_solver_init(&state->strength_solver, k_num_bins, bit_depth);
239 }
240 
set_chroma_coefficient_fallback_soln(AomEquationSystem * eqns)241 static void set_chroma_coefficient_fallback_soln(AomEquationSystem *eqns) {
242     const double  k_tolerance = 1e-6;
243     const int32_t last        = eqns->n - 1;
244     // Set all of the AR coefficients to zero, but try to solve for correlation
245     // with the luma channel
246     memset(eqns->x, 0, sizeof(*eqns->x) * eqns->n);
247     if (fabs(eqns->A[last * eqns->n + last]) > k_tolerance)
248         eqns->x[last] = eqns->b[last] / eqns->A[last * eqns->n + last];
249 }
250 
svt_aom_noise_strength_lut_init(AomNoiseStrengthLut * lut,int32_t num_points)251 int32_t svt_aom_noise_strength_lut_init(AomNoiseStrengthLut *lut, int32_t num_points) {
252     if (!lut)
253         return 0;
254     lut->points = (double(*)[2])malloc(num_points * sizeof(*lut->points));
255     if (!lut->points)
256         return 0;
257     lut->num_points = num_points;
258     memset(lut->points, 0, sizeof(*lut->points) * num_points);
259     return 1;
260 }
261 
svt_aom_noise_strength_lut_free(AomNoiseStrengthLut * lut)262 void svt_aom_noise_strength_lut_free(AomNoiseStrengthLut *lut) {
263     if (!lut)
264         return;
265     free(lut->points);
266     memset(lut, 0, sizeof(*lut));
267 }
268 
noise_strength_solver_get_bin_index(const AomNoiseStrengthSolver * solver,double value)269 static double noise_strength_solver_get_bin_index(const AomNoiseStrengthSolver *solver,
270                                                   double                        value) {
271     const double val   = fclamp(value, solver->min_intensity, solver->max_intensity);
272     const double range = solver->max_intensity - solver->min_intensity;
273     return (solver->num_bins - 1) * (val - solver->min_intensity) / range;
274 }
275 
noise_strength_solver_get_value(const AomNoiseStrengthSolver * solver,double x)276 static double noise_strength_solver_get_value(const AomNoiseStrengthSolver *solver, double x) {
277     const double  bin    = noise_strength_solver_get_bin_index(solver, x);
278     const int32_t bin_i0 = (int32_t)floor(bin);
279     const int32_t bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
280     const double  a      = bin - bin_i0;
281     return (1.0 - a) * solver->eqns.x[bin_i0] + a * solver->eqns.x[bin_i1];
282 }
283 
svt_aom_noise_strength_solver_add_measurement(AomNoiseStrengthSolver * solver,double block_mean,double noise_std)284 void svt_aom_noise_strength_solver_add_measurement(AomNoiseStrengthSolver *solver,
285                                                    double block_mean, double noise_std) {
286     const double  bin    = noise_strength_solver_get_bin_index(solver, block_mean);
287     const int32_t bin_i0 = (int32_t)floor(bin);
288     const int32_t bin_i1 = AOMMIN(solver->num_bins - 1, bin_i0 + 1);
289     const double  a      = bin - bin_i0;
290     const int32_t n      = solver->num_bins;
291     solver->eqns.A[bin_i0 * n + bin_i0] += (1.0 - a) * (1.0 - a);
292     solver->eqns.A[bin_i1 * n + bin_i0] += a * (1.0 - a);
293     solver->eqns.A[bin_i1 * n + bin_i1] += a * a;
294     solver->eqns.A[bin_i0 * n + bin_i1] += a * (1.0 - a);
295     solver->eqns.b[bin_i0] += (1.0 - a) * noise_std;
296     solver->eqns.b[bin_i1] += a * noise_std;
297     solver->total += noise_std;
298     solver->num_equations++;
299 }
300 
svt_aom_noise_strength_solver_solve(AomNoiseStrengthSolver * solver)301 int32_t svt_aom_noise_strength_solver_solve(AomNoiseStrengthSolver *solver) {
302     // Add regularization proportional to the number of constraints
303     const int32_t n       = solver->num_bins;
304     const double  k_alpha = 2.0 * (double)(solver->num_equations) / n;
305     int32_t       result  = 0;
306     double        mean    = 0;
307 
308     // Do this in a non-destructive manner so it is not confusing to the caller
309     double *old_a = solver->eqns.A;
310     double *A     = (double *)malloc(sizeof(*A) * n * n);
311     if (!A) {
312         SVT_ERROR("Unable to allocate copy of A\n");
313         return 0;
314     }
315     if (svt_memcpy != NULL)
316         svt_memcpy(A, old_a, sizeof(*A) * n * n);
317     else
318         svt_memcpy_c(A, old_a, sizeof(*A) * n * n);
319 
320     for (int32_t i = 0; i < n; ++i) {
321         const int32_t i_lo = AOMMAX(0, i - 1);
322         const int32_t i_hi = AOMMIN(n - 1, i + 1);
323         A[i * n + i_lo] -= k_alpha;
324         A[i * n + i] += 2 * k_alpha;
325         A[i * n + i_hi] -= k_alpha;
326     }
327 
328     // Small regularization to give average noise strength
329     mean = solver->total / solver->num_equations;
330     for (int32_t i = 0; i < n; ++i) {
331         A[i * n + i] += 1.0 / 8192.;
332         solver->eqns.b[i] += mean / 8192.;
333     }
334     solver->eqns.A = A;
335     result         = equation_system_solve(&solver->eqns);
336     solver->eqns.A = old_a;
337 
338     free(A);
339     return result;
340 }
341 
svt_aom_noise_strength_solver_init(AomNoiseStrengthSolver * solver,int32_t num_bins,int32_t bit_depth)342 int32_t svt_aom_noise_strength_solver_init(AomNoiseStrengthSolver *solver, int32_t num_bins,
343                                            int32_t bit_depth) {
344     if (!solver)
345         return 0;
346     memset(solver, 0, sizeof(*solver));
347     solver->num_bins      = num_bins;
348     solver->min_intensity = 0;
349     solver->max_intensity = (1 << bit_depth) - 1;
350     solver->total         = 0;
351     solver->num_equations = 0;
352     return equation_system_init(&solver->eqns, num_bins);
353 }
354 
svt_aom_noise_strength_solver_get_center(const AomNoiseStrengthSolver * solver,int32_t i)355 double svt_aom_noise_strength_solver_get_center(const AomNoiseStrengthSolver *solver, int32_t i) {
356     const double  range = solver->max_intensity - solver->min_intensity;
357     const int32_t n     = solver->num_bins;
358     return ((double)i) / (n - 1) * range + solver->min_intensity;
359 }
360 
361 // Computes the residual if a point were to be removed from the lut. This is
362 // calculated as the area between the output of the solver and the line segment
363 // that would be formed between [x_{i - 1}, x_{i + 1}).
update_piecewise_linear_residual(const AomNoiseStrengthSolver * solver,const AomNoiseStrengthLut * lut,double * residual,int32_t start,int32_t end)364 static void update_piecewise_linear_residual(const AomNoiseStrengthSolver *solver,
365                                              const AomNoiseStrengthLut *lut, double *residual,
366                                              int32_t start, int32_t end) {
367     const double dx = 255. / solver->num_bins;
368     for (int32_t i = AOMMAX(start, 1); i < AOMMIN(end, lut->num_points - 1); ++i) {
369         const int32_t lower = AOMMAX(
370             0, (int32_t)floor(noise_strength_solver_get_bin_index(solver, lut->points[i - 1][0])));
371         const int32_t upper = AOMMIN(
372             solver->num_bins - 1,
373             (int32_t)ceil(noise_strength_solver_get_bin_index(solver, lut->points[i + 1][0])));
374         double r = 0;
375         for (int32_t j = lower; j <= upper; ++j) {
376             const double x = svt_aom_noise_strength_solver_get_center(solver, j);
377             if (x < lut->points[i - 1][0])
378                 continue;
379             if (x >= lut->points[i + 1][0])
380                 continue;
381             const double y = solver->eqns.x[j];
382             const double a = (x - lut->points[i - 1][0]) /
383                 (lut->points[i + 1][0] - lut->points[i - 1][0]);
384             const double estimate_y = lut->points[i - 1][1] * (1.0 - a) + lut->points[i + 1][1] * a;
385             r += fabs(y - estimate_y);
386         }
387         residual[i] = r * dx;
388     }
389 }
390 
svt_aom_noise_strength_solver_fit_piecewise(const AomNoiseStrengthSolver * solver,int32_t max_output_points,AomNoiseStrengthLut * lut)391 int32_t svt_aom_noise_strength_solver_fit_piecewise(const AomNoiseStrengthSolver *solver,
392                                                     int32_t                       max_output_points,
393                                                     AomNoiseStrengthLut *         lut) {
394     // The tolerance is normalized to be give consistent results between
395     // different bit-depths.
396     const double k_tolerance = solver->max_intensity * 0.00625 / 255.0;
397     if (!svt_aom_noise_strength_lut_init(lut, solver->num_bins)) {
398         SVT_ERROR("Failed to init lut\n");
399         return 0;
400     }
401     for (int32_t i = 0; i < solver->num_bins; ++i) {
402         lut->points[i][0] = svt_aom_noise_strength_solver_get_center(solver, i);
403         lut->points[i][1] = solver->eqns.x[i];
404     }
405     if (max_output_points < 0)
406         max_output_points = solver->num_bins;
407     double *residual = malloc(solver->num_bins * sizeof(*residual));
408     ASSERT(residual != NULL);
409     memset(residual, 0, sizeof(*residual) * solver->num_bins);
410 
411     update_piecewise_linear_residual(solver, lut, residual, 0, solver->num_bins);
412 
413     // Greedily remove points if there are too many or if it doesn't hurt local
414     // approximation (never remove the end points)
415     while (lut->num_points > 2) {
416         int32_t min_index = 1;
417         for (int32_t j = 1; j < lut->num_points - 1; ++j) {
418             if (residual[j] < residual[min_index])
419                 min_index = j;
420         }
421         const double dx           = lut->points[min_index + 1][0] - lut->points[min_index - 1][0];
422         const double avg_residual = residual[min_index] / dx;
423         if (lut->num_points <= max_output_points && avg_residual > k_tolerance)
424             break;
425         const int32_t num_remaining = lut->num_points - min_index - 1;
426         memmove(lut->points + min_index,
427                 lut->points + min_index + 1,
428                 sizeof(lut->points[0]) * num_remaining);
429         lut->num_points--;
430 
431         update_piecewise_linear_residual(solver, lut, residual, min_index - 1, min_index + 1);
432     }
433     free(residual);
434     return 1;
435 }
436 
svt_aom_flat_block_finder_init(AomFlatBlockFinder * block_finder,int32_t block_size,int32_t bit_depth,int32_t use_highbd)437 int32_t svt_aom_flat_block_finder_init(AomFlatBlockFinder *block_finder, int32_t block_size,
438                                        int32_t bit_depth, int32_t use_highbd) {
439     const int32_t     n = block_size * block_size;
440     AomEquationSystem eqns;
441     if (!equation_system_init(&eqns, kLowPolyNumParams)) {
442         SVT_ERROR("Failed to init equation system for block_size=%d\n", block_size);
443         return 0;
444     }
445 
446     double *at_a_inv = (double *)malloc(kLowPolyNumParams * kLowPolyNumParams * sizeof(*at_a_inv));
447     double *A        = (double *)malloc(kLowPolyNumParams * n * sizeof(*A));
448     if (at_a_inv == NULL || A == NULL) {
449         SVT_ERROR("Failed to alloc A or at_a_inv for block_size=%d\n", block_size);
450         free(at_a_inv);
451         free(A);
452         equation_system_free(&eqns);
453         return 0;
454     }
455 
456     block_finder->A             = A;
457     block_finder->at_a_inv      = at_a_inv;
458     block_finder->block_size    = block_size;
459     block_finder->normalization = (1 << bit_depth) - 1;
460     block_finder->use_highbd    = use_highbd;
461 
462     for (int32_t y = 0; y < block_size; ++y) {
463         const double yd = ((double)y - block_size / 2.) / (block_size / 2.);
464         for (int32_t x = 0; x < block_size; ++x) {
465             const double  xd               = ((double)x - block_size / 2.) / (block_size / 2.);
466             const double  coords[3]        = {yd, xd, 1};
467             const int32_t row              = y * block_size + x;
468             A[kLowPolyNumParams * row + 0] = yd;
469             A[kLowPolyNumParams * row + 1] = xd;
470             A[kLowPolyNumParams * row + 2] = 1;
471 
472             for (int i = 0; i < kLowPolyNumParams; ++i)
473                 for (int j = 0; j < kLowPolyNumParams; ++j)
474                     eqns.A[kLowPolyNumParams * i + j] += coords[i] * coords[j];
475         }
476     }
477 
478     // Lazy inverse using existing equation solver.
479     for (int i = 0; i < kLowPolyNumParams; ++i) {
480         memset(eqns.b, 0, sizeof(*eqns.b) * kLowPolyNumParams);
481         eqns.b[i] = 1;
482         equation_system_solve(&eqns);
483 
484         for (int j = 0; j < kLowPolyNumParams; ++j) at_a_inv[j * kLowPolyNumParams + i] = eqns.x[j];
485     }
486     equation_system_free(&eqns);
487     return 1;
488 }
489 
svt_aom_flat_block_finder_free(AomFlatBlockFinder * block_finder)490 void svt_aom_flat_block_finder_free(AomFlatBlockFinder *block_finder) {
491     if (!block_finder)
492         return;
493     free(block_finder->A);
494     free(block_finder->at_a_inv);
495     memset(block_finder, 0, sizeof(*block_finder));
496 }
497 
svt_aom_flat_block_finder_extract_block(const AomFlatBlockFinder * block_finder,const uint8_t * const data,int32_t w,int32_t h,int32_t stride,int32_t offsx,int32_t offsy,double * plane,double * block)498 void svt_aom_flat_block_finder_extract_block(const AomFlatBlockFinder *block_finder,
499                                              const uint8_t *const data, int32_t w, int32_t h,
500                                              int32_t stride, int32_t offsx, int32_t offsy,
501                                              double *plane, double *block) {
502     const int32_t block_size = block_finder->block_size;
503     const int32_t n          = block_size * block_size;
504     const double *A          = block_finder->A;
505     const double *at_a_inv   = block_finder->at_a_inv;
506     double        plane_coords[kLowPolyNumParams];
507     double        at_a_inv__b[kLowPolyNumParams];
508     int32_t       xi, yi, i;
509 
510     if (block_finder->use_highbd) {
511         const uint16_t *const data16 = (const uint16_t *const)data;
512         for (yi = 0; yi < block_size; ++yi) {
513             const int32_t y = clamp(offsy + yi, 0, h - 1);
514             for (xi = 0; xi < block_size; ++xi) {
515                 const int32_t x             = clamp(offsx + xi, 0, w - 1);
516                 block[yi * block_size + xi] = ((double)data16[y * stride + x]) /
517                     block_finder->normalization;
518             }
519         }
520     } else {
521         for (yi = 0; yi < block_size; ++yi) {
522             const int32_t y = clamp(offsy + yi, 0, h - 1);
523             for (xi = 0; xi < block_size; ++xi) {
524                 const int32_t x             = clamp(offsx + xi, 0, w - 1);
525                 block[yi * block_size + xi] = ((double)data[y * stride + x]) /
526                     block_finder->normalization;
527             }
528         }
529     }
530     multiply_mat(block, A, at_a_inv__b, 1, n, kLowPolyNumParams);
531     multiply_mat(at_a_inv, at_a_inv__b, plane_coords, kLowPolyNumParams, kLowPolyNumParams, 1);
532     multiply_mat(A, plane_coords, plane, n, kLowPolyNumParams, 1);
533 
534     for (i = 0; i < n; ++i) block[i] -= plane[i];
535 }
536 
537 typedef struct {
538     int32_t index;
539     float   score;
540 } IndexAndscore;
541 
compare_scores(const void * a,const void * b)542 static int compare_scores(const void *a, const void *b) {
543     const float diff = ((IndexAndscore *)a)->score - ((IndexAndscore *)b)->score;
544     return diff < 0 ? -1 : diff > 0;
545 }
546 
svt_aom_flat_block_finder_run(const AomFlatBlockFinder * block_finder,const uint8_t * const data,int32_t w,int32_t h,int32_t stride,uint8_t * flat_blocks)547 int32_t svt_aom_flat_block_finder_run(const AomFlatBlockFinder *block_finder,
548                                       const uint8_t *const data, int32_t w, int32_t h,
549                                       int32_t stride, uint8_t *flat_blocks) {
550     // The gradient-based features used in this code are based on:
551     //  A. Kokaram, D. Kelly, H. Denman and A. Crawford, "Measuring noise
552     //  correlation for improved video denoising," 2012 19th, ICIP.
553     // The thresholds are more lenient to allow for correct grain modeling
554     // if extreme cases.
555     const int32_t  block_size        = block_finder->block_size;
556     const int32_t  n                 = block_size * block_size;
557     const double   k_trace_threshold = 0.15 / (32 * 32);
558     const double   k_ratio_threshold = 1.25;
559     const double   k_norm_threshold  = 0.08 / (32 * 32);
560     const double   k_var_threshold   = 0.005 / (double)n;
561     const int32_t  num_blocks_w      = (w + block_size - 1) / block_size;
562     const int32_t  num_blocks_h      = (h + block_size - 1) / block_size;
563     int32_t        num_flat          = 0;
564     double *       plane             = (double *)malloc(n * sizeof(*plane));
565     double *       block             = (double *)malloc(n * sizeof(*block));
566     IndexAndscore *scores = (IndexAndscore *)malloc(num_blocks_w * num_blocks_h * sizeof(*scores));
567     if (plane == NULL || block == NULL || scores == NULL) {
568         SVT_ERROR("Failed to allocate memory for block of size %d\n", n);
569         free(plane);
570         free(block);
571         free(scores);
572         return -1;
573     }
574 
575 #ifdef NOISE_MODEL_LOG_SCORE
576     SVT_ERROR("score = [");
577 #endif
578     for (int32_t by = 0; by < num_blocks_h; ++by) {
579         for (int32_t bx = 0; bx < num_blocks_w; ++bx) {
580             // Compute gradient covariance matrix.
581             double g_xx = 0, g_xy = 0, g_yy = 0;
582             double var  = 0;
583             double mean = 0;
584             svt_aom_flat_block_finder_extract_block(
585                 block_finder, data, w, h, stride, bx * block_size, by * block_size, plane, block);
586 
587             for (int32_t yi = 1; yi < block_size - 1; ++yi) {
588                 for (int32_t xi = 1; xi < block_size - 1; ++xi) {
589                     const double gx = (block[yi * block_size + xi + 1] -
590                                        block[yi * block_size + xi - 1]) /
591                         2;
592                     const double gy = (block[yi * block_size + xi + block_size] -
593                                        block[yi * block_size + xi - block_size]) /
594                         2;
595                     g_xx += gx * gx;
596                     g_xy += gx * gy;
597                     g_yy += gy * gy;
598 
599                     mean += block[yi * block_size + xi];
600                     var += block[yi * block_size + xi] * block[yi * block_size + xi];
601                 }
602             }
603             mean /= (block_size - 2) * (block_size - 2);
604 
605             // Normalize gradients by BlockSize.
606             g_xx /= ((block_size - 2) * (block_size - 2));
607             g_xy /= ((block_size - 2) * (block_size - 2));
608             g_yy /= ((block_size - 2) * (block_size - 2));
609             var = var / ((block_size - 2) * (block_size - 2)) - mean * mean;
610 
611             {
612                 const double  trace   = g_xx + g_yy;
613                 const double  det     = g_xx * g_yy - g_xy * g_xy;
614                 const double  e1      = (trace + sqrt(trace * trace - 4 * det)) / 2.;
615                 const double  e2      = (trace - sqrt(trace * trace - 4 * det)) / 2.;
616                 const double  norm    = e1; // Spectral norm
617                 const double  ratio   = (e1 / AOMMAX(e2, 1e-6));
618                 const int32_t is_flat = (trace < k_trace_threshold) &&
619                     (ratio < k_ratio_threshold) && (norm < k_norm_threshold) &&
620                     (var > k_var_threshold);
621                 // The following weights are used to combine the above features to give
622                 // a sigmoid score for flatness. If the input was normalized to [0,100]
623                 // the magnitude of these values would be close to 1 (e.g., weights
624                 // corresponding to variance would be a factor of 10000x smaller).
625                 // The weights are given in the following order:
626                 //    [{var}, {ratio}, {trace}, {norm}, offset]
627                 // with one of the most discriminative being simply the variance.
628                 const double weights[5]              = {-6682, -0.2056, 13087, -12434, 2.5694};
629                 const float  score                   = (float)(1.0 /
630                                             (1 +
631                                              exp(-(weights[0] * var + weights[1] * ratio +
632                                                    weights[2] * trace + weights[3] * norm +
633                                                    weights[4]))));
634                 flat_blocks[by * num_blocks_w + bx]  = is_flat ? 255 : 0;
635                 scores[by * num_blocks_w + bx].score = var > k_var_threshold ? score : 0;
636                 scores[by * num_blocks_w + bx].index = by * num_blocks_w + bx;
637 #ifdef NOISE_MODEL_LOG_SCORE
638                 SVT_ERROR("%g %g %g %g %g %d ", score, var, ratio, trace, norm, is_flat);
639 #endif
640                 num_flat += is_flat;
641             }
642         }
643 #ifdef NOISE_MODEL_LOG_SCORE
644         SVT_ERROR("\n");
645 #endif
646     }
647 #ifdef NOISE_MODEL_LOG_SCORE
648     SVT_ERROR("];\n");
649 #endif
650     // Find the top-scored blocks (most likely to be flat) and set the flat blocks
651     // be the union of the thresholded results and the top 10th percentile of the
652     // scored results.
653     qsort(scores, num_blocks_w * num_blocks_h, sizeof(*scores), &compare_scores);
654     const int32_t top_nth_percentile = num_blocks_w * num_blocks_h * 90 / 100;
655     const float   score_threshold    = scores[top_nth_percentile].score;
656     for (int32_t i = 0; i < num_blocks_w * num_blocks_h; ++i) {
657         if (scores[i].score >= score_threshold) {
658             num_flat += flat_blocks[scores[i].index] == 0;
659             flat_blocks[scores[i].index] |= 1;
660         }
661     }
662     free(block);
663     free(plane);
664     free(scores);
665     return num_flat;
666 }
667 
svt_aom_noise_model_init(AomNoiseModel * model,const AomNoiseModelParams params)668 int32_t svt_aom_noise_model_init(AomNoiseModel *model, const AomNoiseModelParams params) {
669     const int32_t n         = num_coeffs(params);
670     const int32_t lag       = params.lag;
671     const int32_t bit_depth = params.bit_depth;
672     int32_t       i         = 0;
673 
674     memset(model, 0, sizeof(*model));
675     if (params.lag < 1) {
676         SVT_ERROR("Invalid noise param: lag = %d must be >= 1\n", params.lag);
677         return 0;
678     }
679     if (params.lag > k_max_lag) {
680         SVT_ERROR("Invalid noise param: lag = %d must be <= %d\n", params.lag, k_max_lag);
681         return 0;
682     }
683     if (svt_memcpy != NULL)
684         svt_memcpy(&model->params, &params, sizeof(params));
685     else
686         svt_memcpy_c(&model->params, &params, sizeof(params));
687 
688     for (int c = 0; c < 3; ++c) {
689         if (!noise_state_init(&model->combined_state[c], n + (c > 0), bit_depth)) {
690             SVT_ERROR("Failed to allocate noise state for channel %d\n", c);
691             svt_aom_noise_model_free(model);
692             return 0;
693         }
694         if (!noise_state_init(&model->latest_state[c], n + (c > 0), bit_depth)) {
695             SVT_ERROR("Failed to allocate noise state for channel %d\n", c);
696             svt_aom_noise_model_free(model);
697             return 0;
698         }
699     }
700     model->n      = n;
701     model->coords = (int32_t(*)[2])malloc(sizeof(*model->coords) * n);
702     if (!model->coords) {
703         SVT_ERROR("Failed to allocate memory for coords\n");
704         svt_aom_noise_model_free(model);
705         return 0;
706     }
707     for (int32_t y = -lag; y <= 0; ++y) {
708         const int32_t max_x = y == 0 ? -1 : lag;
709         for (int32_t x = -lag; x <= max_x; ++x) {
710             switch (params.shape) {
711             case AOM_NOISE_SHAPE_DIAMOND:
712                 if (abs(x) <= y + lag) {
713                     model->coords[i][0] = x;
714                     model->coords[i][1] = y;
715                     ++i;
716                 }
717                 break;
718             case AOM_NOISE_SHAPE_SQUARE:
719                 model->coords[i][0] = x;
720                 model->coords[i][1] = y;
721                 ++i;
722                 break;
723             default:
724                 SVT_ERROR("Invalid shape\n");
725                 svt_aom_noise_model_free(model);
726                 return 0;
727             }
728         }
729     }
730     assert(i == n);
731     return 1;
732 }
733 
svt_aom_noise_model_free(AomNoiseModel * model)734 void svt_aom_noise_model_free(AomNoiseModel *model) {
735     int32_t c = 0;
736     if (!model)
737         return;
738 
739     free(model->coords);
740     for (c = 0; c < 3; ++c) {
741         equation_system_free(&model->latest_state[c].eqns);
742         equation_system_free(&model->combined_state[c].eqns);
743 
744         equation_system_free(&model->latest_state[c].strength_solver.eqns);
745         equation_system_free(&model->combined_state[c].strength_solver.eqns);
746     }
747     memset(model, 0, sizeof(*model));
748 }
749 
750 // Extracts the neighborhood defined by coords around point (x, y) from
751 // the difference between the data and denoised images. Also extracts the
752 // entry (possibly downsampled) for (x, y) in the alt_data (e.g., luma).
753 #define EXTRACT_AR_ROW(INT_TYPE, suffix)                                                 \
754     static double extract_ar_row_##suffix(int32_t(*coords)[2],                           \
755                                           int32_t               num_coords,              \
756                                           const INT_TYPE *const data,                    \
757                                           const INT_TYPE *const denoised,                \
758                                           int32_t               stride,                  \
759                                           int32_t               sub_log2[2],             \
760                                           const INT_TYPE *const alt_data,                \
761                                           const INT_TYPE *const alt_denoised,            \
762                                           int32_t               alt_stride,              \
763                                           int32_t               x,                       \
764                                           int32_t               y,                       \
765                                           double *              buffer) {                              \
766         for (int32_t i = 0; i < num_coords; ++i) {                                       \
767             const int32_t x_i = x + coords[i][0], y_i = y + coords[i][1];                \
768             buffer[i] = (double)data[y_i * stride + x_i] - denoised[y_i * stride + x_i]; \
769         }                                                                                \
770         const double val = (double)data[y * stride + x] - denoised[y * stride + x];      \
771                                                                                          \
772         if (alt_data && alt_denoised) {                                                  \
773             double  avg_data = 0, avg_denoised = 0;                                      \
774             int32_t num_samples = 0;                                                     \
775             for (int32_t dy_i = 0; dy_i < (1 << sub_log2[1]); dy_i++) {                  \
776                 const int32_t y_up = (y << sub_log2[1]) + dy_i;                          \
777                 for (int32_t dx_i = 0; dx_i < (1 << sub_log2[0]); dx_i++) {              \
778                     const int32_t x_up = (x << sub_log2[0]) + dx_i;                      \
779                     avg_data += alt_data[y_up * alt_stride + x_up];                      \
780                     avg_denoised += alt_denoised[y_up * alt_stride + x_up];              \
781                     num_samples++;                                                       \
782                 }                                                                        \
783             }                                                                            \
784             assert(num_samples > 0);                                                     \
785             buffer[num_coords] = (avg_data - avg_denoised) / num_samples;                \
786         }                                                                                \
787         return val;                                                                      \
788     }
789 
790 EXTRACT_AR_ROW(uint8_t, lowbd);
791 EXTRACT_AR_ROW(uint16_t, highbd);
792 
add_block_observations(AomNoiseModel * noise_model,int32_t c,const uint8_t * const data,const uint8_t * const denoised,int32_t w,int32_t h,int32_t stride,int32_t sub_log2[2],const uint8_t * const alt_data,const uint8_t * const alt_denoised,int32_t alt_stride,const uint8_t * const flat_blocks,int32_t block_size,int32_t num_blocks_w,int32_t num_blocks_h)793 static int32_t add_block_observations(AomNoiseModel *noise_model, int32_t c,
794                                       const uint8_t *const data, const uint8_t *const denoised,
795                                       int32_t w, int32_t h, int32_t stride, int32_t sub_log2[2],
796                                       const uint8_t *const alt_data,
797                                       const uint8_t *const alt_denoised, int32_t alt_stride,
798                                       const uint8_t *const flat_blocks, int32_t block_size,
799                                       int32_t num_blocks_w, int32_t num_blocks_h) {
800     const int32_t lag           = noise_model->params.lag;
801     const int32_t num_coords    = noise_model->n;
802     const double  normalization = (1 << noise_model->params.bit_depth) - 1;
803     double *      A             = noise_model->latest_state[c].eqns.A;
804     double *      b             = noise_model->latest_state[c].eqns.b;
805     double *      buffer        = (double *)malloc(sizeof(*buffer) * (num_coords + 1));
806     const int32_t n             = noise_model->latest_state[c].eqns.n;
807 
808     if (!buffer) {
809         SVT_ERROR("Unable to allocate buffer of size %d\n", num_coords + 1);
810         return 0;
811     }
812     for (int32_t by = 0; by < num_blocks_h; ++by) {
813         const int32_t y_o = by * (block_size >> sub_log2[1]);
814         for (int32_t bx = 0; bx < num_blocks_w; ++bx) {
815             const int32_t x_o = bx * (block_size >> sub_log2[0]);
816             if (!flat_blocks[by * num_blocks_w + bx])
817                 continue;
818             int32_t y_start = (by > 0 && flat_blocks[(by - 1) * num_blocks_w + bx]) ? 0 : lag;
819             int32_t x_start = (bx > 0 && flat_blocks[by * num_blocks_w + bx - 1]) ? 0 : lag;
820             int32_t y_end   = AOMMIN((h >> sub_log2[1]) - by * (block_size >> sub_log2[1]),
821                                    block_size >> sub_log2[1]);
822             int32_t x_end   = AOMMIN(
823                 (w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]) - lag,
824                 (bx + 1 < num_blocks_w && flat_blocks[by * num_blocks_w + bx + 1])
825                       ? (block_size >> sub_log2[0])
826                       : ((block_size >> sub_log2[0]) - lag));
827             for (int32_t y = y_start; y < y_end; ++y) {
828                 for (int32_t x = x_start; x < x_end; ++x) {
829                     const double val = noise_model->params.use_highbd
830                         ? extract_ar_row_highbd(noise_model->coords,
831                                                 num_coords,
832                                                 (const uint16_t *const)data,
833                                                 (const uint16_t *const)denoised,
834                                                 stride,
835                                                 sub_log2,
836                                                 (const uint16_t *const)alt_data,
837                                                 (const uint16_t *const)alt_denoised,
838                                                 alt_stride,
839                                                 x + x_o,
840                                                 y + y_o,
841                                                 buffer)
842                         : extract_ar_row_lowbd(noise_model->coords,
843                                                num_coords,
844                                                data,
845                                                denoised,
846                                                stride,
847                                                sub_log2,
848                                                alt_data,
849                                                alt_denoised,
850                                                alt_stride,
851                                                x + x_o,
852                                                y + y_o,
853                                                buffer);
854                     for (int32_t i = 0; i < n; ++i) {
855                         for (int32_t j = 0; j < n; ++j) {
856                             A[i * n + j] += (buffer[i] * buffer[j]) /
857                                 (normalization * normalization);
858                         }
859                         b[i] += (buffer[i] * val) / (normalization * normalization);
860                     }
861                     noise_model->latest_state[c].num_observations++;
862                 }
863             }
864         }
865     }
866     free(buffer);
867     return 1;
868 }
869 
add_noise_std_observations(AomNoiseModel * noise_model,int32_t c,const double * coeffs,const uint8_t * const data,const uint8_t * const denoised,int32_t w,int32_t h,int32_t stride,int32_t sub_log2[2],const uint8_t * const alt_data,int32_t alt_stride,const uint8_t * const flat_blocks,int32_t block_size,int32_t num_blocks_w,int32_t num_blocks_h)870 static void add_noise_std_observations(AomNoiseModel *noise_model, int32_t c, const double *coeffs,
871                                        const uint8_t *const data, const uint8_t *const denoised,
872                                        int32_t w, int32_t h, int32_t stride, int32_t sub_log2[2],
873                                        const uint8_t *const alt_data, int32_t alt_stride,
874                                        const uint8_t *const flat_blocks, int32_t block_size,
875                                        int32_t num_blocks_w, int32_t num_blocks_h) {
876     const int32_t           num_coords            = noise_model->n;
877     AomNoiseStrengthSolver *noise_strength_solver = &noise_model->latest_state[c].strength_solver;
878 
879     const AomNoiseStrengthSolver *noise_strength_luma =
880         &noise_model->latest_state[0].strength_solver;
881     const double luma_gain  = noise_model->latest_state[0].ar_gain;
882     const double noise_gain = noise_model->latest_state[c].ar_gain;
883     for (int32_t by = 0; by < num_blocks_h; ++by) {
884         const int32_t y_o = by * (block_size >> sub_log2[1]);
885         for (int32_t bx = 0; bx < num_blocks_w; ++bx) {
886             const int32_t x_o = bx * (block_size >> sub_log2[0]);
887             if (!flat_blocks[by * num_blocks_w + bx])
888                 continue;
889             const int32_t num_samples_h = AOMMIN(
890                 (h >> sub_log2[1]) - by * (block_size >> sub_log2[1]), block_size >> sub_log2[1]);
891             const int32_t num_samples_w = AOMMIN(
892                 (w >> sub_log2[0]) - bx * (block_size >> sub_log2[0]), (block_size >> sub_log2[0]));
893             // Make sure that we have a reasonable amount of samples to consider the
894             // block
895             if (num_samples_w * num_samples_h > block_size) {
896                 const double block_mean = get_block_mean(alt_data ? alt_data : data,
897                                                          w,
898                                                          h,
899                                                          alt_data ? alt_stride : stride,
900                                                          x_o << sub_log2[0],
901                                                          y_o << sub_log2[1],
902                                                          block_size,
903                                                          noise_model->params.use_highbd);
904                 const double noise_var = get_noise_var(data,
905                                                        denoised,
906                                                        stride,
907                                                        w >> sub_log2[0],
908                                                        h >> sub_log2[1],
909                                                        x_o,
910                                                        y_o,
911                                                        block_size >> sub_log2[0],
912                                                        block_size >> sub_log2[1],
913                                                        noise_model->params.use_highbd);
914                 // We want to remove the part of the noise that came from being
915                 // correlated with luma. Note that the noise solver for luma must
916                 // have already been run.
917                 const double luma_strength = c > 0
918                     ? luma_gain * noise_strength_solver_get_value(noise_strength_luma, block_mean)
919                     : 0;
920                 const double corr          = c > 0 ? coeffs[num_coords] : 0;
921                 // Chroma noise:
922                 //    N(0, noise_var) = N(0, uncorr_var) + corr * N(0, luma_strength^2)
923                 // The uncorrelated component:
924                 //   uncorr_var = noise_var - (corr * luma_strength)^2
925                 // But don't allow fully correlated noise (hence the max), since the
926                 // synthesis cannot model it.
927                 const double uncorr_std = sqrt(
928                     AOMMAX(noise_var / 16, noise_var - pow(corr * luma_strength, 2)));
929                 // After we've removed correlation with luma, undo the gain that will
930                 // come from running the IIR filter.
931                 const double adjusted_strength = uncorr_std / noise_gain;
932                 svt_aom_noise_strength_solver_add_measurement(
933                     noise_strength_solver, block_mean, adjusted_strength);
934             }
935         }
936     }
937 }
938 
ar_equation_system_solve(AomNoiseState * state,int32_t is_chroma)939 static int32_t ar_equation_system_solve(AomNoiseState *state, int32_t is_chroma) {
940     const int32_t ret = equation_system_solve(&state->eqns);
941     state->ar_gain    = 1.0;
942     if (!ret)
943         return ret;
944 
945     // Update the AR gain from the equation system as it will be used to fit
946     // the noise strength as a function of intensity.  In the Yule-Walker
947     // equations, the diagonal should be the variance of the correlated noise.
948     // In the case of the least squares estimate, there will be some variability
949     // in the diagonal. So use the mean of the diagonal as the estimate of
950     // overall variance (this works for least squares or Yule-Walker formulation).
951     double        var = 0;
952     const int32_t n   = state->eqns.n;
953     for (int32_t i = 0; i < (state->eqns.n - is_chroma); ++i)
954         var += state->eqns.A[i * n + i] / state->num_observations;
955     var /= (n - is_chroma);
956 
957     // Keep track of E(Y^2) = <b, x> + E(X^2)
958     // In the case that we are using chroma and have an estimate of correlation
959     // with luma we adjust that estimate slightly to remove the correlated bits by
960     // subtracting out the last column of a scaled by our correlation estimate
961     // from b. E(y^2) = <b - A(:, end)*x(end), x>
962     double sum_covar = 0;
963     for (int32_t i = 0; i < state->eqns.n - is_chroma; ++i) {
964         double bi = state->eqns.b[i];
965         if (is_chroma)
966             bi -= state->eqns.A[i * n + (n - 1)] * state->eqns.x[n - 1];
967         sum_covar += (bi * state->eqns.x[i]) / state->num_observations;
968     }
969     // Now, get an estimate of the variance of uncorrelated noise signal and use
970     // it to determine the gain of the AR filter.
971     const double noise_var = AOMMAX(var - sum_covar, 1e-6);
972     state->ar_gain         = AOMMAX(1, sqrt(AOMMAX(var / noise_var, 1e-6)));
973     return ret;
974 }
975 
svt_aom_noise_model_update(AomNoiseModel * const noise_model,const uint8_t * const data[3],const uint8_t * const denoised[3],int32_t w,int32_t h,int32_t stride[3],int32_t chroma_sub_log2[2],const uint8_t * const flat_blocks,int32_t block_size)976 AomNoiseStatus svt_aom_noise_model_update(AomNoiseModel *const noise_model,
977                                           const uint8_t *const data[3],
978                                           const uint8_t *const denoised[3], int32_t w, int32_t h,
979                                           int32_t stride[3], int32_t chroma_sub_log2[2],
980                                           const uint8_t *const flat_blocks, int32_t block_size) {
981     const int32_t num_blocks_w = (w + block_size - 1) / block_size;
982     const int32_t num_blocks_h = (h + block_size - 1) / block_size;
983     //  int32_t y_model_different = 0;
984     int32_t num_blocks = 0;
985     int32_t i = 0, channel = 0;
986 
987     if (block_size <= 1) {
988         SVT_ERROR("BlockSize = %d must be > 1\n", block_size);
989         return AOM_NOISE_STATUS_INVALID_ARGUMENT;
990     }
991 
992     if (block_size < noise_model->params.lag * 2 + 1) {
993         SVT_ERROR("BlockSize = %d must be >= %d\n", block_size, noise_model->params.lag * 2 + 1);
994         return AOM_NOISE_STATUS_INVALID_ARGUMENT;
995     }
996 
997     // Clear the latest equation system
998     for (i = 0; i < 3; ++i) {
999         equation_system_clear(&noise_model->latest_state[i].eqns);
1000         noise_model->latest_state[i].num_observations = 0;
1001         noise_strength_solver_clear(&noise_model->latest_state[i].strength_solver);
1002     }
1003 
1004     // Check that we have enough flat blocks
1005     for (i = 0; i < num_blocks_h * num_blocks_w; ++i) {
1006         if (flat_blocks[i])
1007             num_blocks++;
1008     }
1009 
1010     if (num_blocks <= 1) {
1011         SVT_ERROR("Not enough flat blocks to update noise estimate\n");
1012         return AOM_NOISE_STATUS_INSUFFICIENT_FLAT_BLOCKS;
1013     }
1014 
1015     for (channel = 0; channel < 3; ++channel) {
1016         int32_t        no_subsampling[2] = {0, 0};
1017         const uint8_t *alt_data          = channel > 0 ? data[0] : 0;
1018         const uint8_t *alt_denoised      = channel > 0 ? denoised[0] : 0;
1019         int32_t *      sub               = channel > 0 ? chroma_sub_log2 : no_subsampling;
1020         const int32_t  is_chroma         = channel != 0;
1021         if (!data[channel] || !denoised[channel])
1022             break;
1023         if (!add_block_observations(noise_model,
1024                                     channel,
1025                                     data[channel],
1026                                     denoised[channel],
1027                                     w,
1028                                     h,
1029                                     stride[channel],
1030                                     sub,
1031                                     alt_data,
1032                                     alt_denoised,
1033                                     stride[0],
1034                                     flat_blocks,
1035                                     block_size,
1036                                     num_blocks_w,
1037                                     num_blocks_h)) {
1038             SVT_ERROR("Adding block observation failed\n");
1039             return AOM_NOISE_STATUS_INTERNAL_ERROR;
1040         }
1041 
1042         if (!ar_equation_system_solve(&noise_model->latest_state[channel], is_chroma)) {
1043             if (is_chroma) {
1044                 set_chroma_coefficient_fallback_soln(&noise_model->latest_state[channel].eqns);
1045             } else {
1046                 SVT_ERROR("Solving latest noise equation system failed %d!\n", channel);
1047                 return AOM_NOISE_STATUS_INTERNAL_ERROR;
1048             }
1049         }
1050 
1051         add_noise_std_observations(noise_model,
1052                                    channel,
1053                                    noise_model->latest_state[channel].eqns.x,
1054                                    data[channel],
1055                                    denoised[channel],
1056                                    w,
1057                                    h,
1058                                    stride[channel],
1059                                    sub,
1060                                    alt_data,
1061                                    stride[0],
1062                                    flat_blocks,
1063                                    block_size,
1064                                    num_blocks_w,
1065                                    num_blocks_h);
1066 
1067         if (!svt_aom_noise_strength_solver_solve(
1068                 &noise_model->latest_state[channel].strength_solver)) {
1069             SVT_ERROR("Solving latest noise strength failed!\n");
1070             return AOM_NOISE_STATUS_INTERNAL_ERROR;
1071         }
1072 
1073         // Check noise characteristics and return if error.
1074         //    if (channel == 0 &&
1075         //        noise_model->combined_state[channel].strength_solver.num_equations >
1076         //            0 &&
1077         //        is_noise_model_different(noise_model)) {
1078         //      y_model_different = 1;
1079         //    }
1080 
1081         noise_model->combined_state[channel].num_observations =
1082             noise_model->latest_state[channel].num_observations;
1083         equation_system_copy(&noise_model->combined_state[channel].eqns,
1084                              &noise_model->latest_state[channel].eqns);
1085         if (!ar_equation_system_solve(&noise_model->combined_state[channel], is_chroma)) {
1086             if (is_chroma) {
1087                 set_chroma_coefficient_fallback_soln(&noise_model->combined_state[channel].eqns);
1088             } else {
1089                 SVT_ERROR("Solving combined noise equation system failed %d!\n", channel);
1090                 return AOM_NOISE_STATUS_INTERNAL_ERROR;
1091             }
1092         }
1093 
1094         noise_strength_solver_copy(&noise_model->combined_state[channel].strength_solver,
1095                                    &noise_model->latest_state[channel].strength_solver);
1096 
1097         if (!svt_aom_noise_strength_solver_solve(
1098                 &noise_model->combined_state[channel].strength_solver)) {
1099             SVT_ERROR("Solving combined noise strength failed!\n");
1100             return AOM_NOISE_STATUS_INTERNAL_ERROR;
1101         }
1102     }
1103 
1104     return AOM_NOISE_STATUS_OK;
1105 }
1106 
svt_aom_noise_model_save_latest(AomNoiseModel * noise_model)1107 void svt_aom_noise_model_save_latest(AomNoiseModel *noise_model) {
1108     for (int32_t c = 0; c < 3; c++) {
1109         equation_system_copy(&noise_model->combined_state[c].eqns,
1110                              &noise_model->latest_state[c].eqns);
1111         equation_system_copy(&noise_model->combined_state[c].strength_solver.eqns,
1112                              &noise_model->latest_state[c].strength_solver.eqns);
1113         noise_model->combined_state[c].strength_solver.num_equations =
1114             noise_model->latest_state[c].strength_solver.num_equations;
1115         noise_model->combined_state[c].num_observations =
1116             noise_model->latest_state[c].num_observations;
1117         noise_model->combined_state[c].ar_gain = noise_model->latest_state[c].ar_gain;
1118     }
1119 }
1120 
svt_aom_noise_model_get_grain_parameters(AomNoiseModel * const noise_model,AomFilmGrain * film_grain)1121 int32_t svt_aom_noise_model_get_grain_parameters(AomNoiseModel *const noise_model,
1122                                                  AomFilmGrain *       film_grain) {
1123     if (noise_model->params.lag > 3) {
1124         SVT_ERROR("params.lag = %d > 3\n", noise_model->params.lag);
1125         return 0;
1126     }
1127     uint16_t random_seed = film_grain->random_seed;
1128     memset(film_grain, 0, sizeof(*film_grain));
1129     film_grain->random_seed = random_seed;
1130 
1131     film_grain->apply_grain       = 1;
1132     film_grain->update_parameters = 1;
1133 
1134     film_grain->ar_coeff_lag = noise_model->params.lag;
1135 
1136     // Convert the scaling functions to 8 bit values
1137     AomNoiseStrengthLut scaling_points[3];
1138     svt_aom_noise_strength_solver_fit_piecewise(
1139         &noise_model->combined_state[0].strength_solver, 14, scaling_points + 0);
1140     svt_aom_noise_strength_solver_fit_piecewise(
1141         &noise_model->combined_state[1].strength_solver, 10, scaling_points + 1);
1142     svt_aom_noise_strength_solver_fit_piecewise(
1143         &noise_model->combined_state[2].strength_solver, 10, scaling_points + 2);
1144 
1145     // Both the domain and the range of the scaling functions in the film_grain
1146     // are normalized to 8-bit (e.g., they are implicitly scaled during grain
1147     // synthesis).
1148     const double strength_divisor  = 1 << (noise_model->params.bit_depth - 8);
1149     double       max_scaling_value = 1e-4;
1150     for (int32_t c = 0; c < 3; ++c) {
1151         for (int32_t i = 0; i < scaling_points[c].num_points; ++i) {
1152             scaling_points[c].points[i][0] = AOMMIN(
1153                 255, scaling_points[c].points[i][0] / strength_divisor);
1154             scaling_points[c].points[i][1] = AOMMIN(
1155                 255, scaling_points[c].points[i][1] / strength_divisor);
1156             max_scaling_value = AOMMAX(scaling_points[c].points[i][1], max_scaling_value);
1157         }
1158     }
1159 
1160     // Scaling_shift values are in the range [8,11]
1161     const int32_t max_scaling_value_log2 = clamp((int32_t)floor(log2(max_scaling_value) + 1), 2, 5);
1162     film_grain->scaling_shift            = 5 + (8 - max_scaling_value_log2);
1163 
1164     const double scale_factor = 1 << (8 - max_scaling_value_log2);
1165     film_grain->num_y_points  = scaling_points[0].num_points;
1166     film_grain->num_cb_points = scaling_points[1].num_points;
1167     film_grain->num_cr_points = scaling_points[2].num_points;
1168 
1169     int32_t(*film_grain_scaling[3])[2] = {
1170         film_grain->scaling_points_y,
1171         film_grain->scaling_points_cb,
1172         film_grain->scaling_points_cr,
1173     };
1174     for (int32_t c = 0; c < 3; c++) {
1175         for (int32_t i = 0; i < scaling_points[c].num_points; ++i) {
1176             film_grain_scaling[c][i][0] = (int32_t)(scaling_points[c].points[i][0] + 0.5);
1177             film_grain_scaling[c][i][1] = clamp(
1178                 (int32_t)(scale_factor * scaling_points[c].points[i][1] + 0.5), 0, 255);
1179         }
1180     }
1181     svt_aom_noise_strength_lut_free(scaling_points + 0);
1182     svt_aom_noise_strength_lut_free(scaling_points + 1);
1183     svt_aom_noise_strength_lut_free(scaling_points + 2);
1184 
1185     // Convert the ar_coeffs into 8-bit values
1186     const int32_t n_coeff   = noise_model->combined_state[0].eqns.n;
1187     double        max_coeff = 1e-4, min_coeff = -1e-4;
1188     double        y_corr[2]         = {0, 0};
1189     double        avg_luma_strength = 0;
1190     for (int32_t c = 0; c < 3; c++) {
1191         AomEquationSystem *eqns = &noise_model->combined_state[c].eqns;
1192         for (int32_t i = 0; i < n_coeff; ++i) {
1193             max_coeff = AOMMAX(max_coeff, eqns->x[i]);
1194             min_coeff = AOMMIN(min_coeff, eqns->x[i]);
1195         }
1196         // Since the correlation between luma/chroma was computed in an already
1197         // scaled space, we adjust it in the un-scaled space.
1198         AomNoiseStrengthSolver *solver = &noise_model->combined_state[c].strength_solver;
1199         // Compute a weighted average of the strength for the channel.
1200         double average_strength = 0, total_weight = 0;
1201         for (int32_t i = 0; i < solver->eqns.n; ++i) {
1202             double w = 0;
1203             for (int32_t j = 0; j < solver->eqns.n; ++j)
1204                 w += solver->eqns.A[i * solver->eqns.n + j];
1205             w = sqrt(w);
1206             average_strength += solver->eqns.x[i] * w;
1207             total_weight += w;
1208         }
1209         if (total_weight == 0)
1210             average_strength = 1;
1211         else
1212             average_strength /= total_weight;
1213         if (c == 0)
1214             avg_luma_strength = average_strength;
1215         else {
1216             y_corr[c - 1] = avg_luma_strength * eqns->x[n_coeff] / average_strength;
1217             max_coeff     = AOMMAX(max_coeff, y_corr[c - 1]);
1218             min_coeff     = AOMMIN(min_coeff, y_corr[c - 1]);
1219         }
1220     }
1221     // Shift value: AR coeffs range (values 6-9)
1222     // 6: [-2, 2),  7: [-1, 1), 8: [-0.5, 0.5), 9: [-0.25, 0.25)
1223     film_grain->ar_coeff_shift = clamp(
1224         7 - (int32_t)AOMMAX(1 + floor(log2(max_coeff)), ceil(log2(-min_coeff))), 6, 9);
1225     double   scale_ar_coeff = 1 << film_grain->ar_coeff_shift;
1226     int32_t *ar_coeffs[3]   = {
1227         film_grain->ar_coeffs_y,
1228         film_grain->ar_coeffs_cb,
1229         film_grain->ar_coeffs_cr,
1230     };
1231     for (int32_t c = 0; c < 3; ++c) {
1232         AomEquationSystem *eqns = &noise_model->combined_state[c].eqns;
1233         for (int32_t i = 0; i < n_coeff; ++i) {
1234             ar_coeffs[c][i] = clamp((int32_t)round(scale_ar_coeff * eqns->x[i]), -128, 127);
1235         }
1236         if (c > 0) {
1237             ar_coeffs[c][n_coeff] = clamp(
1238                 (int32_t)round(scale_ar_coeff * y_corr[c - 1]), -128, 127);
1239         }
1240     }
1241 
1242     // At the moment, the noise modeling code assumes that the chroma scaling
1243     // functions are a function of luma.
1244     film_grain->cb_mult      = 128; // 8 bits
1245     film_grain->cb_luma_mult = 192; // 8 bits
1246     film_grain->cb_offset    = 256; // 9 bits
1247 
1248     film_grain->cr_mult      = 128; // 8 bits
1249     film_grain->cr_luma_mult = 192; // 8 bits
1250     film_grain->cr_offset    = 256; // 9 bits
1251 
1252     film_grain->chroma_scaling_from_luma = 0;
1253     film_grain->grain_scale_shift        = 0;
1254     film_grain->overlap_flag             = 1;
1255     return 1;
1256 }
1257 
pointwise_multiply(const float * a,float * b,int32_t n)1258 static void pointwise_multiply(const float *a, float *b, int32_t n) {
1259     for (int32_t i = 0; i < n; ++i) b[i] *= a[i];
1260 }
1261 
get_half_cos_window(int32_t block_size)1262 static float *get_half_cos_window(int32_t block_size) {
1263     float *window_function = (float *)malloc(block_size * block_size * sizeof(*window_function));
1264     ASSERT(window_function);
1265     for (int32_t y = 0; y < block_size; ++y) {
1266         const double cos_yd = cos((.5 + y) * PI / block_size - PI / 2);
1267         for (int32_t x = 0; x < block_size; ++x) {
1268             const double cos_xd                 = cos((.5 + x) * PI / block_size - PI / 2);
1269             window_function[y * block_size + x] = (float)(cos_yd * cos_xd);
1270         }
1271     }
1272     return window_function;
1273 }
1274 
1275 #define DITHER_AND_QUANTIZE(INT_TYPE, suffix)                                                     \
1276     static void dither_and_quantize_##suffix(float *   result,                                    \
1277                                              int32_t   result_stride,                             \
1278                                              INT_TYPE *denoised,                                  \
1279                                              int32_t   w,                                         \
1280                                              int32_t   h,                                         \
1281                                              int32_t   stride,                                    \
1282                                              int32_t   chroma_sub_w,                              \
1283                                              int32_t   chroma_sub_h,                              \
1284                                              int32_t   block_size,                                \
1285                                              float     block_normalization) {                         \
1286         for (int32_t y = 0; y < (h >> chroma_sub_h); ++y) {                                       \
1287             for (int32_t x = 0; x < (w >> chroma_sub_w); ++x) {                                   \
1288                 const int32_t result_idx = (y + (block_size >> chroma_sub_h)) * result_stride +   \
1289                     x + (block_size >> chroma_sub_w);                                             \
1290                 INT_TYPE new_val = (INT_TYPE)AOMMIN(                                              \
1291                     AOMMAX(result[result_idx] * block_normalization + 0.5f, 0),                   \
1292                     block_normalization);                                                         \
1293                 const float err = -(((float)new_val) / block_normalization - result[result_idx]); \
1294                 denoised[y * stride + x] = new_val;                                               \
1295                 if (x + 1 < (w >> chroma_sub_w)) {                                                \
1296                     result[result_idx + 1] += err * 7.0f / 16.0f;                                 \
1297                 }                                                                                 \
1298                 if (y + 1 < (h >> chroma_sub_h)) {                                                \
1299                     if (x > 0) {                                                                  \
1300                         result[result_idx + result_stride - 1] += err * 3.0f / 16.0f;             \
1301                     }                                                                             \
1302                     result[result_idx + result_stride] += err * 5.0f / 16.0f;                     \
1303                     if (x + 1 < (w >> chroma_sub_w)) {                                            \
1304                         result[result_idx + result_stride + 1] += err * 1.0f / 16.0f;             \
1305                     }                                                                             \
1306                 }                                                                                 \
1307             }                                                                                     \
1308         }                                                                                         \
1309     }
1310 
1311 DITHER_AND_QUANTIZE(uint8_t, lowbd);
1312 DITHER_AND_QUANTIZE(uint16_t, highbd);
1313 
svt_aom_wiener_denoise_2d(const uint8_t * const data[3],uint8_t * denoised[3],int32_t w,int32_t h,int32_t stride[3],int32_t chroma_sub[2],float * noise_psd[3],int32_t block_size,int32_t bit_depth,int32_t use_highbd)1314 int32_t svt_aom_wiener_denoise_2d(const uint8_t *const data[3], uint8_t *denoised[3], int32_t w,
1315                                   int32_t h, int32_t stride[3], int32_t chroma_sub[2],
1316                                   float *noise_psd[3], int32_t block_size, int32_t bit_depth,
1317                                   int32_t use_highbd) {
1318     float *plane = NULL, *window_full = NULL, *window_chroma = NULL;
1319     DECLARE_ALIGNED(32, float, *block);
1320     block                          = NULL;
1321     double *               block_d = NULL, *plane_d = NULL;
1322     struct aom_noise_tx_t *tx_full       = NULL;
1323     struct aom_noise_tx_t *tx_chroma     = NULL;
1324     const int32_t          num_blocks_w  = (w + block_size - 1) / block_size;
1325     const int32_t          num_blocks_h  = (h + block_size - 1) / block_size;
1326     const int32_t          result_stride = (num_blocks_w + 2) * block_size;
1327     const int32_t          result_height = (num_blocks_h + 2) * block_size;
1328     float *                result        = NULL;
1329     int32_t                init_success  = 1;
1330     AomFlatBlockFinder     block_finder_full;
1331     AomFlatBlockFinder     block_finder_chroma;
1332     const float            k_block_normalization = (float)((1 << bit_depth) - 1);
1333     if (chroma_sub[0] != chroma_sub[1]) {
1334         SVT_ERROR(
1335             "svt_aom_wiener_denoise_2d doesn't handle different chroma "
1336             "subsampling");
1337         return 0;
1338     }
1339     init_success &= svt_aom_flat_block_finder_init(
1340         &block_finder_full, block_size, bit_depth, use_highbd);
1341     result  = (float *)malloc((num_blocks_h + 2) * block_size * result_stride * sizeof(*result));
1342     plane   = (float *)malloc(block_size * block_size * sizeof(*plane));
1343     block   = (float *)svt_aom_memalign(32, 2 * block_size * block_size * sizeof(*block));
1344     block_d = (double *)malloc(block_size * block_size * sizeof(*block_d));
1345     plane_d = (double *)malloc(block_size * block_size * sizeof(*plane_d));
1346     window_full = get_half_cos_window(block_size);
1347     tx_full     = svt_aom_noise_tx_malloc(block_size);
1348 
1349     if (chroma_sub[0] != 0) {
1350         init_success &= svt_aom_flat_block_finder_init(
1351             &block_finder_chroma, block_size >> chroma_sub[0], bit_depth, use_highbd);
1352         window_chroma = get_half_cos_window(block_size >> chroma_sub[0]);
1353         tx_chroma     = svt_aom_noise_tx_malloc(block_size >> chroma_sub[0]);
1354     } else {
1355         window_chroma = window_full;
1356         tx_chroma     = tx_full;
1357     }
1358 
1359     init_success &= (int32_t)((tx_full != NULL) && (tx_chroma != NULL) && (plane != NULL) &&
1360                               (plane_d != NULL) && (block != NULL) && (block_d != NULL) &&
1361                               (window_full != NULL) && (window_chroma != NULL) && (result != NULL));
1362     for (int32_t c = init_success ? 0 : 3; c < 3; ++c) {
1363         float *                window_function = c == 0 ? window_full : window_chroma;
1364         AomFlatBlockFinder *   block_finder    = &block_finder_full;
1365         const int32_t          chroma_sub_h    = c > 0 ? chroma_sub[1] : 0;
1366         const int32_t          chroma_sub_w    = c > 0 ? chroma_sub[0] : 0;
1367         struct aom_noise_tx_t *tx              = (c > 0 && chroma_sub[0] > 0) ? tx_chroma : tx_full;
1368         if (!data[c] || !denoised[c])
1369             continue;
1370         if (c > 0 && chroma_sub[0] != 0)
1371             block_finder = &block_finder_chroma;
1372         memset(result, 0, sizeof(*result) * result_stride * result_height);
1373         // Do overlapped block processing (half overlapped). The block rows can
1374         // easily be done in parallel
1375         for (int32_t offsy = 0; offsy < (block_size >> chroma_sub_h);
1376              offsy += (block_size >> chroma_sub_h) / 2) {
1377             for (int32_t offsx = 0; offsx < (block_size >> chroma_sub_w);
1378                  offsx += (block_size >> chroma_sub_w) / 2) {
1379                 // Pad the boundary when processing each block-set.
1380                 for (int32_t by = -1; by < num_blocks_h; ++by) {
1381                     for (int32_t bx = -1; bx < num_blocks_w; ++bx) {
1382                         const int32_t pixels_per_block = (block_size >> chroma_sub_w) *
1383                             (block_size >> chroma_sub_h);
1384                         svt_aom_flat_block_finder_extract_block(
1385                             block_finder,
1386                             data[c],
1387                             w >> chroma_sub_w,
1388                             h >> chroma_sub_h,
1389                             stride[c],
1390                             bx * (block_size >> chroma_sub_w) + offsx,
1391                             by * (block_size >> chroma_sub_h) + offsy,
1392                             plane_d,
1393                             block_d);
1394                         for (int32_t j = 0; j < pixels_per_block; ++j) {
1395                             block[j] = (float)block_d[j];
1396                             plane[j] = (float)plane_d[j];
1397                         }
1398                         pointwise_multiply(window_function, block, pixels_per_block);
1399                         svt_aom_noise_tx_forward(tx, block);
1400                         svt_aom_noise_tx_filter(tx, noise_psd[c]);
1401                         svt_aom_noise_tx_inverse(tx, block);
1402 
1403                         // Apply window function to the plane approximation (we will apply
1404                         // it to the sum of plane + block when composing the results).
1405                         pointwise_multiply(window_function, plane, pixels_per_block);
1406 
1407                         for (int32_t y = 0; y < (block_size >> chroma_sub_h); ++y) {
1408                             const int32_t y_result = y + (by + 1) * (block_size >> chroma_sub_h) +
1409                                 offsy;
1410                             for (int32_t x = 0; x < (block_size >> chroma_sub_w); ++x) {
1411                                 const int32_t x_result = x +
1412                                     (bx + 1) * (block_size >> chroma_sub_w) + offsx;
1413                                 result[y_result * result_stride + x_result] +=
1414                                     (block[y * (block_size >> chroma_sub_w) + x] +
1415                                      plane[y * (block_size >> chroma_sub_w) + x]) *
1416                                     window_function[y * (block_size >> chroma_sub_w) + x];
1417                             }
1418                         }
1419                     }
1420                 }
1421             }
1422         }
1423         if (use_highbd) {
1424             dither_and_quantize_highbd(result,
1425                                        result_stride,
1426                                        (uint16_t *)denoised[c],
1427                                        w,
1428                                        h,
1429                                        stride[c],
1430                                        chroma_sub_w,
1431                                        chroma_sub_h,
1432                                        block_size,
1433                                        k_block_normalization);
1434         } else {
1435             dither_and_quantize_lowbd(result,
1436                                       result_stride,
1437                                       denoised[c],
1438                                       w,
1439                                       h,
1440                                       stride[c],
1441                                       chroma_sub_w,
1442                                       chroma_sub_h,
1443                                       block_size,
1444                                       k_block_normalization);
1445         }
1446     }
1447     free(result);
1448     free(plane);
1449     svt_aom_free(block);
1450     free(plane_d);
1451     free(block_d);
1452     free(window_full);
1453 
1454     svt_aom_noise_tx_free(tx_full);
1455 
1456     svt_aom_flat_block_finder_free(&block_finder_full);
1457     if (chroma_sub[0] != 0) {
1458         svt_aom_flat_block_finder_free(&block_finder_chroma);
1459         free(window_chroma);
1460         svt_aom_noise_tx_free(tx_chroma);
1461     }
1462     return init_success;
1463 }
1464 
svt_aom_denoise_and_model_alloc(AomDenoiseAndModel * ctx,int32_t bit_depth,int32_t block_size,float noise_level)1465 EbErrorType svt_aom_denoise_and_model_alloc(AomDenoiseAndModel *ctx, int32_t bit_depth,
1466                                             int32_t block_size, float noise_level) {
1467     ctx->block_size  = block_size;
1468     ctx->noise_level = noise_level;
1469     ctx->bit_depth   = bit_depth;
1470 
1471     EB_MALLOC_ARRAY(ctx->noise_psd[0], block_size * block_size);
1472     EB_MALLOC_ARRAY(ctx->noise_psd[1], block_size * block_size);
1473     EB_MALLOC_ARRAY(ctx->noise_psd[2], block_size * block_size);
1474     return EB_ErrorNone;
1475 }
1476 
denoise_and_model_dctor(EbPtr p)1477 static void denoise_and_model_dctor(EbPtr p) {
1478     AomDenoiseAndModel *obj = (AomDenoiseAndModel *)p;
1479 
1480     free(obj->flat_blocks);
1481     for (int32_t i = 0; i < 3; ++i) {
1482         EB_FREE_ARRAY(obj->denoised[i]);
1483         EB_FREE_ARRAY(obj->noise_psd[i]);
1484         EB_FREE_ARRAY(obj->packed[i]);
1485     }
1486     svt_aom_noise_model_free(&obj->noise_model);
1487     svt_aom_flat_block_finder_free(&obj->flat_block_finder);
1488 }
1489 
denoise_and_model_ctor(AomDenoiseAndModel * object_ptr,EbPtr object_init_data_ptr)1490 EbErrorType denoise_and_model_ctor(AomDenoiseAndModel *object_ptr, EbPtr object_init_data_ptr) {
1491     DenoiseAndModelInitData *init_data_ptr = (DenoiseAndModelInitData *)object_init_data_ptr;
1492     EbErrorType              return_error  = EB_ErrorNone;
1493     uint32_t                 use_highbd    = init_data_ptr->encoder_bit_depth > EB_8BIT ? 1 : 0;
1494 
1495     int32_t chroma_sub_log2[2] = {1, 1}; //todo: send chroma subsampling
1496     chroma_sub_log2[0]         = (init_data_ptr->encoder_color_format == EB_YUV444 ? 1 : 2) - 1;
1497     chroma_sub_log2[1]         = (init_data_ptr->encoder_color_format >= EB_YUV422 ? 1 : 2) - 1;
1498 
1499     object_ptr->dctor = denoise_and_model_dctor;
1500 
1501     return_error = svt_aom_denoise_and_model_alloc(
1502         object_ptr,
1503         init_data_ptr->encoder_bit_depth > EB_8BIT ? 10 : 8,
1504         DENOISING_BlockSize,
1505         (float)(init_data_ptr->noise_level / 10.0));
1506     if (return_error != EB_ErrorNone)
1507         return return_error;
1508     object_ptr->width     = init_data_ptr->width;
1509     object_ptr->height    = init_data_ptr->height;
1510     object_ptr->y_stride  = init_data_ptr->stride_y;
1511     object_ptr->uv_stride = init_data_ptr->stride_cb;
1512 
1513     //todo: consider replacing with EbPictureBuffersDesc
1514 
1515     EB_MALLOC_ARRAY(object_ptr->denoised[0],
1516                     (object_ptr->y_stride * object_ptr->height) << use_highbd);
1517     EB_MALLOC_ARRAY(object_ptr->denoised[1],
1518                     (object_ptr->uv_stride * (object_ptr->height >> chroma_sub_log2[0]))
1519                         << use_highbd);
1520     EB_MALLOC_ARRAY(object_ptr->denoised[2],
1521                     (object_ptr->uv_stride * (object_ptr->height >> chroma_sub_log2[0]))
1522                         << use_highbd);
1523 
1524     if (use_highbd) {
1525         EB_MALLOC_ARRAY(object_ptr->packed[0], (object_ptr->y_stride * object_ptr->height));
1526         EB_MALLOC_ARRAY(object_ptr->packed[1],
1527                         (object_ptr->uv_stride * (object_ptr->height >> chroma_sub_log2[0])));
1528         EB_MALLOC_ARRAY(object_ptr->packed[2],
1529                         (object_ptr->uv_stride * (object_ptr->height >> chroma_sub_log2[0])));
1530     }
1531 
1532     return return_error;
1533 }
1534 
denoise_and_model_realloc_if_necessary(struct AomDenoiseAndModel * ctx,EbPictureBufferDesc * sd,int32_t use_highbd)1535 static int32_t denoise_and_model_realloc_if_necessary(struct AomDenoiseAndModel *ctx,
1536                                                       EbPictureBufferDesc *sd, int32_t use_highbd) {
1537     int32_t chroma_sub_log2[2] = {1, 1}; //todo: send chroma subsampling
1538 
1539     const int32_t block_size = ctx->block_size;
1540 
1541     free(ctx->flat_blocks);
1542     ctx->flat_blocks = NULL;
1543 
1544     ctx->num_blocks_w = (sd->width + ctx->block_size - 1) / ctx->block_size;
1545     ctx->num_blocks_h = (sd->height + ctx->block_size - 1) / ctx->block_size;
1546     ctx->flat_blocks  = malloc(ctx->num_blocks_w * ctx->num_blocks_h);
1547 
1548     svt_aom_flat_block_finder_free(&ctx->flat_block_finder);
1549     if (!svt_aom_flat_block_finder_init(
1550             &ctx->flat_block_finder, ctx->block_size, ctx->bit_depth, use_highbd)) {
1551         SVT_ERROR("Unable to init flat block finder\n");
1552         return 0;
1553     }
1554 
1555     const AomNoiseModelParams params = {AOM_NOISE_SHAPE_SQUARE, 3, ctx->bit_depth, use_highbd};
1556     //  svt_aom_noise_model_free(&ctx->noise_model);
1557     if (!svt_aom_noise_model_init(&ctx->noise_model, params)) {
1558         SVT_ERROR("Unable to init noise model\n");
1559         return 0;
1560     }
1561 
1562     // Simply use a flat PSD (although we could use the flat blocks to estimate
1563     // PSD) those to estimate an actual noise PSD)
1564     const float y_noise_level  = svt_aom_noise_psd_get_default_value(ctx->block_size,
1565                                                                     ctx->noise_level);
1566     const float uv_noise_level = svt_aom_noise_psd_get_default_value(
1567         ctx->block_size >> chroma_sub_log2[1], ctx->noise_level);
1568     for (int32_t i = 0; i < block_size * block_size; ++i) {
1569         ctx->noise_psd[0][i] = y_noise_level;
1570         ctx->noise_psd[1][i] = ctx->noise_psd[2][i] = uv_noise_level;
1571     }
1572     return 1;
1573 }
1574 
pack_2d_pic(EbPictureBufferDesc * input_picture,uint16_t * packed[3])1575 static void pack_2d_pic(EbPictureBufferDesc *input_picture, uint16_t *packed[3]) {
1576     const uint32_t input_luma_offset = ((input_picture->origin_y) * input_picture->stride_y) +
1577         (input_picture->origin_x);
1578     const uint32_t input_bit_inc_luma_offset = ((input_picture->origin_y) *
1579                                                 input_picture->stride_bit_inc_y) +
1580         (input_picture->origin_x);
1581     const uint32_t input_cb_offset = (((input_picture->origin_y) >> 1) * input_picture->stride_cb) +
1582         ((input_picture->origin_x) >> 1);
1583     const uint32_t input_bit_inc_cb_offset = (((input_picture->origin_y) >> 1) *
1584                                               input_picture->stride_bit_inc_cb) +
1585         ((input_picture->origin_x) >> 1);
1586     const uint32_t input_cr_offset = (((input_picture->origin_y) >> 1) * input_picture->stride_cr) +
1587         ((input_picture->origin_x) >> 1);
1588     const uint32_t input_bit_inc_cr_offset = (((input_picture->origin_y) >> 1) *
1589                                               input_picture->stride_bit_inc_cr) +
1590         ((input_picture->origin_x) >> 1);
1591 
1592     pack2d_src(input_picture->buffer_y + input_luma_offset,
1593                input_picture->stride_y,
1594                input_picture->buffer_bit_inc_y + input_bit_inc_luma_offset,
1595                input_picture->stride_bit_inc_y,
1596                (uint16_t *)packed[0],
1597                input_picture->stride_y,
1598                input_picture->width,
1599                input_picture->height);
1600 
1601     pack2d_src(input_picture->buffer_cb + input_cb_offset,
1602                input_picture->stride_cr,
1603                input_picture->buffer_bit_inc_cb + input_bit_inc_cb_offset,
1604                input_picture->stride_bit_inc_cr,
1605                (uint16_t *)packed[1],
1606                input_picture->stride_cr,
1607                input_picture->width >> 1,
1608                input_picture->height >> 1);
1609 
1610     pack2d_src(input_picture->buffer_cr + input_cr_offset,
1611                input_picture->stride_cr,
1612                input_picture->buffer_bit_inc_cr + input_bit_inc_cr_offset,
1613                input_picture->stride_bit_inc_cr,
1614                (uint16_t *)packed[2],
1615                input_picture->stride_cr,
1616                input_picture->width >> 1,
1617                input_picture->height >> 1);
1618 }
1619 
unpack_2d_pic(uint8_t * packed[3],EbPictureBufferDesc * outputPicturePtr)1620 static void unpack_2d_pic(uint8_t *packed[3], EbPictureBufferDesc *outputPicturePtr) {
1621     uint32_t luma_buffer_offset = ((outputPicturePtr->origin_y) * outputPicturePtr->stride_y) +
1622         (outputPicturePtr->origin_x);
1623     uint32_t chroma_buffer_offset = (((outputPicturePtr->origin_y) >> 1) *
1624                                      outputPicturePtr->stride_cb) +
1625         ((outputPicturePtr->origin_x) >> 1);
1626     uint16_t luma_width    = (uint16_t)(outputPicturePtr->width);
1627     uint16_t chroma_width  = luma_width >> 1;
1628     uint16_t luma_height   = (uint16_t)(outputPicturePtr->height);
1629     uint16_t chroma_height = luma_height >> 1;
1630 
1631     un_pack2d((uint16_t *)(packed[0]),
1632               outputPicturePtr->stride_y,
1633               outputPicturePtr->buffer_y + luma_buffer_offset,
1634               outputPicturePtr->stride_y,
1635               outputPicturePtr->buffer_bit_inc_y + luma_buffer_offset,
1636               outputPicturePtr->stride_bit_inc_y,
1637               luma_width,
1638               luma_height);
1639 
1640     un_pack2d((uint16_t *)(packed[1]),
1641               outputPicturePtr->stride_cb,
1642               outputPicturePtr->buffer_cb + chroma_buffer_offset,
1643               outputPicturePtr->stride_cb,
1644               outputPicturePtr->buffer_bit_inc_cb + chroma_buffer_offset,
1645               outputPicturePtr->stride_bit_inc_cb,
1646               chroma_width,
1647               chroma_height);
1648 
1649     un_pack2d((uint16_t *)(packed[2]),
1650               outputPicturePtr->stride_cr,
1651               outputPicturePtr->buffer_cr + chroma_buffer_offset,
1652               outputPicturePtr->stride_cr,
1653               outputPicturePtr->buffer_bit_inc_cr + chroma_buffer_offset,
1654               outputPicturePtr->stride_bit_inc_cr,
1655               chroma_width,
1656               chroma_height);
1657 }
1658 
svt_aom_denoise_and_model_run(struct AomDenoiseAndModel * ctx,EbPictureBufferDesc * sd,AomFilmGrain * film_grain,int32_t use_highbd)1659 int32_t svt_aom_denoise_and_model_run(struct AomDenoiseAndModel *ctx, EbPictureBufferDesc *sd,
1660                                       AomFilmGrain *film_grain, int32_t use_highbd) {
1661     const int32_t block_size = ctx->block_size;
1662     uint8_t *     raw_data[3];
1663     int32_t       chroma_sub_log2[2] = {1, 1}; //todo: send chroma subsampling
1664     int32_t       strides[3]         = {sd->stride_y, sd->stride_cb, sd->stride_cr};
1665 
1666     if (!denoise_and_model_realloc_if_necessary(ctx, sd, use_highbd)) {
1667         SVT_ERROR("Unable to realloc buffers\n");
1668         return 0;
1669     }
1670 
1671     if (!use_highbd) { // 8 bits input
1672         raw_data[0] = sd->buffer_y + sd->origin_y * sd->stride_y + sd->origin_x;
1673         raw_data[1] = sd->buffer_cb + sd->stride_cb * (sd->origin_y >> chroma_sub_log2[0]) +
1674             (sd->origin_x >> chroma_sub_log2[1]);
1675         raw_data[2] = sd->buffer_cr + sd->stride_cr * (sd->origin_y >> chroma_sub_log2[0]) +
1676             (sd->origin_x >> chroma_sub_log2[1]);
1677     } else { // 10 bits input
1678         pack_2d_pic(sd, ctx->packed);
1679 
1680         raw_data[0] = (uint8_t *)(ctx->packed[0]);
1681         raw_data[1] = (uint8_t *)(ctx->packed[1]);
1682         raw_data[2] = (uint8_t *)(ctx->packed[2]);
1683     }
1684 
1685     const uint8_t *const data[3] = {raw_data[0], raw_data[1], raw_data[2]};
1686 
1687     svt_aom_flat_block_finder_run(
1688         &ctx->flat_block_finder, data[0], sd->width, sd->height, strides[0], ctx->flat_blocks);
1689 
1690     if (!svt_aom_wiener_denoise_2d(data,
1691                                    ctx->denoised,
1692                                    sd->width,
1693                                    sd->height,
1694                                    strides,
1695                                    chroma_sub_log2,
1696                                    ctx->noise_psd,
1697                                    block_size,
1698                                    ctx->bit_depth,
1699                                    use_highbd)) {
1700         SVT_ERROR("Unable to denoise image\n");
1701         return 0;
1702     }
1703 
1704     const AomNoiseStatus status = svt_aom_noise_model_update(&ctx->noise_model,
1705                                                              data,
1706                                                              (const uint8_t *const *)ctx->denoised,
1707                                                              sd->width,
1708                                                              sd->height,
1709                                                              strides,
1710                                                              chroma_sub_log2,
1711                                                              ctx->flat_blocks,
1712                                                              block_size);
1713 
1714     int32_t have_noise_estimate = 0;
1715     if (status == AOM_NOISE_STATUS_OK || status == AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE) {
1716         svt_aom_noise_model_save_latest(&ctx->noise_model);
1717         have_noise_estimate = 1;
1718     }
1719 
1720     film_grain->apply_grain = 0;
1721     if (have_noise_estimate) {
1722         if (!svt_aom_noise_model_get_grain_parameters(&ctx->noise_model, film_grain)) {
1723             SVT_ERROR("Unable to get grain parameters.\n");
1724             return 0;
1725         }
1726         film_grain->apply_grain = 1;
1727 
1728         if (!use_highbd) {
1729             if (svt_memcpy != NULL) {
1730                 svt_memcpy(raw_data[0], ctx->denoised[0], (strides[0] * sd->height) << use_highbd);
1731                 svt_memcpy(raw_data[1],
1732                            ctx->denoised[1],
1733                            (strides[1] * (sd->height >> chroma_sub_log2[0])) << use_highbd);
1734                 svt_memcpy(raw_data[2],
1735                            ctx->denoised[2],
1736                            (strides[2] * (sd->height >> chroma_sub_log2[0])) << use_highbd);
1737             } else {
1738                 svt_memcpy_c(
1739                     raw_data[0], ctx->denoised[0], (strides[0] * sd->height) << use_highbd);
1740                 svt_memcpy_c(raw_data[1],
1741                              ctx->denoised[1],
1742                              (strides[1] * (sd->height >> chroma_sub_log2[0])) << use_highbd);
1743                 svt_memcpy_c(raw_data[2],
1744                              ctx->denoised[2],
1745                              (strides[2] * (sd->height >> chroma_sub_log2[0])) << use_highbd);
1746             }
1747         } else
1748             unpack_2d_pic(ctx->denoised, sd);
1749     }
1750     svt_aom_flat_block_finder_free(&ctx->flat_block_finder);
1751     svt_aom_noise_model_free(&ctx->noise_model);
1752     free(ctx->flat_blocks);
1753     ctx->flat_blocks = NULL;
1754 
1755     return 1;
1756 }
1757