1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10  */
11 
12 /*!\file
13   * \brief Describes film grain parameters and film grain synthesis
14   *
15   */
16 
17 #include <stdio.h>
18 #include <string.h>
19 #include <stdlib.h>
20 #include "grainSynthesis.h"
21 #include "EbLog.h"
22 
23 // Samples with Gaussian distribution in the range of [-2048, 2047] (12 bits)
24 // with zero mean and standard deviation of about 512.
25 // should be divided by 4 for 10-bit range and 16 for 8-bit range.
26 static const int32_t gaussian_sequence[2048] = {
27     56,    568,   -180, 172,   124,   -84,   172,   -64,   -900,  24,    820,   224,   1248,
28     996,   272,   -8,   -916,  -388,  -732,  -104,  -188,  800,   112,   -652,  -320,  -376,
29     140,   -252,  492,  -168,  44,    -788,  588,   -584,  500,   -228,  12,    680,   272,
30     -476,  972,   -100, 652,   368,   432,   -196,  -720,  -192,  1000,  -332,  652,   -136,
31     -552,  -604,  -4,   192,   -220,  -136,  1000,  -52,   372,   -96,   -624,  124,   -24,
32     396,   540,   -12,  -104,  640,   464,   244,   -208,  -84,   368,   -528,  -740,  248,
33     -968,  -848,  608,  376,   -60,   -292,  -40,   -156,  252,   -292,  248,   224,   -280,
34     400,   -244,  244,  -60,   76,    -80,   212,   532,   340,   128,   -36,   824,   -352,
35     -60,   -264,  -96,  -612,  416,   -704,  220,   -204,  640,   -160,  1220,  -408,  900,
36     336,   20,    -336, -96,   -792,  304,   48,    -28,   -1232, -1172, -448,  104,   -292,
37     -520,  244,   60,   -948,  0,     -708,  268,   108,   356,   -548,  488,   -344,  -136,
38     488,   -196,  -224, 656,   -236,  -1128, 60,    4,     140,   276,   -676,  -376,  168,
39     -108,  464,   8,    564,   64,    240,   308,   -300,  -400,  -456,  -136,  56,    120,
40     -408,  -116,  436,  504,   -232,  328,   844,   -164,  -84,   784,   -168,  232,   -224,
41     348,   -376,  128,  568,   96,    -1244, -288,  276,   848,   832,   -360,  656,   464,
42     -384,  -332,  -356, 728,   -388,  160,   -192,  468,   296,   224,   140,   -776,  -100,
43     280,   4,     196,  44,    -36,   -648,  932,   16,    1428,  28,    528,   808,   772,
44     20,    268,   88,   -332,  -284,  124,   -384,  -448,  208,   -228,  -1044, -328,  660,
45     380,   -148,  -300, 588,   240,   540,   28,    136,   -88,   -436,  256,   296,   -1000,
46     1400,  0,     -48,  1056,  -136,  264,   -528,  -1108, 632,   -484,  -592,  -344,  796,
47     124,   -668,  -768, 388,   1296,  -232,  -188,  -200,  -288,  -4,    308,   100,   -168,
48     256,   -500,  204,  -508,  648,   -136,  372,   -272,  -120,  -1004, -552,  -548,  -384,
49     548,   -296,  428,  -108,  -8,    -912,  -324,  -224,  -88,   -112,  -220,  -100,  996,
50     -796,  548,   360,  -216,  180,   428,   -200,  -212,  148,   96,    148,   284,   216,
51     -412,  -320,  120,  -300,  -384,  -604,  -572,  -332,  -8,    -180,  -176,  696,   116,
52     -88,   628,   76,   44,    -516,  240,   -208,  -40,   100,   -592,  344,   -308,  -452,
53     -228,  20,    916,  -1752, -136,  -340,  -804,  140,   40,    512,   340,   248,   184,
54     -492,  896,   -156, 932,   -628,  328,   -688,  -448,  -616,  -752,  -100,  560,   -1020,
55     180,   -800,  -64,  76,    576,   1068,  396,   660,   552,   -108,  -28,   320,   -628,
56     312,   -92,   -92,  -472,  268,   16,    560,   516,   -672,  -52,   492,   -100,  260,
57     384,   284,   292,  304,   -148,  88,    -152,  1012,  1064,  -228,  164,   -376,  -684,
58     592,   -392,  156,  196,   -524,  -64,   -884,  160,   -176,  636,   648,   404,   -396,
59     -436,  864,   424,  -728,  988,   -604,  904,   -592,  296,   -224,  536,   -176,  -920,
60     436,   -48,   1176, -884,  416,   -776,  -824,  -884,  524,   -548,  -564,  -68,   -164,
61     -96,   692,   364,  -692,  -1012, -68,   260,   -480,  876,   -1116, 452,   -332,  -352,
62     892,   -1088, 1220, -676,  12,    -292,  244,   496,   372,   -32,   280,   200,   112,
63     -440,  -96,   24,   -644,  -184,  56,    -432,  224,   -980,  272,   -260,  144,   -436,
64     420,   356,   364,  -528,  76,    172,   -744,  -368,  404,   -752,  -416,  684,   -688,
65     72,    540,   416,  92,    444,   480,   -72,   -1416, 164,   -1172, -68,   24,    424,
66     264,   1040,  128,  -912,  -524,  -356,  64,    876,   -12,   4,     -88,   532,   272,
67     -524,  320,   276,  -508,  940,   24,    -400,  -120,  756,   60,    236,   -412,  100,
68     376,   -484,  400,  -100,  -740,  -108,  -260,  328,   -268,  224,   -200,  -416,  184,
69     -604,  -564,  -20,  296,   60,    892,   -888,  60,    164,   68,    -760,  216,   -296,
70     904,   -336,  -28,  404,   -356,  -568,  -208,  -1480, -512,  296,   328,   -360,  -164,
71     -1560, -776,  1156, -428,  164,   -504,  -112,  120,   -216,  -148,  -264,  308,   32,
72     64,    -72,   72,   116,   176,   -64,   -272,  460,   -536,  -784,  -280,  348,   108,
73     -752,  -132,  524,  -540,  -776,  116,   -296,  -1196, -288,  -560,  1040,  -472,  116,
74     -848,  -1116, 116,  636,   696,   284,   -176,  1016,  204,   -864,  -648,  -248,  356,
75     972,   -584,  -204, 264,   880,   528,   -24,   -184,  116,   448,   -144,  828,   524,
76     212,   -212,  52,   12,    200,   268,   -488,  -404,  -880,  824,   -672,  -40,   908,
77     -248,  500,   716,  -576,  492,   -576,  16,    720,   -108,  384,   124,   344,   280,
78     576,   -500,  252,  104,   -308,  196,   -188,  -8,    1268,  296,   1032,  -1196, 436,
79     316,   372,   -432, -200,  -660,  704,   -224,  596,   -132,  268,   32,    -452,  884,
80     104,   -1008, 424,  -1348, -280,  4,     -1168, 368,   476,   696,   300,   -8,    24,
81     180,   -592,  -196, 388,   304,   500,   724,   -160,  244,   -84,   272,   -256,  -420,
82     320,   208,   -144, -156,  156,   364,   452,   28,    540,   316,   220,   -644,  -248,
83     464,   72,    360,  32,    -388,  496,   -680,  -48,   208,   -116,  -408,  60,    -604,
84     -392,  548,   -840, 784,   -460,  656,   -544,  -388,  -264,  908,   -800,  -628,  -612,
85     -568,  572,   -220, 164,   288,   -16,   -308,  308,   -112,  -636,  -760,  280,   -668,
86     432,   364,   240,  -196,  604,   340,   384,   196,   592,   -44,   -500,  432,   -580,
87     -132,  636,   -76,  392,   4,     -412,  540,   508,   328,   -356,  -36,   16,    -220,
88     -64,   -248,  -60,  24,    -192,  368,   1040,  92,    -24,   -1044, -32,   40,    104,
89     148,   192,   -136, -520,  56,    -816,  -224,  732,   392,   356,   212,   -80,   -424,
90     -1008, -324,  588,  -1496, 576,   460,   -816,  -848,  56,    -580,  -92,   -1372, -112,
91     -496,  200,   364,  52,    -140,  48,    -48,   -60,   84,    72,    40,    132,   -356,
92     -268,  -104,  -284, -404,  732,   -520,  164,   -304,  -540,  120,   328,   -76,   -460,
93     756,   388,   588,  236,   -436,  -72,   -176,  -404,  -316,  -148,  716,   -604,  404,
94     -72,   -88,   -888, -68,   944,   88,    -220,  -344,  960,   472,   460,   -232,  704,
95     120,   832,   -228, 692,   -508,  132,   -476,  844,   -748,  -364,  -44,   1116,  -1104,
96     -1056, 76,    428,  552,   -692,  60,    356,   96,    -384,  -188,  -612,  -576,  736,
97     508,   892,   352,  -1132, 504,   -24,   -352,  324,   332,   -600,  -312,  292,   508,
98     -144,  -8,    484,  48,    284,   -260,  -240,  256,   -100,  -292,  -204,  -44,   472,
99     -204,  908,   -188, -1000, -256,  92,    1164,  -392,  564,   356,   652,   -28,   -884,
100     256,   484,   -192, 760,   -176,  376,   -524,  -452,  -436,  860,   -736,  212,   124,
101     504,   -476,  468,  76,    -472,  552,   -692,  -944,  -620,  740,   -240,  400,   132,
102     20,    192,   -196, 264,   -668,  -1012, -60,   296,   -316,  -828,  76,    -156,  284,
103     -768,  -448,  -832, 148,   248,   652,   616,   1236,  288,   -328,  -400,  -124,  588,
104     220,   520,   -696, 1032,  768,   -740,  -92,   -272,  296,   448,   -464,  412,   -200,
105     392,   440,   -200, 264,   -152,  -260,  320,   1032,  216,   320,   -8,    -64,   156,
106     -1016, 1084,  1172, 536,   484,   -432,  132,   372,   -52,   -256,  84,    116,   -352,
107     48,    116,   304,  -384,  412,   924,   -300,  528,   628,   180,   648,   44,    -980,
108     -220,  1320,  48,   332,   748,   524,   -268,  -720,  540,   -276,  564,   -344,  -208,
109     -196,  436,   896,  88,    -392,  132,   80,    -964,  -288,  568,   56,    -48,   -456,
110     888,   8,     552,  -156,  -292,  948,   288,   128,   -716,  -292,  1192,  -152,  876,
111     352,   -600,  -260, -812,  -468,  -28,   -120,  -32,   -44,   1284,  496,   192,   464,
112     312,   -76,   -516, -380,  -456,  -1012, -48,   308,   -156,  36,    492,   -156,  -808,
113     188,   1652,  68,   -120,  -116,  316,   160,   -140,  352,   808,   -416,  592,   316,
114     -480,  56,    528,  -204,  -568,  372,   -232,  752,   -344,  744,   -4,    324,   -416,
115     -600,  768,   268,  -248,  -88,   -132,  -420,  -432,  80,    -288,  404,   -316,  -1216,
116     -588,  520,   -108, 92,    -320,  368,   -480,  -216,  -92,   1688,  -300,  180,   1020,
117     -176,  820,   -68,  -228,  -260,  436,   -904,  20,    40,    -508,  440,   -736,  312,
118     332,   204,   760,  -372,  728,   96,    -20,   -632,  -520,  -560,  336,   1076,  -64,
119     -532,  776,   584,  192,   396,   -728,  -520,  276,   -188,  80,    -52,   -612,  -252,
120     -48,   648,   212,  -688,  228,   -52,   -260,  428,   -412,  -272,  -404,  180,   816,
121     -796,  48,    152,  484,   -88,   -216,  988,   696,   188,   -528,  648,   -116,  -180,
122     316,   476,   12,   -564,  96,    476,   -252,  -364,  -376,  -392,  556,   -256,  -576,
123     260,   -352,  120,  -16,   -136,  -260,  -492,  72,    556,   660,   580,   616,   772,
124     436,   424,   -32,  -324,  -1268, 416,   -324,  -80,   920,   160,   228,   724,   32,
125     -516,  64,    384,  68,    -128,  136,   240,   248,   -204,  -68,   252,   -932,  -120,
126     -480,  -628,  -84,  192,   852,   -404,  -288,  -132,  204,   100,   168,   -68,   -196,
127     -868,  460,   1080, 380,   -80,   244,   0,     484,   -888,  64,    184,   352,   600,
128     460,   164,   604,  -196,  320,   -64,   588,   -184,  228,   12,    372,   48,    -848,
129     -344,  224,   208,  -200,  484,   128,   -20,   272,   -468,  -840,  384,   256,   -720,
130     -520,  -464,  -580, 112,   -120,  644,   -356,  -208,  -608,  -528,  704,   560,   -424,
131     392,   828,   40,   84,    200,   -152,  0,     -144,  584,   280,   -120,  80,    -556,
132     -972,  -196,  -472, 724,   80,    168,   -32,   88,    160,   -688,  0,     160,   356,
133     372,   -776,  740,  -128,  676,   -248,  -480,  4,     -364,  96,    544,   232,   -1032,
134     956,   236,   356,  20,    -40,   300,   24,    -676,  -596,  132,   1120,  -104,  532,
135     -1096, 568,   648,  444,   508,   380,   188,   -376,  -604,  1488,  424,   24,    756,
136     -220,  -192,  716,  120,   920,   688,   168,   44,    -460,  568,   284,   1144,  1160,
137     600,   424,   888,  656,   -356,  -320,  220,   316,   -176,  -724,  -188,  -816,  -628,
138     -348,  -228,  -380, 1012,  -452,  -660,  736,   928,   404,   -696,  -72,   -268,  -892,
139     128,   184,   -344, -780,  360,   336,   400,   344,   428,   548,   -112,  136,   -228,
140     -216,  -820,  -516, 340,   92,    -136,  116,   -300,  376,   -244,  100,   -316,  -520,
141     -284,  -12,   824,  164,   -548,  -180,  -128,  116,   -924,  -828,  268,   -368,  -580,
142     620,   192,   160,  0,     -1676, 1068,  424,   -56,   -360,  468,   -156,  720,   288,
143     -528,  556,   -364, 548,   -148,  504,   316,   152,   -648,  -620,  -684,  -24,   -376,
144     -384,  -108,  -920, -1032, 768,   180,   -264,  -508,  -1268, -260,  -60,   300,   -240,
145     988,   724,   -376, -576,  -212,  -736,  556,   192,   1092,  -620,  -880,  376,   -56,
146     -4,    -216,  -32,  836,   268,   396,   1332,  864,   -600,  100,   56,    -412,  -92,
147     356,   180,   884,  -468,  -436,  292,   -388,  -804,  -704,  -840,  368,   -348,  140,
148     -724,  1536,  940,  372,   112,   -372,  436,   -480,  1136,  296,   -32,   -228,  132,
149     -48,   -220,  868,  -1016, -60,   -1044, -464,  328,   916,   244,   12,    -736,  -296,
150     360,   468,   -376, -108,  -92,   788,   368,   -56,   544,   400,   -672,  -420,  728,
151     16,    320,   44,   -284,  -380,  -796,  488,   132,   204,   -596,  -372,  88,    -152,
152     -908,  -636,  -572, -624,  -116,  -692,  -200,  -56,   276,   -88,   484,   -324,  948,
153     864,   1000,  -456, -184,  -276,  292,   -296,  156,   676,   320,   160,   908,   -84,
154     -1236, -288,  -116, 260,   -372,  -644,  732,   -756,  -96,   84,    344,   -520,  348,
155     -688,  240,   -84,  216,   -1044, -136,  -676,  -396,  -1500, 960,   -40,   176,   168,
156     1516,  420,   -504, -344,  -364,  -360,  1216,  -940,  -380,  -212,  252,   -660,  -708,
157     484,   -444,  -152, 928,   -120,  1112,  476,   -260,  560,   -148,  -344,  108,   -196,
158     228,   -288,  504,  560,   -328,  -88,   288,   -1008, 460,   -228,  468,   -836,  -196,
159     76,    388,   232,  412,   -1168, -716,  -644,  756,   -172,  -356,  -504,  116,   432,
160     528,   48,    476,  -168,  -608,  448,   160,   -532,  -272,  28,    -676,  -12,   828,
161     980,   456,   520,  104,   -104,  256,   -344,  -4,    -28,   -368,  -52,   -524,  -572,
162     -556,  -200,  768,  1124,  -208,  -512,  176,   232,   248,   -148,  -888,  604,   -600,
163     -304,  804,   -156, -212,  488,   -192,  -804,  -256,  368,   -360,  -916,  -328,  228,
164     -240,  -448,  -472, 856,   -556,  -364,  572,   -12,   -156,  -368,  -340,  432,   252,
165     -752,  -152,  288,  268,   -580,  -848,  -592,  108,   -76,   244,   312,   -716,  592,
166     -80,   436,   360,  4,     -248,  160,   516,   584,   732,   44,    -468,  -280,  -292,
167     -156,  -588,  28,   308,   912,   24,    124,   156,   180,   -252,  944,   -924,  -772,
168     -520,  -428,  -624, 300,   -212,  -1144, 32,    -724,  800,   -1128, -212,  -1288, -848,
169     180,   -416,  440,  192,   -576,  -792,  -76,   -1080, 80,    -532,  -352,  -132,  380,
170     -820,  148,   1112, 128,   164,   456,   700,   -924,  144,   -668,  -384,  648,   -832,
171     508,   552,   -52,  -100,  -656,  208,   -568,  748,   -88,   680,   232,   300,   192,
172     -408,  -1012, -152, -252,  -268,  272,   -876,  -664,  -648,  -332,  -136,  16,    12,
173     1152,  -28,   332,  -536,  320,   -672,  -460,  -316,  532,   -260,  228,   -40,   1052,
174     -816,  180,   88,   -496,  -556,  -672,  -368,  428,   92,    356,   404,   -408,  252,
175     196,   -176,  -556, 792,   268,   32,    372,   40,    96,    -332,  328,   120,   372,
176     -900,  -40,   472,  -264,  -592,  952,   128,   656,   112,   664,   -232,  420,   4,
177     -344,  -464,  556,  244,   -416,  -32,   252,   0,     -412,  188,   -696,  508,   -476,
178     324,   -1096, 656,  -312,  560,   264,   -136,  304,   160,   -64,   -580,  248,   336,
179     -720,  560,   -348, -288,  -276,  -196,  -500,  852,   -544,  -236,  -1128, -992,  -776,
180     116,   56,    52,   860,   884,   212,   -12,   168,   1020,  512,   -552,  924,   -148,
181     716,   188,   164,  -340,  -520,  -184,  880,   -152,  -680,  -208,  -1156, -300,  -528,
182     -472,  364,   100,  -744,  -1056, -32,   540,   280,   144,   -676,  -32,   -232,  -280,
183     -224,  96,    568,  -76,   172,   148,   148,   104,   32,    -296,  -32,   788,   -80,
184     32,    -16,   280,  288,   944,   428,   -484};
185 
186 static const int32_t gauss_bits = 11;
187 
188 static int32_t luma_subblock_size_y = 32;
189 static int32_t luma_subblock_size_x = 32;
190 
191 static int32_t chroma_subblock_size_y = 16;
192 static int32_t chroma_subblock_size_x = 16;
193 
194 static const int32_t min_luma_legal_range = 16;
195 static const int32_t max_luma_legal_range = 235;
196 
197 static const int32_t min_chroma_legal_range = 16;
198 static const int32_t max_chroma_legal_range = 240;
199 
200 static int32_t scaling_lut_y[256];
201 static int32_t scaling_lut_cb[256];
202 static int32_t scaling_lut_cr[256];
203 
204 static int32_t grain_center;
205 static int32_t grain_min;
206 static int32_t grain_max;
207 
208 static uint16_t random_register = 0; // random number generator register
209 
210 //----------------------------------------------------------------------
211 // todo: aomlib memory functions (to be replaced by Eb functions)
212 /*
213 #define ADDRESS_STORAGE_SIZE sizeof(size_t)
214 #define DEFAULT_ALIGNMENT (2 * sizeof(void *))
215 #define AOM_MAX_ALLOCABLE_MEMORY 8589934592  // 8 GB
216 //returns an addr aligned to the byte boundary specified by align
217 #define align_addr(addr, align) \
218   (void *)(((size_t)(addr) + ((align)-1)) & ~(size_t)((align)-1))
219 
220 // Returns 0 in case of overflow of nmemb * size.
221 static int32_t check_size_argument_overflow(uint64_t nmemb, uint64_t size) {
222     const uint64_t total_size = nmemb * size;
223     if (nmemb == 0) return 1;
224     if (size > AOM_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
225     if (total_size != (size_t)total_size) return 0;
226     return 1;
227 }
228 
229 static size_t get_aligned_malloc_size(size_t size, size_t align) {
230     return size + align - 1 + ADDRESS_STORAGE_SIZE;
231 }
232 
233 static size_t *get_malloc_address_location(void *const mem) {
234     return ((size_t *)mem) - 1;
235 }
236 
237 static void set_actual_malloc_address(void *const mem,
238     const void *const malloc_addr) {
239     size_t *const malloc_addr_location = get_malloc_address_location(mem);
240     *malloc_addr_location = (size_t)malloc_addr;
241 }
242 
243 static void *get_actual_malloc_address(void *const mem) {
244     const size_t *const malloc_addr_location = get_malloc_address_location(mem);
245     return (void *)(*malloc_addr_location);
246 }
247 
248 void *svt_aom_memalign(size_t align, size_t size) {
249     void *x = NULL;
250     const size_t aligned_size = get_aligned_malloc_size(size, align);
251 #if defined(AOM_MAX_ALLOCABLE_MEMORY)
252     if (!check_size_argument_overflow(1, aligned_size)) return NULL;
253 #endif
254     void *const addr = malloc(aligned_size);
255     if (addr) {
256         x = align_addr((uint8_t *)addr + ADDRESS_STORAGE_SIZE, align);
257         set_actual_malloc_address(x, addr);
258     }
259     return x;
260 }
261 
262 void *svt_aom_malloc(size_t size) { return svt_aom_memalign(DEFAULT_ALIGNMENT, size); }
263 
264 void svt_aom_free(void *memblk) {
265     if (memblk) {
266         void *addr = get_actual_malloc_address(memblk);
267         free(addr);
268     }
269 }
270 */
271 //--------------------------------------------------------------------
272 
init_arrays(AomFilmGrain * params,int32_t luma_stride,int32_t chroma_stride,int32_t *** pred_pos_luma_p,int32_t *** pred_pos_chroma_p,int32_t ** luma_grain_block,int32_t ** cb_grain_block,int32_t ** cr_grain_block,int32_t ** y_line_buf,int32_t ** cb_line_buf,int32_t ** cr_line_buf,int32_t ** y_col_buf,int32_t ** cb_col_buf,int32_t ** cr_col_buf,int32_t luma_grain_samples,int32_t chroma_grain_samples,int32_t chroma_subsamp_y,int32_t chroma_subsamp_x)273 static void init_arrays(AomFilmGrain *params, int32_t luma_stride, int32_t chroma_stride,
274                         int32_t ***pred_pos_luma_p, int32_t ***pred_pos_chroma_p,
275                         int32_t **luma_grain_block, int32_t **cb_grain_block,
276                         int32_t **cr_grain_block, int32_t **y_line_buf, int32_t **cb_line_buf,
277                         int32_t **cr_line_buf, int32_t **y_col_buf, int32_t **cb_col_buf,
278                         int32_t **cr_col_buf, int32_t luma_grain_samples,
279                         int32_t chroma_grain_samples, int32_t chroma_subsamp_y,
280                         int32_t chroma_subsamp_x) {
281     memset(scaling_lut_y, 0, sizeof(*scaling_lut_y) * 256);
282     memset(scaling_lut_cb, 0, sizeof(*scaling_lut_cb) * 256);
283     memset(scaling_lut_cr, 0, sizeof(*scaling_lut_cr) * 256);
284 
285     int32_t num_pos_luma   = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
286     int32_t num_pos_chroma = num_pos_luma;
287     if (params->num_y_points > 0)
288         ++num_pos_chroma;
289 
290     int32_t **pred_pos_luma;
291     int32_t **pred_pos_chroma;
292 
293     pred_pos_luma = (int32_t **)malloc(sizeof(*pred_pos_luma) * num_pos_luma);
294     ASSERT(pred_pos_luma != NULL);
295     for (int32_t row = 0; row < num_pos_luma; row++) {
296         pred_pos_luma[row] = (int32_t *)malloc(sizeof(**pred_pos_luma) * 3);
297         ASSERT(pred_pos_luma[row]);
298     }
299 
300     pred_pos_chroma = (int32_t **)malloc(sizeof(*pred_pos_chroma) * num_pos_chroma);
301     ASSERT(pred_pos_chroma != NULL);
302     for (int32_t row = 0; row < num_pos_chroma; row++) {
303         pred_pos_chroma[row] = (int32_t *)malloc(sizeof(**pred_pos_chroma) * 3);
304         ASSERT(pred_pos_chroma[row]);
305     }
306 
307     int32_t pos_ar_index = 0;
308 
309     for (int32_t row = -params->ar_coeff_lag; row < 0; row++) {
310         for (int32_t col = -params->ar_coeff_lag; col < params->ar_coeff_lag + 1; col++) {
311             pred_pos_luma[pos_ar_index][0] = row;
312             pred_pos_luma[pos_ar_index][1] = col;
313             pred_pos_luma[pos_ar_index][2] = 0;
314 
315             pred_pos_chroma[pos_ar_index][0] = row;
316             pred_pos_chroma[pos_ar_index][1] = col;
317             pred_pos_chroma[pos_ar_index][2] = 0;
318             ++pos_ar_index;
319         }
320     }
321 
322     for (int32_t col = -params->ar_coeff_lag; col < 0; col++) {
323         pred_pos_luma[pos_ar_index][0] = 0;
324         pred_pos_luma[pos_ar_index][1] = col;
325         pred_pos_luma[pos_ar_index][2] = 0;
326 
327         pred_pos_chroma[pos_ar_index][0] = 0;
328         pred_pos_chroma[pos_ar_index][1] = col;
329         pred_pos_chroma[pos_ar_index][2] = 0;
330 
331         ++pos_ar_index;
332     }
333 
334     if (params->num_y_points > 0) {
335         pred_pos_chroma[pos_ar_index][0] = 0;
336         pred_pos_chroma[pos_ar_index][1] = 0;
337         pred_pos_chroma[pos_ar_index][2] = 1;
338     }
339 
340     *pred_pos_luma_p   = pred_pos_luma;
341     *pred_pos_chroma_p = pred_pos_chroma;
342 
343     *y_line_buf  = (int32_t *)malloc(sizeof(**y_line_buf) * luma_stride * 2);
344     *cb_line_buf = (int32_t *)malloc(sizeof(**cb_line_buf) * chroma_stride *
345                                      (2 >> chroma_subsamp_y));
346     *cr_line_buf = (int32_t *)malloc(sizeof(**cr_line_buf) * chroma_stride *
347                                      (2 >> chroma_subsamp_y));
348 
349     *y_col_buf  = (int32_t *)malloc(sizeof(**y_col_buf) * (luma_subblock_size_y + 2) * 2);
350     *cb_col_buf = (int32_t *)malloc(sizeof(**cb_col_buf) *
351                                     (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) *
352                                     (2 >> chroma_subsamp_x));
353     *cr_col_buf = (int32_t *)malloc(sizeof(**cr_col_buf) *
354                                     (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) *
355                                     (2 >> chroma_subsamp_x));
356 
357     *luma_grain_block = (int32_t *)malloc(sizeof(**luma_grain_block) * luma_grain_samples);
358     *cb_grain_block   = (int32_t *)malloc(sizeof(**cb_grain_block) * chroma_grain_samples);
359     *cr_grain_block   = (int32_t *)malloc(sizeof(**cr_grain_block) * chroma_grain_samples);
360 }
361 
dealloc_arrays(AomFilmGrain * params,int32_t *** pred_pos_luma,int32_t *** pred_pos_chroma,int32_t ** luma_grain_block,int32_t ** cb_grain_block,int32_t ** cr_grain_block,int32_t ** y_line_buf,int32_t ** cb_line_buf,int32_t ** cr_line_buf,int32_t ** y_col_buf,int32_t ** cb_col_buf,int32_t ** cr_col_buf)362 static void dealloc_arrays(AomFilmGrain *params, int32_t ***pred_pos_luma,
363                            int32_t ***pred_pos_chroma, int32_t **luma_grain_block,
364                            int32_t **cb_grain_block, int32_t **cr_grain_block, int32_t **y_line_buf,
365                            int32_t **cb_line_buf, int32_t **cr_line_buf, int32_t **y_col_buf,
366                            int32_t **cb_col_buf, int32_t **cr_col_buf) {
367     int32_t num_pos_luma   = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
368     int32_t num_pos_chroma = num_pos_luma;
369     if (params->num_y_points > 0)
370         ++num_pos_chroma;
371 
372     for (int32_t row = 0; row < num_pos_luma; row++) free((*pred_pos_luma)[row]);
373     free(*pred_pos_luma);
374 
375     for (int32_t row = 0; row < num_pos_chroma; row++) free((*pred_pos_chroma)[row]);
376     free((*pred_pos_chroma));
377 
378     free(*y_line_buf);
379 
380     free(*cb_line_buf);
381 
382     free(*cr_line_buf);
383 
384     free(*y_col_buf);
385 
386     free(*cb_col_buf);
387 
388     free(*cr_col_buf);
389 
390     free(*luma_grain_block);
391 
392     free(*cb_grain_block);
393 
394     free(*cr_grain_block);
395 }
396 
397 // get a number between 0 and 2^bits - 1
get_random_number(int32_t bits)398 static INLINE int32_t get_random_number(int32_t bits) {
399     uint16_t bit;
400     bit = ((random_register >> 0) ^ (random_register >> 1) ^ (random_register >> 3) ^
401            (random_register >> 12)) &
402         1;
403     random_register = (random_register >> 1) | (bit << 15);
404     return (random_register >> (16 - bits)) & ((1 << bits) - 1);
405 }
406 
init_random_generator(int32_t luma_line,uint16_t seed)407 static void init_random_generator(int32_t luma_line, uint16_t seed) {
408     // same for the picture
409 
410     uint16_t msb = (seed >> 8) & 255;
411     uint16_t lsb = seed & 255;
412 
413     random_register = (msb << 8) + lsb;
414 
415     //  changes for each row
416     int32_t luma_num = luma_line >> 5;
417 
418     random_register ^= ((luma_num * 37 + 178) & 255) << 8;
419     random_register ^= ((luma_num * 173 + 105) & 255);
420 }
421 
generate_luma_grain_block(AomFilmGrain * params,int32_t ** pred_pos_luma,int32_t * luma_grain_block,int32_t luma_block_size_y,int32_t luma_block_size_x,int32_t luma_grain_stride,int32_t left_pad,int32_t top_pad,int32_t right_pad,int32_t bottom_pad)422 static void generate_luma_grain_block(AomFilmGrain *params, int32_t **pred_pos_luma,
423                                       int32_t *luma_grain_block, int32_t luma_block_size_y,
424                                       int32_t luma_block_size_x, int32_t luma_grain_stride,
425                                       int32_t left_pad, int32_t top_pad, int32_t right_pad,
426                                       int32_t bottom_pad) {
427     if (params->num_y_points == 0)
428         return;
429 
430     int32_t bit_depth       = params->bit_depth;
431     int32_t gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
432 
433     int32_t num_pos_luma    = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
434     int32_t rounding_offset = (1 << (params->ar_coeff_shift - 1));
435 
436     for (int32_t i = 0; i < luma_block_size_y; i++)
437         for (int32_t j = 0; j < luma_block_size_x; j++)
438             luma_grain_block[i * luma_grain_stride + j] =
439                 (gaussian_sequence[get_random_number(gauss_bits)] +
440                  ((1 << gauss_sec_shift) >> 1)) >>
441                 gauss_sec_shift;
442 
443     for (int32_t i = top_pad; i < luma_block_size_y - bottom_pad; i++)
444         for (int32_t j = left_pad; j < luma_block_size_x - right_pad; j++) {
445             int32_t wsum = 0;
446             for (int32_t pos = 0; pos < num_pos_luma; pos++) {
447                 wsum = wsum +
448                     params->ar_coeffs_y[pos] *
449                         luma_grain_block[(i + pred_pos_luma[pos][0]) * luma_grain_stride + j +
450                                          pred_pos_luma[pos][1]];
451             }
452             luma_grain_block[i * luma_grain_stride + j] = clamp(
453                 luma_grain_block[i * luma_grain_stride + j] +
454                     ((wsum + rounding_offset) >> params->ar_coeff_shift),
455                 grain_min,
456                 grain_max);
457         }
458 }
459 
generate_chroma_grain_blocks(AomFilmGrain * params,int32_t ** pred_pos_chroma,int32_t * luma_grain_block,int32_t * cb_grain_block,int32_t * cr_grain_block,int32_t luma_grain_stride,int32_t chroma_block_size_y,int32_t chroma_block_size_x,int32_t chroma_grain_stride,int32_t left_pad,int32_t top_pad,int32_t right_pad,int32_t bottom_pad,int32_t chroma_subsamp_y,int32_t chroma_subsamp_x)460 static void generate_chroma_grain_blocks(
461     AomFilmGrain *params,
462     //                                  int32_t** pred_pos_luma,
463     int32_t **pred_pos_chroma, int32_t *luma_grain_block, int32_t *cb_grain_block,
464     int32_t *cr_grain_block, int32_t luma_grain_stride, int32_t chroma_block_size_y,
465     int32_t chroma_block_size_x, int32_t chroma_grain_stride, int32_t left_pad, int32_t top_pad,
466     int32_t right_pad, int32_t bottom_pad, int32_t chroma_subsamp_y, int32_t chroma_subsamp_x) {
467     int32_t bit_depth       = params->bit_depth;
468     int32_t gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
469 
470     int32_t num_pos_chroma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
471     if (params->num_y_points > 0)
472         ++num_pos_chroma;
473     int32_t rounding_offset = (1 << (params->ar_coeff_shift - 1));
474 
475     int chroma_grain_block_size = chroma_block_size_y * chroma_grain_stride;
476 
477     if (params->num_cb_points || params->chroma_scaling_from_luma) {
478         init_random_generator(7 << 5, params->random_seed);
479 
480         for (int32_t i = 0; i < chroma_block_size_y; i++)
481             for (int32_t j = 0; j < chroma_block_size_x; j++)
482                 cb_grain_block[i * chroma_grain_stride + j] =
483                     (gaussian_sequence[get_random_number(gauss_bits)] +
484                      ((1 << gauss_sec_shift) >> 1)) >>
485                     gauss_sec_shift;
486     } else {
487         memset(cb_grain_block, 0, sizeof(*cb_grain_block) * chroma_grain_block_size);
488     }
489     if (params->num_cr_points || params->chroma_scaling_from_luma) {
490         init_random_generator(11 << 5, params->random_seed);
491 
492         for (int32_t i = 0; i < chroma_block_size_y; i++)
493             for (int32_t j = 0; j < chroma_block_size_x; j++)
494                 cr_grain_block[i * chroma_grain_stride + j] =
495                     (gaussian_sequence[get_random_number(gauss_bits)] +
496                      ((1 << gauss_sec_shift) >> 1)) >>
497                     gauss_sec_shift;
498     } else {
499         memset(cr_grain_block, 0, sizeof(*cr_grain_block) * chroma_grain_block_size);
500     }
501 
502     for (int32_t i = top_pad; i < chroma_block_size_y - bottom_pad; i++)
503         for (int32_t j = left_pad; j < chroma_block_size_x - right_pad; j++) {
504             int32_t wsum_cb = 0;
505             int32_t wsum_cr = 0;
506             for (int32_t pos = 0; pos < num_pos_chroma; pos++) {
507                 if (pred_pos_chroma[pos][2] == 0) {
508                     wsum_cb = wsum_cb +
509                         params->ar_coeffs_cb[pos] *
510                             cb_grain_block[(i + pred_pos_chroma[pos][0]) * chroma_grain_stride + j +
511                                            pred_pos_chroma[pos][1]];
512                     wsum_cr = wsum_cr +
513                         params->ar_coeffs_cr[pos] *
514                             cr_grain_block[(i + pred_pos_chroma[pos][0]) * chroma_grain_stride + j +
515                                            pred_pos_chroma[pos][1]];
516                 } else if (pred_pos_chroma[pos][2] == 1) {
517                     int32_t av_luma      = 0;
518                     int32_t luma_coord_y = ((i - top_pad) << chroma_subsamp_y) + top_pad;
519                     int32_t luma_coord_x = ((j - left_pad) << chroma_subsamp_x) + left_pad;
520 
521                     for (int32_t k = luma_coord_y; k < luma_coord_y + chroma_subsamp_y + 1; k++)
522                         for (int32_t l = luma_coord_x; l < luma_coord_x + chroma_subsamp_x + 1; l++)
523                             av_luma += luma_grain_block[k * luma_grain_stride + l];
524 
525                     av_luma = (av_luma + ((1 << (chroma_subsamp_y + chroma_subsamp_x)) >> 1)) >>
526                         (chroma_subsamp_y + chroma_subsamp_x);
527 
528                     wsum_cb = wsum_cb + params->ar_coeffs_cb[pos] * av_luma;
529                     wsum_cr = wsum_cr + params->ar_coeffs_cr[pos] * av_luma;
530                 } else {
531                     SVT_LOG(
532                         "Grain synthesis: prediction between two chroma components is "
533                         "not supported!");
534                     exit(1);
535                 }
536             }
537             if (params->num_cb_points || params->chroma_scaling_from_luma)
538                 cb_grain_block[i * chroma_grain_stride + j] = clamp(
539                     cb_grain_block[i * chroma_grain_stride + j] +
540                         ((wsum_cb + rounding_offset) >> params->ar_coeff_shift),
541                     grain_min,
542                     grain_max);
543             if (params->num_cr_points || params->chroma_scaling_from_luma)
544                 cr_grain_block[i * chroma_grain_stride + j] = clamp(
545                     cr_grain_block[i * chroma_grain_stride + j] +
546                         ((wsum_cr + rounding_offset) >> params->ar_coeff_shift),
547                     grain_min,
548                     grain_max);
549         }
550 }
551 
init_scaling_function(int32_t scaling_points[][2],int32_t num_points,int32_t scaling_lut[])552 static void init_scaling_function(int32_t scaling_points[][2], int32_t num_points,
553                                   int32_t scaling_lut[]) {
554     if (num_points == 0)
555         return;
556 
557     for (int32_t i = 0; i < scaling_points[0][0]; i++) scaling_lut[i] = scaling_points[0][1];
558 
559     for (int32_t point = 0; point < num_points - 1; point++) {
560         int32_t delta_y = scaling_points[point + 1][1] - scaling_points[point][1];
561         int32_t delta_x = scaling_points[point + 1][0] - scaling_points[point][0];
562 
563         int64_t delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
564 
565         for (int32_t x = 0; x < delta_x; x++) {
566             scaling_lut[scaling_points[point][0] + x] = scaling_points[point][1] +
567                 (int32_t)((x * delta + 32768) >> 16);
568         }
569     }
570 
571     for (int32_t i = scaling_points[num_points - 1][0]; i < 256; i++)
572         scaling_lut[i] = scaling_points[num_points - 1][1];
573 }
574 
575 // function that extracts samples from a lut (and interpolates intemediate
576 // frames for 10- and 12-bit video)
scale_lut(int32_t * scaling_lut,int32_t index,int32_t bit_depth)577 static int32_t scale_lut(int32_t *scaling_lut, int32_t index, int32_t bit_depth) {
578     int32_t x = index >> (bit_depth - 8);
579 
580     if (!(bit_depth - 8) || x == 255)
581         return scaling_lut[x];
582     else
583         return scaling_lut[x] +
584             (((scaling_lut[x + 1] - scaling_lut[x]) * (index & ((1 << (bit_depth - 8)) - 1)) +
585               (1 << (bit_depth - 9))) >>
586              (bit_depth - 8));
587 }
588 
add_noise_to_block(AomFilmGrain * params,uint8_t * luma,uint8_t * cb,uint8_t * cr,int32_t luma_stride,int32_t chroma_stride,int32_t * luma_grain,int32_t * cb_grain,int32_t * cr_grain,int32_t luma_grain_stride,int32_t chroma_grain_stride,int32_t half_luma_height,int32_t half_luma_width,int32_t bit_depth,int32_t chroma_subsamp_y,int32_t chroma_subsamp_x)589 static void add_noise_to_block(AomFilmGrain *params, uint8_t *luma, uint8_t *cb, uint8_t *cr,
590                                int32_t luma_stride, int32_t chroma_stride, int32_t *luma_grain,
591                                int32_t *cb_grain, int32_t *cr_grain, int32_t luma_grain_stride,
592                                int32_t chroma_grain_stride, int32_t half_luma_height,
593                                int32_t half_luma_width, int32_t bit_depth, int32_t chroma_subsamp_y,
594                                int32_t chroma_subsamp_x) {
595     int32_t cb_mult      = params->cb_mult - 128; // fixed scale
596     int32_t cb_luma_mult = params->cb_luma_mult - 128; // fixed scale
597     int32_t cb_offset    = params->cb_offset - 256;
598 
599     int32_t cr_mult      = params->cr_mult - 128; // fixed scale
600     int32_t cr_luma_mult = params->cr_luma_mult - 128; // fixed scale
601     int32_t cr_offset    = params->cr_offset - 256;
602 
603     int32_t rounding_offset = (1 << (params->scaling_shift - 1));
604 
605     int32_t apply_y  = params->num_y_points > 0 ? 1 : 0;
606     int32_t apply_cb = (params->num_cb_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0;
607     int32_t apply_cr = (params->num_cr_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0;
608 
609     if (params->chroma_scaling_from_luma) {
610         cb_mult      = 0; // fixed scale
611         cb_luma_mult = 64; // fixed scale
612         cb_offset    = 0;
613 
614         cr_mult      = 0; // fixed scale
615         cr_luma_mult = 64; // fixed scale
616         cr_offset    = 0;
617     }
618 
619     int32_t min_luma, max_luma, min_chroma, max_chroma;
620 
621     if (params->clip_to_restricted_range) {
622         min_luma = min_luma_legal_range;
623         max_luma = max_luma_legal_range;
624 
625         min_chroma = min_chroma_legal_range;
626         max_chroma = max_chroma_legal_range;
627     } else {
628         min_luma = min_chroma = 0;
629         max_luma = max_chroma = 255;
630     }
631 
632     for (int32_t i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) {
633         for (int32_t j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) {
634             int32_t average_luma = 0;
635             if (chroma_subsamp_x) {
636                 average_luma =
637                     (luma[(i << chroma_subsamp_y) * luma_stride + (j << chroma_subsamp_x)] +
638                      luma[(i << chroma_subsamp_y) * luma_stride + (j << chroma_subsamp_x) + 1] +
639                      1) >>
640                     1;
641             } else
642                 average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j];
643             if (apply_cb) {
644                 cb[i * chroma_stride + j] = clamp(
645                     cb[i * chroma_stride + j] +
646                         ((scale_lut(scaling_lut_cb,
647                                     clamp(((average_luma * cb_luma_mult +
648                                             cb_mult * cb[i * chroma_stride + j]) >>
649                                            6) +
650                                               cb_offset,
651                                           0,
652                                           (256 << (bit_depth - 8)) - 1),
653                                     8) *
654                               cb_grain[i * chroma_grain_stride + j] +
655                           rounding_offset) >>
656                          params->scaling_shift),
657                     min_chroma,
658                     max_chroma);
659             }
660 
661             if (apply_cr) {
662                 cr[i * chroma_stride + j] = clamp(
663                     cr[i * chroma_stride + j] +
664                         ((scale_lut(scaling_lut_cr,
665                                     clamp(((average_luma * cr_luma_mult +
666                                             cr_mult * cr[i * chroma_stride + j]) >>
667                                            6) +
668                                               cr_offset,
669                                           0,
670                                           (256 << (bit_depth - 8)) - 1),
671                                     8) *
672                               cr_grain[i * chroma_grain_stride + j] +
673                           rounding_offset) >>
674                          params->scaling_shift),
675                     min_chroma,
676                     max_chroma);
677             }
678         }
679     }
680 
681     if (apply_y) {
682         for (int32_t i = 0; i < (half_luma_height << 1); i++) {
683             for (int32_t j = 0; j < (half_luma_width << 1); j++) {
684                 luma[i * luma_stride + j] = clamp(
685                     luma[i * luma_stride + j] +
686                         ((scale_lut(scaling_lut_y, luma[i * luma_stride + j], 8) *
687                               luma_grain[i * luma_grain_stride + j] +
688                           rounding_offset) >>
689                          params->scaling_shift),
690                     min_luma,
691                     max_luma);
692             }
693         }
694     }
695 }
696 
add_noise_to_block_hbd(AomFilmGrain * params,uint16_t * luma,uint16_t * cb,uint16_t * cr,int32_t luma_stride,int32_t chroma_stride,int32_t * luma_grain,int32_t * cb_grain,int32_t * cr_grain,int32_t luma_grain_stride,int32_t chroma_grain_stride,int32_t half_luma_height,int32_t half_luma_width,int32_t bit_depth,int32_t chroma_subsamp_y,int32_t chroma_subsamp_x)697 static void add_noise_to_block_hbd(AomFilmGrain *params, uint16_t *luma, uint16_t *cb, uint16_t *cr,
698                                    int32_t luma_stride, int32_t chroma_stride, int32_t *luma_grain,
699                                    int32_t *cb_grain, int32_t *cr_grain, int32_t luma_grain_stride,
700                                    int32_t chroma_grain_stride, int32_t half_luma_height,
701                                    int32_t half_luma_width, int32_t bit_depth,
702                                    int32_t chroma_subsamp_y, int32_t chroma_subsamp_x) {
703     int32_t cb_mult      = params->cb_mult - 128; // fixed scale
704     int32_t cb_luma_mult = params->cb_luma_mult - 128; // fixed scale
705     // offset value depends on the bit depth
706     int32_t cb_offset = (params->cb_offset << (bit_depth - 8)) - (1 << bit_depth);
707 
708     int32_t cr_mult      = params->cr_mult - 128; // fixed scale
709     int32_t cr_luma_mult = params->cr_luma_mult - 128; // fixed scale
710     // offset value depends on the bit depth
711     int32_t cr_offset = (params->cr_offset << (bit_depth - 8)) - (1 << bit_depth);
712 
713     int32_t rounding_offset = (1 << (params->scaling_shift - 1));
714 
715     int32_t apply_y  = params->num_y_points > 0 ? 1 : 0;
716     int32_t apply_cb = params->num_cb_points > 0 ? 1 : 0;
717     int32_t apply_cr = params->num_cr_points > 0 ? 1 : 0;
718 
719     if (params->chroma_scaling_from_luma) {
720         cb_mult      = 0; // fixed scale
721         cb_luma_mult = 64; // fixed scale
722         cb_offset    = 0;
723 
724         cr_mult      = 0; // fixed scale
725         cr_luma_mult = 64; // fixed scale
726         cr_offset    = 0;
727     }
728 
729     int32_t min_luma, max_luma, min_chroma, max_chroma;
730 
731     if (params->clip_to_restricted_range) {
732         min_luma = min_luma_legal_range << (bit_depth - 8);
733         max_luma = max_luma_legal_range << (bit_depth - 8);
734 
735         min_chroma = min_chroma_legal_range << (bit_depth - 8);
736         max_chroma = max_chroma_legal_range << (bit_depth - 8);
737     } else {
738         min_luma = min_chroma = 0;
739         max_luma = max_chroma = (256 << (bit_depth - 8)) - 1;
740     }
741 
742     for (int32_t i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) {
743         for (int32_t j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) {
744             int32_t average_luma = 0;
745             if (chroma_subsamp_x) {
746                 average_luma =
747                     (luma[(i << chroma_subsamp_y) * luma_stride + (j << chroma_subsamp_x)] +
748                      luma[(i << chroma_subsamp_y) * luma_stride + (j << chroma_subsamp_x) + 1] +
749                      1) >>
750                     1;
751             } else
752                 average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j];
753             if (apply_cb) {
754                 cb[i * chroma_stride + j] = clamp(
755                     cb[i * chroma_stride + j] +
756                         ((scale_lut(scaling_lut_cb,
757                                     clamp(((average_luma * cb_luma_mult +
758                                             cb_mult * cb[i * chroma_stride + j]) >>
759                                            6) +
760                                               cb_offset,
761                                           0,
762                                           (256 << (bit_depth - 8)) - 1),
763                                     bit_depth) *
764                               cb_grain[i * chroma_grain_stride + j] +
765                           rounding_offset) >>
766                          params->scaling_shift),
767                     min_chroma,
768                     max_chroma);
769             }
770             if (apply_cr) {
771                 cr[i * chroma_stride + j] = clamp(
772                     cr[i * chroma_stride + j] +
773                         ((scale_lut(scaling_lut_cr,
774                                     clamp(((average_luma * cr_luma_mult +
775                                             cr_mult * cr[i * chroma_stride + j]) >>
776                                            6) +
777                                               cr_offset,
778                                           0,
779                                           (256 << (bit_depth - 8)) - 1),
780                                     bit_depth) *
781                               cr_grain[i * chroma_grain_stride + j] +
782                           rounding_offset) >>
783                          params->scaling_shift),
784                     min_chroma,
785                     max_chroma);
786             }
787         }
788     }
789 
790     if (apply_y) {
791         for (int32_t i = 0; i < (half_luma_height << 1); i++) {
792             for (int32_t j = 0; j < (half_luma_width << 1); j++) {
793                 luma[i * luma_stride + j] = clamp(
794                     luma[i * luma_stride + j] +
795                         ((scale_lut(scaling_lut_y, luma[i * luma_stride + j], bit_depth) *
796                               luma_grain[i * luma_grain_stride + j] +
797                           rounding_offset) >>
798                          params->scaling_shift),
799                     min_luma,
800                     max_luma);
801             }
802         }
803     }
804 }
805 
film_grain_params_equal(AomFilmGrain * pars_a,AomFilmGrain * pars_b)806 int32_t film_grain_params_equal(AomFilmGrain *pars_a, AomFilmGrain *pars_b) {
807     if (pars_a->apply_grain != pars_b->apply_grain)
808         return 0;
809     if (pars_a->overlap_flag != pars_b->overlap_flag)
810         return 0;
811     if (pars_a->clip_to_restricted_range != pars_b->clip_to_restricted_range)
812         return 0;
813     if (pars_a->chroma_scaling_from_luma != pars_b->chroma_scaling_from_luma)
814         return 0;
815     if (pars_a->grain_scale_shift != pars_b->grain_scale_shift)
816         return 0;
817     if (pars_a->ar_coeff_shift != pars_b->ar_coeff_shift)
818         return 0;
819     if (pars_a->cb_mult != pars_b->cb_mult)
820         return 0;
821     if (pars_a->cb_luma_mult != pars_b->cb_luma_mult)
822         return 0;
823     if (pars_a->cb_offset != pars_b->cb_offset)
824         return 0;
825     if (pars_a->cr_mult != pars_b->cr_mult)
826         return 0;
827     if (pars_a->cr_luma_mult != pars_b->cr_luma_mult)
828         return 0;
829     if (pars_a->cr_offset != pars_b->cr_offset)
830         return 0;
831 
832     if (pars_a->scaling_shift != pars_b->scaling_shift)
833         return 0;
834     if (pars_a->ar_coeff_lag != pars_b->ar_coeff_lag)
835         return 0;
836 
837     if (pars_a->num_y_points != pars_b->num_y_points)
838         return 0;
839 
840     if (pars_a->num_cb_points != pars_b->num_cb_points)
841         return 0;
842 
843     if (pars_a->num_cr_points != pars_b->num_cr_points)
844         return 0;
845 
846     if (memcmp(
847             pars_a->scaling_points_y, pars_b->scaling_points_y, sizeof(pars_b->scaling_points_y)))
848         return 0;
849 
850     if (memcmp(pars_a->scaling_points_cb,
851                pars_b->scaling_points_cb,
852                sizeof(pars_b->scaling_points_cb)))
853         return 0;
854 
855     if (memcmp(pars_a->scaling_points_cr,
856                pars_b->scaling_points_cr,
857                sizeof(pars_b->scaling_points_cr)))
858         return 0;
859 
860     if (memcmp(pars_a->ar_coeffs_y, pars_b->ar_coeffs_y, sizeof(pars_b->ar_coeffs_y)))
861         return 0;
862 
863     if (memcmp(pars_a->ar_coeffs_cb, pars_b->ar_coeffs_cb, sizeof(pars_b->ar_coeffs_cb)))
864         return 0;
865 
866     if (memcmp(pars_a->ar_coeffs_cr, pars_b->ar_coeffs_cr, sizeof(pars_b->ar_coeffs_cr)))
867         return 0;
868 
869     return 1;
870 }
871 
fgn_copy_rect(uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int32_t width,int32_t height,int32_t use_high_bit_depth)872 void fgn_copy_rect(uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride,
873                    int32_t width, int32_t height, int32_t use_high_bit_depth) {
874     int32_t hbd_coeff = use_high_bit_depth ? 2 : 1;
875     while (height) {
876         svt_memcpy(dst, src, width * sizeof(uint8_t) * hbd_coeff);
877         src += src_stride * hbd_coeff;
878         dst += dst_stride * hbd_coeff;
879         --height;
880     }
881     return;
882 }
883 
copy_area(int32_t * src,int32_t src_stride,int32_t * dst,int32_t dst_stride,int32_t width,int32_t height)884 static void copy_area(int32_t *src, int32_t src_stride, int32_t *dst, int32_t dst_stride,
885                       int32_t width, int32_t height) {
886     while (height) {
887         if (svt_memcpy != NULL)
888             svt_memcpy(dst, src, width * sizeof(*src));
889         else
890             svt_memcpy_c(dst, src, width * sizeof(*src));
891         src += src_stride;
892         dst += dst_stride;
893         --height;
894     }
895     return;
896 }
897 
ver_boundary_overlap(int32_t * left_block,int32_t left_stride,int32_t * right_block,int32_t right_stride,int32_t * dst_block,int32_t dst_stride,int32_t width,int32_t height)898 static void ver_boundary_overlap(int32_t *left_block, int32_t left_stride, int32_t *right_block,
899                                  int32_t right_stride, int32_t *dst_block, int32_t dst_stride,
900                                  int32_t width, int32_t height) {
901     if (width == 1) {
902         while (height) {
903             *dst_block = clamp(
904                 (*left_block * 23 + *right_block * 22 + 16) >> 5, grain_min, grain_max);
905             left_block += left_stride;
906             right_block += right_stride;
907             dst_block += dst_stride;
908             --height;
909         }
910         return;
911     } else if (width == 2) {
912         while (height) {
913             dst_block[0] = clamp(
914                 (27 * left_block[0] + 17 * right_block[0] + 16) >> 5, grain_min, grain_max);
915             dst_block[1] = clamp(
916                 (17 * left_block[1] + 27 * right_block[1] + 16) >> 5, grain_min, grain_max);
917             left_block += left_stride;
918             right_block += right_stride;
919             dst_block += dst_stride;
920             --height;
921         }
922         return;
923     }
924 }
925 
hor_boundary_overlap(int32_t * top_block,int32_t top_stride,int32_t * bottom_block,int32_t bottom_stride,int32_t * dst_block,int32_t dst_stride,int32_t width,int32_t height)926 static void hor_boundary_overlap(int32_t *top_block, int32_t top_stride, int32_t *bottom_block,
927                                  int32_t bottom_stride, int32_t *dst_block, int32_t dst_stride,
928                                  int32_t width, int32_t height) {
929     if (height == 1) {
930         while (width) {
931             *dst_block = clamp(
932                 (*top_block * 23 + *bottom_block * 22 + 16) >> 5, grain_min, grain_max);
933             ++top_block;
934             ++bottom_block;
935             ++dst_block;
936             --width;
937         }
938         return;
939     } else if (height == 2) {
940         while (width) {
941             dst_block[0] = clamp(
942                 (27 * top_block[0] + 17 * bottom_block[0] + 16) >> 5, grain_min, grain_max);
943             dst_block[dst_stride] = clamp(
944                 (17 * top_block[top_stride] + 27 * bottom_block[bottom_stride] + 16) >> 5,
945                 grain_min,
946                 grain_max);
947             ++top_block;
948             ++bottom_block;
949             ++dst_block;
950             --width;
951         }
952         return;
953     }
954 }
955 
svt_av1_add_film_grain_run(AomFilmGrain * params,uint8_t * luma,uint8_t * cb,uint8_t * cr,int32_t height,int32_t width,int32_t luma_stride,int32_t chroma_stride,int32_t use_high_bit_depth,int32_t chroma_subsamp_y,int32_t chroma_subsamp_x)956 void svt_av1_add_film_grain_run(AomFilmGrain *params, uint8_t *luma, uint8_t *cb, uint8_t *cr,
957                                 int32_t height, int32_t width, int32_t luma_stride,
958                                 int32_t chroma_stride, int32_t use_high_bit_depth,
959                                 int32_t chroma_subsamp_y, int32_t chroma_subsamp_x) {
960     int32_t **pred_pos_luma;
961     int32_t **pred_pos_chroma;
962     int32_t * luma_grain_block;
963     int32_t * cb_grain_block;
964     int32_t * cr_grain_block;
965 
966     int32_t *y_line_buf;
967     int32_t *cb_line_buf;
968     int32_t *cr_line_buf;
969 
970     int32_t *y_col_buf;
971     int32_t *cb_col_buf;
972     int32_t *cr_col_buf;
973 
974     random_register = params->random_seed;
975 
976     int32_t left_pad   = 3;
977     int32_t right_pad  = 3; // padding to offset for AR coefficients
978     int32_t top_pad    = 3;
979     int32_t bottom_pad = 0;
980 
981     int32_t ar_padding = 3; // maximum lag used for stabilization of AR coefficients
982 
983     luma_subblock_size_y = 32;
984     luma_subblock_size_x = 32;
985 
986     chroma_subblock_size_y = luma_subblock_size_y >> chroma_subsamp_y;
987     chroma_subblock_size_x = luma_subblock_size_x >> chroma_subsamp_x;
988 
989     // Initial padding is only needed for generation of
990     // film grain templates (to stabilize the AR process)
991     // Only a 64x64 luma and 32x32 chroma part of a template
992     // is used later for adding grain, padding can be discarded
993 
994     int32_t luma_block_size_y = top_pad + 2 * ar_padding + luma_subblock_size_y * 2 + bottom_pad;
995     int32_t luma_block_size_x = left_pad + 2 * ar_padding + luma_subblock_size_x * 2 +
996         2 * ar_padding + right_pad;
997 
998     int32_t chroma_block_size_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding +
999         chroma_subblock_size_y * 2 + bottom_pad;
1000     int32_t chroma_block_size_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding +
1001         chroma_subblock_size_x * 2 + (2 >> chroma_subsamp_x) * ar_padding + right_pad;
1002 
1003     int32_t luma_grain_stride   = luma_block_size_x;
1004     int32_t chroma_grain_stride = chroma_block_size_x;
1005 
1006     int32_t overlap   = params->overlap_flag;
1007     int32_t bit_depth = params->bit_depth;
1008 
1009     grain_center = 128 << (bit_depth - 8);
1010     grain_min    = 0 - grain_center;
1011     grain_max    = (256 << (bit_depth - 8)) - 1 - grain_center;
1012 
1013     init_arrays(params,
1014                 luma_stride,
1015                 chroma_stride,
1016                 &pred_pos_luma,
1017                 &pred_pos_chroma,
1018                 &luma_grain_block,
1019                 &cb_grain_block,
1020                 &cr_grain_block,
1021                 &y_line_buf,
1022                 &cb_line_buf,
1023                 &cr_line_buf,
1024                 &y_col_buf,
1025                 &cb_col_buf,
1026                 &cr_col_buf,
1027                 luma_block_size_y * luma_block_size_x,
1028                 chroma_block_size_y * chroma_block_size_x,
1029                 chroma_subsamp_y,
1030                 chroma_subsamp_x);
1031 
1032     generate_luma_grain_block(params,
1033                               pred_pos_luma,
1034                               luma_grain_block,
1035                               luma_block_size_y,
1036                               luma_block_size_x,
1037                               luma_grain_stride,
1038                               left_pad,
1039                               top_pad,
1040                               right_pad,
1041                               bottom_pad);
1042 
1043     generate_chroma_grain_blocks(params,
1044                                  //                               pred_pos_luma,
1045                                  pred_pos_chroma,
1046                                  luma_grain_block,
1047                                  cb_grain_block,
1048                                  cr_grain_block,
1049                                  luma_grain_stride,
1050                                  chroma_block_size_y,
1051                                  chroma_block_size_x,
1052                                  chroma_grain_stride,
1053                                  left_pad,
1054                                  top_pad,
1055                                  right_pad,
1056                                  bottom_pad,
1057                                  chroma_subsamp_y,
1058                                  chroma_subsamp_x);
1059 
1060     init_scaling_function(params->scaling_points_y, params->num_y_points, scaling_lut_y);
1061 
1062     if (params->chroma_scaling_from_luma) {
1063         svt_memcpy(scaling_lut_cb, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
1064         svt_memcpy(scaling_lut_cr, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
1065     } else {
1066         init_scaling_function(params->scaling_points_cb, params->num_cb_points, scaling_lut_cb);
1067         init_scaling_function(params->scaling_points_cr, params->num_cr_points, scaling_lut_cr);
1068     }
1069     for (int32_t y = 0; y < height / 2; y += (luma_subblock_size_y >> 1)) {
1070         init_random_generator(y * 2, params->random_seed);
1071 
1072         for (int32_t x = 0; x < width / 2; x += (luma_subblock_size_x >> 1)) {
1073             int32_t offset_y = get_random_number(8);
1074             int32_t offset_x = (offset_y >> 4) & 15;
1075             offset_y &= 15;
1076 
1077             int32_t luma_offset_y = left_pad + 2 * ar_padding + (offset_y << 1);
1078             int32_t luma_offset_x = top_pad + 2 * ar_padding + (offset_x << 1);
1079 
1080             int32_t chroma_offset_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding +
1081                 offset_y * (2 >> chroma_subsamp_y);
1082             int32_t chroma_offset_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding +
1083                 offset_x * (2 >> chroma_subsamp_x);
1084 
1085             if (overlap && x) {
1086                 ver_boundary_overlap(
1087                     y_col_buf,
1088                     2,
1089                     luma_grain_block + luma_offset_y * luma_grain_stride + luma_offset_x,
1090                     luma_grain_stride,
1091                     y_col_buf,
1092                     2,
1093                     2,
1094                     AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
1095 
1096                 ver_boundary_overlap(
1097                     cb_col_buf,
1098                     2 >> chroma_subsamp_x,
1099                     cb_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x,
1100                     chroma_grain_stride,
1101                     cb_col_buf,
1102                     2 >> chroma_subsamp_x,
1103                     2 >> chroma_subsamp_x,
1104                     AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1105                            (height - (y << 1)) >> chroma_subsamp_y));
1106 
1107                 ver_boundary_overlap(
1108                     cr_col_buf,
1109                     2 >> chroma_subsamp_x,
1110                     cr_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x,
1111                     chroma_grain_stride,
1112                     cr_col_buf,
1113                     2 >> chroma_subsamp_x,
1114                     2 >> chroma_subsamp_x,
1115                     AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1116                            (height - (y << 1)) >> chroma_subsamp_y));
1117 
1118                 int32_t i = y ? 1 : 0;
1119 
1120                 if (use_high_bit_depth) {
1121                     add_noise_to_block_hbd(
1122                         params,
1123                         (uint16_t *)luma + ((y + i) << 1) * luma_stride + (x << 1),
1124                         (uint16_t *)cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1125                             (x << (1 - chroma_subsamp_x)),
1126                         (uint16_t *)cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1127                             (x << (1 - chroma_subsamp_x)),
1128                         luma_stride,
1129                         chroma_stride,
1130                         y_col_buf + i * 4,
1131                         cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1132                         cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1133                         2,
1134                         (2 - chroma_subsamp_x),
1135                         AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1136                         1,
1137                         bit_depth,
1138                         chroma_subsamp_y,
1139                         chroma_subsamp_x);
1140                 } else {
1141                     add_noise_to_block(
1142                         params,
1143                         luma + ((y + i) << 1) * luma_stride + (x << 1),
1144                         cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1145                             (x << (1 - chroma_subsamp_x)),
1146                         cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1147                             (x << (1 - chroma_subsamp_x)),
1148                         luma_stride,
1149                         chroma_stride,
1150                         y_col_buf + i * 4,
1151                         cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1152                         cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1153                         2,
1154                         (2 - chroma_subsamp_x),
1155                         AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1156                         1,
1157                         bit_depth,
1158                         chroma_subsamp_y,
1159                         chroma_subsamp_x);
1160                 }
1161             }
1162 
1163             if (overlap && y) {
1164                 if (x) {
1165                     ASSERT(y_col_buf != NULL);
1166                     hor_boundary_overlap(y_line_buf + (x << 1),
1167                                          luma_stride,
1168                                          y_col_buf,
1169                                          2,
1170                                          y_line_buf + (x << 1),
1171                                          luma_stride,
1172                                          2,
1173                                          2);
1174 
1175                     hor_boundary_overlap(cb_line_buf + x * (2 >> chroma_subsamp_x),
1176                                          chroma_stride,
1177                                          cb_col_buf,
1178                                          2 >> chroma_subsamp_x,
1179                                          cb_line_buf + x * (2 >> chroma_subsamp_x),
1180                                          chroma_stride,
1181                                          2 >> chroma_subsamp_x,
1182                                          2 >> chroma_subsamp_y);
1183 
1184                     hor_boundary_overlap(cr_line_buf + x * (2 >> chroma_subsamp_x),
1185                                          chroma_stride,
1186                                          cr_col_buf,
1187                                          2 >> chroma_subsamp_x,
1188                                          cr_line_buf + x * (2 >> chroma_subsamp_x),
1189                                          chroma_stride,
1190                                          2 >> chroma_subsamp_x,
1191                                          2 >> chroma_subsamp_y);
1192                 }
1193 
1194                 hor_boundary_overlap(y_line_buf + ((x ? x + 1 : 0) << 1),
1195                                      luma_stride,
1196                                      luma_grain_block + luma_offset_y * luma_grain_stride +
1197                                          luma_offset_x + (x ? 2 : 0),
1198                                      luma_grain_stride,
1199                                      y_line_buf + ((x ? x + 1 : 0) << 1),
1200                                      luma_stride,
1201                                      AOMMIN(luma_subblock_size_x - ((x ? 1 : 0) << 1),
1202                                             width - ((x ? x + 1 : 0) << 1)),
1203                                      2);
1204 
1205                 hor_boundary_overlap(
1206                     cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1207                     chroma_stride,
1208                     cb_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x +
1209                         ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1210                     chroma_grain_stride,
1211                     cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1212                     chroma_stride,
1213                     AOMMIN(chroma_subblock_size_x - ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1214                            (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
1215                     2 >> chroma_subsamp_y);
1216 
1217                 hor_boundary_overlap(
1218                     cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1219                     chroma_stride,
1220                     cr_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x +
1221                         ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1222                     chroma_grain_stride,
1223                     cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1224                     chroma_stride,
1225                     AOMMIN(chroma_subblock_size_x - ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1226                            (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
1227                     2 >> chroma_subsamp_y);
1228 
1229                 if (use_high_bit_depth) {
1230                     add_noise_to_block_hbd(
1231                         params,
1232                         (uint16_t *)luma + (y << 1) * luma_stride + (x << 1),
1233                         (uint16_t *)cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1234                             (x << ((1 - chroma_subsamp_x))),
1235                         (uint16_t *)cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1236                             (x << ((1 - chroma_subsamp_x))),
1237                         luma_stride,
1238                         chroma_stride,
1239                         y_line_buf + (x << 1),
1240                         cb_line_buf + (x << (1 - chroma_subsamp_x)),
1241                         cr_line_buf + (x << (1 - chroma_subsamp_x)),
1242                         luma_stride,
1243                         chroma_stride,
1244                         1,
1245                         AOMMIN(luma_subblock_size_x >> 1, width / 2 - x),
1246                         bit_depth,
1247                         chroma_subsamp_y,
1248                         chroma_subsamp_x);
1249                 } else {
1250                     add_noise_to_block(params,
1251                                        luma + (y << 1) * luma_stride + (x << 1),
1252                                        cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1253                                            (x << ((1 - chroma_subsamp_x))),
1254                                        cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1255                                            (x << ((1 - chroma_subsamp_x))),
1256                                        luma_stride,
1257                                        chroma_stride,
1258                                        y_line_buf + (x << 1),
1259                                        cb_line_buf + (x << (1 - chroma_subsamp_x)),
1260                                        cr_line_buf + (x << (1 - chroma_subsamp_x)),
1261                                        luma_stride,
1262                                        chroma_stride,
1263                                        1,
1264                                        AOMMIN(luma_subblock_size_x >> 1, width / 2 - x),
1265                                        bit_depth,
1266                                        chroma_subsamp_y,
1267                                        chroma_subsamp_x);
1268                 }
1269             }
1270 
1271             int32_t i = overlap && y ? 1 : 0;
1272             int32_t j = overlap && x ? 1 : 0;
1273 
1274             if (use_high_bit_depth) {
1275                 add_noise_to_block_hbd(
1276                     params,
1277                     (uint16_t *)luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
1278                     (uint16_t *)cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1279                         ((x + j) << (1 - chroma_subsamp_x)),
1280                     (uint16_t *)cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1281                         ((x + j) << (1 - chroma_subsamp_x)),
1282                     luma_stride,
1283                     chroma_stride,
1284                     luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride +
1285                         luma_offset_x + (j << 1),
1286                     cb_grain_block +
1287                         (chroma_offset_y + (i << (1 - chroma_subsamp_y))) * chroma_grain_stride +
1288                         chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1289                     cr_grain_block +
1290                         (chroma_offset_y + (i << (1 - chroma_subsamp_y))) * chroma_grain_stride +
1291                         chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1292                     luma_grain_stride,
1293                     chroma_grain_stride,
1294                     AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1295                     AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j,
1296                     bit_depth,
1297                     chroma_subsamp_y,
1298                     chroma_subsamp_x);
1299             } else {
1300                 add_noise_to_block(
1301                     params,
1302                     luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
1303                     cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1304                         ((x + j) << (1 - chroma_subsamp_x)),
1305                     cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1306                         ((x + j) << (1 - chroma_subsamp_x)),
1307                     luma_stride,
1308                     chroma_stride,
1309                     luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride +
1310                         luma_offset_x + (j << 1),
1311                     cb_grain_block +
1312                         (chroma_offset_y + (i << (1 - chroma_subsamp_y))) * chroma_grain_stride +
1313                         chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1314                     cr_grain_block +
1315                         (chroma_offset_y + (i << (1 - chroma_subsamp_y))) * chroma_grain_stride +
1316                         chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1317                     luma_grain_stride,
1318                     chroma_grain_stride,
1319                     AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1320                     AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j,
1321                     bit_depth,
1322                     chroma_subsamp_y,
1323                     chroma_subsamp_x);
1324             }
1325 
1326             if (overlap) {
1327                 if (x) {
1328                     // Copy overlapped column bufer to line buffer
1329                     copy_area(y_col_buf + (luma_subblock_size_y << 1),
1330                               2,
1331                               y_line_buf + (x << 1),
1332                               luma_stride,
1333                               2,
1334                               2);
1335 
1336                     copy_area(cb_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)),
1337                               2 >> chroma_subsamp_x,
1338                               cb_line_buf + (x << (1 - chroma_subsamp_x)),
1339                               chroma_stride,
1340                               2 >> chroma_subsamp_x,
1341                               2 >> chroma_subsamp_y);
1342 
1343                     copy_area(cr_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)),
1344                               2 >> chroma_subsamp_x,
1345                               cr_line_buf + (x << (1 - chroma_subsamp_x)),
1346                               chroma_stride,
1347                               2 >> chroma_subsamp_x,
1348                               2 >> chroma_subsamp_y);
1349                 }
1350 
1351                 // Copy grain to the line buffer for overlap with a bottom block
1352                 copy_area(luma_grain_block +
1353                               (luma_offset_y + luma_subblock_size_y) * luma_grain_stride +
1354                               luma_offset_x + ((x ? 2 : 0)),
1355                           luma_grain_stride,
1356                           y_line_buf + ((x ? x + 1 : 0) << 1),
1357                           luma_stride,
1358                           AOMMIN(luma_subblock_size_x, width - (x << 1)) - (x ? 2 : 0),
1359                           2);
1360 
1361                 copy_area(cb_grain_block +
1362                               (chroma_offset_y + chroma_subblock_size_y) * chroma_grain_stride +
1363                               chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0),
1364                           chroma_grain_stride,
1365                           cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1366                           chroma_stride,
1367                           AOMMIN(chroma_subblock_size_x, ((width - (x << 1)) >> chroma_subsamp_x)) -
1368                               (x ? 2 >> chroma_subsamp_x : 0),
1369                           2 >> chroma_subsamp_y);
1370 
1371                 copy_area(cr_grain_block +
1372                               (chroma_offset_y + chroma_subblock_size_y) * chroma_grain_stride +
1373                               chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0),
1374                           chroma_grain_stride,
1375                           cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1376                           chroma_stride,
1377                           AOMMIN(chroma_subblock_size_x, ((width - (x << 1)) >> chroma_subsamp_x)) -
1378                               (x ? 2 >> chroma_subsamp_x : 0),
1379                           2 >> chroma_subsamp_y);
1380 
1381                 // Copy grain to the column buffer for overlap with the next block to
1382                 // the right
1383 
1384                 copy_area(luma_grain_block + luma_offset_y * luma_grain_stride + luma_offset_x +
1385                               luma_subblock_size_x,
1386                           luma_grain_stride,
1387                           y_col_buf,
1388                           2,
1389                           2,
1390                           AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
1391 
1392                 copy_area(cb_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x +
1393                               chroma_subblock_size_x,
1394                           chroma_grain_stride,
1395                           cb_col_buf,
1396                           2 >> chroma_subsamp_x,
1397                           2 >> chroma_subsamp_x,
1398                           AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1399                                  (height - (y << 1)) >> chroma_subsamp_y));
1400 
1401                 copy_area(cr_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x +
1402                               chroma_subblock_size_x,
1403                           chroma_grain_stride,
1404                           cr_col_buf,
1405                           2 >> chroma_subsamp_x,
1406                           2 >> chroma_subsamp_x,
1407                           AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1408                                  (height - (y << 1)) >> chroma_subsamp_y));
1409             }
1410         }
1411     }
1412 
1413     dealloc_arrays(params,
1414                    &pred_pos_luma,
1415                    &pred_pos_chroma,
1416                    &luma_grain_block,
1417                    &cb_grain_block,
1418                    &cr_grain_block,
1419                    &y_line_buf,
1420                    &cb_line_buf,
1421                    &cr_line_buf,
1422                    &y_col_buf,
1423                    &cb_col_buf,
1424                    &cr_col_buf);
1425 }
1426 
1427 /*
1428 void av1_film_grain_write_updated(const AomFilmGrain *pars,
1429                                   int32_t monochrome,
1430                                   struct AomWriteBitBuffer *wb) {
1431   svt_aom_wb_write_literal(wb, pars->num_y_points, 4);  // max 14
1432   for (int32_t i = 0; i < pars->num_y_points; i++) {
1433     svt_aom_wb_write_literal(wb, pars->scaling_points_y[i][0], 8);
1434     svt_aom_wb_write_literal(wb, pars->scaling_points_y[i][1], 8);
1435   }
1436 
1437   if (!monochrome)
1438     svt_aom_wb_write_bit(wb, pars->chroma_scaling_from_luma);
1439 
1440   if (!(monochrome || pars->chroma_scaling_from_luma)) {
1441     svt_aom_wb_write_literal(wb, pars->num_cb_points, 4);  // max 10
1442     for (int32_t i = 0; i < pars->num_cb_points; i++) {
1443       svt_aom_wb_write_literal(wb, pars->scaling_points_cb[i][0], 8);
1444       svt_aom_wb_write_literal(wb, pars->scaling_points_cb[i][1], 8);
1445     }
1446 
1447     svt_aom_wb_write_literal(wb, pars->num_cr_points, 4);  // max 10
1448     for (int32_t i = 0; i < pars->num_cr_points; i++) {
1449       svt_aom_wb_write_literal(wb, pars->scaling_points_cr[i][0], 8);
1450       svt_aom_wb_write_literal(wb, pars->scaling_points_cr[i][1], 8);
1451     }
1452   }
1453 
1454   svt_aom_wb_write_literal(wb, pars->scaling_shift - 8, 2);  // 8 + value
1455 
1456   // AR coefficients
1457   // Only sent if the corresponsing scaling function has
1458   // more than 0 points
1459   svt_aom_wb_write_literal(wb, pars->ar_coeff_lag, 2);
1460 
1461   int32_t num_pos_luma = 2 * pars->ar_coeff_lag * (pars->ar_coeff_lag + 1);
1462   int32_t num_pos_chroma = num_pos_luma;
1463   if (pars->num_y_points > 0) ++num_pos_chroma;
1464 
1465   if (pars->num_y_points)
1466     for (int32_t i = 0; i < num_pos_luma; i++)
1467       svt_aom_wb_write_literal(wb, pars->ar_coeffs_y[i] + 128, 8);
1468 
1469   if (pars->num_cb_points || pars->chroma_scaling_from_luma)
1470     for (int32_t i = 0; i < num_pos_chroma; i++)
1471       svt_aom_wb_write_literal(wb, pars->ar_coeffs_cb[i] + 128, 8);
1472 
1473   if (pars->num_cr_points || pars->chroma_scaling_from_luma)
1474     for (int32_t i = 0; i < num_pos_chroma; i++)
1475       svt_aom_wb_write_literal(wb, pars->ar_coeffs_cr[i] + 128, 8);
1476 
1477   svt_aom_wb_write_literal(wb, pars->ar_coeff_shift - 6, 2);  // 8 + value
1478 
1479   svt_aom_wb_write_literal(wb, pars->grain_scale_shift, 2);
1480 
1481   if (pars->num_cb_points) {
1482     svt_aom_wb_write_literal(wb, pars->cb_mult, 8);
1483     svt_aom_wb_write_literal(wb, pars->cb_luma_mult, 8);
1484     svt_aom_wb_write_literal(wb, pars->cb_offset, 9);
1485   }
1486 
1487   if (pars->num_cr_points) {
1488     svt_aom_wb_write_literal(wb, pars->cr_mult, 8);
1489     svt_aom_wb_write_literal(wb, pars->cr_luma_mult, 8);
1490     svt_aom_wb_write_literal(wb, pars->cr_offset, 9);
1491   }
1492 
1493   svt_aom_wb_write_bit(wb, pars->overlap_flag);
1494 
1495   svt_aom_wb_write_bit(wb, pars->clip_to_restricted_range);
1496 }
1497 */
1498 /*
1499 void av1_film_grain_read_updated(AomFilmGrain *pars,
1500                                  int32_t monochrome,
1501                                  struct aom_read_bit_buffer *rb,
1502                                  struct aom_internal_error_info *error) {
1503   // Scaling functions parameters
1504   pars->num_y_points = aom_rb_read_literal(rb, 4);  // max 14
1505   if (pars->num_y_points > 14)
1506     aom_internal_error(error, AOM_CODEC_UNSUP_BITSTREAM,
1507                        "Number of points for film grain luma scaling function "
1508                        "exceeds the maximum value.");
1509   for (int32_t i = 0; i < pars->num_y_points; i++) {
1510     pars->scaling_points_y[i][0] = aom_rb_read_literal(rb, 8);
1511     if (i && pars->scaling_points_y[i - 1][0] >= pars->scaling_points_y[i][0])
1512       aom_internal_error(error, AOM_CODEC_UNSUP_BITSTREAM,
1513                          "First coordinateg of the scaling function points "
1514                          "shall be increasing.");
1515     pars->scaling_points_y[i][1] = aom_rb_read_literal(rb, 8);
1516   }
1517 
1518   if (!monochrome)
1519     pars->chroma_scaling_from_luma = aom_rb_read_bit(rb);
1520 
1521   if (monochrome || pars->chroma_scaling_from_luma) {
1522     pars->num_cb_points = 0;
1523     pars->num_cr_points = 0;
1524   } else {
1525     pars->num_cb_points = aom_rb_read_literal(rb, 4);  // max 10
1526     if (pars->num_cb_points > 10)
1527       aom_internal_error(error, AOM_CODEC_UNSUP_BITSTREAM,
1528                          "Number of points for film grain cb scaling function "
1529                          "exceeds the maximum value.");
1530     for (int32_t i = 0; i < pars->num_cb_points; i++) {
1531       pars->scaling_points_cb[i][0] = aom_rb_read_literal(rb, 8);
1532       if (i &&
1533           pars->scaling_points_cb[i - 1][0] >= pars->scaling_points_cb[i][0])
1534         aom_internal_error(error, AOM_CODEC_UNSUP_BITSTREAM,
1535                            "First coordinate of the scaling function points "
1536                            "shall be increasing.");
1537       pars->scaling_points_cb[i][1] = aom_rb_read_literal(rb, 8);
1538     }
1539 
1540     pars->num_cr_points = aom_rb_read_literal(rb, 4);  // max 10
1541     if (pars->num_cr_points > 10)
1542       aom_internal_error(error, AOM_CODEC_UNSUP_BITSTREAM,
1543                          "Number of points for film grain cr scaling function "
1544                          "exceeds the maximum value.");
1545     for (int32_t i = 0; i < pars->num_cr_points; i++) {
1546       pars->scaling_points_cr[i][0] = aom_rb_read_literal(rb, 8);
1547       if (i &&
1548           pars->scaling_points_cr[i - 1][0] >= pars->scaling_points_cr[i][0])
1549         aom_internal_error(error, AOM_CODEC_UNSUP_BITSTREAM,
1550                            "First coordinate of the scaling function points "
1551                            "shall be increasing.");
1552       pars->scaling_points_cr[i][1] = aom_rb_read_literal(rb, 8);
1553     }
1554   }
1555 
1556   pars->scaling_shift = aom_rb_read_literal(rb, 2) + 8;  // 8 + value
1557 
1558   // AR coefficients
1559   // Only sent if the corresponsing scaling function has
1560   // more than 0 points
1561   pars->ar_coeff_lag = aom_rb_read_literal(rb, 2);
1562 
1563   int32_t num_pos_luma = 2 * pars->ar_coeff_lag * (pars->ar_coeff_lag + 1);
1564   int32_t num_pos_chroma = num_pos_luma;
1565   if (pars->num_y_points > 0) ++num_pos_chroma;
1566 
1567   if (pars->num_y_points)
1568     for (int32_t i = 0; i < num_pos_luma; i++)
1569       pars->ar_coeffs_y[i] = aom_rb_read_literal(rb, 8) - 128;
1570 
1571   if (pars->num_cb_points || pars->chroma_scaling_from_luma)
1572     for (int32_t i = 0; i < num_pos_chroma; i++)
1573       pars->ar_coeffs_cb[i] = aom_rb_read_literal(rb, 8) - 128;
1574 
1575   if (pars->num_cr_points || pars->chroma_scaling_from_luma)
1576     for (int32_t i = 0; i < num_pos_chroma; i++)
1577       pars->ar_coeffs_cr[i] = aom_rb_read_literal(rb, 8) - 128;
1578 
1579   pars->ar_coeff_shift = aom_rb_read_literal(rb, 2) + 6;  // 6 + value
1580 
1581   pars->grain_scale_shift = aom_rb_read_literal(rb, 2);
1582 
1583   if (pars->num_cb_points) {
1584     pars->cb_mult = aom_rb_read_literal(rb, 8);
1585     pars->cb_luma_mult = aom_rb_read_literal(rb, 8);
1586     pars->cb_offset = aom_rb_read_literal(rb, 9);
1587   }
1588 
1589   if (pars->num_cr_points) {
1590     pars->cr_mult = aom_rb_read_literal(rb, 8);
1591     pars->cr_luma_mult = aom_rb_read_literal(rb, 8);
1592     pars->cr_offset = aom_rb_read_literal(rb, 9);
1593   }
1594 
1595   pars->overlap_flag = aom_rb_read_bit(rb);
1596 
1597   pars->clip_to_restricted_range = aom_rb_read_bit(rb);
1598 }
1599 */
1600