1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10 */
11
12 /*!\file
13 * \brief Describes film grain parameters and film grain synthesis
14 *
15 */
16
17 #include <stdio.h>
18 #include <string.h>
19 #include <stdlib.h>
20 #include "grainSynthesis.h"
21 #include "EbLog.h"
22
23 // Samples with Gaussian distribution in the range of [-2048, 2047] (12 bits)
24 // with zero mean and standard deviation of about 512.
25 // should be divided by 4 for 10-bit range and 16 for 8-bit range.
26 static const int32_t gaussian_sequence[2048] = {
27 56, 568, -180, 172, 124, -84, 172, -64, -900, 24, 820, 224, 1248,
28 996, 272, -8, -916, -388, -732, -104, -188, 800, 112, -652, -320, -376,
29 140, -252, 492, -168, 44, -788, 588, -584, 500, -228, 12, 680, 272,
30 -476, 972, -100, 652, 368, 432, -196, -720, -192, 1000, -332, 652, -136,
31 -552, -604, -4, 192, -220, -136, 1000, -52, 372, -96, -624, 124, -24,
32 396, 540, -12, -104, 640, 464, 244, -208, -84, 368, -528, -740, 248,
33 -968, -848, 608, 376, -60, -292, -40, -156, 252, -292, 248, 224, -280,
34 400, -244, 244, -60, 76, -80, 212, 532, 340, 128, -36, 824, -352,
35 -60, -264, -96, -612, 416, -704, 220, -204, 640, -160, 1220, -408, 900,
36 336, 20, -336, -96, -792, 304, 48, -28, -1232, -1172, -448, 104, -292,
37 -520, 244, 60, -948, 0, -708, 268, 108, 356, -548, 488, -344, -136,
38 488, -196, -224, 656, -236, -1128, 60, 4, 140, 276, -676, -376, 168,
39 -108, 464, 8, 564, 64, 240, 308, -300, -400, -456, -136, 56, 120,
40 -408, -116, 436, 504, -232, 328, 844, -164, -84, 784, -168, 232, -224,
41 348, -376, 128, 568, 96, -1244, -288, 276, 848, 832, -360, 656, 464,
42 -384, -332, -356, 728, -388, 160, -192, 468, 296, 224, 140, -776, -100,
43 280, 4, 196, 44, -36, -648, 932, 16, 1428, 28, 528, 808, 772,
44 20, 268, 88, -332, -284, 124, -384, -448, 208, -228, -1044, -328, 660,
45 380, -148, -300, 588, 240, 540, 28, 136, -88, -436, 256, 296, -1000,
46 1400, 0, -48, 1056, -136, 264, -528, -1108, 632, -484, -592, -344, 796,
47 124, -668, -768, 388, 1296, -232, -188, -200, -288, -4, 308, 100, -168,
48 256, -500, 204, -508, 648, -136, 372, -272, -120, -1004, -552, -548, -384,
49 548, -296, 428, -108, -8, -912, -324, -224, -88, -112, -220, -100, 996,
50 -796, 548, 360, -216, 180, 428, -200, -212, 148, 96, 148, 284, 216,
51 -412, -320, 120, -300, -384, -604, -572, -332, -8, -180, -176, 696, 116,
52 -88, 628, 76, 44, -516, 240, -208, -40, 100, -592, 344, -308, -452,
53 -228, 20, 916, -1752, -136, -340, -804, 140, 40, 512, 340, 248, 184,
54 -492, 896, -156, 932, -628, 328, -688, -448, -616, -752, -100, 560, -1020,
55 180, -800, -64, 76, 576, 1068, 396, 660, 552, -108, -28, 320, -628,
56 312, -92, -92, -472, 268, 16, 560, 516, -672, -52, 492, -100, 260,
57 384, 284, 292, 304, -148, 88, -152, 1012, 1064, -228, 164, -376, -684,
58 592, -392, 156, 196, -524, -64, -884, 160, -176, 636, 648, 404, -396,
59 -436, 864, 424, -728, 988, -604, 904, -592, 296, -224, 536, -176, -920,
60 436, -48, 1176, -884, 416, -776, -824, -884, 524, -548, -564, -68, -164,
61 -96, 692, 364, -692, -1012, -68, 260, -480, 876, -1116, 452, -332, -352,
62 892, -1088, 1220, -676, 12, -292, 244, 496, 372, -32, 280, 200, 112,
63 -440, -96, 24, -644, -184, 56, -432, 224, -980, 272, -260, 144, -436,
64 420, 356, 364, -528, 76, 172, -744, -368, 404, -752, -416, 684, -688,
65 72, 540, 416, 92, 444, 480, -72, -1416, 164, -1172, -68, 24, 424,
66 264, 1040, 128, -912, -524, -356, 64, 876, -12, 4, -88, 532, 272,
67 -524, 320, 276, -508, 940, 24, -400, -120, 756, 60, 236, -412, 100,
68 376, -484, 400, -100, -740, -108, -260, 328, -268, 224, -200, -416, 184,
69 -604, -564, -20, 296, 60, 892, -888, 60, 164, 68, -760, 216, -296,
70 904, -336, -28, 404, -356, -568, -208, -1480, -512, 296, 328, -360, -164,
71 -1560, -776, 1156, -428, 164, -504, -112, 120, -216, -148, -264, 308, 32,
72 64, -72, 72, 116, 176, -64, -272, 460, -536, -784, -280, 348, 108,
73 -752, -132, 524, -540, -776, 116, -296, -1196, -288, -560, 1040, -472, 116,
74 -848, -1116, 116, 636, 696, 284, -176, 1016, 204, -864, -648, -248, 356,
75 972, -584, -204, 264, 880, 528, -24, -184, 116, 448, -144, 828, 524,
76 212, -212, 52, 12, 200, 268, -488, -404, -880, 824, -672, -40, 908,
77 -248, 500, 716, -576, 492, -576, 16, 720, -108, 384, 124, 344, 280,
78 576, -500, 252, 104, -308, 196, -188, -8, 1268, 296, 1032, -1196, 436,
79 316, 372, -432, -200, -660, 704, -224, 596, -132, 268, 32, -452, 884,
80 104, -1008, 424, -1348, -280, 4, -1168, 368, 476, 696, 300, -8, 24,
81 180, -592, -196, 388, 304, 500, 724, -160, 244, -84, 272, -256, -420,
82 320, 208, -144, -156, 156, 364, 452, 28, 540, 316, 220, -644, -248,
83 464, 72, 360, 32, -388, 496, -680, -48, 208, -116, -408, 60, -604,
84 -392, 548, -840, 784, -460, 656, -544, -388, -264, 908, -800, -628, -612,
85 -568, 572, -220, 164, 288, -16, -308, 308, -112, -636, -760, 280, -668,
86 432, 364, 240, -196, 604, 340, 384, 196, 592, -44, -500, 432, -580,
87 -132, 636, -76, 392, 4, -412, 540, 508, 328, -356, -36, 16, -220,
88 -64, -248, -60, 24, -192, 368, 1040, 92, -24, -1044, -32, 40, 104,
89 148, 192, -136, -520, 56, -816, -224, 732, 392, 356, 212, -80, -424,
90 -1008, -324, 588, -1496, 576, 460, -816, -848, 56, -580, -92, -1372, -112,
91 -496, 200, 364, 52, -140, 48, -48, -60, 84, 72, 40, 132, -356,
92 -268, -104, -284, -404, 732, -520, 164, -304, -540, 120, 328, -76, -460,
93 756, 388, 588, 236, -436, -72, -176, -404, -316, -148, 716, -604, 404,
94 -72, -88, -888, -68, 944, 88, -220, -344, 960, 472, 460, -232, 704,
95 120, 832, -228, 692, -508, 132, -476, 844, -748, -364, -44, 1116, -1104,
96 -1056, 76, 428, 552, -692, 60, 356, 96, -384, -188, -612, -576, 736,
97 508, 892, 352, -1132, 504, -24, -352, 324, 332, -600, -312, 292, 508,
98 -144, -8, 484, 48, 284, -260, -240, 256, -100, -292, -204, -44, 472,
99 -204, 908, -188, -1000, -256, 92, 1164, -392, 564, 356, 652, -28, -884,
100 256, 484, -192, 760, -176, 376, -524, -452, -436, 860, -736, 212, 124,
101 504, -476, 468, 76, -472, 552, -692, -944, -620, 740, -240, 400, 132,
102 20, 192, -196, 264, -668, -1012, -60, 296, -316, -828, 76, -156, 284,
103 -768, -448, -832, 148, 248, 652, 616, 1236, 288, -328, -400, -124, 588,
104 220, 520, -696, 1032, 768, -740, -92, -272, 296, 448, -464, 412, -200,
105 392, 440, -200, 264, -152, -260, 320, 1032, 216, 320, -8, -64, 156,
106 -1016, 1084, 1172, 536, 484, -432, 132, 372, -52, -256, 84, 116, -352,
107 48, 116, 304, -384, 412, 924, -300, 528, 628, 180, 648, 44, -980,
108 -220, 1320, 48, 332, 748, 524, -268, -720, 540, -276, 564, -344, -208,
109 -196, 436, 896, 88, -392, 132, 80, -964, -288, 568, 56, -48, -456,
110 888, 8, 552, -156, -292, 948, 288, 128, -716, -292, 1192, -152, 876,
111 352, -600, -260, -812, -468, -28, -120, -32, -44, 1284, 496, 192, 464,
112 312, -76, -516, -380, -456, -1012, -48, 308, -156, 36, 492, -156, -808,
113 188, 1652, 68, -120, -116, 316, 160, -140, 352, 808, -416, 592, 316,
114 -480, 56, 528, -204, -568, 372, -232, 752, -344, 744, -4, 324, -416,
115 -600, 768, 268, -248, -88, -132, -420, -432, 80, -288, 404, -316, -1216,
116 -588, 520, -108, 92, -320, 368, -480, -216, -92, 1688, -300, 180, 1020,
117 -176, 820, -68, -228, -260, 436, -904, 20, 40, -508, 440, -736, 312,
118 332, 204, 760, -372, 728, 96, -20, -632, -520, -560, 336, 1076, -64,
119 -532, 776, 584, 192, 396, -728, -520, 276, -188, 80, -52, -612, -252,
120 -48, 648, 212, -688, 228, -52, -260, 428, -412, -272, -404, 180, 816,
121 -796, 48, 152, 484, -88, -216, 988, 696, 188, -528, 648, -116, -180,
122 316, 476, 12, -564, 96, 476, -252, -364, -376, -392, 556, -256, -576,
123 260, -352, 120, -16, -136, -260, -492, 72, 556, 660, 580, 616, 772,
124 436, 424, -32, -324, -1268, 416, -324, -80, 920, 160, 228, 724, 32,
125 -516, 64, 384, 68, -128, 136, 240, 248, -204, -68, 252, -932, -120,
126 -480, -628, -84, 192, 852, -404, -288, -132, 204, 100, 168, -68, -196,
127 -868, 460, 1080, 380, -80, 244, 0, 484, -888, 64, 184, 352, 600,
128 460, 164, 604, -196, 320, -64, 588, -184, 228, 12, 372, 48, -848,
129 -344, 224, 208, -200, 484, 128, -20, 272, -468, -840, 384, 256, -720,
130 -520, -464, -580, 112, -120, 644, -356, -208, -608, -528, 704, 560, -424,
131 392, 828, 40, 84, 200, -152, 0, -144, 584, 280, -120, 80, -556,
132 -972, -196, -472, 724, 80, 168, -32, 88, 160, -688, 0, 160, 356,
133 372, -776, 740, -128, 676, -248, -480, 4, -364, 96, 544, 232, -1032,
134 956, 236, 356, 20, -40, 300, 24, -676, -596, 132, 1120, -104, 532,
135 -1096, 568, 648, 444, 508, 380, 188, -376, -604, 1488, 424, 24, 756,
136 -220, -192, 716, 120, 920, 688, 168, 44, -460, 568, 284, 1144, 1160,
137 600, 424, 888, 656, -356, -320, 220, 316, -176, -724, -188, -816, -628,
138 -348, -228, -380, 1012, -452, -660, 736, 928, 404, -696, -72, -268, -892,
139 128, 184, -344, -780, 360, 336, 400, 344, 428, 548, -112, 136, -228,
140 -216, -820, -516, 340, 92, -136, 116, -300, 376, -244, 100, -316, -520,
141 -284, -12, 824, 164, -548, -180, -128, 116, -924, -828, 268, -368, -580,
142 620, 192, 160, 0, -1676, 1068, 424, -56, -360, 468, -156, 720, 288,
143 -528, 556, -364, 548, -148, 504, 316, 152, -648, -620, -684, -24, -376,
144 -384, -108, -920, -1032, 768, 180, -264, -508, -1268, -260, -60, 300, -240,
145 988, 724, -376, -576, -212, -736, 556, 192, 1092, -620, -880, 376, -56,
146 -4, -216, -32, 836, 268, 396, 1332, 864, -600, 100, 56, -412, -92,
147 356, 180, 884, -468, -436, 292, -388, -804, -704, -840, 368, -348, 140,
148 -724, 1536, 940, 372, 112, -372, 436, -480, 1136, 296, -32, -228, 132,
149 -48, -220, 868, -1016, -60, -1044, -464, 328, 916, 244, 12, -736, -296,
150 360, 468, -376, -108, -92, 788, 368, -56, 544, 400, -672, -420, 728,
151 16, 320, 44, -284, -380, -796, 488, 132, 204, -596, -372, 88, -152,
152 -908, -636, -572, -624, -116, -692, -200, -56, 276, -88, 484, -324, 948,
153 864, 1000, -456, -184, -276, 292, -296, 156, 676, 320, 160, 908, -84,
154 -1236, -288, -116, 260, -372, -644, 732, -756, -96, 84, 344, -520, 348,
155 -688, 240, -84, 216, -1044, -136, -676, -396, -1500, 960, -40, 176, 168,
156 1516, 420, -504, -344, -364, -360, 1216, -940, -380, -212, 252, -660, -708,
157 484, -444, -152, 928, -120, 1112, 476, -260, 560, -148, -344, 108, -196,
158 228, -288, 504, 560, -328, -88, 288, -1008, 460, -228, 468, -836, -196,
159 76, 388, 232, 412, -1168, -716, -644, 756, -172, -356, -504, 116, 432,
160 528, 48, 476, -168, -608, 448, 160, -532, -272, 28, -676, -12, 828,
161 980, 456, 520, 104, -104, 256, -344, -4, -28, -368, -52, -524, -572,
162 -556, -200, 768, 1124, -208, -512, 176, 232, 248, -148, -888, 604, -600,
163 -304, 804, -156, -212, 488, -192, -804, -256, 368, -360, -916, -328, 228,
164 -240, -448, -472, 856, -556, -364, 572, -12, -156, -368, -340, 432, 252,
165 -752, -152, 288, 268, -580, -848, -592, 108, -76, 244, 312, -716, 592,
166 -80, 436, 360, 4, -248, 160, 516, 584, 732, 44, -468, -280, -292,
167 -156, -588, 28, 308, 912, 24, 124, 156, 180, -252, 944, -924, -772,
168 -520, -428, -624, 300, -212, -1144, 32, -724, 800, -1128, -212, -1288, -848,
169 180, -416, 440, 192, -576, -792, -76, -1080, 80, -532, -352, -132, 380,
170 -820, 148, 1112, 128, 164, 456, 700, -924, 144, -668, -384, 648, -832,
171 508, 552, -52, -100, -656, 208, -568, 748, -88, 680, 232, 300, 192,
172 -408, -1012, -152, -252, -268, 272, -876, -664, -648, -332, -136, 16, 12,
173 1152, -28, 332, -536, 320, -672, -460, -316, 532, -260, 228, -40, 1052,
174 -816, 180, 88, -496, -556, -672, -368, 428, 92, 356, 404, -408, 252,
175 196, -176, -556, 792, 268, 32, 372, 40, 96, -332, 328, 120, 372,
176 -900, -40, 472, -264, -592, 952, 128, 656, 112, 664, -232, 420, 4,
177 -344, -464, 556, 244, -416, -32, 252, 0, -412, 188, -696, 508, -476,
178 324, -1096, 656, -312, 560, 264, -136, 304, 160, -64, -580, 248, 336,
179 -720, 560, -348, -288, -276, -196, -500, 852, -544, -236, -1128, -992, -776,
180 116, 56, 52, 860, 884, 212, -12, 168, 1020, 512, -552, 924, -148,
181 716, 188, 164, -340, -520, -184, 880, -152, -680, -208, -1156, -300, -528,
182 -472, 364, 100, -744, -1056, -32, 540, 280, 144, -676, -32, -232, -280,
183 -224, 96, 568, -76, 172, 148, 148, 104, 32, -296, -32, 788, -80,
184 32, -16, 280, 288, 944, 428, -484};
185
186 static const int32_t gauss_bits = 11;
187
188 static int32_t luma_subblock_size_y = 32;
189 static int32_t luma_subblock_size_x = 32;
190
191 static int32_t chroma_subblock_size_y = 16;
192 static int32_t chroma_subblock_size_x = 16;
193
194 static const int32_t min_luma_legal_range = 16;
195 static const int32_t max_luma_legal_range = 235;
196
197 static const int32_t min_chroma_legal_range = 16;
198 static const int32_t max_chroma_legal_range = 240;
199
200 static int32_t scaling_lut_y[256];
201 static int32_t scaling_lut_cb[256];
202 static int32_t scaling_lut_cr[256];
203
204 static int32_t grain_center;
205 static int32_t grain_min;
206 static int32_t grain_max;
207
208 static uint16_t random_register = 0; // random number generator register
209
210 //----------------------------------------------------------------------
211 // todo: aomlib memory functions (to be replaced by Eb functions)
212 /*
213 #define ADDRESS_STORAGE_SIZE sizeof(size_t)
214 #define DEFAULT_ALIGNMENT (2 * sizeof(void *))
215 #define AOM_MAX_ALLOCABLE_MEMORY 8589934592 // 8 GB
216 //returns an addr aligned to the byte boundary specified by align
217 #define align_addr(addr, align) \
218 (void *)(((size_t)(addr) + ((align)-1)) & ~(size_t)((align)-1))
219
220 // Returns 0 in case of overflow of nmemb * size.
221 static int32_t check_size_argument_overflow(uint64_t nmemb, uint64_t size) {
222 const uint64_t total_size = nmemb * size;
223 if (nmemb == 0) return 1;
224 if (size > AOM_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
225 if (total_size != (size_t)total_size) return 0;
226 return 1;
227 }
228
229 static size_t get_aligned_malloc_size(size_t size, size_t align) {
230 return size + align - 1 + ADDRESS_STORAGE_SIZE;
231 }
232
233 static size_t *get_malloc_address_location(void *const mem) {
234 return ((size_t *)mem) - 1;
235 }
236
237 static void set_actual_malloc_address(void *const mem,
238 const void *const malloc_addr) {
239 size_t *const malloc_addr_location = get_malloc_address_location(mem);
240 *malloc_addr_location = (size_t)malloc_addr;
241 }
242
243 static void *get_actual_malloc_address(void *const mem) {
244 const size_t *const malloc_addr_location = get_malloc_address_location(mem);
245 return (void *)(*malloc_addr_location);
246 }
247
248 void *svt_aom_memalign(size_t align, size_t size) {
249 void *x = NULL;
250 const size_t aligned_size = get_aligned_malloc_size(size, align);
251 #if defined(AOM_MAX_ALLOCABLE_MEMORY)
252 if (!check_size_argument_overflow(1, aligned_size)) return NULL;
253 #endif
254 void *const addr = malloc(aligned_size);
255 if (addr) {
256 x = align_addr((uint8_t *)addr + ADDRESS_STORAGE_SIZE, align);
257 set_actual_malloc_address(x, addr);
258 }
259 return x;
260 }
261
262 void *svt_aom_malloc(size_t size) { return svt_aom_memalign(DEFAULT_ALIGNMENT, size); }
263
264 void svt_aom_free(void *memblk) {
265 if (memblk) {
266 void *addr = get_actual_malloc_address(memblk);
267 free(addr);
268 }
269 }
270 */
271 //--------------------------------------------------------------------
272
init_arrays(AomFilmGrain * params,int32_t luma_stride,int32_t chroma_stride,int32_t *** pred_pos_luma_p,int32_t *** pred_pos_chroma_p,int32_t ** luma_grain_block,int32_t ** cb_grain_block,int32_t ** cr_grain_block,int32_t ** y_line_buf,int32_t ** cb_line_buf,int32_t ** cr_line_buf,int32_t ** y_col_buf,int32_t ** cb_col_buf,int32_t ** cr_col_buf,int32_t luma_grain_samples,int32_t chroma_grain_samples,int32_t chroma_subsamp_y,int32_t chroma_subsamp_x)273 static void init_arrays(AomFilmGrain *params, int32_t luma_stride, int32_t chroma_stride,
274 int32_t ***pred_pos_luma_p, int32_t ***pred_pos_chroma_p,
275 int32_t **luma_grain_block, int32_t **cb_grain_block,
276 int32_t **cr_grain_block, int32_t **y_line_buf, int32_t **cb_line_buf,
277 int32_t **cr_line_buf, int32_t **y_col_buf, int32_t **cb_col_buf,
278 int32_t **cr_col_buf, int32_t luma_grain_samples,
279 int32_t chroma_grain_samples, int32_t chroma_subsamp_y,
280 int32_t chroma_subsamp_x) {
281 memset(scaling_lut_y, 0, sizeof(*scaling_lut_y) * 256);
282 memset(scaling_lut_cb, 0, sizeof(*scaling_lut_cb) * 256);
283 memset(scaling_lut_cr, 0, sizeof(*scaling_lut_cr) * 256);
284
285 int32_t num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
286 int32_t num_pos_chroma = num_pos_luma;
287 if (params->num_y_points > 0)
288 ++num_pos_chroma;
289
290 int32_t **pred_pos_luma;
291 int32_t **pred_pos_chroma;
292
293 pred_pos_luma = (int32_t **)malloc(sizeof(*pred_pos_luma) * num_pos_luma);
294 ASSERT(pred_pos_luma != NULL);
295 for (int32_t row = 0; row < num_pos_luma; row++) {
296 pred_pos_luma[row] = (int32_t *)malloc(sizeof(**pred_pos_luma) * 3);
297 ASSERT(pred_pos_luma[row]);
298 }
299
300 pred_pos_chroma = (int32_t **)malloc(sizeof(*pred_pos_chroma) * num_pos_chroma);
301 ASSERT(pred_pos_chroma != NULL);
302 for (int32_t row = 0; row < num_pos_chroma; row++) {
303 pred_pos_chroma[row] = (int32_t *)malloc(sizeof(**pred_pos_chroma) * 3);
304 ASSERT(pred_pos_chroma[row]);
305 }
306
307 int32_t pos_ar_index = 0;
308
309 for (int32_t row = -params->ar_coeff_lag; row < 0; row++) {
310 for (int32_t col = -params->ar_coeff_lag; col < params->ar_coeff_lag + 1; col++) {
311 pred_pos_luma[pos_ar_index][0] = row;
312 pred_pos_luma[pos_ar_index][1] = col;
313 pred_pos_luma[pos_ar_index][2] = 0;
314
315 pred_pos_chroma[pos_ar_index][0] = row;
316 pred_pos_chroma[pos_ar_index][1] = col;
317 pred_pos_chroma[pos_ar_index][2] = 0;
318 ++pos_ar_index;
319 }
320 }
321
322 for (int32_t col = -params->ar_coeff_lag; col < 0; col++) {
323 pred_pos_luma[pos_ar_index][0] = 0;
324 pred_pos_luma[pos_ar_index][1] = col;
325 pred_pos_luma[pos_ar_index][2] = 0;
326
327 pred_pos_chroma[pos_ar_index][0] = 0;
328 pred_pos_chroma[pos_ar_index][1] = col;
329 pred_pos_chroma[pos_ar_index][2] = 0;
330
331 ++pos_ar_index;
332 }
333
334 if (params->num_y_points > 0) {
335 pred_pos_chroma[pos_ar_index][0] = 0;
336 pred_pos_chroma[pos_ar_index][1] = 0;
337 pred_pos_chroma[pos_ar_index][2] = 1;
338 }
339
340 *pred_pos_luma_p = pred_pos_luma;
341 *pred_pos_chroma_p = pred_pos_chroma;
342
343 *y_line_buf = (int32_t *)malloc(sizeof(**y_line_buf) * luma_stride * 2);
344 *cb_line_buf = (int32_t *)malloc(sizeof(**cb_line_buf) * chroma_stride *
345 (2 >> chroma_subsamp_y));
346 *cr_line_buf = (int32_t *)malloc(sizeof(**cr_line_buf) * chroma_stride *
347 (2 >> chroma_subsamp_y));
348
349 *y_col_buf = (int32_t *)malloc(sizeof(**y_col_buf) * (luma_subblock_size_y + 2) * 2);
350 *cb_col_buf = (int32_t *)malloc(sizeof(**cb_col_buf) *
351 (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) *
352 (2 >> chroma_subsamp_x));
353 *cr_col_buf = (int32_t *)malloc(sizeof(**cr_col_buf) *
354 (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) *
355 (2 >> chroma_subsamp_x));
356
357 *luma_grain_block = (int32_t *)malloc(sizeof(**luma_grain_block) * luma_grain_samples);
358 *cb_grain_block = (int32_t *)malloc(sizeof(**cb_grain_block) * chroma_grain_samples);
359 *cr_grain_block = (int32_t *)malloc(sizeof(**cr_grain_block) * chroma_grain_samples);
360 }
361
dealloc_arrays(AomFilmGrain * params,int32_t *** pred_pos_luma,int32_t *** pred_pos_chroma,int32_t ** luma_grain_block,int32_t ** cb_grain_block,int32_t ** cr_grain_block,int32_t ** y_line_buf,int32_t ** cb_line_buf,int32_t ** cr_line_buf,int32_t ** y_col_buf,int32_t ** cb_col_buf,int32_t ** cr_col_buf)362 static void dealloc_arrays(AomFilmGrain *params, int32_t ***pred_pos_luma,
363 int32_t ***pred_pos_chroma, int32_t **luma_grain_block,
364 int32_t **cb_grain_block, int32_t **cr_grain_block, int32_t **y_line_buf,
365 int32_t **cb_line_buf, int32_t **cr_line_buf, int32_t **y_col_buf,
366 int32_t **cb_col_buf, int32_t **cr_col_buf) {
367 int32_t num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
368 int32_t num_pos_chroma = num_pos_luma;
369 if (params->num_y_points > 0)
370 ++num_pos_chroma;
371
372 for (int32_t row = 0; row < num_pos_luma; row++) free((*pred_pos_luma)[row]);
373 free(*pred_pos_luma);
374
375 for (int32_t row = 0; row < num_pos_chroma; row++) free((*pred_pos_chroma)[row]);
376 free((*pred_pos_chroma));
377
378 free(*y_line_buf);
379
380 free(*cb_line_buf);
381
382 free(*cr_line_buf);
383
384 free(*y_col_buf);
385
386 free(*cb_col_buf);
387
388 free(*cr_col_buf);
389
390 free(*luma_grain_block);
391
392 free(*cb_grain_block);
393
394 free(*cr_grain_block);
395 }
396
397 // get a number between 0 and 2^bits - 1
get_random_number(int32_t bits)398 static INLINE int32_t get_random_number(int32_t bits) {
399 uint16_t bit;
400 bit = ((random_register >> 0) ^ (random_register >> 1) ^ (random_register >> 3) ^
401 (random_register >> 12)) &
402 1;
403 random_register = (random_register >> 1) | (bit << 15);
404 return (random_register >> (16 - bits)) & ((1 << bits) - 1);
405 }
406
init_random_generator(int32_t luma_line,uint16_t seed)407 static void init_random_generator(int32_t luma_line, uint16_t seed) {
408 // same for the picture
409
410 uint16_t msb = (seed >> 8) & 255;
411 uint16_t lsb = seed & 255;
412
413 random_register = (msb << 8) + lsb;
414
415 // changes for each row
416 int32_t luma_num = luma_line >> 5;
417
418 random_register ^= ((luma_num * 37 + 178) & 255) << 8;
419 random_register ^= ((luma_num * 173 + 105) & 255);
420 }
421
generate_luma_grain_block(AomFilmGrain * params,int32_t ** pred_pos_luma,int32_t * luma_grain_block,int32_t luma_block_size_y,int32_t luma_block_size_x,int32_t luma_grain_stride,int32_t left_pad,int32_t top_pad,int32_t right_pad,int32_t bottom_pad)422 static void generate_luma_grain_block(AomFilmGrain *params, int32_t **pred_pos_luma,
423 int32_t *luma_grain_block, int32_t luma_block_size_y,
424 int32_t luma_block_size_x, int32_t luma_grain_stride,
425 int32_t left_pad, int32_t top_pad, int32_t right_pad,
426 int32_t bottom_pad) {
427 if (params->num_y_points == 0)
428 return;
429
430 int32_t bit_depth = params->bit_depth;
431 int32_t gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
432
433 int32_t num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
434 int32_t rounding_offset = (1 << (params->ar_coeff_shift - 1));
435
436 for (int32_t i = 0; i < luma_block_size_y; i++)
437 for (int32_t j = 0; j < luma_block_size_x; j++)
438 luma_grain_block[i * luma_grain_stride + j] =
439 (gaussian_sequence[get_random_number(gauss_bits)] +
440 ((1 << gauss_sec_shift) >> 1)) >>
441 gauss_sec_shift;
442
443 for (int32_t i = top_pad; i < luma_block_size_y - bottom_pad; i++)
444 for (int32_t j = left_pad; j < luma_block_size_x - right_pad; j++) {
445 int32_t wsum = 0;
446 for (int32_t pos = 0; pos < num_pos_luma; pos++) {
447 wsum = wsum +
448 params->ar_coeffs_y[pos] *
449 luma_grain_block[(i + pred_pos_luma[pos][0]) * luma_grain_stride + j +
450 pred_pos_luma[pos][1]];
451 }
452 luma_grain_block[i * luma_grain_stride + j] = clamp(
453 luma_grain_block[i * luma_grain_stride + j] +
454 ((wsum + rounding_offset) >> params->ar_coeff_shift),
455 grain_min,
456 grain_max);
457 }
458 }
459
generate_chroma_grain_blocks(AomFilmGrain * params,int32_t ** pred_pos_chroma,int32_t * luma_grain_block,int32_t * cb_grain_block,int32_t * cr_grain_block,int32_t luma_grain_stride,int32_t chroma_block_size_y,int32_t chroma_block_size_x,int32_t chroma_grain_stride,int32_t left_pad,int32_t top_pad,int32_t right_pad,int32_t bottom_pad,int32_t chroma_subsamp_y,int32_t chroma_subsamp_x)460 static void generate_chroma_grain_blocks(
461 AomFilmGrain *params,
462 // int32_t** pred_pos_luma,
463 int32_t **pred_pos_chroma, int32_t *luma_grain_block, int32_t *cb_grain_block,
464 int32_t *cr_grain_block, int32_t luma_grain_stride, int32_t chroma_block_size_y,
465 int32_t chroma_block_size_x, int32_t chroma_grain_stride, int32_t left_pad, int32_t top_pad,
466 int32_t right_pad, int32_t bottom_pad, int32_t chroma_subsamp_y, int32_t chroma_subsamp_x) {
467 int32_t bit_depth = params->bit_depth;
468 int32_t gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
469
470 int32_t num_pos_chroma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
471 if (params->num_y_points > 0)
472 ++num_pos_chroma;
473 int32_t rounding_offset = (1 << (params->ar_coeff_shift - 1));
474
475 int chroma_grain_block_size = chroma_block_size_y * chroma_grain_stride;
476
477 if (params->num_cb_points || params->chroma_scaling_from_luma) {
478 init_random_generator(7 << 5, params->random_seed);
479
480 for (int32_t i = 0; i < chroma_block_size_y; i++)
481 for (int32_t j = 0; j < chroma_block_size_x; j++)
482 cb_grain_block[i * chroma_grain_stride + j] =
483 (gaussian_sequence[get_random_number(gauss_bits)] +
484 ((1 << gauss_sec_shift) >> 1)) >>
485 gauss_sec_shift;
486 } else {
487 memset(cb_grain_block, 0, sizeof(*cb_grain_block) * chroma_grain_block_size);
488 }
489 if (params->num_cr_points || params->chroma_scaling_from_luma) {
490 init_random_generator(11 << 5, params->random_seed);
491
492 for (int32_t i = 0; i < chroma_block_size_y; i++)
493 for (int32_t j = 0; j < chroma_block_size_x; j++)
494 cr_grain_block[i * chroma_grain_stride + j] =
495 (gaussian_sequence[get_random_number(gauss_bits)] +
496 ((1 << gauss_sec_shift) >> 1)) >>
497 gauss_sec_shift;
498 } else {
499 memset(cr_grain_block, 0, sizeof(*cr_grain_block) * chroma_grain_block_size);
500 }
501
502 for (int32_t i = top_pad; i < chroma_block_size_y - bottom_pad; i++)
503 for (int32_t j = left_pad; j < chroma_block_size_x - right_pad; j++) {
504 int32_t wsum_cb = 0;
505 int32_t wsum_cr = 0;
506 for (int32_t pos = 0; pos < num_pos_chroma; pos++) {
507 if (pred_pos_chroma[pos][2] == 0) {
508 wsum_cb = wsum_cb +
509 params->ar_coeffs_cb[pos] *
510 cb_grain_block[(i + pred_pos_chroma[pos][0]) * chroma_grain_stride + j +
511 pred_pos_chroma[pos][1]];
512 wsum_cr = wsum_cr +
513 params->ar_coeffs_cr[pos] *
514 cr_grain_block[(i + pred_pos_chroma[pos][0]) * chroma_grain_stride + j +
515 pred_pos_chroma[pos][1]];
516 } else if (pred_pos_chroma[pos][2] == 1) {
517 int32_t av_luma = 0;
518 int32_t luma_coord_y = ((i - top_pad) << chroma_subsamp_y) + top_pad;
519 int32_t luma_coord_x = ((j - left_pad) << chroma_subsamp_x) + left_pad;
520
521 for (int32_t k = luma_coord_y; k < luma_coord_y + chroma_subsamp_y + 1; k++)
522 for (int32_t l = luma_coord_x; l < luma_coord_x + chroma_subsamp_x + 1; l++)
523 av_luma += luma_grain_block[k * luma_grain_stride + l];
524
525 av_luma = (av_luma + ((1 << (chroma_subsamp_y + chroma_subsamp_x)) >> 1)) >>
526 (chroma_subsamp_y + chroma_subsamp_x);
527
528 wsum_cb = wsum_cb + params->ar_coeffs_cb[pos] * av_luma;
529 wsum_cr = wsum_cr + params->ar_coeffs_cr[pos] * av_luma;
530 } else {
531 SVT_LOG(
532 "Grain synthesis: prediction between two chroma components is "
533 "not supported!");
534 exit(1);
535 }
536 }
537 if (params->num_cb_points || params->chroma_scaling_from_luma)
538 cb_grain_block[i * chroma_grain_stride + j] = clamp(
539 cb_grain_block[i * chroma_grain_stride + j] +
540 ((wsum_cb + rounding_offset) >> params->ar_coeff_shift),
541 grain_min,
542 grain_max);
543 if (params->num_cr_points || params->chroma_scaling_from_luma)
544 cr_grain_block[i * chroma_grain_stride + j] = clamp(
545 cr_grain_block[i * chroma_grain_stride + j] +
546 ((wsum_cr + rounding_offset) >> params->ar_coeff_shift),
547 grain_min,
548 grain_max);
549 }
550 }
551
init_scaling_function(int32_t scaling_points[][2],int32_t num_points,int32_t scaling_lut[])552 static void init_scaling_function(int32_t scaling_points[][2], int32_t num_points,
553 int32_t scaling_lut[]) {
554 if (num_points == 0)
555 return;
556
557 for (int32_t i = 0; i < scaling_points[0][0]; i++) scaling_lut[i] = scaling_points[0][1];
558
559 for (int32_t point = 0; point < num_points - 1; point++) {
560 int32_t delta_y = scaling_points[point + 1][1] - scaling_points[point][1];
561 int32_t delta_x = scaling_points[point + 1][0] - scaling_points[point][0];
562
563 int64_t delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
564
565 for (int32_t x = 0; x < delta_x; x++) {
566 scaling_lut[scaling_points[point][0] + x] = scaling_points[point][1] +
567 (int32_t)((x * delta + 32768) >> 16);
568 }
569 }
570
571 for (int32_t i = scaling_points[num_points - 1][0]; i < 256; i++)
572 scaling_lut[i] = scaling_points[num_points - 1][1];
573 }
574
575 // function that extracts samples from a lut (and interpolates intemediate
576 // frames for 10- and 12-bit video)
scale_lut(int32_t * scaling_lut,int32_t index,int32_t bit_depth)577 static int32_t scale_lut(int32_t *scaling_lut, int32_t index, int32_t bit_depth) {
578 int32_t x = index >> (bit_depth - 8);
579
580 if (!(bit_depth - 8) || x == 255)
581 return scaling_lut[x];
582 else
583 return scaling_lut[x] +
584 (((scaling_lut[x + 1] - scaling_lut[x]) * (index & ((1 << (bit_depth - 8)) - 1)) +
585 (1 << (bit_depth - 9))) >>
586 (bit_depth - 8));
587 }
588
add_noise_to_block(AomFilmGrain * params,uint8_t * luma,uint8_t * cb,uint8_t * cr,int32_t luma_stride,int32_t chroma_stride,int32_t * luma_grain,int32_t * cb_grain,int32_t * cr_grain,int32_t luma_grain_stride,int32_t chroma_grain_stride,int32_t half_luma_height,int32_t half_luma_width,int32_t bit_depth,int32_t chroma_subsamp_y,int32_t chroma_subsamp_x)589 static void add_noise_to_block(AomFilmGrain *params, uint8_t *luma, uint8_t *cb, uint8_t *cr,
590 int32_t luma_stride, int32_t chroma_stride, int32_t *luma_grain,
591 int32_t *cb_grain, int32_t *cr_grain, int32_t luma_grain_stride,
592 int32_t chroma_grain_stride, int32_t half_luma_height,
593 int32_t half_luma_width, int32_t bit_depth, int32_t chroma_subsamp_y,
594 int32_t chroma_subsamp_x) {
595 int32_t cb_mult = params->cb_mult - 128; // fixed scale
596 int32_t cb_luma_mult = params->cb_luma_mult - 128; // fixed scale
597 int32_t cb_offset = params->cb_offset - 256;
598
599 int32_t cr_mult = params->cr_mult - 128; // fixed scale
600 int32_t cr_luma_mult = params->cr_luma_mult - 128; // fixed scale
601 int32_t cr_offset = params->cr_offset - 256;
602
603 int32_t rounding_offset = (1 << (params->scaling_shift - 1));
604
605 int32_t apply_y = params->num_y_points > 0 ? 1 : 0;
606 int32_t apply_cb = (params->num_cb_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0;
607 int32_t apply_cr = (params->num_cr_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0;
608
609 if (params->chroma_scaling_from_luma) {
610 cb_mult = 0; // fixed scale
611 cb_luma_mult = 64; // fixed scale
612 cb_offset = 0;
613
614 cr_mult = 0; // fixed scale
615 cr_luma_mult = 64; // fixed scale
616 cr_offset = 0;
617 }
618
619 int32_t min_luma, max_luma, min_chroma, max_chroma;
620
621 if (params->clip_to_restricted_range) {
622 min_luma = min_luma_legal_range;
623 max_luma = max_luma_legal_range;
624
625 min_chroma = min_chroma_legal_range;
626 max_chroma = max_chroma_legal_range;
627 } else {
628 min_luma = min_chroma = 0;
629 max_luma = max_chroma = 255;
630 }
631
632 for (int32_t i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) {
633 for (int32_t j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) {
634 int32_t average_luma = 0;
635 if (chroma_subsamp_x) {
636 average_luma =
637 (luma[(i << chroma_subsamp_y) * luma_stride + (j << chroma_subsamp_x)] +
638 luma[(i << chroma_subsamp_y) * luma_stride + (j << chroma_subsamp_x) + 1] +
639 1) >>
640 1;
641 } else
642 average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j];
643 if (apply_cb) {
644 cb[i * chroma_stride + j] = clamp(
645 cb[i * chroma_stride + j] +
646 ((scale_lut(scaling_lut_cb,
647 clamp(((average_luma * cb_luma_mult +
648 cb_mult * cb[i * chroma_stride + j]) >>
649 6) +
650 cb_offset,
651 0,
652 (256 << (bit_depth - 8)) - 1),
653 8) *
654 cb_grain[i * chroma_grain_stride + j] +
655 rounding_offset) >>
656 params->scaling_shift),
657 min_chroma,
658 max_chroma);
659 }
660
661 if (apply_cr) {
662 cr[i * chroma_stride + j] = clamp(
663 cr[i * chroma_stride + j] +
664 ((scale_lut(scaling_lut_cr,
665 clamp(((average_luma * cr_luma_mult +
666 cr_mult * cr[i * chroma_stride + j]) >>
667 6) +
668 cr_offset,
669 0,
670 (256 << (bit_depth - 8)) - 1),
671 8) *
672 cr_grain[i * chroma_grain_stride + j] +
673 rounding_offset) >>
674 params->scaling_shift),
675 min_chroma,
676 max_chroma);
677 }
678 }
679 }
680
681 if (apply_y) {
682 for (int32_t i = 0; i < (half_luma_height << 1); i++) {
683 for (int32_t j = 0; j < (half_luma_width << 1); j++) {
684 luma[i * luma_stride + j] = clamp(
685 luma[i * luma_stride + j] +
686 ((scale_lut(scaling_lut_y, luma[i * luma_stride + j], 8) *
687 luma_grain[i * luma_grain_stride + j] +
688 rounding_offset) >>
689 params->scaling_shift),
690 min_luma,
691 max_luma);
692 }
693 }
694 }
695 }
696
add_noise_to_block_hbd(AomFilmGrain * params,uint16_t * luma,uint16_t * cb,uint16_t * cr,int32_t luma_stride,int32_t chroma_stride,int32_t * luma_grain,int32_t * cb_grain,int32_t * cr_grain,int32_t luma_grain_stride,int32_t chroma_grain_stride,int32_t half_luma_height,int32_t half_luma_width,int32_t bit_depth,int32_t chroma_subsamp_y,int32_t chroma_subsamp_x)697 static void add_noise_to_block_hbd(AomFilmGrain *params, uint16_t *luma, uint16_t *cb, uint16_t *cr,
698 int32_t luma_stride, int32_t chroma_stride, int32_t *luma_grain,
699 int32_t *cb_grain, int32_t *cr_grain, int32_t luma_grain_stride,
700 int32_t chroma_grain_stride, int32_t half_luma_height,
701 int32_t half_luma_width, int32_t bit_depth,
702 int32_t chroma_subsamp_y, int32_t chroma_subsamp_x) {
703 int32_t cb_mult = params->cb_mult - 128; // fixed scale
704 int32_t cb_luma_mult = params->cb_luma_mult - 128; // fixed scale
705 // offset value depends on the bit depth
706 int32_t cb_offset = (params->cb_offset << (bit_depth - 8)) - (1 << bit_depth);
707
708 int32_t cr_mult = params->cr_mult - 128; // fixed scale
709 int32_t cr_luma_mult = params->cr_luma_mult - 128; // fixed scale
710 // offset value depends on the bit depth
711 int32_t cr_offset = (params->cr_offset << (bit_depth - 8)) - (1 << bit_depth);
712
713 int32_t rounding_offset = (1 << (params->scaling_shift - 1));
714
715 int32_t apply_y = params->num_y_points > 0 ? 1 : 0;
716 int32_t apply_cb = params->num_cb_points > 0 ? 1 : 0;
717 int32_t apply_cr = params->num_cr_points > 0 ? 1 : 0;
718
719 if (params->chroma_scaling_from_luma) {
720 cb_mult = 0; // fixed scale
721 cb_luma_mult = 64; // fixed scale
722 cb_offset = 0;
723
724 cr_mult = 0; // fixed scale
725 cr_luma_mult = 64; // fixed scale
726 cr_offset = 0;
727 }
728
729 int32_t min_luma, max_luma, min_chroma, max_chroma;
730
731 if (params->clip_to_restricted_range) {
732 min_luma = min_luma_legal_range << (bit_depth - 8);
733 max_luma = max_luma_legal_range << (bit_depth - 8);
734
735 min_chroma = min_chroma_legal_range << (bit_depth - 8);
736 max_chroma = max_chroma_legal_range << (bit_depth - 8);
737 } else {
738 min_luma = min_chroma = 0;
739 max_luma = max_chroma = (256 << (bit_depth - 8)) - 1;
740 }
741
742 for (int32_t i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) {
743 for (int32_t j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) {
744 int32_t average_luma = 0;
745 if (chroma_subsamp_x) {
746 average_luma =
747 (luma[(i << chroma_subsamp_y) * luma_stride + (j << chroma_subsamp_x)] +
748 luma[(i << chroma_subsamp_y) * luma_stride + (j << chroma_subsamp_x) + 1] +
749 1) >>
750 1;
751 } else
752 average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j];
753 if (apply_cb) {
754 cb[i * chroma_stride + j] = clamp(
755 cb[i * chroma_stride + j] +
756 ((scale_lut(scaling_lut_cb,
757 clamp(((average_luma * cb_luma_mult +
758 cb_mult * cb[i * chroma_stride + j]) >>
759 6) +
760 cb_offset,
761 0,
762 (256 << (bit_depth - 8)) - 1),
763 bit_depth) *
764 cb_grain[i * chroma_grain_stride + j] +
765 rounding_offset) >>
766 params->scaling_shift),
767 min_chroma,
768 max_chroma);
769 }
770 if (apply_cr) {
771 cr[i * chroma_stride + j] = clamp(
772 cr[i * chroma_stride + j] +
773 ((scale_lut(scaling_lut_cr,
774 clamp(((average_luma * cr_luma_mult +
775 cr_mult * cr[i * chroma_stride + j]) >>
776 6) +
777 cr_offset,
778 0,
779 (256 << (bit_depth - 8)) - 1),
780 bit_depth) *
781 cr_grain[i * chroma_grain_stride + j] +
782 rounding_offset) >>
783 params->scaling_shift),
784 min_chroma,
785 max_chroma);
786 }
787 }
788 }
789
790 if (apply_y) {
791 for (int32_t i = 0; i < (half_luma_height << 1); i++) {
792 for (int32_t j = 0; j < (half_luma_width << 1); j++) {
793 luma[i * luma_stride + j] = clamp(
794 luma[i * luma_stride + j] +
795 ((scale_lut(scaling_lut_y, luma[i * luma_stride + j], bit_depth) *
796 luma_grain[i * luma_grain_stride + j] +
797 rounding_offset) >>
798 params->scaling_shift),
799 min_luma,
800 max_luma);
801 }
802 }
803 }
804 }
805
film_grain_params_equal(AomFilmGrain * pars_a,AomFilmGrain * pars_b)806 int32_t film_grain_params_equal(AomFilmGrain *pars_a, AomFilmGrain *pars_b) {
807 if (pars_a->apply_grain != pars_b->apply_grain)
808 return 0;
809 if (pars_a->overlap_flag != pars_b->overlap_flag)
810 return 0;
811 if (pars_a->clip_to_restricted_range != pars_b->clip_to_restricted_range)
812 return 0;
813 if (pars_a->chroma_scaling_from_luma != pars_b->chroma_scaling_from_luma)
814 return 0;
815 if (pars_a->grain_scale_shift != pars_b->grain_scale_shift)
816 return 0;
817 if (pars_a->ar_coeff_shift != pars_b->ar_coeff_shift)
818 return 0;
819 if (pars_a->cb_mult != pars_b->cb_mult)
820 return 0;
821 if (pars_a->cb_luma_mult != pars_b->cb_luma_mult)
822 return 0;
823 if (pars_a->cb_offset != pars_b->cb_offset)
824 return 0;
825 if (pars_a->cr_mult != pars_b->cr_mult)
826 return 0;
827 if (pars_a->cr_luma_mult != pars_b->cr_luma_mult)
828 return 0;
829 if (pars_a->cr_offset != pars_b->cr_offset)
830 return 0;
831
832 if (pars_a->scaling_shift != pars_b->scaling_shift)
833 return 0;
834 if (pars_a->ar_coeff_lag != pars_b->ar_coeff_lag)
835 return 0;
836
837 if (pars_a->num_y_points != pars_b->num_y_points)
838 return 0;
839
840 if (pars_a->num_cb_points != pars_b->num_cb_points)
841 return 0;
842
843 if (pars_a->num_cr_points != pars_b->num_cr_points)
844 return 0;
845
846 if (memcmp(
847 pars_a->scaling_points_y, pars_b->scaling_points_y, sizeof(pars_b->scaling_points_y)))
848 return 0;
849
850 if (memcmp(pars_a->scaling_points_cb,
851 pars_b->scaling_points_cb,
852 sizeof(pars_b->scaling_points_cb)))
853 return 0;
854
855 if (memcmp(pars_a->scaling_points_cr,
856 pars_b->scaling_points_cr,
857 sizeof(pars_b->scaling_points_cr)))
858 return 0;
859
860 if (memcmp(pars_a->ar_coeffs_y, pars_b->ar_coeffs_y, sizeof(pars_b->ar_coeffs_y)))
861 return 0;
862
863 if (memcmp(pars_a->ar_coeffs_cb, pars_b->ar_coeffs_cb, sizeof(pars_b->ar_coeffs_cb)))
864 return 0;
865
866 if (memcmp(pars_a->ar_coeffs_cr, pars_b->ar_coeffs_cr, sizeof(pars_b->ar_coeffs_cr)))
867 return 0;
868
869 return 1;
870 }
871
fgn_copy_rect(uint8_t * src,int32_t src_stride,uint8_t * dst,int32_t dst_stride,int32_t width,int32_t height,int32_t use_high_bit_depth)872 void fgn_copy_rect(uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride,
873 int32_t width, int32_t height, int32_t use_high_bit_depth) {
874 int32_t hbd_coeff = use_high_bit_depth ? 2 : 1;
875 while (height) {
876 svt_memcpy(dst, src, width * sizeof(uint8_t) * hbd_coeff);
877 src += src_stride * hbd_coeff;
878 dst += dst_stride * hbd_coeff;
879 --height;
880 }
881 return;
882 }
883
copy_area(int32_t * src,int32_t src_stride,int32_t * dst,int32_t dst_stride,int32_t width,int32_t height)884 static void copy_area(int32_t *src, int32_t src_stride, int32_t *dst, int32_t dst_stride,
885 int32_t width, int32_t height) {
886 while (height) {
887 if (svt_memcpy != NULL)
888 svt_memcpy(dst, src, width * sizeof(*src));
889 else
890 svt_memcpy_c(dst, src, width * sizeof(*src));
891 src += src_stride;
892 dst += dst_stride;
893 --height;
894 }
895 return;
896 }
897
ver_boundary_overlap(int32_t * left_block,int32_t left_stride,int32_t * right_block,int32_t right_stride,int32_t * dst_block,int32_t dst_stride,int32_t width,int32_t height)898 static void ver_boundary_overlap(int32_t *left_block, int32_t left_stride, int32_t *right_block,
899 int32_t right_stride, int32_t *dst_block, int32_t dst_stride,
900 int32_t width, int32_t height) {
901 if (width == 1) {
902 while (height) {
903 *dst_block = clamp(
904 (*left_block * 23 + *right_block * 22 + 16) >> 5, grain_min, grain_max);
905 left_block += left_stride;
906 right_block += right_stride;
907 dst_block += dst_stride;
908 --height;
909 }
910 return;
911 } else if (width == 2) {
912 while (height) {
913 dst_block[0] = clamp(
914 (27 * left_block[0] + 17 * right_block[0] + 16) >> 5, grain_min, grain_max);
915 dst_block[1] = clamp(
916 (17 * left_block[1] + 27 * right_block[1] + 16) >> 5, grain_min, grain_max);
917 left_block += left_stride;
918 right_block += right_stride;
919 dst_block += dst_stride;
920 --height;
921 }
922 return;
923 }
924 }
925
hor_boundary_overlap(int32_t * top_block,int32_t top_stride,int32_t * bottom_block,int32_t bottom_stride,int32_t * dst_block,int32_t dst_stride,int32_t width,int32_t height)926 static void hor_boundary_overlap(int32_t *top_block, int32_t top_stride, int32_t *bottom_block,
927 int32_t bottom_stride, int32_t *dst_block, int32_t dst_stride,
928 int32_t width, int32_t height) {
929 if (height == 1) {
930 while (width) {
931 *dst_block = clamp(
932 (*top_block * 23 + *bottom_block * 22 + 16) >> 5, grain_min, grain_max);
933 ++top_block;
934 ++bottom_block;
935 ++dst_block;
936 --width;
937 }
938 return;
939 } else if (height == 2) {
940 while (width) {
941 dst_block[0] = clamp(
942 (27 * top_block[0] + 17 * bottom_block[0] + 16) >> 5, grain_min, grain_max);
943 dst_block[dst_stride] = clamp(
944 (17 * top_block[top_stride] + 27 * bottom_block[bottom_stride] + 16) >> 5,
945 grain_min,
946 grain_max);
947 ++top_block;
948 ++bottom_block;
949 ++dst_block;
950 --width;
951 }
952 return;
953 }
954 }
955
svt_av1_add_film_grain_run(AomFilmGrain * params,uint8_t * luma,uint8_t * cb,uint8_t * cr,int32_t height,int32_t width,int32_t luma_stride,int32_t chroma_stride,int32_t use_high_bit_depth,int32_t chroma_subsamp_y,int32_t chroma_subsamp_x)956 void svt_av1_add_film_grain_run(AomFilmGrain *params, uint8_t *luma, uint8_t *cb, uint8_t *cr,
957 int32_t height, int32_t width, int32_t luma_stride,
958 int32_t chroma_stride, int32_t use_high_bit_depth,
959 int32_t chroma_subsamp_y, int32_t chroma_subsamp_x) {
960 int32_t **pred_pos_luma;
961 int32_t **pred_pos_chroma;
962 int32_t * luma_grain_block;
963 int32_t * cb_grain_block;
964 int32_t * cr_grain_block;
965
966 int32_t *y_line_buf;
967 int32_t *cb_line_buf;
968 int32_t *cr_line_buf;
969
970 int32_t *y_col_buf;
971 int32_t *cb_col_buf;
972 int32_t *cr_col_buf;
973
974 random_register = params->random_seed;
975
976 int32_t left_pad = 3;
977 int32_t right_pad = 3; // padding to offset for AR coefficients
978 int32_t top_pad = 3;
979 int32_t bottom_pad = 0;
980
981 int32_t ar_padding = 3; // maximum lag used for stabilization of AR coefficients
982
983 luma_subblock_size_y = 32;
984 luma_subblock_size_x = 32;
985
986 chroma_subblock_size_y = luma_subblock_size_y >> chroma_subsamp_y;
987 chroma_subblock_size_x = luma_subblock_size_x >> chroma_subsamp_x;
988
989 // Initial padding is only needed for generation of
990 // film grain templates (to stabilize the AR process)
991 // Only a 64x64 luma and 32x32 chroma part of a template
992 // is used later for adding grain, padding can be discarded
993
994 int32_t luma_block_size_y = top_pad + 2 * ar_padding + luma_subblock_size_y * 2 + bottom_pad;
995 int32_t luma_block_size_x = left_pad + 2 * ar_padding + luma_subblock_size_x * 2 +
996 2 * ar_padding + right_pad;
997
998 int32_t chroma_block_size_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding +
999 chroma_subblock_size_y * 2 + bottom_pad;
1000 int32_t chroma_block_size_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding +
1001 chroma_subblock_size_x * 2 + (2 >> chroma_subsamp_x) * ar_padding + right_pad;
1002
1003 int32_t luma_grain_stride = luma_block_size_x;
1004 int32_t chroma_grain_stride = chroma_block_size_x;
1005
1006 int32_t overlap = params->overlap_flag;
1007 int32_t bit_depth = params->bit_depth;
1008
1009 grain_center = 128 << (bit_depth - 8);
1010 grain_min = 0 - grain_center;
1011 grain_max = (256 << (bit_depth - 8)) - 1 - grain_center;
1012
1013 init_arrays(params,
1014 luma_stride,
1015 chroma_stride,
1016 &pred_pos_luma,
1017 &pred_pos_chroma,
1018 &luma_grain_block,
1019 &cb_grain_block,
1020 &cr_grain_block,
1021 &y_line_buf,
1022 &cb_line_buf,
1023 &cr_line_buf,
1024 &y_col_buf,
1025 &cb_col_buf,
1026 &cr_col_buf,
1027 luma_block_size_y * luma_block_size_x,
1028 chroma_block_size_y * chroma_block_size_x,
1029 chroma_subsamp_y,
1030 chroma_subsamp_x);
1031
1032 generate_luma_grain_block(params,
1033 pred_pos_luma,
1034 luma_grain_block,
1035 luma_block_size_y,
1036 luma_block_size_x,
1037 luma_grain_stride,
1038 left_pad,
1039 top_pad,
1040 right_pad,
1041 bottom_pad);
1042
1043 generate_chroma_grain_blocks(params,
1044 // pred_pos_luma,
1045 pred_pos_chroma,
1046 luma_grain_block,
1047 cb_grain_block,
1048 cr_grain_block,
1049 luma_grain_stride,
1050 chroma_block_size_y,
1051 chroma_block_size_x,
1052 chroma_grain_stride,
1053 left_pad,
1054 top_pad,
1055 right_pad,
1056 bottom_pad,
1057 chroma_subsamp_y,
1058 chroma_subsamp_x);
1059
1060 init_scaling_function(params->scaling_points_y, params->num_y_points, scaling_lut_y);
1061
1062 if (params->chroma_scaling_from_luma) {
1063 svt_memcpy(scaling_lut_cb, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
1064 svt_memcpy(scaling_lut_cr, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
1065 } else {
1066 init_scaling_function(params->scaling_points_cb, params->num_cb_points, scaling_lut_cb);
1067 init_scaling_function(params->scaling_points_cr, params->num_cr_points, scaling_lut_cr);
1068 }
1069 for (int32_t y = 0; y < height / 2; y += (luma_subblock_size_y >> 1)) {
1070 init_random_generator(y * 2, params->random_seed);
1071
1072 for (int32_t x = 0; x < width / 2; x += (luma_subblock_size_x >> 1)) {
1073 int32_t offset_y = get_random_number(8);
1074 int32_t offset_x = (offset_y >> 4) & 15;
1075 offset_y &= 15;
1076
1077 int32_t luma_offset_y = left_pad + 2 * ar_padding + (offset_y << 1);
1078 int32_t luma_offset_x = top_pad + 2 * ar_padding + (offset_x << 1);
1079
1080 int32_t chroma_offset_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding +
1081 offset_y * (2 >> chroma_subsamp_y);
1082 int32_t chroma_offset_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding +
1083 offset_x * (2 >> chroma_subsamp_x);
1084
1085 if (overlap && x) {
1086 ver_boundary_overlap(
1087 y_col_buf,
1088 2,
1089 luma_grain_block + luma_offset_y * luma_grain_stride + luma_offset_x,
1090 luma_grain_stride,
1091 y_col_buf,
1092 2,
1093 2,
1094 AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
1095
1096 ver_boundary_overlap(
1097 cb_col_buf,
1098 2 >> chroma_subsamp_x,
1099 cb_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x,
1100 chroma_grain_stride,
1101 cb_col_buf,
1102 2 >> chroma_subsamp_x,
1103 2 >> chroma_subsamp_x,
1104 AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1105 (height - (y << 1)) >> chroma_subsamp_y));
1106
1107 ver_boundary_overlap(
1108 cr_col_buf,
1109 2 >> chroma_subsamp_x,
1110 cr_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x,
1111 chroma_grain_stride,
1112 cr_col_buf,
1113 2 >> chroma_subsamp_x,
1114 2 >> chroma_subsamp_x,
1115 AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1116 (height - (y << 1)) >> chroma_subsamp_y));
1117
1118 int32_t i = y ? 1 : 0;
1119
1120 if (use_high_bit_depth) {
1121 add_noise_to_block_hbd(
1122 params,
1123 (uint16_t *)luma + ((y + i) << 1) * luma_stride + (x << 1),
1124 (uint16_t *)cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1125 (x << (1 - chroma_subsamp_x)),
1126 (uint16_t *)cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1127 (x << (1 - chroma_subsamp_x)),
1128 luma_stride,
1129 chroma_stride,
1130 y_col_buf + i * 4,
1131 cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1132 cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1133 2,
1134 (2 - chroma_subsamp_x),
1135 AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1136 1,
1137 bit_depth,
1138 chroma_subsamp_y,
1139 chroma_subsamp_x);
1140 } else {
1141 add_noise_to_block(
1142 params,
1143 luma + ((y + i) << 1) * luma_stride + (x << 1),
1144 cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1145 (x << (1 - chroma_subsamp_x)),
1146 cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1147 (x << (1 - chroma_subsamp_x)),
1148 luma_stride,
1149 chroma_stride,
1150 y_col_buf + i * 4,
1151 cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1152 cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
1153 2,
1154 (2 - chroma_subsamp_x),
1155 AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1156 1,
1157 bit_depth,
1158 chroma_subsamp_y,
1159 chroma_subsamp_x);
1160 }
1161 }
1162
1163 if (overlap && y) {
1164 if (x) {
1165 ASSERT(y_col_buf != NULL);
1166 hor_boundary_overlap(y_line_buf + (x << 1),
1167 luma_stride,
1168 y_col_buf,
1169 2,
1170 y_line_buf + (x << 1),
1171 luma_stride,
1172 2,
1173 2);
1174
1175 hor_boundary_overlap(cb_line_buf + x * (2 >> chroma_subsamp_x),
1176 chroma_stride,
1177 cb_col_buf,
1178 2 >> chroma_subsamp_x,
1179 cb_line_buf + x * (2 >> chroma_subsamp_x),
1180 chroma_stride,
1181 2 >> chroma_subsamp_x,
1182 2 >> chroma_subsamp_y);
1183
1184 hor_boundary_overlap(cr_line_buf + x * (2 >> chroma_subsamp_x),
1185 chroma_stride,
1186 cr_col_buf,
1187 2 >> chroma_subsamp_x,
1188 cr_line_buf + x * (2 >> chroma_subsamp_x),
1189 chroma_stride,
1190 2 >> chroma_subsamp_x,
1191 2 >> chroma_subsamp_y);
1192 }
1193
1194 hor_boundary_overlap(y_line_buf + ((x ? x + 1 : 0) << 1),
1195 luma_stride,
1196 luma_grain_block + luma_offset_y * luma_grain_stride +
1197 luma_offset_x + (x ? 2 : 0),
1198 luma_grain_stride,
1199 y_line_buf + ((x ? x + 1 : 0) << 1),
1200 luma_stride,
1201 AOMMIN(luma_subblock_size_x - ((x ? 1 : 0) << 1),
1202 width - ((x ? x + 1 : 0) << 1)),
1203 2);
1204
1205 hor_boundary_overlap(
1206 cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1207 chroma_stride,
1208 cb_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x +
1209 ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1210 chroma_grain_stride,
1211 cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1212 chroma_stride,
1213 AOMMIN(chroma_subblock_size_x - ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1214 (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
1215 2 >> chroma_subsamp_y);
1216
1217 hor_boundary_overlap(
1218 cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1219 chroma_stride,
1220 cr_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x +
1221 ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1222 chroma_grain_stride,
1223 cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1224 chroma_stride,
1225 AOMMIN(chroma_subblock_size_x - ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
1226 (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
1227 2 >> chroma_subsamp_y);
1228
1229 if (use_high_bit_depth) {
1230 add_noise_to_block_hbd(
1231 params,
1232 (uint16_t *)luma + (y << 1) * luma_stride + (x << 1),
1233 (uint16_t *)cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1234 (x << ((1 - chroma_subsamp_x))),
1235 (uint16_t *)cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1236 (x << ((1 - chroma_subsamp_x))),
1237 luma_stride,
1238 chroma_stride,
1239 y_line_buf + (x << 1),
1240 cb_line_buf + (x << (1 - chroma_subsamp_x)),
1241 cr_line_buf + (x << (1 - chroma_subsamp_x)),
1242 luma_stride,
1243 chroma_stride,
1244 1,
1245 AOMMIN(luma_subblock_size_x >> 1, width / 2 - x),
1246 bit_depth,
1247 chroma_subsamp_y,
1248 chroma_subsamp_x);
1249 } else {
1250 add_noise_to_block(params,
1251 luma + (y << 1) * luma_stride + (x << 1),
1252 cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1253 (x << ((1 - chroma_subsamp_x))),
1254 cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
1255 (x << ((1 - chroma_subsamp_x))),
1256 luma_stride,
1257 chroma_stride,
1258 y_line_buf + (x << 1),
1259 cb_line_buf + (x << (1 - chroma_subsamp_x)),
1260 cr_line_buf + (x << (1 - chroma_subsamp_x)),
1261 luma_stride,
1262 chroma_stride,
1263 1,
1264 AOMMIN(luma_subblock_size_x >> 1, width / 2 - x),
1265 bit_depth,
1266 chroma_subsamp_y,
1267 chroma_subsamp_x);
1268 }
1269 }
1270
1271 int32_t i = overlap && y ? 1 : 0;
1272 int32_t j = overlap && x ? 1 : 0;
1273
1274 if (use_high_bit_depth) {
1275 add_noise_to_block_hbd(
1276 params,
1277 (uint16_t *)luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
1278 (uint16_t *)cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1279 ((x + j) << (1 - chroma_subsamp_x)),
1280 (uint16_t *)cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1281 ((x + j) << (1 - chroma_subsamp_x)),
1282 luma_stride,
1283 chroma_stride,
1284 luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride +
1285 luma_offset_x + (j << 1),
1286 cb_grain_block +
1287 (chroma_offset_y + (i << (1 - chroma_subsamp_y))) * chroma_grain_stride +
1288 chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1289 cr_grain_block +
1290 (chroma_offset_y + (i << (1 - chroma_subsamp_y))) * chroma_grain_stride +
1291 chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1292 luma_grain_stride,
1293 chroma_grain_stride,
1294 AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1295 AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j,
1296 bit_depth,
1297 chroma_subsamp_y,
1298 chroma_subsamp_x);
1299 } else {
1300 add_noise_to_block(
1301 params,
1302 luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
1303 cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1304 ((x + j) << (1 - chroma_subsamp_x)),
1305 cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
1306 ((x + j) << (1 - chroma_subsamp_x)),
1307 luma_stride,
1308 chroma_stride,
1309 luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride +
1310 luma_offset_x + (j << 1),
1311 cb_grain_block +
1312 (chroma_offset_y + (i << (1 - chroma_subsamp_y))) * chroma_grain_stride +
1313 chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1314 cr_grain_block +
1315 (chroma_offset_y + (i << (1 - chroma_subsamp_y))) * chroma_grain_stride +
1316 chroma_offset_x + (j << (1 - chroma_subsamp_x)),
1317 luma_grain_stride,
1318 chroma_grain_stride,
1319 AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
1320 AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j,
1321 bit_depth,
1322 chroma_subsamp_y,
1323 chroma_subsamp_x);
1324 }
1325
1326 if (overlap) {
1327 if (x) {
1328 // Copy overlapped column bufer to line buffer
1329 copy_area(y_col_buf + (luma_subblock_size_y << 1),
1330 2,
1331 y_line_buf + (x << 1),
1332 luma_stride,
1333 2,
1334 2);
1335
1336 copy_area(cb_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)),
1337 2 >> chroma_subsamp_x,
1338 cb_line_buf + (x << (1 - chroma_subsamp_x)),
1339 chroma_stride,
1340 2 >> chroma_subsamp_x,
1341 2 >> chroma_subsamp_y);
1342
1343 copy_area(cr_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)),
1344 2 >> chroma_subsamp_x,
1345 cr_line_buf + (x << (1 - chroma_subsamp_x)),
1346 chroma_stride,
1347 2 >> chroma_subsamp_x,
1348 2 >> chroma_subsamp_y);
1349 }
1350
1351 // Copy grain to the line buffer for overlap with a bottom block
1352 copy_area(luma_grain_block +
1353 (luma_offset_y + luma_subblock_size_y) * luma_grain_stride +
1354 luma_offset_x + ((x ? 2 : 0)),
1355 luma_grain_stride,
1356 y_line_buf + ((x ? x + 1 : 0) << 1),
1357 luma_stride,
1358 AOMMIN(luma_subblock_size_x, width - (x << 1)) - (x ? 2 : 0),
1359 2);
1360
1361 copy_area(cb_grain_block +
1362 (chroma_offset_y + chroma_subblock_size_y) * chroma_grain_stride +
1363 chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0),
1364 chroma_grain_stride,
1365 cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1366 chroma_stride,
1367 AOMMIN(chroma_subblock_size_x, ((width - (x << 1)) >> chroma_subsamp_x)) -
1368 (x ? 2 >> chroma_subsamp_x : 0),
1369 2 >> chroma_subsamp_y);
1370
1371 copy_area(cr_grain_block +
1372 (chroma_offset_y + chroma_subblock_size_y) * chroma_grain_stride +
1373 chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0),
1374 chroma_grain_stride,
1375 cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
1376 chroma_stride,
1377 AOMMIN(chroma_subblock_size_x, ((width - (x << 1)) >> chroma_subsamp_x)) -
1378 (x ? 2 >> chroma_subsamp_x : 0),
1379 2 >> chroma_subsamp_y);
1380
1381 // Copy grain to the column buffer for overlap with the next block to
1382 // the right
1383
1384 copy_area(luma_grain_block + luma_offset_y * luma_grain_stride + luma_offset_x +
1385 luma_subblock_size_x,
1386 luma_grain_stride,
1387 y_col_buf,
1388 2,
1389 2,
1390 AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
1391
1392 copy_area(cb_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x +
1393 chroma_subblock_size_x,
1394 chroma_grain_stride,
1395 cb_col_buf,
1396 2 >> chroma_subsamp_x,
1397 2 >> chroma_subsamp_x,
1398 AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1399 (height - (y << 1)) >> chroma_subsamp_y));
1400
1401 copy_area(cr_grain_block + chroma_offset_y * chroma_grain_stride + chroma_offset_x +
1402 chroma_subblock_size_x,
1403 chroma_grain_stride,
1404 cr_col_buf,
1405 2 >> chroma_subsamp_x,
1406 2 >> chroma_subsamp_x,
1407 AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
1408 (height - (y << 1)) >> chroma_subsamp_y));
1409 }
1410 }
1411 }
1412
1413 dealloc_arrays(params,
1414 &pred_pos_luma,
1415 &pred_pos_chroma,
1416 &luma_grain_block,
1417 &cb_grain_block,
1418 &cr_grain_block,
1419 &y_line_buf,
1420 &cb_line_buf,
1421 &cr_line_buf,
1422 &y_col_buf,
1423 &cb_col_buf,
1424 &cr_col_buf);
1425 }
1426
1427 /*
1428 void av1_film_grain_write_updated(const AomFilmGrain *pars,
1429 int32_t monochrome,
1430 struct AomWriteBitBuffer *wb) {
1431 svt_aom_wb_write_literal(wb, pars->num_y_points, 4); // max 14
1432 for (int32_t i = 0; i < pars->num_y_points; i++) {
1433 svt_aom_wb_write_literal(wb, pars->scaling_points_y[i][0], 8);
1434 svt_aom_wb_write_literal(wb, pars->scaling_points_y[i][1], 8);
1435 }
1436
1437 if (!monochrome)
1438 svt_aom_wb_write_bit(wb, pars->chroma_scaling_from_luma);
1439
1440 if (!(monochrome || pars->chroma_scaling_from_luma)) {
1441 svt_aom_wb_write_literal(wb, pars->num_cb_points, 4); // max 10
1442 for (int32_t i = 0; i < pars->num_cb_points; i++) {
1443 svt_aom_wb_write_literal(wb, pars->scaling_points_cb[i][0], 8);
1444 svt_aom_wb_write_literal(wb, pars->scaling_points_cb[i][1], 8);
1445 }
1446
1447 svt_aom_wb_write_literal(wb, pars->num_cr_points, 4); // max 10
1448 for (int32_t i = 0; i < pars->num_cr_points; i++) {
1449 svt_aom_wb_write_literal(wb, pars->scaling_points_cr[i][0], 8);
1450 svt_aom_wb_write_literal(wb, pars->scaling_points_cr[i][1], 8);
1451 }
1452 }
1453
1454 svt_aom_wb_write_literal(wb, pars->scaling_shift - 8, 2); // 8 + value
1455
1456 // AR coefficients
1457 // Only sent if the corresponsing scaling function has
1458 // more than 0 points
1459 svt_aom_wb_write_literal(wb, pars->ar_coeff_lag, 2);
1460
1461 int32_t num_pos_luma = 2 * pars->ar_coeff_lag * (pars->ar_coeff_lag + 1);
1462 int32_t num_pos_chroma = num_pos_luma;
1463 if (pars->num_y_points > 0) ++num_pos_chroma;
1464
1465 if (pars->num_y_points)
1466 for (int32_t i = 0; i < num_pos_luma; i++)
1467 svt_aom_wb_write_literal(wb, pars->ar_coeffs_y[i] + 128, 8);
1468
1469 if (pars->num_cb_points || pars->chroma_scaling_from_luma)
1470 for (int32_t i = 0; i < num_pos_chroma; i++)
1471 svt_aom_wb_write_literal(wb, pars->ar_coeffs_cb[i] + 128, 8);
1472
1473 if (pars->num_cr_points || pars->chroma_scaling_from_luma)
1474 for (int32_t i = 0; i < num_pos_chroma; i++)
1475 svt_aom_wb_write_literal(wb, pars->ar_coeffs_cr[i] + 128, 8);
1476
1477 svt_aom_wb_write_literal(wb, pars->ar_coeff_shift - 6, 2); // 8 + value
1478
1479 svt_aom_wb_write_literal(wb, pars->grain_scale_shift, 2);
1480
1481 if (pars->num_cb_points) {
1482 svt_aom_wb_write_literal(wb, pars->cb_mult, 8);
1483 svt_aom_wb_write_literal(wb, pars->cb_luma_mult, 8);
1484 svt_aom_wb_write_literal(wb, pars->cb_offset, 9);
1485 }
1486
1487 if (pars->num_cr_points) {
1488 svt_aom_wb_write_literal(wb, pars->cr_mult, 8);
1489 svt_aom_wb_write_literal(wb, pars->cr_luma_mult, 8);
1490 svt_aom_wb_write_literal(wb, pars->cr_offset, 9);
1491 }
1492
1493 svt_aom_wb_write_bit(wb, pars->overlap_flag);
1494
1495 svt_aom_wb_write_bit(wb, pars->clip_to_restricted_range);
1496 }
1497 */
1498 /*
1499 void av1_film_grain_read_updated(AomFilmGrain *pars,
1500 int32_t monochrome,
1501 struct aom_read_bit_buffer *rb,
1502 struct aom_internal_error_info *error) {
1503 // Scaling functions parameters
1504 pars->num_y_points = aom_rb_read_literal(rb, 4); // max 14
1505 if (pars->num_y_points > 14)
1506 aom_internal_error(error, AOM_CODEC_UNSUP_BITSTREAM,
1507 "Number of points for film grain luma scaling function "
1508 "exceeds the maximum value.");
1509 for (int32_t i = 0; i < pars->num_y_points; i++) {
1510 pars->scaling_points_y[i][0] = aom_rb_read_literal(rb, 8);
1511 if (i && pars->scaling_points_y[i - 1][0] >= pars->scaling_points_y[i][0])
1512 aom_internal_error(error, AOM_CODEC_UNSUP_BITSTREAM,
1513 "First coordinateg of the scaling function points "
1514 "shall be increasing.");
1515 pars->scaling_points_y[i][1] = aom_rb_read_literal(rb, 8);
1516 }
1517
1518 if (!monochrome)
1519 pars->chroma_scaling_from_luma = aom_rb_read_bit(rb);
1520
1521 if (monochrome || pars->chroma_scaling_from_luma) {
1522 pars->num_cb_points = 0;
1523 pars->num_cr_points = 0;
1524 } else {
1525 pars->num_cb_points = aom_rb_read_literal(rb, 4); // max 10
1526 if (pars->num_cb_points > 10)
1527 aom_internal_error(error, AOM_CODEC_UNSUP_BITSTREAM,
1528 "Number of points for film grain cb scaling function "
1529 "exceeds the maximum value.");
1530 for (int32_t i = 0; i < pars->num_cb_points; i++) {
1531 pars->scaling_points_cb[i][0] = aom_rb_read_literal(rb, 8);
1532 if (i &&
1533 pars->scaling_points_cb[i - 1][0] >= pars->scaling_points_cb[i][0])
1534 aom_internal_error(error, AOM_CODEC_UNSUP_BITSTREAM,
1535 "First coordinate of the scaling function points "
1536 "shall be increasing.");
1537 pars->scaling_points_cb[i][1] = aom_rb_read_literal(rb, 8);
1538 }
1539
1540 pars->num_cr_points = aom_rb_read_literal(rb, 4); // max 10
1541 if (pars->num_cr_points > 10)
1542 aom_internal_error(error, AOM_CODEC_UNSUP_BITSTREAM,
1543 "Number of points for film grain cr scaling function "
1544 "exceeds the maximum value.");
1545 for (int32_t i = 0; i < pars->num_cr_points; i++) {
1546 pars->scaling_points_cr[i][0] = aom_rb_read_literal(rb, 8);
1547 if (i &&
1548 pars->scaling_points_cr[i - 1][0] >= pars->scaling_points_cr[i][0])
1549 aom_internal_error(error, AOM_CODEC_UNSUP_BITSTREAM,
1550 "First coordinate of the scaling function points "
1551 "shall be increasing.");
1552 pars->scaling_points_cr[i][1] = aom_rb_read_literal(rb, 8);
1553 }
1554 }
1555
1556 pars->scaling_shift = aom_rb_read_literal(rb, 2) + 8; // 8 + value
1557
1558 // AR coefficients
1559 // Only sent if the corresponsing scaling function has
1560 // more than 0 points
1561 pars->ar_coeff_lag = aom_rb_read_literal(rb, 2);
1562
1563 int32_t num_pos_luma = 2 * pars->ar_coeff_lag * (pars->ar_coeff_lag + 1);
1564 int32_t num_pos_chroma = num_pos_luma;
1565 if (pars->num_y_points > 0) ++num_pos_chroma;
1566
1567 if (pars->num_y_points)
1568 for (int32_t i = 0; i < num_pos_luma; i++)
1569 pars->ar_coeffs_y[i] = aom_rb_read_literal(rb, 8) - 128;
1570
1571 if (pars->num_cb_points || pars->chroma_scaling_from_luma)
1572 for (int32_t i = 0; i < num_pos_chroma; i++)
1573 pars->ar_coeffs_cb[i] = aom_rb_read_literal(rb, 8) - 128;
1574
1575 if (pars->num_cr_points || pars->chroma_scaling_from_luma)
1576 for (int32_t i = 0; i < num_pos_chroma; i++)
1577 pars->ar_coeffs_cr[i] = aom_rb_read_literal(rb, 8) - 128;
1578
1579 pars->ar_coeff_shift = aom_rb_read_literal(rb, 2) + 6; // 6 + value
1580
1581 pars->grain_scale_shift = aom_rb_read_literal(rb, 2);
1582
1583 if (pars->num_cb_points) {
1584 pars->cb_mult = aom_rb_read_literal(rb, 8);
1585 pars->cb_luma_mult = aom_rb_read_literal(rb, 8);
1586 pars->cb_offset = aom_rb_read_literal(rb, 9);
1587 }
1588
1589 if (pars->num_cr_points) {
1590 pars->cr_mult = aom_rb_read_literal(rb, 8);
1591 pars->cr_luma_mult = aom_rb_read_literal(rb, 8);
1592 pars->cr_offset = aom_rb_read_literal(rb, 9);
1593 }
1594
1595 pars->overlap_flag = aom_rb_read_bit(rb);
1596
1597 pars->clip_to_restricted_range = aom_rb_read_bit(rb);
1598 }
1599 */
1600