1 /*
2 * Copyright (c) 2020-2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file decode_av1_filmgrain_feature_g12.cpp
24 //! \brief Defines the interface for av1 decode film grain feature
25 //!
26
27 #include "decode_av1_filmgrain_feature_g12.h"
28 #include "decode_av1_feature_manager_g12.h"
29 #include "codechal_utilities.h"
30 #include "decode_av1_feature_defs_g12.h"
31 #include "mhw_render_g12_X.h"
32 #include "decode_utils.h"
33
34 namespace decode
35 {
36
37 // Constant values
38 // Samples with Gaussian distribution in the range of [-2048, 2047] (12 bits)
39 // with zero mean and standard deviation of about 512.
40 // should be divided by 4 for 10-bit range and 16 for 8-bit range.
41 static const int16_t defaultGaussianSequence[2048] = {
42 56, 568, -180, 172, 124, -84, 172, -64, -900, 24, 820,
43 224, 1248, 996, 272, -8, -916, -388, -732, -104, -188, 800,
44 112, -652, -320, -376, 140, -252, 492, -168, 44, -788, 588,
45 -584, 500, -228, 12, 680, 272, -476, 972, -100, 652, 368,
46 432, -196, -720, -192, 1000, -332, 652, -136, -552, -604, -4,
47 192, -220, -136, 1000, -52, 372, -96, -624, 124, -24, 396,
48 540, -12, -104, 640, 464, 244, -208, -84, 368, -528, -740,
49 248, -968, -848, 608, 376, -60, -292, -40, -156, 252, -292,
50 248, 224, -280, 400, -244, 244, -60, 76, -80, 212, 532,
51 340, 128, -36, 824, -352, -60, -264, -96, -612, 416, -704,
52 220, -204, 640, -160, 1220, -408, 900, 336, 20, -336, -96,
53 -792, 304, 48, -28, -1232, -1172, -448, 104, -292, -520, 244,
54 60, -948, 0, -708, 268, 108, 356, -548, 488, -344, -136,
55 488, -196, -224, 656, -236, -1128, 60, 4, 140, 276, -676,
56 -376, 168, -108, 464, 8, 564, 64, 240, 308, -300, -400,
57 -456, -136, 56, 120, -408, -116, 436, 504, -232, 328, 844,
58 -164, -84, 784, -168, 232, -224, 348, -376, 128, 568, 96,
59 -1244, -288, 276, 848, 832, -360, 656, 464, -384, -332, -356,
60 728, -388, 160, -192, 468, 296, 224, 140, -776, -100, 280,
61 4, 196, 44, -36, -648, 932, 16, 1428, 28, 528, 808,
62 772, 20, 268, 88, -332, -284, 124, -384, -448, 208, -228,
63 -1044, -328, 660, 380, -148, -300, 588, 240, 540, 28, 136,
64 -88, -436, 256, 296, -1000, 1400, 0, -48, 1056, -136, 264,
65 -528, -1108, 632, -484, -592, -344, 796, 124, -668, -768, 388,
66 1296, -232, -188, -200, -288, -4, 308, 100, -168, 256, -500,
67 204, -508, 648, -136, 372, -272, -120, -1004, -552, -548, -384,
68 548, -296, 428, -108, -8, -912, -324, -224, -88, -112, -220,
69 -100, 996, -796, 548, 360, -216, 180, 428, -200, -212, 148,
70 96, 148, 284, 216, -412, -320, 120, -300, -384, -604, -572,
71 -332, -8, -180, -176, 696, 116, -88, 628, 76, 44, -516,
72 240, -208, -40, 100, -592, 344, -308, -452, -228, 20, 916,
73 -1752, -136, -340, -804, 140, 40, 512, 340, 248, 184, -492,
74 896, -156, 932, -628, 328, -688, -448, -616, -752, -100, 560,
75 -1020, 180, -800, -64, 76, 576, 1068, 396, 660, 552, -108,
76 -28, 320, -628, 312, -92, -92, -472, 268, 16, 560, 516,
77 -672, -52, 492, -100, 260, 384, 284, 292, 304, -148, 88,
78 -152, 1012, 1064, -228, 164, -376, -684, 592, -392, 156, 196,
79 -524, -64, -884, 160, -176, 636, 648, 404, -396, -436, 864,
80 424, -728, 988, -604, 904, -592, 296, -224, 536, -176, -920,
81 436, -48, 1176, -884, 416, -776, -824, -884, 524, -548, -564,
82 -68, -164, -96, 692, 364, -692, -1012, -68, 260, -480, 876,
83 -1116, 452, -332, -352, 892, -1088, 1220, -676, 12, -292, 244,
84 496, 372, -32, 280, 200, 112, -440, -96, 24, -644, -184,
85 56, -432, 224, -980, 272, -260, 144, -436, 420, 356, 364,
86 -528, 76, 172, -744, -368, 404, -752, -416, 684, -688, 72,
87 540, 416, 92, 444, 480, -72, -1416, 164, -1172, -68, 24,
88 424, 264, 1040, 128, -912, -524, -356, 64, 876, -12, 4,
89 -88, 532, 272, -524, 320, 276, -508, 940, 24, -400, -120,
90 756, 60, 236, -412, 100, 376, -484, 400, -100, -740, -108,
91 -260, 328, -268, 224, -200, -416, 184, -604, -564, -20, 296,
92 60, 892, -888, 60, 164, 68, -760, 216, -296, 904, -336,
93 -28, 404, -356, -568, -208, -1480, -512, 296, 328, -360, -164,
94 -1560, -776, 1156, -428, 164, -504, -112, 120, -216, -148, -264,
95 308, 32, 64, -72, 72, 116, 176, -64, -272, 460, -536,
96 -784, -280, 348, 108, -752, -132, 524, -540, -776, 116, -296,
97 -1196, -288, -560, 1040, -472, 116, -848, -1116, 116, 636, 696,
98 284, -176, 1016, 204, -864, -648, -248, 356, 972, -584, -204,
99 264, 880, 528, -24, -184, 116, 448, -144, 828, 524, 212,
100 -212, 52, 12, 200, 268, -488, -404, -880, 824, -672, -40,
101 908, -248, 500, 716, -576, 492, -576, 16, 720, -108, 384,
102 124, 344, 280, 576, -500, 252, 104, -308, 196, -188, -8,
103 1268, 296, 1032, -1196, 436, 316, 372, -432, -200, -660, 704,
104 -224, 596, -132, 268, 32, -452, 884, 104, -1008, 424, -1348,
105 -280, 4, -1168, 368, 476, 696, 300, -8, 24, 180, -592,
106 -196, 388, 304, 500, 724, -160, 244, -84, 272, -256, -420,
107 320, 208, -144, -156, 156, 364, 452, 28, 540, 316, 220,
108 -644, -248, 464, 72, 360, 32, -388, 496, -680, -48, 208,
109 -116, -408, 60, -604, -392, 548, -840, 784, -460, 656, -544,
110 -388, -264, 908, -800, -628, -612, -568, 572, -220, 164, 288,
111 -16, -308, 308, -112, -636, -760, 280, -668, 432, 364, 240,
112 -196, 604, 340, 384, 196, 592, -44, -500, 432, -580, -132,
113 636, -76, 392, 4, -412, 540, 508, 328, -356, -36, 16,
114 -220, -64, -248, -60, 24, -192, 368, 1040, 92, -24, -1044,
115 -32, 40, 104, 148, 192, -136, -520, 56, -816, -224, 732,
116 392, 356, 212, -80, -424, -1008, -324, 588, -1496, 576, 460,
117 -816, -848, 56, -580, -92, -1372, -112, -496, 200, 364, 52,
118 -140, 48, -48, -60, 84, 72, 40, 132, -356, -268, -104,
119 -284, -404, 732, -520, 164, -304, -540, 120, 328, -76, -460,
120 756, 388, 588, 236, -436, -72, -176, -404, -316, -148, 716,
121 -604, 404, -72, -88, -888, -68, 944, 88, -220, -344, 960,
122 472, 460, -232, 704, 120, 832, -228, 692, -508, 132, -476,
123 844, -748, -364, -44, 1116, -1104, -1056, 76, 428, 552, -692,
124 60, 356, 96, -384, -188, -612, -576, 736, 508, 892, 352,
125 -1132, 504, -24, -352, 324, 332, -600, -312, 292, 508, -144,
126 -8, 484, 48, 284, -260, -240, 256, -100, -292, -204, -44,
127 472, -204, 908, -188, -1000, -256, 92, 1164, -392, 564, 356,
128 652, -28, -884, 256, 484, -192, 760, -176, 376, -524, -452,
129 -436, 860, -736, 212, 124, 504, -476, 468, 76, -472, 552,
130 -692, -944, -620, 740, -240, 400, 132, 20, 192, -196, 264,
131 -668, -1012, -60, 296, -316, -828, 76, -156, 284, -768, -448,
132 -832, 148, 248, 652, 616, 1236, 288, -328, -400, -124, 588,
133 220, 520, -696, 1032, 768, -740, -92, -272, 296, 448, -464,
134 412, -200, 392, 440, -200, 264, -152, -260, 320, 1032, 216,
135 320, -8, -64, 156, -1016, 1084, 1172, 536, 484, -432, 132,
136 372, -52, -256, 84, 116, -352, 48, 116, 304, -384, 412,
137 924, -300, 528, 628, 180, 648, 44, -980, -220, 1320, 48,
138 332, 748, 524, -268, -720, 540, -276, 564, -344, -208, -196,
139 436, 896, 88, -392, 132, 80, -964, -288, 568, 56, -48,
140 -456, 888, 8, 552, -156, -292, 948, 288, 128, -716, -292,
141 1192, -152, 876, 352, -600, -260, -812, -468, -28, -120, -32,
142 -44, 1284, 496, 192, 464, 312, -76, -516, -380, -456, -1012,
143 -48, 308, -156, 36, 492, -156, -808, 188, 1652, 68, -120,
144 -116, 316, 160, -140, 352, 808, -416, 592, 316, -480, 56,
145 528, -204, -568, 372, -232, 752, -344, 744, -4, 324, -416,
146 -600, 768, 268, -248, -88, -132, -420, -432, 80, -288, 404,
147 -316, -1216, -588, 520, -108, 92, -320, 368, -480, -216, -92,
148 1688, -300, 180, 1020, -176, 820, -68, -228, -260, 436, -904,
149 20, 40, -508, 440, -736, 312, 332, 204, 760, -372, 728,
150 96, -20, -632, -520, -560, 336, 1076, -64, -532, 776, 584,
151 192, 396, -728, -520, 276, -188, 80, -52, -612, -252, -48,
152 648, 212, -688, 228, -52, -260, 428, -412, -272, -404, 180,
153 816, -796, 48, 152, 484, -88, -216, 988, 696, 188, -528,
154 648, -116, -180, 316, 476, 12, -564, 96, 476, -252, -364,
155 -376, -392, 556, -256, -576, 260, -352, 120, -16, -136, -260,
156 -492, 72, 556, 660, 580, 616, 772, 436, 424, -32, -324,
157 -1268, 416, -324, -80, 920, 160, 228, 724, 32, -516, 64,
158 384, 68, -128, 136, 240, 248, -204, -68, 252, -932, -120,
159 -480, -628, -84, 192, 852, -404, -288, -132, 204, 100, 168,
160 -68, -196, -868, 460, 1080, 380, -80, 244, 0, 484, -888,
161 64, 184, 352, 600, 460, 164, 604, -196, 320, -64, 588,
162 -184, 228, 12, 372, 48, -848, -344, 224, 208, -200, 484,
163 128, -20, 272, -468, -840, 384, 256, -720, -520, -464, -580,
164 112, -120, 644, -356, -208, -608, -528, 704, 560, -424, 392,
165 828, 40, 84, 200, -152, 0, -144, 584, 280, -120, 80,
166 -556, -972, -196, -472, 724, 80, 168, -32, 88, 160, -688,
167 0, 160, 356, 372, -776, 740, -128, 676, -248, -480, 4,
168 -364, 96, 544, 232, -1032, 956, 236, 356, 20, -40, 300,
169 24, -676, -596, 132, 1120, -104, 532, -1096, 568, 648, 444,
170 508, 380, 188, -376, -604, 1488, 424, 24, 756, -220, -192,
171 716, 120, 920, 688, 168, 44, -460, 568, 284, 1144, 1160,
172 600, 424, 888, 656, -356, -320, 220, 316, -176, -724, -188,
173 -816, -628, -348, -228, -380, 1012, -452, -660, 736, 928, 404,
174 -696, -72, -268, -892, 128, 184, -344, -780, 360, 336, 400,
175 344, 428, 548, -112, 136, -228, -216, -820, -516, 340, 92,
176 -136, 116, -300, 376, -244, 100, -316, -520, -284, -12, 824,
177 164, -548, -180, -128, 116, -924, -828, 268, -368, -580, 620,
178 192, 160, 0, -1676, 1068, 424, -56, -360, 468, -156, 720,
179 288, -528, 556, -364, 548, -148, 504, 316, 152, -648, -620,
180 -684, -24, -376, -384, -108, -920, -1032, 768, 180, -264, -508,
181 -1268, -260, -60, 300, -240, 988, 724, -376, -576, -212, -736,
182 556, 192, 1092, -620, -880, 376, -56, -4, -216, -32, 836,
183 268, 396, 1332, 864, -600, 100, 56, -412, -92, 356, 180,
184 884, -468, -436, 292, -388, -804, -704, -840, 368, -348, 140,
185 -724, 1536, 940, 372, 112, -372, 436, -480, 1136, 296, -32,
186 -228, 132, -48, -220, 868, -1016, -60, -1044, -464, 328, 916,
187 244, 12, -736, -296, 360, 468, -376, -108, -92, 788, 368,
188 -56, 544, 400, -672, -420, 728, 16, 320, 44, -284, -380,
189 -796, 488, 132, 204, -596, -372, 88, -152, -908, -636, -572,
190 -624, -116, -692, -200, -56, 276, -88, 484, -324, 948, 864,
191 1000, -456, -184, -276, 292, -296, 156, 676, 320, 160, 908,
192 -84, -1236, -288, -116, 260, -372, -644, 732, -756, -96, 84,
193 344, -520, 348, -688, 240, -84, 216, -1044, -136, -676, -396,
194 -1500, 960, -40, 176, 168, 1516, 420, -504, -344, -364, -360,
195 1216, -940, -380, -212, 252, -660, -708, 484, -444, -152, 928,
196 -120, 1112, 476, -260, 560, -148, -344, 108, -196, 228, -288,
197 504, 560, -328, -88, 288, -1008, 460, -228, 468, -836, -196,
198 76, 388, 232, 412, -1168, -716, -644, 756, -172, -356, -504,
199 116, 432, 528, 48, 476, -168, -608, 448, 160, -532, -272,
200 28, -676, -12, 828, 980, 456, 520, 104, -104, 256, -344,
201 -4, -28, -368, -52, -524, -572, -556, -200, 768, 1124, -208,
202 -512, 176, 232, 248, -148, -888, 604, -600, -304, 804, -156,
203 -212, 488, -192, -804, -256, 368, -360, -916, -328, 228, -240,
204 -448, -472, 856, -556, -364, 572, -12, -156, -368, -340, 432,
205 252, -752, -152, 288, 268, -580, -848, -592, 108, -76, 244,
206 312, -716, 592, -80, 436, 360, 4, -248, 160, 516, 584,
207 732, 44, -468, -280, -292, -156, -588, 28, 308, 912, 24,
208 124, 156, 180, -252, 944, -924, -772, -520, -428, -624, 300,
209 -212, -1144, 32, -724, 800, -1128, -212, -1288, -848, 180, -416,
210 440, 192, -576, -792, -76, -1080, 80, -532, -352, -132, 380,
211 -820, 148, 1112, 128, 164, 456, 700, -924, 144, -668, -384,
212 648, -832, 508, 552, -52, -100, -656, 208, -568, 748, -88,
213 680, 232, 300, 192, -408, -1012, -152, -252, -268, 272, -876,
214 -664, -648, -332, -136, 16, 12, 1152, -28, 332, -536, 320,
215 -672, -460, -316, 532, -260, 228, -40, 1052, -816, 180, 88,
216 -496, -556, -672, -368, 428, 92, 356, 404, -408, 252, 196,
217 -176, -556, 792, 268, 32, 372, 40, 96, -332, 328, 120,
218 372, -900, -40, 472, -264, -592, 952, 128, 656, 112, 664,
219 -232, 420, 4, -344, -464, 556, 244, -416, -32, 252, 0,
220 -412, 188, -696, 508, -476, 324, -1096, 656, -312, 560, 264,
221 -136, 304, 160, -64, -580, 248, 336, -720, 560, -348, -288,
222 -276, -196, -500, 852, -544, -236, -1128, -992, -776, 116, 56,
223 52, 860, 884, 212, -12, 168, 1020, 512, -552, 924, -148,
224 716, 188, 164, -340, -520, -184, 880, -152, -680, -208, -1156,
225 -300, -528, -472, 364, 100, -744, -1056, -32, 540, 280, 144,
226 -676, -32, -232, -280, -224, 96, 568, -76, 172, 148, 148,
227 104, 32, -296, -32, 788, -80, 32, -16, 280, 288, 944,
228 428, -484
229 };
230
231 // Binding Table Definitions
232 //!
233 //! \enum FilmGrainGetRandomValuesBindingTableOffset
234 //! \brief Binding table offset for GetRandomValues kernel
235 //!
236 enum FilmGrainGetRandomValuesBindingTableOffset
237 {
238 grvInputGaussianSeq = 0,
239 grvOutputYRandomValue,
240 grvOutputURandomValue,
241 grvOutputVRandomValue,
242 grvOutputCoordinates,
243 grvNumSurfaces
244 };
245
246 //!
247 //! \enum FilmGrainRegressPhase1BindingTableOffset
248 //! \brief Binding table offset for regressPhase1 kernel
249 //!
250 enum FilmGrainRegressPhase1BindingTableOffset
251 {
252 rp1InputYRandomValue = 0,
253 rp1OutputYDitheringSurface,
254 rp1InputYCoeff,
255 rp1NumSurfaces
256 };
257
258 //!
259 //! \enum FilmGrainRegressPhase2BindingTableOffset
260 //! \brief Binding table offset for regressPhase2 kernel
261 //!
262 enum FilmGrainRegressPhase2BindingTableOffset
263 {
264 rp2InputYRandomValue = 0,
265 rp2InputURandomValue,
266 rp2InputVRandomValue,
267 rp2InputYDithering,
268 rp2OutputYDithering,
269 rp2OutputUDithering,
270 rp2OutputVDithering,
271 rp2InputYCoeff,
272 rp2InputUCoeff,
273 rp2InputVCoeff,
274 rp2NumSurfaces
275 };
276
277 //!
278 //! \enum FilmGrainApplyNoiseBindingTableOffset
279 //! \brief Binding table offset for ApplyNoise kernel
280 //!
281 enum FilmGrainApplyNoiseBindingTableOffset
282 {
283 anInputYuv = 0,
284 anOutputY,
285 anOutputUv,
286 anInputYDithering,
287 anInputUDithering,
288 anInputVDithering,
289 anInputRandomValuesCoordinates,
290 anInputYGammaLut,
291 anInputUGammaLut,
292 anInputVGammaLut,
293 anNumSurfaces
294 };
295
296 const int32_t Av1DecodeFilmGrainG12::m_filmGrainBindingTableCount[kernelNum] = {
297 grvNumSurfaces,
298 rp1NumSurfaces,
299 rp2NumSurfaces,
300 anNumSurfaces
301 };
302
303 //Curbe definitions
304 struct FilmGrainGetRandomValuesCurbe
305 {
306 // uint32_t 0
307 union
308 {
309 struct
310 {
311 uint32_t GaussianSeqSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
312 };
313 struct
314 {
315 uint32_t Value;
316 };
317 } DW0;
318
319 // uint32_t 1
320 union
321 {
322 struct
323 {
324 uint32_t YRandomValuesSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
325 };
326 struct
327 {
328 uint32_t Value;
329 };
330 } DW1;
331
332 // uint32_t 2
333 union
334 {
335 struct
336 {
337 uint32_t URandomValuesSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
338 };
339 struct
340 {
341 uint32_t Value;
342 };
343 } DW2;
344
345 // uint32_t 3
346 union
347 {
348 struct
349 {
350 uint32_t VRandomValuesSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
351 };
352 struct
353 {
354 uint32_t Value;
355 };
356 } DW3;
357
358 // uint32_t 4
359 union
360 {
361 struct
362 {
363 uint32_t CoordinatesSurfaceIndex : MOS_BITFIELD_RANGE(0, 31); //Random values for coordinates surface index
364 };
365 struct
366 {
367 uint32_t Value;
368 };
369 } DW4;
370
371 // uint32_t 5
372 union
373 {
374 struct
375 {
376 uint32_t NoiseShiftAmount : MOS_BITFIELD_RANGE(0, 15);
377 uint32_t Reserved : MOS_BITFIELD_RANGE(16, 31);
378 };
379 struct
380 {
381 uint32_t Value;
382 };
383 } DW5;
384
385 // uint32_t 6
386 union
387 {
388 struct
389 {
390 uint32_t GrainSeed : MOS_BITFIELD_RANGE(0, 31); //Random number generation initializer
391 };
392 struct
393 {
394 uint32_t Value;
395 };
396 } DW6;
397
398 // uint32_t 7
399 union
400 {
401 struct
402 {
403 uint32_t CoordinatesWidth : MOS_BITFIELD_RANGE(0, 15); //RoundUp(ImageHeight/64)
404 uint32_t CoordinatesHeight : MOS_BITFIELD_RANGE(16, 31); //RoundUp(ImageWidth/64)
405 };
406 struct
407 {
408 uint32_t Value;
409 };
410 } DW7;
411
412 static const size_t m_size = 8;
413 static const size_t m_byteSize = 32;
414 };
415 C_ASSERT(sizeof(FilmGrainGetRandomValuesCurbe) == 32);
416
417 struct FilmGrainRegressPhase1Curbe
418 {
419 // uint32_t 0
420 union
421 {
422 struct
423 {
424 uint32_t YRandomValuesSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
425 };
426 struct
427 {
428 uint32_t Value;
429 };
430 } DW0;
431
432 // uint32_t 1
433 union
434 {
435 struct
436 {
437 uint32_t YDitheringSurface : MOS_BITFIELD_RANGE(0, 31);
438 };
439 struct
440 {
441 uint32_t Value;
442 };
443 } DW1;
444
445 // uint32_t 2
446 union
447 {
448 struct
449 {
450 uint32_t YCoeffSurface : MOS_BITFIELD_RANGE(0, 31);
451 };
452 struct
453 {
454 uint32_t Value;
455 };
456 } DW2;
457
458 static const size_t m_size = 3;
459 static const size_t m_byteSize = 12;
460 };
461 C_ASSERT(sizeof(FilmGrainRegressPhase1Curbe) == 12);
462
463 struct FilmGrainRegressPhase2Curbe
464 {
465 // uint32_t 0
466 union
467 {
468 struct
469 {
470 uint32_t YRandomValuesSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
471 };
472 struct
473 {
474 uint32_t Value;
475 };
476 } DW0;
477
478 // uint32_t 1
479 union
480 {
481 struct
482 {
483 uint32_t URandomValuesSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
484 };
485 struct
486 {
487 uint32_t Value;
488 };
489 } DW1;
490
491 // uint32_t 2
492 union
493 {
494 struct
495 {
496 uint32_t VRandomValuesSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
497 };
498 struct
499 {
500 uint32_t Value;
501 };
502 } DW2;
503
504 // uint32_t 3
505 union
506 {
507 struct
508 {
509 uint32_t YDitheringInputSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
510 };
511 struct
512 {
513 uint32_t Value;
514 };
515 } DW3;
516
517 // uint32_t 4
518 union
519 {
520 struct
521 {
522 uint32_t YDitheringSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
523 };
524 struct
525 {
526 uint32_t Value;
527 };
528 } DW4;
529
530 // uint32_t 5
531 union
532 {
533 struct
534 {
535 uint32_t UDitheringSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
536 };
537 struct
538 {
539 uint32_t Value;
540 };
541 } DW5;
542
543 // uint32_t 6
544 union
545 {
546 struct
547 {
548 uint32_t VDitheringSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
549 };
550 struct
551 {
552 uint32_t Value;
553 };
554 } DW6;
555
556 // uint32_t 7
557 union
558 {
559 struct
560 {
561 uint32_t YCoeffSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
562 };
563 struct
564 {
565 uint32_t Value;
566 };
567 } DW7;
568
569 // uint32_t 8
570 union
571 {
572 struct
573 {
574 uint32_t UCoeffSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
575 };
576 struct
577 {
578 uint32_t Value;
579 };
580 } DW8;
581
582 // uint32_t 9
583 union
584 {
585 struct
586 {
587 uint32_t VCoeffSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
588 };
589 struct
590 {
591 uint32_t Value;
592 };
593 } DW9;
594
595 // uint32_t 10
596 union
597 {
598 struct
599 {
600 uint32_t RegressionCoefficientShift : MOS_BITFIELD_RANGE(0, 15);
601 uint32_t Reserved : MOS_BITFIELD_RANGE(16, 31);
602 };
603 struct
604 {
605 uint32_t Value;
606 };
607 } DW10;
608
609 static const size_t m_size = 11;
610 static const size_t m_byteSize = 44;
611 };
612 C_ASSERT(sizeof(FilmGrainRegressPhase2Curbe) == 44);
613
614 struct FilmGrainApplyNoiseCurbe
615 {
616 // uint32_t 0
617 union
618 {
619 struct
620 {
621 uint32_t InputYuvSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
622 };
623 struct
624 {
625 uint32_t Value;
626 };
627 } DW0;
628
629 // uint32_t 1
630 union
631 {
632 struct
633 {
634 uint32_t OutputYSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
635 };
636 struct
637 {
638 uint32_t Value;
639 };
640 } DW1;
641
642 // uint32_t 2
643 union
644 {
645 struct
646 {
647 uint32_t OutputUvSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
648 };
649 struct
650 {
651 uint32_t Value;
652 };
653 } DW2;
654
655 // uint32_t 3
656 union
657 {
658 struct
659 {
660 uint32_t YDitheringSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
661 };
662 struct
663 {
664 uint32_t Value;
665 };
666 } DW3;
667
668 // uint32_t 4
669 union
670 {
671 struct
672 {
673 uint32_t UDitheringSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
674 };
675 struct
676 {
677 uint32_t Value;
678 };
679 } DW4;
680
681 // uint32_t 5
682 union
683 {
684 struct
685 {
686 uint32_t VDitheringSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
687 };
688 struct
689 {
690 uint32_t Value;
691 };
692 } DW5;
693
694 // uint32_t 6
695 union
696 {
697 struct
698 {
699 uint32_t RandomValuesForCoordinatesSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
700 };
701 struct
702 {
703 uint32_t Value;
704 };
705 } DW6;
706
707 // uint32_t 7
708 union
709 {
710 struct
711 {
712 uint32_t YGammaCorrectionLutSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
713 };
714 struct
715 {
716 uint32_t Value;
717 };
718 } DW7;
719
720 // uint32_t 8
721 union
722 {
723 struct
724 {
725 uint32_t UGammaCorrectionLutSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
726 };
727 struct
728 {
729 uint32_t Value;
730 };
731 } DW8;
732
733 // uint32_t 9
734 union
735 {
736 struct
737 {
738 uint32_t VGammaCorrectionLutSurfaceIndex : MOS_BITFIELD_RANGE(0, 31);
739 };
740 struct
741 {
742 uint32_t Value;
743 };
744 } DW9;
745
746 // uint32_t 10
747 union
748 {
749 struct
750 {
751 uint32_t EnableYFilmGrain : MOS_BITFIELD_RANGE(0, 15);
752 uint32_t EnableUFilmGrain : MOS_BITFIELD_RANGE(16, 31);
753 };
754 struct
755 {
756 uint32_t Value;
757 };
758 } DW10;
759
760 // uint32_t 11
761 union
762 {
763 struct
764 {
765 uint32_t EnableVFilmGrain : MOS_BITFIELD_RANGE(0, 15);
766 uint32_t RandomValuesForCoordinatesTableWidth : MOS_BITFIELD_RANGE(16, 31);
767 };
768 struct
769 {
770 uint32_t Value;
771 };
772 } DW11;
773
774 // uint32_t 12
775 union
776 {
777 struct
778 {
779 uint32_t ImageHeight : MOS_BITFIELD_RANGE(0, 15);
780 uint32_t ScalingShiftValue : MOS_BITFIELD_RANGE(16, 31);
781 };
782 struct
783 {
784 uint32_t Value;
785 };
786 } DW12;
787
788 // uint32_t 13
789 union
790 {
791 struct
792 {
793 uint32_t MinimumYClippingValue : MOS_BITFIELD_RANGE(0, 15);
794 uint32_t MaximumYClippingValue : MOS_BITFIELD_RANGE(16, 31);
795 };
796 struct
797 {
798 uint32_t Value;
799 };
800 } DW13;
801
802 // uint32_t 14
803 union
804 {
805 struct
806 {
807 uint32_t MinimumUvClippingValue : MOS_BITFIELD_RANGE(0, 15);
808 uint32_t MaximumUvClippingValue : MOS_BITFIELD_RANGE(16, 31);
809 };
810 struct
811 {
812 uint32_t Value;
813 };
814 } DW14;
815
816 // uint32_t 15
817 union
818 {
819 struct
820 {
821 uint32_t CbLumaMultiplier : MOS_BITFIELD_RANGE(0, 15);
822 uint32_t CbMultiplier : MOS_BITFIELD_RANGE(16, 31);
823 };
824 struct
825 {
826 uint32_t Value;
827 };
828 } DW15;
829
830 // uint32_t 16
831 union
832 {
833 struct
834 {
835 uint32_t CbOffset : MOS_BITFIELD_RANGE(0, 15);
836 uint32_t CrLumaMultiplier : MOS_BITFIELD_RANGE(16, 31);
837 };
838 struct
839 {
840 uint32_t Value;
841 };
842 } DW16;
843
844 // uint32_t 17
845 union
846 {
847 struct
848 {
849 uint32_t CrMultiplier : MOS_BITFIELD_RANGE(0, 15);
850 uint32_t CrOffset : MOS_BITFIELD_RANGE(16, 31);
851 };
852 struct
853 {
854 uint32_t Value;
855 };
856 } DW17;
857
858 static const size_t m_size = 18;
859 static const size_t m_byteSize = 72;
860 };
861 C_ASSERT(sizeof(FilmGrainApplyNoiseCurbe) == 72);
862
863 const int32_t Av1DecodeFilmGrainG12::m_filmGrainCurbeSize[kernelNum] = {
864 (sizeof(FilmGrainGetRandomValuesCurbe)),
865 (sizeof(FilmGrainRegressPhase1Curbe)),
866 (sizeof(FilmGrainRegressPhase2Curbe)),
867 (sizeof(FilmGrainApplyNoiseCurbe))
868 };
869
870 // Initialize the static const float variables in class Av1DecodeFilmGrainG12.
871 const float Av1DecodeFilmGrainG12::m_maxScaleRatio = 1.0f;
872 const float Av1DecodeFilmGrainG12::m_minScaleRatio = 0.125f;
873
Av1DecodeFilmGrainG12(MediaFeatureManager * featureManager,DecodeAllocator * allocator,CodechalHwInterface * hwInterface)874 Av1DecodeFilmGrainG12::Av1DecodeFilmGrainG12(
875 MediaFeatureManager *featureManager,
876 DecodeAllocator *allocator,
877 CodechalHwInterface *hwInterface) :
878 m_allocator(allocator)
879 {
880 m_featureManager = featureManager;
881
882 auto decFeatureManager = dynamic_cast<DecodeAv1FeatureManagerG12 *>(featureManager);
883 DECODE_CHK_NULL_NO_STATUS_RETURN(decFeatureManager);
884
885 m_basicFeature = dynamic_cast<Av1BasicFeatureG12 *>(m_featureManager->GetFeature(Av1FeatureIDs::basicFeature));
886 DECODE_CHK_NULL_NO_STATUS_RETURN(m_basicFeature);
887
888 m_hwInterface = hwInterface;
889 }
890
~Av1DecodeFilmGrainG12()891 Av1DecodeFilmGrainG12::~Av1DecodeFilmGrainG12()
892 {
893 m_allocator->Destroy(m_gaussianSequenceSurface);
894 m_allocator->Destroy(m_yRandomValuesSurface);
895 m_allocator->Destroy(m_uRandomValuesSurface);
896 m_allocator->Destroy(m_vRandomValuesSurface);
897 m_allocator->Destroy(m_yDitheringTempSurface);
898
899 m_allocator->Destroy(m_coordinatesRandomValuesSurfaceArray);
900 m_allocator->Destroy(m_yCoefficientsSurfaceArray);
901 m_allocator->Destroy(m_yDitheringSurfaceArray);
902 m_allocator->Destroy(m_uDitheringSurfaceArray);
903 m_allocator->Destroy(m_vDitheringSurfaceArray);
904 m_allocator->Destroy(m_yCoeffSurfaceArray);
905 m_allocator->Destroy(m_uCoeffSurfaceArray);
906 m_allocator->Destroy(m_vCoeffSurfaceArray);
907 m_allocator->Destroy(m_yGammaLUTSurfaceArray);
908 m_allocator->Destroy(m_uGammaLUTSurfaceArray);
909 m_allocator->Destroy(m_vGammaLUTSurfaceArray);
910 m_allocator->Destroy(m_coordinatesRandomValuesSurfaceArray);
911 }
912
Init(void * settings)913 MOS_STATUS Av1DecodeFilmGrainG12::Init(void *settings)
914 {
915 DECODE_FUNC_CALL();
916
917 memset(&m_kernelSize, 0, sizeof(m_kernelSize));
918 memset(&m_dshSize, 0, sizeof(m_dshSize));
919 memset(&m_syncObject, 0, sizeof(m_syncObject));
920
921 for (uint8_t i = getRandomValues; i < kernelNum; i++)
922 {
923 m_kernelBinary[i] = nullptr;
924 m_kernelStates[i] = MHW_KERNEL_STATE();
925 }
926
927 return MOS_STATUS_SUCCESS;
928 }
929
Update(void * params)930 MOS_STATUS Av1DecodeFilmGrainG12::Update(void *params)
931 {
932 DECODE_FUNC_CALL();
933 DECODE_CHK_NULL(params);
934
935 CodechalDecodeParams *decodeParams = (CodechalDecodeParams *)params;
936 m_picParams = static_cast<CodecAv1PicParams *>(decodeParams->m_picParams);
937 DECODE_CHK_NULL(m_picParams);
938
939 m_bitDepthIndicator = m_basicFeature->m_av1DepthIndicator;
940
941 if (!m_resourceAllocated)
942 {
943 DECODE_CHK_STATUS(InitializeKernelState());
944 DECODE_CHK_STATUS(AllocateFixedSizeSurfaces());
945 m_resourceAllocated = true;
946 }
947
948 bool applyY = (m_picParams->m_filmGrainParams.m_numYPoints > 0) ? 1 : 0;
949 bool applyCb = (m_picParams->m_filmGrainParams.m_numCbPoints > 0 || m_picParams->m_filmGrainParams.m_filmGrainInfoFlags.m_fields.m_chromaScalingFromLuma) ? 1 : 0;
950 bool applyCr = (m_picParams->m_filmGrainParams.m_numCrPoints > 0 || m_picParams->m_filmGrainParams.m_filmGrainInfoFlags.m_fields.m_chromaScalingFromLuma) ? 1 : 0;
951 m_filmGrainEnabled = m_picParams->m_filmGrainParams.m_filmGrainInfoFlags.m_fields.m_applyGrain && (applyY | applyCb | applyCr);
952
953 if (m_picParams->m_filmGrainParams.m_filmGrainInfoFlags.m_fields.m_applyGrain)
954 {
955 m_av1TileParams = static_cast<CodecAv1TileParams*>(decodeParams->m_sliceParams);
956 DECODE_CHK_NULL(m_av1TileParams);
957
958 m_segmentParams = &m_picParams->m_av1SegData;
959 DECODE_CHK_NULL(m_segmentParams);
960
961 DECODE_CHK_STATUS(SetFrameStates(m_picParams));
962 DECODE_CHK_STATUS(AllocateVariableSizeSurfaces());
963 }
964
965 #if (_DEBUG || _RELEASE_INTERNAL)
966 m_fgOutputSurfList[m_basicFeature->m_curRenderPic.FrameIdx] = m_basicFeature->m_fgOutputSurf;
967 #endif
968
969 return MOS_STATUS_SUCCESS;
970 }
971
InitInterfaceStateHeapSetting()972 MOS_STATUS Av1DecodeFilmGrainG12::InitInterfaceStateHeapSetting()
973 {
974 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
975
976 DECODE_FUNC_CALL();
977
978 DECODE_CHK_STATUS(m_hwInterface->GetFilmGrainKernelInfo(
979 m_kernelBaseCommon,
980 m_combinedKernelSize));
981 DECODE_CHK_NULL(m_kernelBaseCommon);
982
983 return eStatus;
984 }
985
AllocateStateHeap(CodechalHwInterface * hwInterface)986 MOS_STATUS Av1DecodeFilmGrainG12::AllocateStateHeap(
987 CodechalHwInterface *hwInterface)
988 {
989 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
990
991 DECODE_FUNC_CALL();
992
993 MhwRenderInterface *renderInterface = m_hwInterface->GetRenderInterface();
994 DECODE_CHK_NULL(renderInterface);
995
996 MHW_STATE_HEAP_SETTINGS *stateHeapSettings = m_hwInterface->GetStateHeapSettings();
997 DECODE_CHK_NULL(stateHeapSettings);
998
999 stateHeapSettings->m_ishBehavior = HeapManager::Behavior::clientControlled;
1000 stateHeapSettings->m_dshBehavior = HeapManager::Behavior::destructiveExtend;
1001 // As a performance optimization keep the DSH locked always,
1002 // the ISH is only accessed at device creation and thus does not need to be locked
1003 stateHeapSettings->m_keepDshLocked = true;
1004 stateHeapSettings->dwDshIncrement = 2 * MOS_PAGE_SIZE;
1005
1006 if (stateHeapSettings->dwIshSize > 0 &&
1007 stateHeapSettings->dwDshSize > 0 &&
1008 stateHeapSettings->dwNumSyncTags > 0)
1009 {
1010 DECODE_CHK_STATUS(renderInterface->AllocateHeaps(
1011 *stateHeapSettings));
1012 }
1013
1014 return eStatus;
1015 }
1016
GetCommonKernelHeaderAndSize(void * binary,FilmGrainKernelStateIdx index,uint8_t bitDepthIndicator,void * krnHeader,uint32_t * krnSize)1017 MOS_STATUS Av1DecodeFilmGrainG12::GetCommonKernelHeaderAndSize(
1018 void *binary,
1019 FilmGrainKernelStateIdx index,
1020 uint8_t bitDepthIndicator,
1021 void *krnHeader,
1022 uint32_t *krnSize)
1023 {
1024 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1025
1026 DECODE_FUNC_CALL();
1027
1028 DECODE_CHK_NULL(binary);
1029 DECODE_CHK_NULL(krnHeader);
1030 DECODE_CHK_NULL(krnSize);
1031
1032 FilmGrainCombinedKernelHeader *kernelHeaderTable;
1033 kernelHeaderTable = (FilmGrainCombinedKernelHeader*)binary;
1034 CodecKernelHeader *invalidEntry;
1035 invalidEntry = &(kernelHeaderTable->applyNoise10b) + 1;
1036
1037 CodecKernelHeader *currKrnHeader;
1038 switch (index)
1039 {
1040 case getRandomValues:
1041 currKrnHeader = &kernelHeaderTable->getRandomValues8b;
1042 break;
1043 case regressPhase1:
1044 currKrnHeader = &kernelHeaderTable->regressPhase1;
1045 break;
1046 case regressPhase2:
1047 currKrnHeader = &kernelHeaderTable->regressPhase2For8b;
1048 break;
1049 case applyNoise:
1050 currKrnHeader = &kernelHeaderTable->applyNoise8b;
1051 break;
1052 default:
1053 DECODE_VERBOSEMESSAGE("Unsupported film grain stage requested");
1054 eStatus = MOS_STATUS_INVALID_PARAMETER;
1055 return eStatus;
1056 }
1057
1058 if (index != regressPhase1)
1059 {
1060 currKrnHeader += bitDepthIndicator;
1061 }
1062
1063 *((CodecKernelHeader *)krnHeader) = *currKrnHeader;
1064
1065 CodecKernelHeader *nextKrnHeader;
1066 nextKrnHeader = (currKrnHeader + 1);
1067 uint32_t nextKrnOffset;
1068 nextKrnOffset = *krnSize;
1069
1070 if (nextKrnHeader < invalidEntry)
1071 {
1072 nextKrnOffset = nextKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT;
1073 }
1074 *krnSize = nextKrnOffset - (currKrnHeader->KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1075
1076 return eStatus;
1077 }
1078
InitializeKernelState()1079 MOS_STATUS Av1DecodeFilmGrainG12::InitializeKernelState()
1080 {
1081 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1082
1083 DECODE_FUNC_CALL();
1084
1085 if (m_bitDepthIndicator > 1)
1086 {
1087 DECODE_VERBOSEMESSAGE("Bit depth not supported!\n");
1088 return MOS_STATUS_INVALID_PARAMETER;
1089 }
1090
1091 PMOS_INTERFACE osInterface = m_hwInterface->GetOsInterface();
1092 m_osInterface = osInterface;
1093
1094 // Init State Heap
1095 DECODE_CHK_STATUS(InitInterfaceStateHeapSetting());
1096
1097 CODECHAL_KERNEL_HEADER currKrnHeader;
1098 MHW_KERNEL_STATE *kernelState;
1099 for (auto krnStateIdx = 0; krnStateIdx < kernelNum; krnStateIdx++)
1100 {
1101 uint32_t kernelSize = m_combinedKernelSize;
1102 kernelState = &m_kernelStates[krnStateIdx];
1103
1104 DECODE_CHK_STATUS(GetCommonKernelHeaderAndSize(
1105 m_kernelBaseCommon,
1106 (FilmGrainKernelStateIdx)krnStateIdx,
1107 m_bitDepthIndicator,
1108 &currKrnHeader,
1109 &kernelSize))
1110
1111 kernelState->KernelParams.iBTCount = m_filmGrainBindingTableCount[krnStateIdx];
1112 kernelState->KernelParams.iCurbeLength = m_filmGrainCurbeSize[krnStateIdx];
1113 kernelState->KernelParams.iBlockWidth = CODECHAL_MACROBLOCK_WIDTH;
1114 kernelState->KernelParams.iBlockHeight = CODECHAL_MACROBLOCK_HEIGHT;
1115 kernelState->KernelParams.iIdCount = 1;
1116 kernelState->dwKernelBinaryOffset = 0;
1117
1118 kernelState->KernelParams.pBinary = m_kernelBaseCommon + (currKrnHeader.KernelStartPointer << MHW_KERNEL_OFFSET_SHIFT);
1119 kernelState->KernelParams.iSize = kernelSize;
1120 }
1121
1122 DECODE_CHK_STATUS(m_osInterface->pfnCreateSyncResource(
1123 m_osInterface,
1124 &m_syncObject));
1125
1126 return eStatus;
1127 }
1128
SetupMediaVfe(PMOS_COMMAND_BUFFER cmdBuffer,MHW_KERNEL_STATE * kernelState)1129 MOS_STATUS Av1DecodeFilmGrainG12::SetupMediaVfe(
1130 PMOS_COMMAND_BUFFER cmdBuffer,
1131 MHW_KERNEL_STATE *kernelState)
1132 {
1133 MHW_VFE_PARAMS_G12 vfeParams = {};
1134 vfeParams.pKernelState = kernelState;
1135
1136 DECODE_CHK_STATUS(m_renderInterface->AddMediaVfeCmd(cmdBuffer, &vfeParams));
1137
1138 return MOS_STATUS_SUCCESS;
1139 }
1140
AllocateFixedSizeSurfaces()1141 MOS_STATUS Av1DecodeFilmGrainG12::AllocateFixedSizeSurfaces()
1142 {
1143 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1144
1145 DECODE_FUNC_CALL();
1146
1147 //Gaussian sequence surface
1148 m_gaussianSequenceSurface = m_allocator->AllocateBuffer(
1149 MOS_ALIGN_CEIL(2048 * sizeof(int16_t), CODECHAL_PAGE_SIZE), "GaussianSequenceSurface",
1150 resourceInternalReadWriteCache, lockableVideoMem);
1151 DECODE_CHK_NULL(m_gaussianSequenceSurface);
1152
1153 auto data = (int16_t *)m_allocator->LockResourceForWrite(&m_gaussianSequenceSurface->OsResource);
1154 DECODE_CHK_NULL(data);
1155 MOS_SecureMemcpy(data, 2048 * sizeof(int16_t), defaultGaussianSequence, 2048 * sizeof(int16_t));
1156
1157 // Surfaces/buffers for GetRandomValues kernel
1158 //Y random values surface
1159 PMOS_SURFACE surface = nullptr;
1160 m_yRandomValuesSurface = m_allocator->AllocateSurface(
1161 70 * sizeof(int16_t),
1162 70,
1163 "Film Grain GRV [out] YRandomValuesSurface",
1164 Format_R8UN,
1165 false,
1166 resourceInternalReadWriteCache,
1167 notLockableVideoMem);
1168 DECODE_CHK_NULL(m_yRandomValuesSurface);
1169
1170 //U random values surface
1171 m_uRandomValuesSurface = m_allocator->AllocateSurface(
1172 38 * sizeof(int16_t),
1173 38,
1174 "Film Grain GRV [out] URandomValuesSurface",
1175 Format_R8UN,
1176 false,
1177 resourceInternalReadWriteCache,
1178 notLockableVideoMem);
1179 DECODE_CHK_NULL(m_uRandomValuesSurface);
1180
1181 //V random values surface
1182 m_vRandomValuesSurface = m_allocator->AllocateSurface(
1183 38 * sizeof(int16_t),
1184 38,
1185 "Film Grain GRV [out] VRandomValuesSurface",
1186 Format_R8UN,
1187 false,
1188 resourceInternalReadWriteCache,
1189 notLockableVideoMem);
1190 DECODE_CHK_NULL(m_vRandomValuesSurface);
1191
1192 //Y Dithering Temp LUT Surface
1193 m_yDitheringTempSurface = m_allocator->AllocateSurface(
1194 70 * sizeof(int32_t),
1195 70,
1196 "Film Grain RP1 [out] YDitheringTempSurface",
1197 Format_R8UN,
1198 false,
1199 resourceInternalReadWriteCache,
1200 notLockableVideoMem);
1201 DECODE_CHK_NULL(m_yDitheringTempSurface);
1202
1203 // Surfaces/buffers for RegressPhase1 kernel
1204 //Y Coefficients Surface
1205 m_yCoefficientsSurfaceArray = m_allocator->AllocateBufferArray(
1206 24 * sizeof(int16_t),
1207 "YCoeffSurface",
1208 m_bufferPoolDepth,
1209 resourceInternalReadWriteCache,
1210 lockableVideoMem);
1211 DECODE_CHK_NULL(m_yCoefficientsSurfaceArray);
1212
1213 //Y dithering Surface
1214 m_yDitheringSurfaceArray = m_allocator->AllocateSurfaceArray(
1215 128 * ((m_bitDepthIndicator == 1) ? sizeof(int16_t) : sizeof(int8_t)),
1216 128,
1217 "Film Grain RP2 [out] YDitheringSurface",
1218 m_bufferPoolDepth,
1219 Format_R8UN,
1220 false,
1221 resourceInternalReadWriteCache,
1222 notLockableVideoMem);
1223 DECODE_CHK_NULL(m_yDitheringSurfaceArray);
1224
1225 //U dithering surface
1226 m_uDitheringSurfaceArray = m_allocator->AllocateSurfaceArray(
1227 64 * ((m_bitDepthIndicator == 1) ? sizeof(int16_t) : sizeof(int8_t)),
1228 64,
1229 "Film Grain RP2 [out] UDitheringSurface",
1230 m_bufferPoolDepth,
1231 Format_R8UN,
1232 false,
1233 resourceInternalReadWriteCache,
1234 notLockableVideoMem);
1235 DECODE_CHK_NULL(m_uDitheringSurfaceArray);
1236
1237 //V Dithering surface
1238 m_vDitheringSurfaceArray = m_allocator->AllocateSurfaceArray(
1239 64 * ((m_bitDepthIndicator == 1) ? sizeof(int16_t) : sizeof(int8_t)),
1240 64,
1241 "Film Grain RP2 [out] VDitheringSurface",
1242 m_bufferPoolDepth,
1243 Format_R8UN,
1244 false,
1245 resourceInternalReadWriteCache,
1246 notLockableVideoMem);
1247 DECODE_CHK_NULL(m_vDitheringSurfaceArray);
1248
1249 //Y Coefficients Surface, for input of RegressPhase2
1250 m_yCoeffSurfaceArray = m_allocator->AllocateBufferArray(
1251 MOS_ALIGN_CEIL(32 * sizeof(int16_t), CODECHAL_PAGE_SIZE),
1252 "YCoeffSurface",
1253 m_bufferPoolDepth,
1254 resourceInternalReadWriteCache,
1255 lockableVideoMem);
1256 DECODE_CHK_NULL(m_yCoeffSurfaceArray);
1257
1258 //U Coefficients Surface, for input of RegressPhase2
1259 m_uCoeffSurfaceArray = m_allocator->AllocateBufferArray(
1260 MOS_ALIGN_CEIL(32 * sizeof(int16_t), CODECHAL_PAGE_SIZE),
1261 "UCoeffSurface",
1262 m_bufferPoolDepth,
1263 resourceInternalReadWriteCache,
1264 lockableVideoMem);
1265 DECODE_CHK_NULL(m_uCoeffSurfaceArray);
1266
1267 //V Coefficients Surface, for input of RegressPhase2
1268 m_vCoeffSurfaceArray = m_allocator->AllocateBufferArray(
1269 MOS_ALIGN_CEIL(32 * sizeof(int16_t), CODECHAL_PAGE_SIZE),
1270 "VCoeffSurface",
1271 m_bufferPoolDepth,
1272 resourceInternalReadWriteCache,
1273 lockableVideoMem);
1274 DECODE_CHK_NULL(m_vCoeffSurfaceArray);
1275
1276 //Y Gamma LUT Surface, for input of ApplyNoise
1277 m_yGammaLUTSurfaceArray = m_allocator->AllocateBufferArray(
1278 MOS_ALIGN_CEIL(257 * sizeof(int16_t), CODECHAL_PAGE_SIZE),
1279 "YGammaLUTSurface",
1280 m_bufferPoolDepth,
1281 resourceInternalReadWriteCache,
1282 lockableVideoMem);
1283 DECODE_CHK_NULL(m_yGammaLUTSurfaceArray);
1284
1285 //U Gamma LUT Surface, for input of ApplyNoise
1286 m_uGammaLUTSurfaceArray = m_allocator->AllocateBufferArray(
1287 MOS_ALIGN_CEIL(257 * sizeof(int16_t), CODECHAL_PAGE_SIZE),
1288 "UGammaLUTSurface",
1289 m_bufferPoolDepth,
1290 resourceInternalReadWriteCache,
1291 lockableVideoMem);
1292 DECODE_CHK_NULL(m_uGammaLUTSurfaceArray);
1293
1294 //V Gamma LUT Surface, for input of ApplyNoise
1295 m_vGammaLUTSurfaceArray = m_allocator->AllocateBufferArray(
1296 MOS_ALIGN_CEIL(257 * sizeof(int16_t), CODECHAL_PAGE_SIZE),
1297 "VGammaLUTSurface",
1298 m_bufferPoolDepth,
1299 resourceInternalReadWriteCache,
1300 lockableVideoMem);
1301 DECODE_CHK_NULL(m_vGammaLUTSurfaceArray);
1302
1303 return eStatus;
1304 }
1305
AllocateVariableSizeSurfaces()1306 MOS_STATUS Av1DecodeFilmGrainG12::AllocateVariableSizeSurfaces()
1307 {
1308 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1309
1310 DECODE_FUNC_CALL();
1311
1312 //Random values for coordinates
1313 uint32_t coordsWidth = MOS_ROUNDUP_SHIFT(m_picParams->m_superResUpscaledWidthMinus1 + 1, 6);
1314 uint32_t coordsHeight = MOS_ROUNDUP_SHIFT(m_picParams->m_superResUpscaledHeightMinus1 + 1, 6);
1315 uint32_t allocSize = MOS_ALIGN_CEIL(coordsWidth * coordsHeight * sizeof(int32_t), CODECHAL_PAGE_SIZE);
1316
1317 if (m_coordinatesRandomValuesSurfaceArray == nullptr)
1318 {
1319 m_coordinatesRandomValuesSurfaceArray= m_allocator->AllocateBufferArray(
1320 allocSize,
1321 "FilmGrainGRVCoordinateSurface",
1322 m_bufferPoolDepth,
1323 resourceInternalReadWriteCache,
1324 notLockableVideoMem);
1325 DECODE_CHK_NULL(m_coordinatesRandomValuesSurfaceArray);
1326 m_coordinatesRandomValuesSurface = m_coordinatesRandomValuesSurfaceArray->Fetch();
1327 DECODE_CHK_NULL(m_coordinatesRandomValuesSurface);
1328 }
1329 else
1330 {
1331 auto &buffer = m_coordinatesRandomValuesSurfaceArray->Fetch();
1332 DECODE_CHK_NULL(buffer);
1333 DECODE_CHK_STATUS(m_allocator->Resize(
1334 buffer, allocSize, notLockableVideoMem));
1335 m_coordinatesRandomValuesSurface = buffer;
1336 }
1337 m_coordinateSurfaceSize = allocSize;
1338
1339 return eStatus;
1340 }
1341
InitScalingFunction(uint8_t * pointValue,uint8_t * pointScaling,uint8_t numPoints,int16_t * scalingLUT)1342 MOS_STATUS Av1DecodeFilmGrainG12::InitScalingFunction(
1343 uint8_t *pointValue, //corresponds to scaling_points[][0]
1344 uint8_t *pointScaling, //corresponds to scaling_points[][1]
1345 uint8_t numPoints,
1346 int16_t *scalingLUT)
1347 {
1348 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1349
1350 DECODE_FUNC_CALL();
1351
1352 DECODE_CHK_NULL(pointValue);
1353 DECODE_CHK_NULL(pointScaling);
1354 DECODE_CHK_NULL(scalingLUT);
1355
1356 if (numPoints == 0)
1357 {
1358 return MOS_STATUS_SUCCESS;
1359 }
1360
1361 for (auto i = 0; i < pointValue[0]; i++)
1362 {
1363 scalingLUT[i] = pointScaling[0];
1364 }
1365
1366 for (auto point = 0; point < numPoints - 1; point++)
1367 {
1368 int32_t delta_y = pointScaling[point + 1] - pointScaling[point];
1369 int32_t delta_x = pointValue[point + 1] - pointValue[point];
1370
1371 int64_t delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
1372
1373 for (auto x = 0; x < delta_x; x++)
1374 {
1375 scalingLUT[pointValue[point] + x] = pointScaling[point] + (int32_t)((x * delta + 32768) >> 16);
1376 }
1377 }
1378
1379 for (uint32_t i = pointValue[numPoints - 1]; i < 256; i++)
1380 {
1381 scalingLUT[i] = pointScaling[numPoints - 1];
1382 }
1383
1384 return eStatus;
1385 }
1386
PreProcScalingPointsAndLUTs()1387 MOS_STATUS Av1DecodeFilmGrainG12::PreProcScalingPointsAndLUTs()
1388 {
1389 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1390
1391 DECODE_FUNC_CALL();
1392
1393 memset(m_scalingLutY, 0, sizeof(*m_scalingLutY) * 256);
1394 memset(m_scalingLutCb, 0, sizeof(*m_scalingLutCb) * 256);
1395 memset(m_scalingLutCr, 0, sizeof(*m_scalingLutCr) * 256);
1396
1397 // Check film grain parameter of the luma component
1398 if (m_picParams->m_filmGrainParams.m_numYPoints > 14)
1399 {
1400 DECODE_ASSERTMESSAGE("Invalid film grain num_y_points (should be in [0, 14]) in pic parameter!");
1401 return MOS_STATUS_INVALID_PARAMETER;
1402 }
1403 for (auto i = 1; i < m_picParams->m_filmGrainParams.m_numYPoints; i++)
1404 {
1405 if (m_picParams->m_filmGrainParams.m_pointYValue[i] <= m_picParams->m_filmGrainParams.m_pointYValue[i - 1])
1406 {
1407 DECODE_ASSERTMESSAGE("Invalid film grain point_y_value (point_y_value[%d] should be greater than point_y_value[%d]) in pic parameter!", i, i - 1);
1408 return MOS_STATUS_INVALID_PARAMETER;
1409 }
1410 }
1411 DECODE_CHK_STATUS(InitScalingFunction(
1412 m_picParams->m_filmGrainParams.m_pointYValue,
1413 m_picParams->m_filmGrainParams.m_pointYScaling,
1414 m_picParams->m_filmGrainParams.m_numYPoints,
1415 m_scalingLutY));
1416
1417 if (m_picParams->m_filmGrainParams.m_filmGrainInfoFlags.m_fields.m_chromaScalingFromLuma)
1418 {
1419 MOS_SecureMemcpy(m_scalingLutCb, sizeof(int16_t) * 256, m_scalingLutY, sizeof(int16_t) * 256);
1420 MOS_SecureMemcpy(m_scalingLutCr, sizeof(int16_t) * 256, m_scalingLutY, sizeof(int16_t) * 256);
1421 }
1422 else
1423 {
1424 // Check film grain parameter of the cb component
1425 if (m_picParams->m_filmGrainParams.m_numCbPoints > 10)
1426 {
1427 DECODE_ASSERTMESSAGE("Invalid film grain num_cb_points (should be in [0, 10]) in pic parameter!");
1428 return MOS_STATUS_INVALID_PARAMETER;
1429 }
1430 for (auto i = 1; i < m_picParams->m_filmGrainParams.m_numCbPoints; i++)
1431 {
1432 if (m_picParams->m_filmGrainParams.m_pointCbValue[i] <= m_picParams->m_filmGrainParams.m_pointCbValue[i - 1])
1433 {
1434 DECODE_ASSERTMESSAGE("Invalid film grain point_cb_value (point_cb_value[%d] should be greater than point_cb_value[%d]) in pic parameter!", i, i - 1);
1435 return MOS_STATUS_INVALID_PARAMETER;
1436 }
1437 }
1438 DECODE_CHK_STATUS(InitScalingFunction(
1439 m_picParams->m_filmGrainParams.m_pointCbValue,
1440 m_picParams->m_filmGrainParams.m_pointCbScaling,
1441 m_picParams->m_filmGrainParams.m_numCbPoints,
1442 m_scalingLutCb));
1443
1444 // Check film grain parameter of the cr component
1445 if (m_picParams->m_filmGrainParams.m_numCrPoints > 10)
1446 {
1447 DECODE_ASSERTMESSAGE("Invalid film grain num_cr_points (should be in [0, 10]) in pic parameter!");
1448 return MOS_STATUS_INVALID_PARAMETER;
1449 }
1450 for (auto i = 1; i < m_picParams->m_filmGrainParams.m_numCrPoints; i++)
1451 {
1452 if (m_picParams->m_filmGrainParams.m_pointCrValue[i] <= m_picParams->m_filmGrainParams.m_pointCrValue[i - 1])
1453 {
1454 DECODE_ASSERTMESSAGE("Invalid film grain point_cr_value (point_cr_value[%d] should be greater than point_cr_value[%d]) in pic parameter!", i, i - 1);
1455 return MOS_STATUS_INVALID_PARAMETER;
1456 }
1457 }
1458 DECODE_CHK_STATUS(InitScalingFunction(
1459 m_picParams->m_filmGrainParams.m_pointCrValue,
1460 m_picParams->m_filmGrainParams.m_pointCrScaling,
1461 m_picParams->m_filmGrainParams.m_numCrPoints,
1462 m_scalingLutCr));
1463 }
1464
1465 return eStatus;
1466 }
1467
PreProcArCoeffs(int16_t * yCoeff,int16_t * uCoeff,int16_t * vCoeff)1468 MOS_STATUS Av1DecodeFilmGrainG12::PreProcArCoeffs(
1469 int16_t *yCoeff,
1470 int16_t *uCoeff,
1471 int16_t *vCoeff)
1472 {
1473
1474 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1475
1476 DECODE_FUNC_CALL();
1477
1478 uint32_t arCoeffLag = m_picParams->m_filmGrainParams.m_filmGrainInfoFlags.m_fields.m_arCoeffLag;
1479 if (arCoeffLag == 3)
1480 {
1481 for (auto i = 0; i < 24; i++)
1482 {
1483 yCoeff[i] = m_picParams->m_filmGrainParams.m_arCoeffsY[i];
1484 }
1485 for (auto i = 0; i < 25; i++)
1486 {
1487 uCoeff[i] = m_picParams->m_filmGrainParams.m_arCoeffsCb[i];
1488 }
1489 for (auto i = 0; i < 25; i++)
1490 {
1491 vCoeff[i] = m_picParams->m_filmGrainParams.m_arCoeffsCr[i];
1492 }
1493
1494 return MOS_STATUS_SUCCESS;
1495 }
1496
1497 memset(yCoeff, 0, 24 * sizeof(int16_t));
1498 memset(uCoeff, 0, 25 * sizeof(int16_t));
1499 memset(vCoeff, 0, 25 * sizeof(int16_t));
1500
1501 uint32_t mappedIdxLag2[13] = { 8, 9, 10, 11, 12, 15, 16, 17, 18, 19, 22, 23, 24 };
1502 uint32_t mappedIdxLag1[5] = { 16, 17, 18, 23, 24 };
1503 uint32_t mappedIdxLag0[1] = { 24 };
1504
1505 uint32_t numPosLuma = 2 * arCoeffLag * (arCoeffLag + 1);
1506 uint32_t numPosChroma = numPosLuma;
1507 if (m_picParams->m_filmGrainParams.m_numYPoints > 0)
1508 {
1509 ++numPosChroma;
1510 }
1511
1512 uint32_t *mappedIdx = nullptr; mappedIdxLag2;
1513 if (arCoeffLag == 2)
1514 {
1515 mappedIdx = mappedIdxLag2;
1516 }
1517 else if (arCoeffLag == 1)
1518 {
1519 mappedIdx = mappedIdxLag1;
1520 }
1521 else if (arCoeffLag == 0)
1522 {
1523 mappedIdx = mappedIdxLag0;
1524 }
1525 else
1526 {
1527 return MOS_STATUS_INVALID_PARAMETER;
1528 }
1529
1530 for (uint32_t i = 0; i < numPosLuma; i++)
1531 {
1532 yCoeff[mappedIdx[i]] = m_picParams->m_filmGrainParams.m_arCoeffsY[i];
1533 }
1534 for (uint32_t i = 0; i < numPosChroma; i++)
1535 {
1536 uCoeff[mappedIdx[i]] = m_picParams->m_filmGrainParams.m_arCoeffsCb[i];
1537 vCoeff[mappedIdx[i]] = m_picParams->m_filmGrainParams.m_arCoeffsCr[i];
1538 }
1539
1540 return eStatus;
1541 }
1542
SetFrameStates(CodecAv1PicParams * picParams)1543 MOS_STATUS Av1DecodeFilmGrainG12::SetFrameStates(
1544 CodecAv1PicParams *picParams)
1545 {
1546 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1547
1548 DECODE_CHK_NULL(picParams);
1549
1550 DECODE_FUNC_CALL();
1551
1552 // Picture parameters from decoder
1553 m_picParams = picParams;
1554
1555 //Pre-process Scaling Points related params and calculate scaling LUTs
1556 DECODE_CHK_STATUS(PreProcScalingPointsAndLUTs());
1557
1558 // Initialize surfaces
1559 int16_t coeffY[24], coeffU[25], coeffV[25];
1560 DECODE_CHK_STATUS(PreProcArCoeffs(coeffY,coeffU, coeffV));
1561
1562 // Y coefficients surface as input of RegressPhase1
1563 m_yCoefficientsSurface = m_yCoefficientsSurfaceArray->Fetch();
1564 DECODE_CHK_NULL(m_yCoefficientsSurface);
1565 auto data = (int16_t *)m_allocator->LockResourceForWrite(&m_yCoefficientsSurface->OsResource);
1566 DECODE_CHK_NULL(data);
1567 MOS_SecureMemcpy(data, 24 * sizeof(int16_t), coeffY, 24 * sizeof(int16_t));
1568
1569 //Y/U/V dithering surfaces as out of RegressPhase2
1570 m_yDitheringSurface = m_yDitheringSurfaceArray->Fetch();
1571 DECODE_CHK_NULL(m_yDitheringSurface);
1572 m_uDitheringSurface = m_uDitheringSurfaceArray->Fetch();
1573 DECODE_CHK_NULL(m_uDitheringSurface);
1574 m_vDitheringSurface = m_vDitheringSurfaceArray->Fetch();
1575 DECODE_CHK_NULL(m_vDitheringSurface);
1576
1577 //Y/U/V coefficients surfaces as input of RegressPhase2
1578 m_yCoeffSurface = m_yCoeffSurfaceArray->Fetch();
1579 DECODE_CHK_NULL(m_yCoeffSurface);
1580 data = (int16_t *)m_allocator->LockResourceForWrite(&m_yCoeffSurface->OsResource);
1581 DECODE_CHK_NULL(data);
1582 MOS_SecureMemcpy(data, 24 * sizeof(int16_t), coeffY, 24 * sizeof(int16_t));
1583
1584 m_uCoeffSurface = m_uCoeffSurfaceArray->Fetch();
1585 DECODE_CHK_NULL(m_uCoeffSurface);
1586 data = (int16_t *)m_allocator->LockResourceForWrite(&m_uCoeffSurface->OsResource);
1587 DECODE_CHK_NULL(data);
1588 MOS_SecureMemcpy(data, 25 * sizeof(int16_t), coeffU, 25 * sizeof(int16_t));
1589
1590 m_vCoeffSurface = m_vCoeffSurfaceArray->Fetch();
1591 DECODE_CHK_NULL(m_vCoeffSurface);
1592 data = (int16_t *)m_allocator->LockResourceForWrite(&m_vCoeffSurface->OsResource);
1593 DECODE_CHK_NULL(data);
1594 MOS_SecureMemcpy(data, 25 * sizeof(int16_t), coeffV, 25 * sizeof(int16_t));
1595
1596 // Scaling LUTs surfaces
1597 m_yGammaLUTSurface = m_yGammaLUTSurfaceArray->Fetch();
1598 DECODE_CHK_NULL(m_yGammaLUTSurface);
1599 data = (int16_t *)m_allocator->LockResourceForWrite(&m_yGammaLUTSurface->OsResource);
1600 DECODE_CHK_NULL(data);
1601 MOS_SecureMemcpy(data, 256 * sizeof(int16_t), m_scalingLutY, 256 * sizeof(int16_t));
1602 data[256] = m_scalingLutY[255];
1603
1604 m_uGammaLUTSurface = m_uGammaLUTSurfaceArray->Fetch();
1605 DECODE_CHK_NULL(m_uGammaLUTSurface);
1606 data = (int16_t *)m_allocator->LockResourceForWrite(&m_uGammaLUTSurface->OsResource);
1607 DECODE_CHK_NULL(data);
1608 MOS_SecureMemcpy(data, 256 * sizeof(int16_t), m_scalingLutCb, 256 * sizeof(int16_t));
1609 data[256] = m_scalingLutCb[255];
1610
1611 m_vGammaLUTSurface = m_vGammaLUTSurfaceArray->Fetch();
1612 DECODE_CHK_NULL(m_vGammaLUTSurface);
1613 data = (int16_t *)m_allocator->LockResourceForWrite(&m_vGammaLUTSurface->OsResource);
1614 DECODE_CHK_NULL(data);
1615 MOS_SecureMemcpy(data, 256 * sizeof(int16_t), m_scalingLutCr, 256 * sizeof(int16_t));
1616 data[256] = m_scalingLutCr[255];
1617
1618 return eStatus;
1619 }
1620
1621 } // namespace Decode
1622