1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
14 * of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Zhao, Yakui <yakui.zhao@intel.com>
26 * Chen, Peng <peng.c.chen@intel.com>
27 *
28 */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <stdbool.h>
33 #include <string.h>
34 #include <math.h>
35 #include <assert.h>
36 #include <va/va.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40 #include "intel_media.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_encoder.h"
44 #include "i965_encoder_api.h"
45 #include "i965_encoder_utils.h"
46 #include "gen10_hcp_common.h"
47 #include "gen10_hevc_enc_common.h"
48 #include "gen10_hevc_enc_kernel.h"
49 #include "gen10_hevc_enc_const_def.h"
50 #include "gen10_hevc_enc_kernels_binary.h"
51 #include "gen10_hevc_encoder.h"
52
53 static bool
gen10_hevc_get_kernel_header_and_size(void * pvbinary,int binary_size,GEN10_HEVC_ENC_OPERATION operation,int krnstate_idx,struct i965_kernel * ret_kernel)54 gen10_hevc_get_kernel_header_and_size(void *pvbinary,
55 int binary_size,
56 GEN10_HEVC_ENC_OPERATION operation,
57 int krnstate_idx,
58 struct i965_kernel *ret_kernel)
59 {
60 typedef uint32_t BIN_PTR[4];
61
62 gen10_hevc_kernel_header *pkh_table;
63 gen10_intel_kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
64 char *bin_start;
65 int next_krnoffset;
66 int not_found = 0;
67
68 if (!pvbinary || !ret_kernel)
69 return false;
70
71 bin_start = (char *)pvbinary;
72 pkh_table = (gen10_hevc_kernel_header *)pvbinary;
73 pinvalid_entry = &(pkh_table->hevc_last) + 1;
74 next_krnoffset = binary_size;
75
76 switch (operation) {
77 case GEN10_HEVC_ENC_SCALING_CONVERSION:
78 pcurr_header = &pkh_table->hevc_ds_convert;
79 break;
80 case GEN10_HEVC_ENC_ME:
81 pcurr_header = &pkh_table->hevc_hme;
82 break;
83 case GEN10_HEVC_ENC_BRC:
84 switch (krnstate_idx) {
85 case 0:
86 pcurr_header = &pkh_table->hevc_brc_init;
87 break;
88 case 1:
89 pcurr_header = &pkh_table->hevc_brc_init;
90 break;
91 case 2:
92 pcurr_header = &pkh_table->hevc_brc_update;
93 break;
94 case 3:
95 pcurr_header = &pkh_table->hevc_brc_lcuqp;
96 break;
97 default:
98 not_found = 1;
99 break;
100 }
101 break;
102
103 case GEN10_HEVC_ENC_MBENC:
104 switch (krnstate_idx) {
105 case 0:
106 pcurr_header = &pkh_table->hevc_intra;
107 break;
108 case 1:
109 pcurr_header = &pkh_table->hevc_enc;
110 break;
111 case 2:
112 pcurr_header = &pkh_table->hevc_enc_lcu64;
113 break;
114 default:
115 not_found = 1;
116 break;
117 }
118
119 break;
120 default:
121 not_found = 1;
122 break;
123 }
124
125 if (not_found) {
126 return false;
127 }
128
129 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
130
131 pnext_header = (pcurr_header + 1);
132 if (pnext_header < pinvalid_entry)
133 next_krnoffset = pnext_header->kernel_start_pointer << 6;
134
135 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
136
137 return true;
138 }
139
140 #define MAX_HEVC_ENCODER_SURFACES 64
141 #define MAX_URB_SIZE 4096
142 #define NUM_KERNELS_PER_GPE_CONTEXT 1
143
144 static void
gen10_hevc_init_gpe_context(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct gen10_hevc_enc_kernel_parameter * kernel_param)145 gen10_hevc_init_gpe_context(VADriverContextP ctx,
146 struct i965_gpe_context *gpe_context,
147 struct gen10_hevc_enc_kernel_parameter *kernel_param)
148 {
149 struct i965_driver_data *i965 = i965_driver_data(ctx);
150
151 gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
152
153 gpe_context->sampler.entry_size = 0;
154 gpe_context->sampler.max_entries = 0;
155 if (kernel_param->sampler_size) {
156 gpe_context->sampler.entry_size = kernel_param->sampler_size;
157 gpe_context->sampler.max_entries = 1;
158 }
159
160 gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
161 gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
162
163 gpe_context->surface_state_binding_table.max_entries = MAX_HEVC_ENCODER_SURFACES;
164 gpe_context->surface_state_binding_table.binding_table_offset = 0;
165 gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_HEVC_ENCODER_SURFACES * 4, 64);
166 gpe_context->surface_state_binding_table.length = ALIGN(MAX_HEVC_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_HEVC_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
167
168 if (i965->intel.eu_total > 0)
169 gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
170 else
171 gpe_context->vfe_state.max_num_threads = 112;
172
173 gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
174 gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
175 gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
176 gpe_context->vfe_state.curbe_allocation_size -
177 ((gpe_context->idrt.entry_size >> 5) *
178 gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
179 gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
180 gpe_context->vfe_state.gpgpu_mode = 0;
181 }
182
183 static void
gen10_hevc_init_vfe_scoreboard(struct i965_gpe_context * gpe_context,struct gen10_hevc_enc_scoreboard_parameter * scoreboard_param)184 gen10_hevc_init_vfe_scoreboard(struct i965_gpe_context *gpe_context,
185 struct gen10_hevc_enc_scoreboard_parameter *scoreboard_param)
186 {
187 if (!gpe_context || !scoreboard_param)
188 return;
189
190 gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
191 gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
192 gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
193
194 if (scoreboard_param->no_dependency) {
195 gpe_context->vfe_desc5.scoreboard0.mask = 0x0;
196 gpe_context->vfe_desc5.scoreboard0.enable = 0;
197 gpe_context->vfe_desc5.scoreboard0.type = 0;
198
199 gpe_context->vfe_desc6.dword = 0;
200 gpe_context->vfe_desc7.dword = 0;
201 } else {
202 gpe_context->vfe_desc5.scoreboard0.mask = 0x7F;
203 gpe_context->vfe_desc6.scoreboard1.delta_x0 = -1;
204 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0;
205
206 gpe_context->vfe_desc6.scoreboard1.delta_x1 = -1;
207 gpe_context->vfe_desc6.scoreboard1.delta_y1 = -1;
208
209 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 0;
210 gpe_context->vfe_desc6.scoreboard1.delta_y2 = -1;
211
212 gpe_context->vfe_desc6.scoreboard1.delta_x3 = 1;
213 gpe_context->vfe_desc6.scoreboard1.delta_y3 = -1;
214
215 gpe_context->vfe_desc7.scoreboard2.delta_x4 = 0;
216 gpe_context->vfe_desc7.scoreboard2.delta_y4 = 0;
217 gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0;
218 gpe_context->vfe_desc7.scoreboard2.delta_y5 = 0;
219 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 0;
220 gpe_context->vfe_desc7.scoreboard2.delta_y6 = 0;
221 gpe_context->vfe_desc7.scoreboard2.delta_x7 = 0;
222 gpe_context->vfe_desc7.scoreboard2.delta_y7 = 0;
223 }
224 }
225
226 static void
gen10_hevc_vme_init_scaling_context(VADriverContextP ctx,struct gen10_hevc_enc_context * vme_context,struct gen10_scaling_context * scaling_context)227 gen10_hevc_vme_init_scaling_context(VADriverContextP ctx,
228 struct gen10_hevc_enc_context *vme_context,
229 struct gen10_scaling_context *scaling_context)
230 {
231 struct gen10_hevc_enc_state *hevc_state;
232 struct i965_gpe_context *gpe_context = NULL;
233 struct gen10_hevc_enc_kernel_parameter kernel_param;
234 struct gen10_hevc_enc_scoreboard_parameter scoreboard_param;
235 struct i965_kernel scale_kernel;
236
237 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
238
239 kernel_param.curbe_size = sizeof(gen10_hevc_scaling_curbe_data);
240 kernel_param.inline_data_size = sizeof(gen10_hevc_scaling_curbe_data);
241 kernel_param.sampler_size = 0;
242
243 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
244 scoreboard_param.mask = 0xFF;
245 scoreboard_param.enable = hevc_state->use_hw_scoreboard;
246 scoreboard_param.type = hevc_state->use_hw_non_stalling_scoreboard;
247 scoreboard_param.no_dependency = true;
248
249 gpe_context = &scaling_context->gpe_context;
250 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
251 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
252
253 memset(&scale_kernel, 0, sizeof(scale_kernel));
254
255 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
256 sizeof(gen10_media_hevc_kernels),
257 GEN10_HEVC_ENC_SCALING_CONVERSION,
258 0,
259 &scale_kernel);
260
261 gen8_gpe_load_kernels(ctx,
262 gpe_context,
263 &scale_kernel,
264 1);
265 }
266
267 static void
gen10_hevc_vme_init_me_context(VADriverContextP ctx,struct gen10_hevc_enc_context * vme_context,struct gen10_me_context * me_context)268 gen10_hevc_vme_init_me_context(VADriverContextP ctx,
269 struct gen10_hevc_enc_context *vme_context,
270 struct gen10_me_context *me_context)
271 {
272 struct gen10_hevc_enc_state *hevc_state;
273 struct i965_gpe_context *gpe_context = NULL;
274 struct gen10_hevc_enc_kernel_parameter kernel_param;
275 struct gen10_hevc_enc_scoreboard_parameter scoreboard_param;
276 struct i965_kernel me_kernel;
277
278 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
279
280 kernel_param.curbe_size = sizeof(gen10_hevc_me_curbe_data);
281 kernel_param.inline_data_size = sizeof(gen10_hevc_me_curbe_data);
282 kernel_param.sampler_size = 0;
283
284 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
285 scoreboard_param.mask = 0xFF;
286 scoreboard_param.enable = hevc_state->use_hw_scoreboard;
287 scoreboard_param.type = hevc_state->use_hw_non_stalling_scoreboard;
288 scoreboard_param.no_dependency = true;
289
290 gpe_context = &me_context->gpe_context;
291 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
292 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
293
294 memset(&me_kernel, 0, sizeof(me_kernel));
295
296 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
297 sizeof(gen10_media_hevc_kernels),
298 GEN10_HEVC_ENC_ME,
299 0,
300 &me_kernel);
301
302 gen8_gpe_load_kernels(ctx,
303 gpe_context,
304 &me_kernel,
305 1);
306 }
307
308 static void
gen10_hevc_vme_init_mbenc_context(VADriverContextP ctx,struct gen10_hevc_enc_context * vme_context,struct gen10_mbenc_context * mbenc_context)309 gen10_hevc_vme_init_mbenc_context(VADriverContextP ctx,
310 struct gen10_hevc_enc_context *vme_context,
311 struct gen10_mbenc_context *mbenc_context)
312 {
313 struct gen10_hevc_enc_state *hevc_state;
314 struct i965_gpe_context *gpe_context = NULL;
315 struct gen10_hevc_enc_kernel_parameter kernel_param;
316 struct gen10_hevc_enc_scoreboard_parameter scoreboard_param;
317 struct i965_kernel mbenc_kernel;
318
319 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
320
321 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
322 scoreboard_param.mask = 0xFF;
323 scoreboard_param.enable = hevc_state->use_hw_scoreboard;
324 scoreboard_param.type = hevc_state->use_hw_non_stalling_scoreboard;
325
326 gpe_context = &mbenc_context->gpe_contexts[GEN10_HEVC_MBENC_I_KRNIDX_G10];
327 kernel_param.curbe_size = sizeof(gen10_hevc_mbenc_intra_curbe_data);
328 kernel_param.inline_data_size = sizeof(gen10_hevc_mbenc_intra_curbe_data);
329 kernel_param.sampler_size = 0;
330 scoreboard_param.no_dependency = false;
331 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
332
333 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
334
335 memset(&mbenc_kernel, 0, sizeof(mbenc_kernel));
336
337 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
338 sizeof(gen10_media_hevc_kernels),
339 GEN10_HEVC_ENC_MBENC,
340 GEN10_HEVC_MBENC_I_KRNIDX_G10,
341 & mbenc_kernel);
342
343 gen8_gpe_load_kernels(ctx,
344 gpe_context,
345 &mbenc_kernel,
346 1);
347
348 gpe_context = &mbenc_context->gpe_contexts[GEN10_HEVC_MBENC_INTER_LCU32_KRNIDX_G10];
349 kernel_param.curbe_size = sizeof(gen10_hevc_mbenc_inter_curbe_data);
350 kernel_param.inline_data_size = sizeof(gen10_hevc_mbenc_inter_curbe_data);
351 kernel_param.sampler_size = 0;
352 scoreboard_param.no_dependency = false;
353 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
354 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
355
356 memset(&mbenc_kernel, 0, sizeof(mbenc_kernel));
357
358 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
359 sizeof(gen10_media_hevc_kernels),
360 GEN10_HEVC_ENC_MBENC,
361 GEN10_HEVC_MBENC_INTER_LCU32_KRNIDX_G10,
362 &mbenc_kernel);
363 gen8_gpe_load_kernels(ctx,
364 gpe_context,
365 &mbenc_kernel,
366 1);
367
368 gpe_context = &mbenc_context->gpe_contexts[GEN10_HEVC_MBENC_INTER_LCU64_KRNIDX_G10];
369 kernel_param.curbe_size = sizeof(gen10_hevc_mbenc_inter_curbe_data);
370 kernel_param.inline_data_size = sizeof(gen10_hevc_mbenc_inter_curbe_data);
371 kernel_param.sampler_size = 0;
372 scoreboard_param.no_dependency = false;
373 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
374 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
375
376 memset(&mbenc_kernel, 0, sizeof(mbenc_kernel));
377
378 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
379 sizeof(gen10_media_hevc_kernels),
380 GEN10_HEVC_ENC_MBENC,
381 GEN10_HEVC_MBENC_INTER_LCU64_KRNIDX_G10,
382 &mbenc_kernel);
383
384 gen8_gpe_load_kernels(ctx,
385 gpe_context,
386 &mbenc_kernel,
387 1);
388 }
389
390 static void
gen10_hevc_vme_init_brc_context(VADriverContextP ctx,struct gen10_hevc_enc_context * vme_context,struct gen10_brc_context * brc_context)391 gen10_hevc_vme_init_brc_context(VADriverContextP ctx,
392 struct gen10_hevc_enc_context *vme_context,
393 struct gen10_brc_context *brc_context)
394 {
395 struct gen10_hevc_enc_state *hevc_state;
396 struct i965_gpe_context *gpe_context = NULL;
397 struct gen10_hevc_enc_kernel_parameter kernel_param;
398 struct gen10_hevc_enc_scoreboard_parameter scoreboard_param;
399 struct i965_kernel brc_kernel;
400
401 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
402
403 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
404 scoreboard_param.mask = 0xFF;
405 scoreboard_param.enable = hevc_state->use_hw_scoreboard;
406 scoreboard_param.type = hevc_state->use_hw_non_stalling_scoreboard;
407
408 gpe_context = &brc_context->gpe_contexts[GEN10_HEVC_BRC_INIT];
409 kernel_param.curbe_size = sizeof(gen10_hevc_brc_init_curbe_data);
410 kernel_param.inline_data_size = sizeof(gen10_hevc_brc_init_curbe_data);
411 kernel_param.sampler_size = 0;
412 scoreboard_param.no_dependency = true;
413 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
414 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
415
416 memset(&brc_kernel, 0, sizeof(brc_kernel));
417
418 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
419 sizeof(gen10_media_hevc_kernels),
420 GEN10_HEVC_ENC_BRC,
421 GEN10_HEVC_BRC_INIT,
422 &brc_kernel);
423
424 gen8_gpe_load_kernels(ctx,
425 gpe_context,
426 &brc_kernel,
427 1);
428
429 gpe_context = &brc_context->gpe_contexts[GEN10_HEVC_BRC_RESET];
430 kernel_param.curbe_size = sizeof(gen10_hevc_brc_init_curbe_data);
431 kernel_param.inline_data_size = sizeof(gen10_hevc_brc_init_curbe_data);
432 kernel_param.sampler_size = 0;
433 scoreboard_param.no_dependency = true;
434 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
435 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
436
437 memset(&brc_kernel, 0, sizeof(brc_kernel));
438
439 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
440 sizeof(gen10_media_hevc_kernels),
441 GEN10_HEVC_ENC_BRC,
442 GEN10_HEVC_BRC_RESET,
443 &brc_kernel);
444
445 gen8_gpe_load_kernels(ctx,
446 gpe_context,
447 &brc_kernel,
448 1);
449
450 gpe_context = &brc_context->gpe_contexts[GEN10_HEVC_BRC_FRAME_UPDATE];
451 kernel_param.curbe_size = sizeof(gen10_hevc_brc_update_curbe_data);
452 kernel_param.inline_data_size = sizeof(gen10_hevc_brc_update_curbe_data);
453 kernel_param.sampler_size = 0;
454 scoreboard_param.no_dependency = true;
455 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
456 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
457
458 memset(&brc_kernel, 0, sizeof(brc_kernel));
459
460 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
461 sizeof(gen10_media_hevc_kernels),
462 GEN10_HEVC_ENC_BRC,
463 GEN10_HEVC_BRC_FRAME_UPDATE,
464 &brc_kernel);
465
466 gen8_gpe_load_kernels(ctx,
467 gpe_context,
468 &brc_kernel,
469 1);
470
471 gpe_context = &brc_context->gpe_contexts[GEN10_HEVC_BRC_LCU_UPDATE];
472 kernel_param.curbe_size = sizeof(gen10_hevc_brc_update_curbe_data);
473 kernel_param.inline_data_size = sizeof(gen10_hevc_brc_update_curbe_data);
474 kernel_param.sampler_size = 0;
475 scoreboard_param.no_dependency = true;
476 gen10_hevc_init_gpe_context(ctx, gpe_context, &kernel_param);
477 gen10_hevc_init_vfe_scoreboard(gpe_context, &scoreboard_param);
478
479 memset(&brc_kernel, 0, sizeof(brc_kernel));
480
481 gen10_hevc_get_kernel_header_and_size((void *)gen10_media_hevc_kernels,
482 sizeof(gen10_media_hevc_kernels),
483 GEN10_HEVC_ENC_BRC,
484 GEN10_HEVC_BRC_LCU_UPDATE,
485 &brc_kernel);
486
487 gen8_gpe_load_kernels(ctx,
488 gpe_context,
489 &brc_kernel,
490 1);
491 }
492
493 static void
gen10_hevc_vme_init_kernels_context(VADriverContextP ctx,struct intel_encoder_context * encoder_context,struct gen10_hevc_enc_context * vme_context)494 gen10_hevc_vme_init_kernels_context(VADriverContextP ctx,
495 struct intel_encoder_context *encoder_context,
496 struct gen10_hevc_enc_context *vme_context)
497 {
498 gen10_hevc_vme_init_scaling_context(ctx, vme_context, &vme_context->scaling_context);
499 gen10_hevc_vme_init_me_context(ctx, vme_context, &vme_context->me_context);
500 gen10_hevc_vme_init_mbenc_context(ctx, vme_context, &vme_context->mbenc_context);
501 gen10_hevc_vme_init_brc_context(ctx, vme_context, &vme_context->brc_context);
502 }
503
504 static void
gen10_hevc_free_surface(void ** data)505 gen10_hevc_free_surface(void **data)
506 {
507 struct gen10_hevc_surface_priv *surface_priv;
508
509 if (!data || !*data)
510 return;
511
512 surface_priv = *data;
513
514 if (surface_priv->scaled_4x_surface) {
515 i965_free_gpe_resource(&surface_priv->gpe_scaled_4x_surface);
516
517 i965_DestroySurfaces(surface_priv->ctx, &surface_priv->scaled_4x_surface_id, 1);
518 surface_priv->scaled_4x_surface_id = VA_INVALID_SURFACE;
519 surface_priv->scaled_4x_surface = NULL;
520 }
521
522 if (surface_priv->scaled_16x_surface) {
523 i965_free_gpe_resource(&surface_priv->gpe_scaled_16x_surface);
524
525 i965_DestroySurfaces(surface_priv->ctx, &surface_priv->scaled_16x_surface_id, 1);
526 surface_priv->scaled_16x_surface_id = VA_INVALID_SURFACE;
527 surface_priv->scaled_16x_surface = NULL;
528 }
529
530 if (surface_priv->scaled_2x_surface) {
531 i965_free_gpe_resource(&surface_priv->gpe_scaled_2x_surface);
532
533 i965_DestroySurfaces(surface_priv->ctx, &surface_priv->scaled_2x_surface_id, 1);
534 surface_priv->scaled_2x_surface_id = VA_INVALID_SURFACE;
535 surface_priv->scaled_2x_surface = NULL;
536 }
537
538 if (surface_priv->converted_surface) {
539 i965_free_gpe_resource(&surface_priv->gpe_converted_surface);
540
541 i965_DestroySurfaces(surface_priv->ctx, &surface_priv->converted_surface_id, 1);
542 surface_priv->converted_surface_id = VA_INVALID_SURFACE;
543 surface_priv->converted_surface = NULL;
544 }
545
546 i965_free_gpe_resource(&surface_priv->motion_vector_temporal);
547
548 free(surface_priv);
549
550 *data = NULL;
551
552 return;
553 }
554
555 static VAStatus
gen10_hevc_init_surface_priv(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct object_surface * obj_surface)556 gen10_hevc_init_surface_priv(VADriverContextP ctx,
557 struct encode_state *encode_state,
558 struct intel_encoder_context *encoder_context,
559 struct object_surface *obj_surface)
560 {
561 struct i965_driver_data *i965 = i965_driver_data(ctx);
562 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
563 struct gen10_hevc_enc_frame_info *frame_info;
564 struct gen10_hevc_enc_state *hevc_state;
565 struct gen10_hevc_surface_priv *surface_priv;
566 int downscaled_width_4x = 0, downscaled_height_4x = 0;
567 int downscaled_width_16x = 0, downscaled_height_16x = 0;
568 int frame_width = 0, frame_height = 0, size;
569
570 if (!obj_surface || !obj_surface->bo)
571 return VA_STATUS_ERROR_INVALID_SURFACE;
572
573 if (obj_surface->private_data &&
574 obj_surface->free_private_data != gen10_hevc_free_surface) {
575 obj_surface->free_private_data(&obj_surface->private_data);
576 obj_surface->private_data = NULL;
577 }
578
579 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
580 frame_info = &vme_context->frame_info;
581
582 if (obj_surface->private_data) {
583 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
584
585 if ((surface_priv->frame_width == frame_info->frame_width) &&
586 (surface_priv->frame_height == frame_info->frame_height) &&
587 (surface_priv->width_ctb == frame_info->width_in_lcu) &&
588 (surface_priv->height_ctb == frame_info->height_in_lcu) &&
589 (surface_priv->is_10bit == hevc_state->is_10bit) &&
590 (surface_priv->is_64lcu == hevc_state->is_64lcu))
591 return VA_STATUS_SUCCESS;
592
593 obj_surface->free_private_data(&obj_surface->private_data);
594 obj_surface->private_data = NULL;
595 surface_priv = NULL;
596 }
597
598 surface_priv = calloc(1, sizeof(struct gen10_hevc_surface_priv));
599
600 if (!surface_priv)
601 return VA_STATUS_ERROR_ALLOCATION_FAILED;
602
603 surface_priv->ctx = ctx;
604
605 obj_surface->private_data = surface_priv;
606 obj_surface->free_private_data = gen10_hevc_free_surface;
607
608 if (hevc_state->is_64lcu) {
609 frame_width = ALIGN(frame_info->frame_width, 64) >> 1;
610 frame_height = ALIGN(frame_info->frame_height, 64) >> 1;
611
612 if (i965_CreateSurfaces(ctx,
613 frame_width,
614 frame_height,
615 VA_RT_FORMAT_YUV420,
616 1,
617 &surface_priv->scaled_2x_surface_id) != VA_STATUS_SUCCESS)
618 return VA_STATUS_ERROR_ALLOCATION_FAILED;
619
620 surface_priv->scaled_2x_surface = SURFACE(surface_priv->scaled_2x_surface_id);
621
622 if (!surface_priv->scaled_2x_surface)
623 return VA_STATUS_ERROR_ALLOCATION_FAILED;
624
625 i965_check_alloc_surface_bo(ctx, surface_priv->scaled_2x_surface, 1,
626 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
627
628 i965_object_surface_to_2d_gpe_resource(&surface_priv->gpe_scaled_2x_surface,
629 surface_priv->scaled_2x_surface);
630 }
631
632 if (hevc_state->is_10bit) {
633 if (i965_CreateSurfaces(ctx,
634 frame_info->frame_width,
635 frame_info->frame_height,
636 VA_RT_FORMAT_YUV420,
637 1,
638 &surface_priv->converted_surface_id) != VA_STATUS_SUCCESS)
639 return VA_STATUS_ERROR_ALLOCATION_FAILED;
640
641 surface_priv->converted_surface = SURFACE(surface_priv->converted_surface_id);
642
643 if (!surface_priv->converted_surface)
644 return VA_STATUS_ERROR_ALLOCATION_FAILED;
645
646 i965_check_alloc_surface_bo(ctx, surface_priv->converted_surface, 1,
647 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
648
649 i965_object_surface_to_2d_gpe_resource(&surface_priv->gpe_converted_surface,
650 surface_priv->converted_surface);
651 }
652
653 if (hevc_state->hme_supported) {
654 downscaled_width_4x = ALIGN(frame_info->frame_width / 4, 32);
655 downscaled_height_4x = ALIGN(frame_info->frame_height / 4, 32);
656
657 if (i965_CreateSurfaces(ctx,
658 downscaled_width_4x,
659 downscaled_height_4x,
660 VA_RT_FORMAT_YUV420,
661 1,
662 &surface_priv->scaled_4x_surface_id) != VA_STATUS_SUCCESS)
663 return VA_STATUS_ERROR_ALLOCATION_FAILED;
664
665 surface_priv->scaled_4x_surface = SURFACE(surface_priv->scaled_4x_surface_id);
666
667 if (!surface_priv->scaled_4x_surface)
668 return VA_STATUS_ERROR_ALLOCATION_FAILED;
669
670 i965_check_alloc_surface_bo(ctx, surface_priv->scaled_4x_surface, 1,
671 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
672
673 i965_object_surface_to_2d_gpe_resource(&surface_priv->gpe_scaled_4x_surface,
674 surface_priv->scaled_4x_surface);
675 }
676
677 if (hevc_state->hme_supported &&
678 hevc_state->b16xme_supported) {
679 downscaled_width_16x = ALIGN(downscaled_width_4x / 4, 32);
680 downscaled_height_16x = ALIGN(downscaled_height_4x / 4, 32);
681
682 if (i965_CreateSurfaces(ctx,
683 downscaled_width_16x,
684 downscaled_height_16x,
685 VA_RT_FORMAT_YUV420,
686 1,
687 &surface_priv->scaled_16x_surface_id) != VA_STATUS_SUCCESS)
688 return VA_STATUS_ERROR_ALLOCATION_FAILED;
689
690 surface_priv->scaled_16x_surface = SURFACE(surface_priv->scaled_16x_surface_id);
691
692 if (!surface_priv->scaled_16x_surface)
693 return VA_STATUS_ERROR_ALLOCATION_FAILED;
694
695 i965_check_alloc_surface_bo(ctx, surface_priv->scaled_16x_surface, 1,
696 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
697
698 i965_object_surface_to_2d_gpe_resource(&surface_priv->gpe_scaled_16x_surface,
699 surface_priv->scaled_16x_surface);
700 }
701
702 frame_width = frame_info->frame_width;
703 frame_height = frame_info->frame_height;
704
705 size = MAX(((frame_width + 63) >> 6) * ((frame_height + 15) >> 4),
706 ((frame_width + 31) >> 5) * ((frame_height + 31) >> 5));
707 size = ALIGN(size, 2) * 64;
708 if (!i965_allocate_gpe_resource(i965->intel.bufmgr,
709 &surface_priv->motion_vector_temporal,
710 size,
711 "Motion vector temporal buffer"))
712 return VA_STATUS_ERROR_ALLOCATION_FAILED;
713
714 surface_priv->is_10bit = hevc_state->is_10bit;
715 surface_priv->is_64lcu = hevc_state->is_64lcu;
716 surface_priv->frame_width = frame_info->frame_width;
717 surface_priv->frame_height = frame_info->frame_height;
718 surface_priv->width_ctb = frame_info->width_in_lcu;
719 surface_priv->height_ctb = frame_info->height_in_lcu;
720
721 return VA_STATUS_SUCCESS;
722 }
723
724 static void
gen10_hevc_free_enc_resources(void * context)725 gen10_hevc_free_enc_resources(void *context)
726 {
727 struct gen10_hevc_enc_context *vme_context = context;
728
729 if (!vme_context)
730 return;
731
732 i965_free_gpe_resource(&vme_context->res_mb_code_surface);
733
734 i965_free_gpe_resource(&vme_context->res_temp_curecord_lcu32_surface);
735 i965_free_gpe_resource(&vme_context->res_16x16_qp_data_surface);
736 i965_free_gpe_resource(&vme_context->res_lculevel_input_data_buffer);
737 i965_free_gpe_resource(&vme_context->res_concurrent_tg_data);
738 i965_free_gpe_resource(&vme_context->res_cu_split_surface);
739 i965_free_gpe_resource(&vme_context->res_kernel_trace_data);
740 i965_free_gpe_resource(&vme_context->res_enc_const_table_intra);
741 i965_free_gpe_resource(&vme_context->res_enc_const_table_inter);
742 i965_free_gpe_resource(&vme_context->res_enc_const_table_inter_lcu64);
743 i965_free_gpe_resource(&vme_context->res_scratch_surface);
744
745 i965_free_gpe_resource(&vme_context->res_temp2_curecord_lcu32_surface);
746 i965_free_gpe_resource(&vme_context->res_temp_curecord_surface_lcu64);
747 i965_free_gpe_resource(&vme_context->res_enc_scratch_buffer);
748 i965_free_gpe_resource(&vme_context->res_enc_scratch_lcu64_buffer);
749 i965_free_gpe_resource(&vme_context->res_64x64_dist_buffer);
750
751 i965_free_gpe_resource(&vme_context->res_jbq_header_buffer);
752 i965_free_gpe_resource(&vme_context->res_jbq_header_lcu64_buffer);
753 i965_free_gpe_resource(&vme_context->res_jbq_data_lcu32_surface);
754 i965_free_gpe_resource(&vme_context->res_jbq_data_lcu64_surface);
755 i965_free_gpe_resource(&vme_context->res_residual_scratch_lcu32_surface);
756
757 i965_free_gpe_resource(&vme_context->res_residual_scratch_lcu64_surface);
758 i965_free_gpe_resource(&vme_context->res_mb_stat_surface);
759 i965_free_gpe_resource(&vme_context->res_mb_split_surface);
760
761 i965_free_gpe_resource(&vme_context->res_s4x_memv_data_surface);
762 i965_free_gpe_resource(&vme_context->res_s4x_me_dist_surface);
763
764 i965_free_gpe_resource(&vme_context->res_s16x_memv_data_surface);
765 i965_free_gpe_resource(&vme_context->res_mv_dist_sum_buffer);
766
767 i965_free_gpe_resource(&vme_context->res_brc_me_dist_surface);
768 i965_free_gpe_resource(&vme_context->res_brc_input_enc_kernel_buffer);
769 i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
770 i965_free_gpe_resource(&vme_context->res_brc_intra_dist_surface);
771 i965_free_gpe_resource(&vme_context->res_brc_pak_statistics_buffer[0]);
772 i965_free_gpe_resource(&vme_context->res_brc_pak_statistics_buffer[1]);
773 i965_free_gpe_resource(&vme_context->res_brc_pic_image_state_write_buffer);
774 i965_free_gpe_resource(&vme_context->res_brc_pic_image_state_read_buffer);
775 i965_free_gpe_resource(&vme_context->res_brc_const_data_surface);
776 i965_free_gpe_resource(&vme_context->res_brc_lcu_const_data_buffer);
777 i965_free_gpe_resource(&vme_context->res_brc_mb_qp_surface);
778 }
779
780 static VAStatus
gen10_hevc_allocate_enc_resources(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)781 gen10_hevc_allocate_enc_resources(VADriverContextP ctx,
782 struct encode_state *encode_state,
783 struct intel_encoder_context *encoder_context)
784
785 {
786 struct i965_driver_data *i965 = i965_driver_data(ctx);
787 struct gen10_hevc_enc_context *vme_context;
788 struct gen10_hevc_enc_state *hevc_state;
789 struct gen10_hevc_enc_frame_info *frame_info;
790 int dw_width, dw_height;
791 int allocate_flag;
792 int res_size;
793 int i;
794
795 vme_context = (struct gen10_hevc_enc_context *)encoder_context->vme_context;
796 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
797 frame_info = &vme_context->frame_info;
798
799 i965_free_gpe_resource(&vme_context->res_mb_code_surface);
800 res_size = vme_context->frame_info.width_in_lcu * vme_context->frame_info.height_in_lcu;
801 if (hevc_state->is_64lcu)
802 res_size = res_size * 64 * 32;
803 else
804 res_size = res_size * 16 * 32;
805
806 res_size = res_size + hevc_state->cu_records_offset;
807 res_size = ALIGN(res_size, 4096);
808 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
809 &vme_context->res_mb_code_surface,
810 res_size,
811 "Mb Code_Surface");
812 if (!allocate_flag)
813 goto FAIL;
814
815 i965_free_gpe_resource(&vme_context->res_temp_curecord_lcu32_surface);
816 dw_width = ALIGN(hevc_state->frame_width, 64);
817 dw_height = ALIGN(hevc_state->frame_height, 64);
818 dw_width = ALIGN(dw_width, 64);
819 res_size = dw_width * dw_height * 64 + 1024;
820 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
821 &vme_context->res_temp_curecord_lcu32_surface,
822 dw_width, dw_height, dw_width,
823 "Temp CURecord surfaces");
824 if (!allocate_flag)
825 goto FAIL;
826
827 i965_free_gpe_resource(&vme_context->res_16x16_qp_data_surface);
828 dw_width = ALIGN(hevc_state->frame_width, 64) >> 4;
829 dw_height = ALIGN(hevc_state->frame_height, 64) >> 4;
830 dw_width = ALIGN(dw_width, 64);
831 dw_height = ALIGN(dw_height, 64);
832 dw_width = ALIGN(dw_width, 64);
833 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
834 &vme_context->res_16x16_qp_data_surface,
835 dw_width, dw_height, dw_width,
836 "CU 16x16 input surface");
837 if (!allocate_flag)
838 goto FAIL;
839
840 i965_free_gpe_resource(&vme_context->res_lculevel_input_data_buffer);
841 res_size = vme_context->frame_info.width_in_lcu * vme_context->frame_info.height_in_lcu * 16;
842 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
843 &vme_context->res_lculevel_input_data_buffer,
844 res_size,
845 "LCU Input data buffer");
846 if (!allocate_flag)
847 goto FAIL;
848
849 i965_free_gpe_resource(&vme_context->res_concurrent_tg_data);
850 res_size = 16 * 256;
851 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
852 &vme_context->res_concurrent_tg_data,
853 res_size,
854 "Concurrent Thread_group data");
855 if (!allocate_flag)
856 goto FAIL;
857
858 i965_free_gpe_resource(&vme_context->res_cu_split_surface);
859 dw_width = ALIGN(hevc_state->frame_width, 64) >> 4;
860 dw_height = ALIGN(hevc_state->frame_height, 64) >> 4;
861 dw_width = ALIGN(dw_width, 64);
862 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
863 &vme_context->res_cu_split_surface,
864 dw_width, dw_height, dw_width,
865 "CU split surface");
866 if (!allocate_flag)
867 goto FAIL;
868
869 i965_free_gpe_resource(&vme_context->res_kernel_trace_data);
870 res_size = 4096;
871 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
872 &vme_context->res_kernel_trace_data,
873 res_size,
874 "Kernel trace");
875 if (!allocate_flag)
876 goto FAIL;
877
878 i965_free_gpe_resource(&vme_context->res_enc_const_table_intra);
879 res_size = GEN10_HEVC_ENC_INTRA_CONST_LUT_SIZE ;
880 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
881 &vme_context->res_enc_const_table_intra,
882 res_size,
883 "Constant data for Intra");
884 if (!allocate_flag)
885 goto FAIL;
886
887 i965_free_gpe_resource(&vme_context->res_enc_const_table_inter);
888 res_size = GEN10_HEVC_ENC_INTER_CONST_LUT32_SIZE ;
889
890 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
891 &vme_context->res_enc_const_table_inter,
892 res_size,
893 "Constant data for Inter");
894 if (!allocate_flag)
895 goto FAIL;
896
897 i965_free_gpe_resource(&vme_context->res_enc_const_table_inter_lcu64);
898 if (hevc_state->is_64lcu) {
899 res_size = GEN10_HEVC_ENC_INTER_CONST_LUT64_SIZE ;
900
901 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
902 &vme_context->res_enc_const_table_inter_lcu64,
903 res_size,
904 "Constant data for LCU64_Inter");
905 if (!allocate_flag)
906 goto FAIL;
907 }
908
909 i965_free_gpe_resource(&vme_context->res_scratch_surface);
910 dw_width = ALIGN(hevc_state->frame_width, 64) >> 3;
911 dw_height = ALIGN(hevc_state->frame_height, 64) >> 5;
912 dw_width = ALIGN(dw_width, 64);
913 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
914 &vme_context->res_scratch_surface,
915 dw_width, dw_height, dw_width,
916 "CU scratch surface");
917 if (!allocate_flag)
918 goto FAIL;
919
920 i965_free_gpe_resource(&vme_context->res_temp2_curecord_lcu32_surface);
921 dw_width = ALIGN(hevc_state->frame_width, 64);
922 dw_height = ALIGN(hevc_state->frame_height, 64);
923 dw_width = ALIGN(dw_width, 64);
924 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
925 &vme_context->res_temp2_curecord_lcu32_surface,
926 dw_width, dw_height, dw_width,
927 "second temp CURecord surfaces");
928 if (!allocate_flag)
929 goto FAIL;
930
931 if (hevc_state->is_64lcu) {
932 i965_free_gpe_resource(&vme_context->res_temp_curecord_surface_lcu64);
933 /* the max number of CU based on 8x8. */
934 dw_width = ALIGN(hevc_state->frame_width, 64);
935 dw_height = ALIGN(hevc_state->frame_height, 64) / 2;
936 dw_width = ALIGN(dw_width, 64);
937 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
938 &vme_context->res_temp_curecord_surface_lcu64,
939 dw_width, dw_height, dw_width,
940 "temp CURecord LCU64 surfaces");
941 if (!allocate_flag)
942 goto FAIL;
943 }
944
945 i965_free_gpe_resource(&vme_context->res_enc_scratch_buffer);
946 dw_width = ALIGN(hevc_state->frame_width, 64) >> 5;
947 dw_height = ALIGN(hevc_state->frame_height, 64) >> 5;
948 res_size = dw_width * dw_height * 13312 + 4096;
949 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
950 &vme_context->res_enc_scratch_buffer,
951 res_size,
952 "Enc Scratch data");
953 if (!allocate_flag)
954 goto FAIL;
955
956 i965_free_gpe_resource(&vme_context->res_enc_scratch_lcu64_buffer);
957 dw_width = vme_context->frame_info.width_in_lcu;
958 dw_height = vme_context->frame_info.height_in_lcu;
959 res_size = dw_width * dw_height * 13312;
960 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
961 &vme_context->res_enc_scratch_lcu64_buffer,
962 res_size,
963 "Enc Scratch data");
964 if (!allocate_flag)
965 goto FAIL;
966
967 i965_free_gpe_resource(&vme_context->res_64x64_dist_buffer);
968 dw_width = ALIGN(hevc_state->frame_width, 64) >> 6;
969 dw_height = ALIGN(hevc_state->frame_height, 64) >> 6;
970 res_size = dw_width * dw_height * 32;
971 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
972 &vme_context->res_64x64_dist_buffer,
973 res_size,
974 "Res 64x64 Distortion");
975 if (!allocate_flag)
976 goto FAIL;
977
978 i965_free_gpe_resource(&vme_context->res_jbq_header_buffer);
979 dw_width = ALIGN(hevc_state->frame_width, 64) >> 5;
980 dw_height = ALIGN(hevc_state->frame_height, 64) >> 5;
981 res_size = dw_width * dw_height * 2656;
982 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
983 &vme_context->res_jbq_header_buffer,
984 res_size,
985 "Job queue_header");
986 if (!allocate_flag)
987 goto FAIL;
988
989 i965_free_gpe_resource(&vme_context->res_jbq_header_lcu64_buffer);
990 dw_width = ALIGN(hevc_state->frame_width, 64) >> 5;
991 dw_height = ALIGN(hevc_state->frame_height, 64) >> 5;
992 res_size = dw_width * dw_height * 32;
993 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
994 &vme_context->res_jbq_header_lcu64_buffer,
995 res_size,
996 "Job queue_header for Multi-thread LCU");
997 if (!allocate_flag)
998 goto FAIL;
999
1000 i965_free_gpe_resource(&vme_context->res_jbq_data_lcu32_surface);
1001 dw_width = ALIGN(hevc_state->frame_width, 64);
1002 dw_height = (ALIGN(hevc_state->frame_height, 64) >> 5) * 58;
1003 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1004 &vme_context->res_jbq_data_lcu32_surface,
1005 dw_width, dw_height, dw_width,
1006 "Job queue data surface for Multi-thread LCU32");
1007 if (!allocate_flag)
1008 goto FAIL;
1009
1010 i965_free_gpe_resource(&vme_context->res_jbq_data_lcu64_surface);
1011 dw_width = ALIGN(hevc_state->frame_width, 64) >> 1;
1012 dw_height = (ALIGN(hevc_state->frame_height, 64) >> 6) * 66;
1013 dw_width = ALIGN(dw_width, 64);
1014 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1015 &vme_context->res_jbq_data_lcu64_surface,
1016 dw_width, dw_height, dw_width,
1017 "Job queue data surface for Multi-thread LCU64");
1018 if (!allocate_flag)
1019 goto FAIL;
1020
1021 i965_free_gpe_resource(&vme_context->res_residual_scratch_lcu32_surface);
1022 dw_width = ALIGN(hevc_state->frame_width, 64) << 1;
1023 dw_height = ALIGN(hevc_state->frame_height, 64) << 2;
1024 dw_width = ALIGN(dw_width, 64);
1025 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1026 &vme_context->res_residual_scratch_lcu32_surface,
1027 dw_width, dw_height, dw_width,
1028 "Resiudal scratch for LCU32");
1029 if (!allocate_flag)
1030 goto FAIL;
1031
1032 i965_free_gpe_resource(&vme_context->res_residual_scratch_lcu64_surface);
1033 dw_width = ALIGN(hevc_state->frame_width, 64) << 1;
1034 dw_height = ALIGN(hevc_state->frame_height, 64) << 2;
1035 dw_width = ALIGN(dw_width, 64);
1036 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1037 &vme_context->res_residual_scratch_lcu64_surface,
1038 dw_width, dw_height, dw_width,
1039 "Resiudal scratch for LCU64");
1040 if (!allocate_flag)
1041 goto FAIL;
1042
1043 i965_free_gpe_resource(&vme_context->res_mb_stat_surface);
1044 dw_width = ALIGN(frame_info->width_in_mb * 4, 64);
1045 dw_height = ALIGN(frame_info->height_in_mb, 8) * 2;
1046 dw_width = ALIGN(dw_width, 64);
1047 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1048 &vme_context->res_mb_stat_surface,
1049 dw_width, dw_height, dw_width,
1050 "MB 16x16 stat");
1051 if (!allocate_flag)
1052 goto FAIL;
1053
1054 i965_free_gpe_resource(&vme_context->res_mb_split_surface);
1055 dw_width = ALIGN(hevc_state->frame_width, 64) >> 2;
1056 dw_height = ALIGN(hevc_state->frame_height, 64) >> 4;
1057 dw_width = ALIGN(dw_width, 64);
1058 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1059 &vme_context->res_mb_split_surface,
1060 dw_width, dw_height, dw_width,
1061 "MB split surface");
1062 if (!allocate_flag)
1063 goto FAIL;
1064
1065 if (hevc_state->hme_supported) {
1066 i965_free_gpe_resource(&vme_context->res_s4x_memv_data_surface);
1067 dw_width = hevc_state->frame_width_4x * 4;
1068 dw_height = hevc_state->frame_height_4x >> 3;
1069 dw_width = ALIGN(dw_width, 64);
1070 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1071 &vme_context->res_s4x_memv_data_surface,
1072 dw_width, dw_height, dw_width,
1073 "HME MEMV Data");
1074 if (!allocate_flag)
1075 goto FAIL;
1076
1077 i965_free_gpe_resource(&vme_context->res_s4x_me_dist_surface);
1078 dw_width = hevc_state->frame_width_4x;
1079 dw_height = hevc_state->frame_height_4x >> 1;
1080 dw_width = ALIGN(dw_width, 64);
1081 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1082 &vme_context->res_s4x_me_dist_surface,
1083 dw_width, dw_height, dw_width,
1084 "HME Distorion");
1085 if (!allocate_flag)
1086 goto FAIL;
1087 }
1088
1089 if (hevc_state->hme_supported &&
1090 hevc_state->b16xme_supported) {
1091 i965_free_gpe_resource(&vme_context->res_s16x_memv_data_surface);
1092 dw_width = hevc_state->frame_width_16x * 4;
1093 dw_height = hevc_state->frame_height_16x >> 3;
1094 dw_width = ALIGN(dw_width, 64);
1095 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1096 &vme_context->res_s16x_memv_data_surface,
1097 dw_width, dw_height, dw_width,
1098 "16xME MEMV Data");
1099 if (!allocate_flag)
1100 goto FAIL;
1101 }
1102
1103 i965_free_gpe_resource(&vme_context->res_mv_dist_sum_buffer);
1104 res_size = 64;
1105 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1106 &vme_context->res_mv_dist_sum_buffer,
1107 res_size,
1108 "MV_DIST_sum");
1109 if (!allocate_flag)
1110 goto FAIL;
1111
1112 i965_free_gpe_resource(&vme_context->res_brc_me_dist_surface);
1113 dw_width = ALIGN(hevc_state->frame_width, 64) >> 4;
1114 dw_width = ALIGN(dw_width, 64);
1115 dw_height = ALIGN(hevc_state->frame_height, 64) >> 4;
1116 dw_height = ALIGN(dw_height, 64);
1117 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1118 &vme_context->res_brc_me_dist_surface,
1119 dw_width, dw_height, dw_width,
1120 "ME BRC distortion");
1121 if (!allocate_flag)
1122 goto FAIL;
1123
1124 i965_free_gpe_resource(&vme_context->res_brc_input_enc_kernel_buffer);
1125 res_size = 1024;
1126 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1127 &vme_context->res_brc_input_enc_kernel_buffer,
1128 res_size,
1129 "Brc Input for Enc Kernel");
1130 if (!allocate_flag)
1131 goto FAIL;
1132
1133 i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
1134 res_size = GEN10_HEVC_BRC_HISTORY_BUFFER_SIZE;
1135 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1136 &vme_context->res_brc_history_buffer,
1137 res_size,
1138 "Brc History buffer");
1139 if (!allocate_flag)
1140 goto FAIL;
1141
1142 i965_zero_gpe_resource(&vme_context->res_brc_history_buffer);
1143
1144 i965_free_gpe_resource(&vme_context->res_brc_intra_dist_surface);
1145 dw_width = ALIGN(hevc_state->frame_width_4x / 2, 64);
1146 dw_height = ALIGN(hevc_state->frame_height_4x / 4, 8) * 2;
1147 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1148 &vme_context->res_brc_intra_dist_surface,
1149 dw_width, dw_height, dw_width,
1150 "Brc Intra distortion buffer");
1151 if (!allocate_flag)
1152 goto FAIL;
1153
1154 i965_zero_gpe_resource(&vme_context->res_brc_intra_dist_surface);
1155
1156 for (i = 0; i < 2; i++) {
1157 i965_free_gpe_resource(&vme_context->res_brc_pak_statistics_buffer[i]);
1158 res_size = 64;
1159 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1160 &vme_context->res_brc_pak_statistics_buffer[i],
1161 res_size,
1162 "Brc Pak statistics buffer");
1163 if (!allocate_flag)
1164 goto FAIL;
1165 }
1166
1167 i965_free_gpe_resource(&vme_context->res_brc_pic_image_state_write_buffer);
1168 res_size = GEN10_HEVC_BRC_IMG_STATE_SIZE_PER_PASS * 8;
1169 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1170 &vme_context->res_brc_pic_image_state_write_buffer,
1171 res_size,
1172 "Brc Pic State Write buffer");
1173 if (!allocate_flag)
1174 goto FAIL;
1175
1176 i965_free_gpe_resource(&vme_context->res_brc_pic_image_state_read_buffer);
1177 res_size = GEN10_HEVC_BRC_IMG_STATE_SIZE_PER_PASS * 8;
1178 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1179 &vme_context->res_brc_pic_image_state_read_buffer,
1180 res_size,
1181 "Brc Pic State Read buffer");
1182 if (!allocate_flag)
1183 goto FAIL;
1184
1185 i965_free_gpe_resource(&vme_context->res_brc_const_data_surface);
1186 dw_width = ALIGN(GEN10_HEVC_BRC_CONST_SURFACE_WIDTH, 64);
1187 dw_height = ALIGN(GEN10_HEVC_BRC_CONST_SURFACE_HEIGHT, 32);
1188 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1189 &vme_context->res_brc_const_data_surface,
1190 dw_width, dw_height, dw_width,
1191 "Brc Const data buffer");
1192 if (!allocate_flag)
1193 goto FAIL;
1194
1195 i965_free_gpe_resource(&vme_context->res_brc_lcu_const_data_buffer);
1196 res_size = 4096;
1197 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
1198 &vme_context->res_brc_lcu_const_data_buffer,
1199 res_size,
1200 "BRC LCU Const_data buffer");
1201 if (!allocate_flag)
1202 goto FAIL;
1203
1204 i965_zero_gpe_resource(&vme_context->res_brc_lcu_const_data_buffer);
1205
1206 i965_free_gpe_resource(&vme_context->res_brc_mb_qp_surface);
1207 dw_width = ALIGN(hevc_state->frame_width_4x * 4, 64) >> 4;
1208 dw_height = ALIGN(hevc_state->frame_height_4x * 4, 64) >> 5;
1209
1210 dw_width = ALIGN(dw_width, 64);
1211 dw_height = ALIGN(dw_height, 8);
1212 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
1213 &vme_context->res_brc_mb_qp_surface,
1214 dw_width, dw_height, dw_width,
1215 "Brc LCU qp data buffer");
1216 if (!allocate_flag)
1217 goto FAIL;
1218
1219 i965_zero_gpe_resource(&vme_context->res_brc_mb_qp_surface);
1220
1221 return VA_STATUS_SUCCESS;
1222
1223 FAIL:
1224 return VA_STATUS_ERROR_ALLOCATION_FAILED;
1225 }
1226
1227 static VAStatus
gen10_hevc_enc_init_const_resources(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)1228 gen10_hevc_enc_init_const_resources(VADriverContextP ctx,
1229 struct encode_state *encode_state,
1230 struct intel_encoder_context *encoder_context)
1231 {
1232 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1233 struct gen10_hevc_enc_state *hevc_state;
1234 char *buffer_ptr;
1235
1236 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
1237
1238 buffer_ptr = i965_map_gpe_resource(&vme_context->res_enc_const_table_intra);
1239 if (!buffer_ptr)
1240 return VA_STATUS_ERROR_OPERATION_FAILED;
1241
1242 memcpy(buffer_ptr, gen10_hevc_enc_intra_const_lut,
1243 GEN10_HEVC_ENC_INTRA_CONST_LUT_SIZE);
1244
1245 i965_unmap_gpe_resource(&vme_context->res_enc_const_table_intra);
1246
1247 buffer_ptr = i965_map_gpe_resource(&vme_context->res_enc_const_table_inter);
1248 if (!buffer_ptr)
1249 return VA_STATUS_ERROR_OPERATION_FAILED;
1250
1251 memcpy(buffer_ptr, gen10_hevc_enc_inter_const_lut32,
1252 GEN10_HEVC_ENC_INTER_CONST_LUT32_SIZE);
1253
1254 i965_unmap_gpe_resource(&vme_context->res_enc_const_table_inter);
1255
1256 if (hevc_state->is_64lcu) {
1257 buffer_ptr = i965_map_gpe_resource(&vme_context->res_enc_const_table_inter_lcu64);
1258 if (!buffer_ptr)
1259 return VA_STATUS_ERROR_OPERATION_FAILED;
1260
1261 memcpy(buffer_ptr, gen10_hevc_enc_inter_const_lut64,
1262 GEN10_HEVC_ENC_INTER_CONST_LUT64_SIZE);
1263
1264 i965_unmap_gpe_resource(&vme_context->res_enc_const_table_inter_lcu64);
1265 }
1266
1267 buffer_ptr = i965_map_gpe_resource(&vme_context->res_brc_const_data_surface);
1268 if (!buffer_ptr)
1269 return VA_STATUS_ERROR_OPERATION_FAILED;
1270
1271 memcpy(buffer_ptr, gen10_hevc_brc_qp_adjust_data, GEN10_HEVC_BRC_QP_ADJUST_SIZE);
1272
1273 buffer_ptr += GEN10_HEVC_BRC_QP_ADJUST_SIZE;
1274
1275 if (hevc_state->is_64lcu)
1276 memcpy(buffer_ptr, gen10_hevc_brc_lcu64_lambda_cost, GEN10_HEVC_BRC_LCU_LAMBDA_COST);
1277 else
1278 memcpy(buffer_ptr, gen10_hevc_brc_lcu32_lambda_cost, GEN10_HEVC_BRC_LCU_LAMBDA_COST);
1279
1280 i965_unmap_gpe_resource(&vme_context->res_brc_const_data_surface);
1281
1282 return VA_STATUS_SUCCESS;
1283 }
1284
1285 static VAStatus
gen10_hevc_enc_check_parameters(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)1286 gen10_hevc_enc_check_parameters(VADriverContextP ctx,
1287 struct encode_state *encode_state,
1288 struct intel_encoder_context *encoder_context)
1289 {
1290 VAEncSequenceParameterBufferHEVC *seq_param;
1291 VAEncPictureParameterBufferHEVC *pic_param;
1292 VAEncSliceParameterBufferHEVC *slice_param;
1293 int i = 0, j = 0;
1294
1295 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1296 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1297 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1298
1299 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1300 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[i]->buffer;
1301
1302 if (slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag &&
1303 slice_param->slice_fields.bits.collocated_from_l0_flag &&
1304 (pic_param->collocated_ref_pic_index == 0xff ||
1305 pic_param->collocated_ref_pic_index > GEN10_MAX_REF_SURFACES))
1306 slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag = 0;
1307
1308 if (slice_param->num_ref_idx_l0_active_minus1 > GEN10_HEVC_NUM_MAX_REF_L0 - 1 ||
1309 slice_param->num_ref_idx_l1_active_minus1 > GEN10_HEVC_NUM_MAX_REF_L1 - 1)
1310 return VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
1311
1312 if (slice_param->slice_type == HEVC_SLICE_P)
1313 return VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
1314 }
1315
1316 i = seq_param->log2_diff_max_min_luma_coding_block_size +
1317 seq_param->log2_min_luma_coding_block_size_minus3 + 3;
1318 if (i < GEN10_HEVC_LOG2_MIN_HEVC_LCU ||
1319 i > GEN10_HEVC_LOG2_MAX_HEVC_LCU)
1320 return VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
1321
1322 i = seq_param->log2_min_transform_block_size_minus2 +
1323 seq_param->log2_diff_max_min_transform_block_size + 2;
1324 j = seq_param->log2_min_luma_coding_block_size_minus3 +
1325 seq_param->log2_diff_max_min_luma_coding_block_size + 3;
1326
1327 if (i != j)
1328 return VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
1329
1330 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1331 i = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1332 j = -seq_param->seq_fields.bits.bit_depth_luma_minus8 * 6;
1333 if (i < j || i > 51)
1334 return VA_STATUS_ERROR_INVALID_PARAMETER;
1335
1336 if (seq_param->seq_fields.bits.chroma_format_idc != 1)
1337 return VA_STATUS_ERROR_INVALID_PARAMETER;
1338
1339 return VA_STATUS_SUCCESS;
1340 }
1341
1342 static VAStatus
gen10_hevc_enc_init_misc_paramers(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)1343 gen10_hevc_enc_init_misc_paramers(VADriverContextP ctx,
1344 struct encode_state *encode_state,
1345 struct intel_encoder_context *encoder_context)
1346 {
1347 struct gen10_hevc_enc_context *vme_context = NULL;
1348 struct gen10_hevc_enc_state *hevc_state;
1349 struct gen10_hevc_enc_frame_info *frame_info;
1350 VAEncSequenceParameterBufferHEVC *seq_param;
1351 VAEncSliceParameterBufferHEVC *slice_param;
1352 uint32_t brc_method, brc_reset;
1353
1354 vme_context = (struct gen10_hevc_enc_context *) encoder_context->vme_context;
1355 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
1356 frame_info = &vme_context->frame_info;
1357 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1358 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1359
1360 hevc_state->low_delay = frame_info->low_delay;
1361
1362 hevc_state->frame_width = frame_info->frame_width;
1363 hevc_state->frame_height = frame_info->frame_height;
1364
1365 hevc_state->frame_width_2x = ALIGN(frame_info->frame_width / 2, 32);
1366 hevc_state->frame_height_2x = ALIGN(frame_info->frame_height / 2, 32);
1367
1368 hevc_state->frame_width_4x = ALIGN(frame_info->frame_width / 4, 32);
1369 hevc_state->frame_height_4x = ALIGN(frame_info->frame_height / 4, 32);
1370
1371 hevc_state->frame_width_16x = ALIGN(hevc_state->frame_width_4x / 4, 32);
1372 hevc_state->frame_height_16x = ALIGN(hevc_state->frame_height_4x / 4, 32);
1373
1374 hevc_state->cu_records_offset = ALIGN(frame_info->width_in_lcu *
1375 frame_info->height_in_lcu *
1376 32, 4096);
1377
1378 hevc_state->hme_supported = 1;
1379 hevc_state->b16xme_supported = 1;
1380
1381 if (hevc_state->frame_width_4x <= GEN10_HEVC_VME_REF_WIN ||
1382 hevc_state->frame_height_4x <= GEN10_HEVC_VME_REF_WIN) {
1383 hevc_state->b16xme_supported = 0;
1384
1385 hevc_state->frame_width_4x = GEN10_HEVC_VME_REF_WIN;
1386 hevc_state->frame_height_4x = GEN10_HEVC_VME_REF_WIN;
1387 } else if (hevc_state->frame_width_16x <= GEN10_HEVC_VME_REF_WIN ||
1388 hevc_state->frame_height_16x <= GEN10_HEVC_VME_REF_WIN) {
1389 hevc_state->frame_width_16x = GEN10_HEVC_VME_REF_WIN;
1390 hevc_state->frame_height_16x = GEN10_HEVC_VME_REF_WIN;
1391 }
1392
1393 if (slice_param->slice_type == HEVC_SLICE_I) {
1394 hevc_state->hme_enabled = 0;
1395 hevc_state->b16xme_enabled = 0;
1396 } else {
1397 hevc_state->hme_enabled = hevc_state->hme_supported;
1398 hevc_state->b16xme_enabled = hevc_state->b16xme_supported;
1399 }
1400
1401 if (frame_info->lcu_size == 64)
1402 hevc_state->is_64lcu = 1;
1403 else
1404 hevc_state->is_64lcu = 0;
1405
1406 if (frame_info->bit_depth_luma_minus8 ||
1407 frame_info->bit_depth_chroma_minus8)
1408 hevc_state->is_10bit = 1;
1409 else
1410 hevc_state->is_10bit = 0;
1411
1412 brc_method = GEN10_HEVC_BRC_CQP;
1413 if (encoder_context->rate_control_mode & VA_RC_CBR)
1414 brc_method = GEN10_HEVC_BRC_CBR;
1415 else if (encoder_context->rate_control_mode & VA_RC_VBR)
1416 brc_method = GEN10_HEVC_BRC_VBR;
1417
1418 brc_reset = hevc_state->brc.brc_method != brc_method ||
1419 frame_info->reallocate_flag;
1420
1421 if (!hevc_state->brc.brc_inited ||
1422 encoder_context->brc.need_reset ||
1423 brc_reset) {
1424 if (brc_method == GEN10_HEVC_BRC_CQP) {
1425 hevc_state->brc.brc_enabled = 0;
1426 hevc_state->num_pak_passes = 1;
1427 } else {
1428 hevc_state->brc.brc_enabled = 1;
1429 hevc_state->num_pak_passes = 1;//2;
1430
1431 if (brc_method == GEN10_HEVC_BRC_CBR) {
1432 hevc_state->brc.target_bit_rate = encoder_context->brc.bits_per_second[0];
1433 hevc_state->brc.max_bit_rate = encoder_context->brc.bits_per_second[0];
1434 hevc_state->brc.min_bit_rate = encoder_context->brc.bits_per_second[0];
1435 hevc_state->brc.window_size = encoder_context->brc.window_size;
1436 } else {
1437 hevc_state->brc.max_bit_rate = encoder_context->brc.bits_per_second[0];
1438 hevc_state->brc.target_bit_rate = encoder_context->brc.bits_per_second[0] *
1439 encoder_context->brc.target_percentage[0] /
1440 100;
1441
1442 if (2 * hevc_state->brc.target_bit_rate < hevc_state->brc.max_bit_rate)
1443 hevc_state->brc.min_bit_rate = 0;
1444 else
1445 hevc_state->brc.min_bit_rate = 2 * hevc_state->brc.target_bit_rate -
1446 hevc_state->brc.max_bit_rate;
1447 }
1448 }
1449
1450 if (encoder_context->brc.hrd_buffer_size)
1451 hevc_state->brc.vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
1452 else if (encoder_context->brc.window_size)
1453 hevc_state->brc.vbv_buffer_size_in_bit = hevc_state->brc.max_bit_rate *
1454 encoder_context->brc.window_size /
1455 1000;
1456 else
1457 hevc_state->brc.vbv_buffer_size_in_bit = hevc_state->brc.max_bit_rate;
1458
1459 if (encoder_context->brc.hrd_initial_buffer_fullness)
1460 hevc_state->brc.init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
1461 else
1462 hevc_state->brc.init_vbv_buffer_fullness_in_bit = hevc_state->brc.vbv_buffer_size_in_bit / 2;
1463
1464 hevc_state->brc.gop_size = encoder_context->brc.gop_size;
1465 hevc_state->brc.gop_p = encoder_context->brc.num_pframes_in_gop;
1466 hevc_state->brc.gop_b = encoder_context->brc.num_bframes_in_gop;
1467
1468 hevc_state->brc.frame_rate_m = encoder_context->brc.framerate[0].num;
1469 hevc_state->brc.frame_rate_d = encoder_context->brc.framerate[0].den;
1470
1471 hevc_state->brc.brc_method = brc_method;
1472 hevc_state->brc.brc_reset = brc_reset || encoder_context->brc.need_reset;
1473
1474 if (brc_method == GEN10_HEVC_BRC_CQP && !hevc_state->brc.brc_inited) {
1475 hevc_state->brc.frame_rate_m = 30;
1476 hevc_state->brc.frame_rate_d = 1;
1477
1478 hevc_state->brc.target_bit_rate = (hevc_state->frame_width >> 4) * (hevc_state->frame_height >> 4)
1479 * 30 * 384 / 10 * 8;
1480 hevc_state->brc.max_bit_rate = hevc_state->brc.target_bit_rate;
1481 hevc_state->brc.min_bit_rate = hevc_state->brc.target_bit_rate;
1482 hevc_state->brc.window_size = 1500;
1483 hevc_state->brc.vbv_buffer_size_in_bit = (hevc_state->brc.target_bit_rate / 1000) * 1500;
1484 hevc_state->brc.init_vbv_buffer_fullness_in_bit = hevc_state->brc.vbv_buffer_size_in_bit / 2;
1485
1486 hevc_state->brc.gop_size = seq_param->intra_period < 2 ? 30 : seq_param->intra_period;
1487 hevc_state->brc.gop_p = (hevc_state->brc.gop_size - 1) /
1488 (!seq_param->ip_period ? 1 : seq_param->ip_period);
1489 hevc_state->brc.gop_b = hevc_state->brc.gop_size - 1 - hevc_state->brc.gop_p;
1490 }
1491
1492 hevc_state->profile_level_max_frame =
1493 gen10_hevc_enc_get_profile_level_max_frame(seq_param, 0,
1494 hevc_state->brc.frame_rate_m /
1495 hevc_state->brc.frame_rate_d);
1496 }
1497
1498 hevc_state->sao_2nd_needed = 0;
1499 hevc_state->sao_first_pass_flag = 0;
1500 hevc_state->num_sao_passes = hevc_state->num_pak_passes;
1501 if (seq_param->seq_fields.bits.sample_adaptive_offset_enabled_flag &&
1502 (slice_param->slice_fields.bits.slice_sao_luma_flag ||
1503 slice_param->slice_fields.bits.slice_sao_chroma_flag)) {
1504 hevc_state->sao_2nd_needed = 1;
1505 hevc_state->sao_first_pass_flag = 1;
1506 hevc_state->num_sao_passes = hevc_state->num_pak_passes + 1;
1507 }
1508
1509 hevc_state->brc.target_usage = encoder_context->quality_level;
1510 hevc_state->thread_num_per_ctb = gen10_hevc_tu_settings[GEN10_TOTAL_THREAD_NUM_PER_LCU_TU_PARAM]
1511 [(hevc_state->brc.target_usage + 1) >> 2];
1512
1513 hevc_state->is_same_ref_list = frame_info->is_same_ref_list;
1514
1515 return VA_STATUS_SUCCESS;
1516 }
1517
1518 static VAStatus
gen10_hevc_enc_init_parameters(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)1519 gen10_hevc_enc_init_parameters(VADriverContextP ctx,
1520 struct encode_state *encode_state,
1521 struct intel_encoder_context *encoder_context)
1522 {
1523 struct gen10_hevc_enc_context *vme_context;
1524 struct gen10_hevc_enc_state *hevc_state;
1525 struct gen10_hevc_enc_frame_info *frame_info;
1526 struct gen10_hevc_enc_common_res *common_res;
1527 VAStatus va_status = VA_STATUS_SUCCESS;
1528
1529 va_status = gen10_hevc_enc_check_parameters(ctx, encode_state, encoder_context);
1530 if (va_status != VA_STATUS_SUCCESS)
1531 goto EXIT;
1532
1533 vme_context = (struct gen10_hevc_enc_context *) encoder_context->vme_context;
1534 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
1535 frame_info = &vme_context->frame_info;
1536 common_res = &vme_context->common_res;
1537
1538 gen10_hevc_enc_init_frame_info(ctx, encode_state, encoder_context, frame_info);
1539 gen10_hevc_enc_init_status_buffer(ctx, encode_state, encoder_context,
1540 &vme_context->status_buffer);
1541
1542 if (!hevc_state->lambda_init ||
1543 frame_info->reallocate_flag) {
1544 gen10_hevc_enc_init_lambda_param(&vme_context->lambda_param, frame_info->bit_depth_luma_minus8,
1545 frame_info->bit_depth_chroma_minus8);
1546
1547 hevc_state->lambda_init = 1;
1548 }
1549
1550 if (gen10_hevc_enc_init_common_resource(ctx, encode_state, encoder_context,
1551 common_res,
1552 frame_info,
1553 frame_info->picture_coding_type != HEVC_SLICE_I,
1554 0) < 0) {
1555 va_status = VA_STATUS_ERROR_ALLOCATION_FAILED;
1556 goto EXIT;
1557 }
1558
1559 va_status = gen10_hevc_enc_init_misc_paramers(ctx, encode_state, encoder_context);
1560 if (va_status != VA_STATUS_SUCCESS)
1561 goto EXIT;
1562
1563 va_status = gen10_hevc_enc_ensure_surface(ctx,
1564 common_res->uncompressed_pic.obj_surface,
1565 frame_info->bit_depth_luma_minus8,
1566 0);
1567 if (va_status != VA_STATUS_SUCCESS)
1568 goto EXIT;
1569
1570 va_status = gen10_hevc_enc_ensure_surface(ctx,
1571 common_res->reconstructed_pic.obj_surface,
1572 frame_info->bit_depth_luma_minus8,
1573 1);
1574 if (va_status != VA_STATUS_SUCCESS)
1575 goto EXIT;
1576
1577 va_status = gen10_hevc_init_surface_priv(ctx, encode_state, encoder_context,
1578 common_res->reconstructed_pic.obj_surface);
1579 if (va_status != VA_STATUS_SUCCESS)
1580 goto EXIT;
1581
1582 if (frame_info->reallocate_flag) {
1583 va_status = gen10_hevc_allocate_enc_resources(ctx, encode_state,
1584 encoder_context);
1585 if (va_status != VA_STATUS_SUCCESS)
1586 goto EXIT;
1587
1588 hevc_state->frame_number = 0;
1589 }
1590
1591 va_status = gen10_hevc_enc_init_const_resources(ctx, encode_state, encoder_context);
1592 if (va_status != VA_STATUS_SUCCESS)
1593 goto EXIT;
1594
1595 EXIT:
1596 return va_status;
1597 }
1598
1599 #define GEN10_WALKER_26_DEGREE 0
1600 #define GEN10_WALKER_26Z_DEGREE 1
1601 #define GEN10_WALKER_26X_DEGREE 2
1602 #define GEN10_WALKER_26ZX_DEGREE 3
1603
1604 static void
gen10_init_media_object_walker_parameter(struct gen10_hevc_enc_kernel_walker_parameter * kernel_walker_param,struct gpe_media_object_walker_parameter * walker_param)1605 gen10_init_media_object_walker_parameter(struct gen10_hevc_enc_kernel_walker_parameter *kernel_walker_param,
1606 struct gpe_media_object_walker_parameter *walker_param)
1607 {
1608 memset(walker_param, 0, sizeof(*walker_param));
1609
1610 walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
1611
1612 walker_param->block_resolution.x = kernel_walker_param->resolution_x;
1613 walker_param->block_resolution.y = kernel_walker_param->resolution_y;
1614
1615 walker_param->global_resolution.x = kernel_walker_param->resolution_x;
1616 walker_param->global_resolution.y = kernel_walker_param->resolution_y;
1617
1618 walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
1619 walker_param->global_outer_loop_stride.y = 0;
1620
1621 walker_param->global_inner_loop_unit.x = 0;
1622 walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
1623
1624 walker_param->local_loop_exec_count = 0xFFFF;
1625 walker_param->global_loop_exec_count = 0xFFFF;
1626
1627 if (kernel_walker_param->no_dependency) {
1628 walker_param->scoreboard_mask = 0;
1629 walker_param->use_scoreboard = 0;
1630 walker_param->local_outer_loop_stride.x = 0;
1631 walker_param->local_outer_loop_stride.y = 1;
1632 walker_param->local_inner_loop_unit.x = 1;
1633 walker_param->local_inner_loop_unit.y = 0;
1634 walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
1635 walker_param->local_end.y = 0;
1636 } else if (kernel_walker_param->use_vertical_scan) {
1637 walker_param->scoreboard_mask = 0x1;
1638 walker_param->local_outer_loop_stride.x = 1;
1639 walker_param->local_outer_loop_stride.y = 0;
1640 walker_param->local_inner_loop_unit.x = 0;
1641 walker_param->local_inner_loop_unit.y = 1;
1642 walker_param->local_end.x = 0;
1643 walker_param->local_end.y = kernel_walker_param->resolution_y - 1;
1644 } else {
1645 walker_param->local_end.x = 0;
1646 walker_param->local_end.y = 0;
1647 }
1648 }
1649
1650 static void
gen10_run_kernel_media_object(VADriverContextP ctx,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context,int media_function,struct gpe_media_object_parameter * param)1651 gen10_run_kernel_media_object(VADriverContextP ctx,
1652 struct intel_encoder_context *encoder_context,
1653 struct i965_gpe_context *gpe_context,
1654 int media_function,
1655 struct gpe_media_object_parameter *param)
1656 {
1657 struct intel_batchbuffer *batch = encoder_context->base.batch;
1658 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1659 struct gen10_hevc_enc_status_buffer *status_buffer;
1660 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1661
1662 status_buffer = &vme_context->status_buffer;
1663
1664 intel_batchbuffer_start_atomic(batch, 0x1000);
1665
1666 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1667 mi_store_data_imm.bo = status_buffer->gpe_res.bo;
1668 mi_store_data_imm.offset = status_buffer->status_media_state_offset;
1669 mi_store_data_imm.dw0 = media_function;
1670 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1671
1672 intel_batchbuffer_emit_mi_flush(batch);
1673
1674 gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
1675 gen8_gpe_media_object(ctx, gpe_context, batch, param);
1676 gen8_gpe_media_state_flush(ctx, gpe_context, batch);
1677
1678 gen9_gpe_pipeline_end(ctx, gpe_context, batch);
1679
1680 intel_batchbuffer_end_atomic(batch);
1681
1682 intel_batchbuffer_flush(batch);
1683 }
1684
1685 static void
gen10_run_kernel_media_object_walker(VADriverContextP ctx,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context,int media_function,struct gpe_media_object_walker_parameter * param)1686 gen10_run_kernel_media_object_walker(VADriverContextP ctx,
1687 struct intel_encoder_context *encoder_context,
1688 struct i965_gpe_context *gpe_context,
1689 int media_function,
1690 struct gpe_media_object_walker_parameter *param)
1691 {
1692 struct intel_batchbuffer *batch = encoder_context->base.batch;
1693 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1694 struct gen10_hevc_enc_status_buffer *status_buffer;
1695 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
1696
1697 status_buffer = &vme_context->status_buffer;
1698
1699 intel_batchbuffer_start_atomic(batch, 0x1000);
1700
1701 intel_batchbuffer_emit_mi_flush(batch);
1702
1703 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
1704 mi_store_data_imm.bo = status_buffer->gpe_res.bo;
1705 mi_store_data_imm.offset = status_buffer->status_media_state_offset;
1706 mi_store_data_imm.dw0 = media_function;
1707 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
1708
1709 gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
1710 gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
1711 gen8_gpe_media_state_flush(ctx, gpe_context, batch);
1712
1713 gen9_gpe_pipeline_end(ctx, gpe_context, batch);
1714
1715 intel_batchbuffer_end_atomic(batch);
1716
1717 intel_batchbuffer_flush(batch);
1718 }
1719
1720 #define BRC_CLIP(x, min, max) \
1721 { \
1722 x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \
1723 }
1724
1725 #define GEN10_HEVC_MAX_BRC_PASSES 4
1726
1727 #define GEN10_HEVC_BRCINIT_ISCBR 0x0010
1728 #define GEN10_HEVC_BRCINIT_ISVBR 0x0020
1729 #define GEN10_HEVC_BRCINIT_ISCQP 0x4000
1730 #define GEN10_HEVC_BRCINIT_DISABLE_MBBRC 0x8000
1731
1732 static void
gen10_hevc_enc_brc_init_set_curbe(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context)1733 gen10_hevc_enc_brc_init_set_curbe(VADriverContextP ctx,
1734 struct encode_state *encode_state,
1735 struct intel_encoder_context *encoder_context,
1736 struct i965_gpe_context *gpe_context)
1737 {
1738 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1739 struct gen10_hevc_enc_state *hevc_state;
1740 gen10_hevc_brc_init_curbe_data *brc_curbe;
1741 double input_bits_per_frame, bps_ratio;
1742
1743 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
1744
1745 brc_curbe = i965_gpe_context_map_curbe(gpe_context);
1746
1747 if (!brc_curbe)
1748 return;
1749
1750 memset(brc_curbe, 0, sizeof(gen10_hevc_brc_init_curbe_data));
1751
1752 brc_curbe->dw0.profile_level_max_frame = hevc_state->profile_level_max_frame;
1753 brc_curbe->dw1.init_buf_full = hevc_state->brc.init_vbv_buffer_fullness_in_bit;
1754 brc_curbe->dw2.buf_size = hevc_state->brc.vbv_buffer_size_in_bit;
1755 brc_curbe->dw3.target_bit_rate = hevc_state->brc.target_bit_rate;
1756 brc_curbe->dw4.maximum_bit_rate = hevc_state->brc.max_bit_rate;
1757 brc_curbe->dw5.minimum_bit_rate = 0;
1758 brc_curbe->dw6.frame_ratem = hevc_state->brc.frame_rate_m;
1759 brc_curbe->dw7.frame_rated = hevc_state->brc.frame_rate_d;
1760 if (hevc_state->brc.lcu_brc_enabled)
1761 brc_curbe->dw8.brc_flag = 0;
1762 else
1763 brc_curbe->dw8.brc_flag = GEN10_HEVC_BRCINIT_DISABLE_MBBRC;
1764
1765 brc_curbe->dw25.ac_qp_buffer = 1;
1766 brc_curbe->dw25.log2_max_cu_size = hevc_state->is_64lcu ? 6 : 5;
1767 brc_curbe->dw25.sliding_wind_size = 30;
1768
1769 if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_CQP) {
1770 brc_curbe->dw8.brc_flag = GEN10_HEVC_BRCINIT_ISCQP;
1771 } else if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_CBR) {
1772 brc_curbe->dw8.brc_flag |= GEN10_HEVC_BRCINIT_ISCBR;
1773 } else if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_VBR) {
1774 brc_curbe->dw8.brc_flag |= GEN10_HEVC_BRCINIT_ISVBR;
1775 }
1776
1777 brc_curbe->dw9.frame_width = hevc_state->frame_width;
1778 brc_curbe->dw10.frame_height = hevc_state->frame_height;
1779 brc_curbe->dw10.avbr_accuracy = 30;
1780 brc_curbe->dw11.avbr_convergence = 150;
1781
1782 brc_curbe->dw14.max_brc_level = 1;
1783 brc_curbe->dw8.brc_gopp = hevc_state->brc.gop_p;
1784 brc_curbe->dw9.brc_gopb = hevc_state->brc.gop_b;
1785
1786 brc_curbe->dw11.minimum_qp = 1;
1787 brc_curbe->dw12.maximum_qp = 51;
1788
1789 brc_curbe->dw16.instant_rate_thr0_pframe = 40;
1790 brc_curbe->dw16.instant_rate_thr1_pframe = 60;
1791 brc_curbe->dw16.instant_rate_thr2_pframe = 80;
1792 brc_curbe->dw16.instant_rate_thr3_pframe = 120;
1793 brc_curbe->dw17.instant_rate_thr0_bframe = 35;
1794 brc_curbe->dw17.instant_rate_thr1_bframe = 60;
1795 brc_curbe->dw17.instant_rate_thr2_bframe = 80;
1796 brc_curbe->dw17.instant_rate_thr3_bframe = 120;
1797 brc_curbe->dw18.instant_rate_thr0_iframe = 40;
1798 brc_curbe->dw18.instant_rate_thr1_iframe = 60;
1799 brc_curbe->dw18.instant_rate_thr2_iframe = 90;
1800 brc_curbe->dw18.instant_rate_thr3_iframe = 115;
1801
1802 input_bits_per_frame = (double)(brc_curbe->dw4.maximum_bit_rate) * ((double)(hevc_state->brc.frame_rate_d)) /
1803 ((double)(hevc_state->brc.frame_rate_m));
1804
1805 if (brc_curbe->dw2.buf_size < (uint32_t)input_bits_per_frame * 4)
1806 brc_curbe->dw2.buf_size = (uint32_t)input_bits_per_frame * 4;
1807
1808 if (!brc_curbe->dw1.init_buf_full)
1809 brc_curbe->dw1.init_buf_full = 7 * brc_curbe->dw2.buf_size / 8;
1810 else if (brc_curbe->dw1.init_buf_full < (uint32_t)input_bits_per_frame * 2)
1811 brc_curbe->dw1.init_buf_full = (uint32_t)input_bits_per_frame * 2;
1812 else if (brc_curbe->dw1.init_buf_full > brc_curbe->dw2.buf_size)
1813 brc_curbe->dw1.init_buf_full = brc_curbe->dw2.buf_size;
1814
1815 bps_ratio = input_bits_per_frame / ((double)(hevc_state->brc.vbv_buffer_size_in_bit) / 30);
1816
1817 BRC_CLIP(bps_ratio, 0.1, 3.5);
1818
1819 brc_curbe->dw19.deviation_thr0_pbframe = (uint32_t)(-50 * pow(0.90, bps_ratio));
1820 brc_curbe->dw19.deviation_thr1_pbframe = (uint32_t)(-50 * pow(0.66, bps_ratio));
1821 brc_curbe->dw19.deviation_thr2_pbframe = (uint32_t)(-50 * pow(0.46, bps_ratio));
1822 brc_curbe->dw19.deviation_thr3_pbframe = (uint32_t)(-50 * pow(0.3, bps_ratio));
1823
1824 brc_curbe->dw20.deviation_thr4_pbframe = (uint32_t)(50 * pow(0.3, bps_ratio));
1825 brc_curbe->dw20.deviation_thr5_pbframe = (uint32_t)(50 * pow(0.46, bps_ratio));
1826 brc_curbe->dw20.deviation_thr6_pbframe = (uint32_t)(50 * pow(0.7, bps_ratio));
1827 brc_curbe->dw20.deviation_thr7_pbframe = (uint32_t)(50 * pow(0.9, bps_ratio));
1828
1829 brc_curbe->dw21.deviation_thr0_vbrctrl = (uint32_t)(-50 * pow(0.9, bps_ratio));
1830 brc_curbe->dw21.deviation_thr1_vbrctrl = (uint32_t)(-50 * pow(0.7, bps_ratio));
1831 brc_curbe->dw21.deviation_thr2_vbrctrl = (uint32_t)(-50 * pow(0.5, bps_ratio));
1832 brc_curbe->dw21.deviation_thr3_vbrctrl = (uint32_t)(-50 * pow(0.3, bps_ratio));
1833
1834 brc_curbe->dw22.deviation_thr4_vbrctrl = (uint32_t)(100 * pow(0.4, bps_ratio));
1835 brc_curbe->dw22.deviation_thr5_vbrctrl = (uint32_t)(100 * pow(0.5, bps_ratio));
1836 brc_curbe->dw22.deviation_thr6_vbrctrl = (uint32_t)(100 * pow(0.75, bps_ratio));
1837 brc_curbe->dw22.deviation_thr7_vbrctrl = (uint32_t)(100 * pow(0.9, bps_ratio));
1838
1839 brc_curbe->dw23.deviation_thr0_iframe = (uint32_t)(-50 * pow(0.8, bps_ratio));
1840 brc_curbe->dw23.deviation_thr1_iframe = (uint32_t)(-50 * pow(0.6, bps_ratio));
1841 brc_curbe->dw23.deviation_thr2_iframe = (uint32_t)(-50 * pow(0.34, bps_ratio));
1842 brc_curbe->dw23.deviation_thr3_iframe = (uint32_t)(-50 * pow(0.2, bps_ratio));
1843
1844 brc_curbe->dw24.deviation_thr4_iframe = (uint32_t)(50 * pow(0.2, bps_ratio));
1845 brc_curbe->dw24.deviation_thr5_iframe = (uint32_t)(50 * pow(0.4, bps_ratio));
1846 brc_curbe->dw24.deviation_thr6_iframe = (uint32_t)(50 * pow(0.66, bps_ratio));
1847 brc_curbe->dw24.deviation_thr7_iframe = (uint32_t)(50 * pow(0.9, bps_ratio));
1848
1849 if (!hevc_state->brc.brc_inited)
1850 hevc_state->brc.brc_init_current_target_buf_full_in_bits = brc_curbe->dw1.init_buf_full;
1851
1852 hevc_state->brc.brc_init_reset_buf_size_in_bits = (double)brc_curbe->dw2.buf_size;
1853 hevc_state->brc.brc_init_reset_input_bits_per_frame = input_bits_per_frame;
1854
1855 i965_gpe_context_unmap_curbe(gpe_context);
1856 }
1857
1858 static void
gen10_hevc_enc_brc_init_add_surfaces(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context)1859 gen10_hevc_enc_brc_init_add_surfaces(VADriverContextP ctx,
1860 struct encode_state *encode_state,
1861 struct intel_encoder_context *encoder_context,
1862 struct i965_gpe_context *gpe_context)
1863 {
1864 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1865
1866 i965_add_buffer_gpe_surface(ctx,
1867 gpe_context,
1868 &vme_context->res_brc_history_buffer,
1869 0,
1870 BYTES2UINT32(vme_context->res_brc_history_buffer.size),
1871 0,
1872 0);
1873
1874 i965_add_buffer_2d_gpe_surface(ctx,
1875 gpe_context,
1876 &vme_context->res_brc_me_dist_surface,
1877 1,
1878 I965_SURFACEFORMAT_R8_UNORM,
1879 1);
1880 }
1881
1882 static void
gen10_hevc_enc_brc_init_reset(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)1883 gen10_hevc_enc_brc_init_reset(VADriverContextP ctx,
1884 struct encode_state *encode_state,
1885 struct intel_encoder_context *encoder_context)
1886 {
1887 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
1888 struct gen10_hevc_enc_state *hevc_state;
1889 struct gpe_media_object_parameter media_object_param;
1890 struct i965_gpe_context *gpe_context;
1891 int gpe_index = GEN10_HEVC_BRC_INIT;
1892 int media_function = GEN10_HEVC_MEDIA_STATE_BRC_INIT_RESET;
1893
1894 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
1895
1896 if (hevc_state->brc.brc_inited)
1897 gpe_index = GEN10_HEVC_BRC_RESET;
1898
1899 gpe_context = &(vme_context->brc_context.gpe_contexts[gpe_index]);
1900
1901 gen8_gpe_context_init(ctx, gpe_context);
1902 gen9_gpe_reset_binding_table(ctx, gpe_context);
1903
1904 gen10_hevc_enc_brc_init_set_curbe(ctx, encode_state, encoder_context, gpe_context);
1905 gen10_hevc_enc_brc_init_add_surfaces(ctx, encode_state, encoder_context, gpe_context);
1906
1907 gen8_gpe_setup_interface_data(ctx, gpe_context);
1908
1909 memset(&media_object_param, 0, sizeof(media_object_param));
1910 gen10_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
1911 }
1912
1913 static void
gen10_hevc_brc_add_pic_img_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)1914 gen10_hevc_brc_add_pic_img_state(VADriverContextP ctx,
1915 struct encode_state *encode_state,
1916 struct intel_encoder_context *encoder_context)
1917 {
1918 struct gen10_hevc_enc_context *pak_context = encoder_context->mfc_context;
1919 struct gen10_hevc_enc_state *hevc_state;
1920 VAEncPictureParameterBufferHEVC *pic_param;
1921 VAEncSequenceParameterBufferHEVC *seq_param;
1922 VAEncSliceParameterBufferHEVC *slice_param;
1923 unsigned int batch_value = 0, tmp_value, i;
1924 uint32_t *batch_ptr, *buffer_ptr;
1925
1926 hevc_state = (struct gen10_hevc_enc_state *) pak_context->enc_priv_state;
1927
1928 buffer_ptr = (uint32_t *)i965_map_gpe_resource(&pak_context->res_brc_pic_image_state_read_buffer);
1929
1930 if (!buffer_ptr)
1931 return;
1932
1933 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1934 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1935 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
1936
1937 for (i = 0; i < 4; i++) {
1938 batch_ptr = buffer_ptr + 32 * i;
1939
1940
1941 /* DW 0 */
1942 *(batch_ptr++) = HCP_PIC_STATE | (31 - 2);
1943
1944 /* DW 1 */
1945 batch_value = (pak_context->frame_info.width_in_cu - 1) |
1946 ((pak_context->frame_info.height_in_cu - 1) << 16);
1947 batch_value |= pic_param->pic_fields.bits.transform_skip_enabled_flag << 15;
1948 *(batch_ptr++) = batch_value;
1949
1950 batch_value = (seq_param->log2_min_pcm_luma_coding_block_size_minus3 << 8) |
1951 (seq_param->log2_max_pcm_luma_coding_block_size_minus3 << 10) |
1952 (seq_param->log2_min_transform_block_size_minus2 << 4) |
1953 ((seq_param->log2_min_transform_block_size_minus2 +
1954 seq_param->log2_diff_max_min_transform_block_size) << 6) |
1955 ((seq_param->log2_min_luma_coding_block_size_minus3 +
1956 seq_param->log2_diff_max_min_luma_coding_block_size) << 2) |
1957 (seq_param->log2_min_luma_coding_block_size_minus3 << 0);
1958
1959 /* DW 2 */
1960 *(batch_ptr++) = batch_value;
1961
1962 /* DW 3 */
1963 *(batch_ptr++) = 0;
1964
1965 /* DW 4 */
1966 batch_value = 0;
1967 if ((slice_param->slice_fields.bits.slice_sao_luma_flag ||
1968 slice_param->slice_fields.bits.slice_sao_chroma_flag) &&
1969 !hevc_state->is_10bit)
1970 batch_value |= (1 << 3);
1971
1972 if (pic_param->pic_fields.bits.cu_qp_delta_enabled_flag) {
1973 tmp_value = pic_param->diff_cu_qp_delta_depth;
1974 batch_value |= (1 << 5) | (tmp_value << 6);
1975 }
1976 batch_value |= (0 << 4) |
1977 (seq_param->seq_fields.bits.pcm_loop_filter_disabled_flag << 8) |
1978 (0 << 9) |
1979 (0 << 10) | //(pic_param->log2_parallel_merge_level_minus2
1980 (0 << 13) |
1981 (0 << 15) |
1982 (0 << 17) | //tile is disabled.
1983 (pic_param->pic_fields.bits.weighted_bipred_flag << 18) |
1984 (pic_param->pic_fields.bits.weighted_pred_flag << 19) |
1985 (0 << 20) | //20/21 is reserved.
1986 (pic_param->pic_fields.bits.transform_skip_enabled_flag << 22) |
1987 (seq_param->seq_fields.bits.amp_enabled_flag << 23) |
1988 (pic_param->pic_fields.bits.transquant_bypass_enabled_flag << 25) |
1989 (seq_param->seq_fields.bits.strong_intra_smoothing_enabled_flag << 26) |
1990 (0 << 27); // VME CU packet
1991
1992 *(batch_ptr++) = batch_value;
1993
1994 /* DW 5 */
1995 batch_value = (pic_param->pps_cr_qp_offset & 0x1f) << 5 |
1996 (pic_param->pps_cb_qp_offset & 0x1f);
1997 batch_value |= (seq_param->max_transform_hierarchy_depth_inter << 13) |
1998 (seq_param->max_transform_hierarchy_depth_intra << 10) |
1999 (seq_param->pcm_sample_bit_depth_luma_minus1 << 20) |
2000 (seq_param->pcm_sample_bit_depth_chroma_minus1 << 16) |
2001 (seq_param->seq_fields.bits.bit_depth_luma_minus8 << 27) |
2002 (seq_param->seq_fields.bits.bit_depth_chroma_minus8 << 24);
2003 *(batch_ptr++) = batch_value;
2004
2005 /* DW6 */
2006 batch_value = pic_param->ctu_max_bitsize_allowed;
2007 batch_value |= (0 << 24 |
2008 1 << 25 |
2009 1 << 26 |
2010 0 << 29); // bit 29 reload slice_pointer_flag.
2011
2012 if (i == 0)
2013 batch_value |= (0 << 16); // Initial pass
2014 else
2015 batch_value |= (1 << 16); // subsequent pass
2016 *(batch_ptr++) = batch_value;
2017
2018 /* DW 7. Frame_rate Max */
2019 *(batch_ptr++) = 0;
2020
2021 /* Dw 8. Frame_rate Min */
2022 *(batch_ptr++) = 0;
2023
2024 /* DW 9. Frame_rate Min/MAX slice_delta */
2025 *(batch_ptr++) = 0;
2026
2027 /* DW 10..17 */
2028 *(batch_ptr++) = 0;
2029 *(batch_ptr++) = 0;
2030 *(batch_ptr++) = 0;
2031 *(batch_ptr++) = 0;
2032 *(batch_ptr++) = 0;
2033 *(batch_ptr++) = 0;
2034 *(batch_ptr++) = 0;
2035 *(batch_ptr++) = 0;
2036
2037 /* DW 18 */
2038 *(batch_ptr++) = 0;
2039
2040 /* DW 19..20 */
2041 *(batch_ptr++) = 0;
2042 *(batch_ptr++) = 0;
2043
2044 /* DW 21..30 */
2045 *(batch_ptr++) = 0;
2046 *(batch_ptr++) = 0;
2047 *(batch_ptr++) = 0;
2048 *(batch_ptr++) = 0;
2049 *(batch_ptr++) = 0;
2050 *(batch_ptr++) = 0;
2051 *(batch_ptr++) = 0;
2052 *(batch_ptr++) = 0;
2053 *(batch_ptr++) = 0;
2054 *(batch_ptr++) = 0;
2055
2056 /* DW 31 */
2057 *(batch_ptr++) = MI_BATCH_BUFFER_END;
2058 }
2059
2060 i965_unmap_gpe_resource(&pak_context->res_brc_pic_image_state_read_buffer);
2061 }
2062
2063 static VAStatus
gen10_hevc_enc_brc_frame_update_add_surfaces(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context)2064 gen10_hevc_enc_brc_frame_update_add_surfaces(VADriverContextP ctx,
2065 struct encode_state *encode_state,
2066 struct intel_encoder_context *encoder_context,
2067 struct i965_gpe_context *gpe_context)
2068 {
2069 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2070 struct gen10_hevc_enc_state *hevc_state;
2071 int pak_read_idx;
2072
2073 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2074
2075 i965_add_buffer_gpe_surface(ctx,
2076 gpe_context,
2077 &vme_context->res_brc_history_buffer,
2078 0,
2079 BYTES2UINT32(vme_context->res_brc_history_buffer.size),
2080 0,
2081 0);
2082
2083 pak_read_idx = !hevc_state->curr_pak_stat_index;
2084 i965_add_buffer_gpe_surface(ctx,
2085 gpe_context,
2086 &vme_context->res_brc_pak_statistics_buffer[pak_read_idx],
2087 0,
2088 BYTES2UINT32(vme_context->res_brc_pak_statistics_buffer[pak_read_idx].size),
2089 0,
2090 1);
2091
2092 i965_add_buffer_gpe_surface(ctx,
2093 gpe_context,
2094 &vme_context->res_brc_pic_image_state_read_buffer,
2095 0,
2096 BYTES2UINT32(vme_context->res_brc_pic_image_state_read_buffer.size),
2097 0,
2098 2);
2099
2100 i965_add_buffer_gpe_surface(ctx,
2101 gpe_context,
2102 &vme_context->res_brc_pic_image_state_write_buffer,
2103 0,
2104 BYTES2UINT32(vme_context->res_brc_pic_image_state_write_buffer.size),
2105 0,
2106 3);
2107
2108 i965_add_buffer_gpe_surface(ctx,
2109 gpe_context,
2110 &vme_context->res_brc_input_enc_kernel_buffer,
2111 0,
2112 BYTES2UINT32(vme_context->res_brc_input_enc_kernel_buffer.size),
2113 0,
2114 4);
2115
2116 i965_add_buffer_2d_gpe_surface(ctx,
2117 gpe_context,
2118 &vme_context->res_brc_me_dist_surface,
2119 1,
2120 I965_SURFACEFORMAT_R8_UNORM,
2121 5);
2122
2123 i965_add_buffer_2d_gpe_surface(ctx,
2124 gpe_context,
2125 &vme_context->res_brc_const_data_surface,
2126 1,
2127 I965_SURFACEFORMAT_R8_UNORM,
2128 6);
2129
2130 i965_add_buffer_2d_gpe_surface(ctx,
2131 gpe_context,
2132 &vme_context->res_mb_stat_surface,
2133 1,
2134 I965_SURFACEFORMAT_R8_UNORM,
2135 7);
2136
2137 i965_add_buffer_gpe_surface(ctx,
2138 gpe_context,
2139 &vme_context->res_mv_dist_sum_buffer,
2140 0,
2141 BYTES2UINT32(vme_context->res_mv_dist_sum_buffer.size),
2142 0,
2143 8);
2144
2145 return VA_STATUS_SUCCESS;
2146 }
2147
2148 static VAStatus
gen10_hevc_enc_brc_update_set_curbe(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context,int lcu_update)2149 gen10_hevc_enc_brc_update_set_curbe(VADriverContextP ctx,
2150 struct encode_state *encode_state,
2151 struct intel_encoder_context *encoder_context,
2152 struct i965_gpe_context *gpe_context,
2153 int lcu_update)
2154 {
2155 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2156 struct gen10_hevc_enc_state *hevc_state;
2157 gen10_hevc_brc_update_curbe_data *brc_update;
2158 VAEncSliceParameterBufferHEVC *slice_param;
2159 VAEncPictureParameterBufferHEVC *pic_param;
2160
2161 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2162
2163 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2164 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
2165
2166 brc_update = i965_gpe_context_map_curbe(gpe_context);
2167
2168 if (!brc_update)
2169 return VA_STATUS_ERROR_OPERATION_FAILED;
2170
2171 memset(brc_update, 0, sizeof(gen10_hevc_brc_update_curbe_data));
2172
2173 if (hevc_state->brc.brc_init_current_target_buf_full_in_bits >
2174 (double)hevc_state->brc.brc_init_reset_buf_size_in_bits) {
2175 hevc_state->brc.brc_init_current_target_buf_full_in_bits -=
2176 (double)hevc_state->brc.brc_init_reset_buf_size_in_bits;
2177 brc_update->dw5.target_size_flag = 1;
2178 }
2179
2180 brc_update->dw0.target_size = (uint32_t)(hevc_state->brc.brc_init_current_target_buf_full_in_bits);
2181 brc_update->dw1.frame_num = hevc_state->frame_number;
2182
2183 brc_update->dw2.picture_header_size = gen10_hevc_enc_get_pic_header_size(encode_state);
2184
2185 if (slice_param->slice_type == HEVC_SLICE_I)
2186 brc_update->dw5.curr_frame_brclevel = 2;
2187 else if (slice_param->slice_type == HEVC_SLICE_P ||
2188 hevc_state->low_delay)
2189 brc_update->dw5.curr_frame_brclevel = 0;
2190 else
2191 brc_update->dw5.curr_frame_brclevel = 1;
2192
2193 brc_update->dw5.max_num_paks = GEN10_HEVC_MAX_BRC_PASSES;
2194
2195 if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_CQP) {
2196 int qp_value;
2197
2198 qp_value = pic_param->pic_init_qp + slice_param->slice_qp_delta;
2199 BRC_CLIP(qp_value, 1, 51);
2200 brc_update->dw6.cqp_value = qp_value;
2201 }
2202
2203 brc_update->dw14.parallel_mode = 0;
2204
2205 if (lcu_update == 1)
2206 hevc_state->brc.brc_init_current_target_buf_full_in_bits +=
2207 hevc_state->brc.brc_init_reset_input_bits_per_frame;
2208
2209 brc_update->dw3.start_gadj_frame0 = 10;
2210 brc_update->dw3.start_gadj_frame1 = 50;
2211 brc_update->dw4.start_gadj_frame2 = 100;
2212 brc_update->dw4.start_gadj_frame3 = 150;
2213
2214 brc_update->dw8.start_gadj_mult0 = 1;
2215 brc_update->dw8.start_gadj_mult1 = 1;
2216 brc_update->dw8.start_gadj_mult2 = 3;
2217 brc_update->dw8.start_gadj_mult3 = 2;
2218 brc_update->dw9.start_gadj_mult4 = 1;
2219
2220 brc_update->dw9.start_gadj_divd0 = 40;
2221 brc_update->dw9.start_gadj_divd1 = 5;
2222 brc_update->dw9.start_gadj_divd2 = 5;
2223 brc_update->dw10.start_gadj_divd3 = 3;
2224 brc_update->dw10.start_gadj_divd4 = 1;
2225
2226 brc_update->dw10.qp_threshold0 = 7;
2227 brc_update->dw10.qp_threshold1 = 18;
2228 brc_update->dw11.qp_threshold2 = 25;
2229 brc_update->dw11.qp_threshold3 = 37;
2230
2231 brc_update->dw11.grate_ratio_thr0 = 40;
2232 brc_update->dw11.grate_ratio_thr1 = 75;
2233 brc_update->dw12.grate_ratio_thr2 = 97;
2234 brc_update->dw12.grate_ratio_thr3 = 103;
2235 brc_update->dw12.grate_ratio_thr4 = 125;
2236 brc_update->dw12.grate_ratio_thr5 = 160;
2237
2238 brc_update->dw13.grate_ratio_thr6 = -3;
2239 brc_update->dw13.grate_ratio_thr7 = -2;
2240 brc_update->dw13.grate_ratio_thr8 = -1;
2241 brc_update->dw13.grate_ratio_thr9 = 0;
2242
2243 brc_update->dw14.grate_ratio_thr10 = 1;
2244 brc_update->dw14.grate_ratio_thr11 = 2;
2245 brc_update->dw14.grate_ratio_thr12 = 3;
2246
2247 i965_gpe_context_unmap_curbe(gpe_context);
2248 return VA_STATUS_SUCCESS;
2249 }
2250
2251 static void
gen10_hevc_enc_brc_frame_update_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)2252 gen10_hevc_enc_brc_frame_update_kernel(VADriverContextP ctx,
2253 struct encode_state *encode_state,
2254 struct intel_encoder_context *encoder_context)
2255 {
2256 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2257 struct i965_gpe_context *gpe_context;
2258 int gpe_index = GEN10_HEVC_BRC_FRAME_UPDATE;
2259 int media_function = GEN10_HEVC_MEDIA_STATE_BRC_UPDATE;
2260 struct gpe_media_object_parameter media_object_param;
2261
2262 gpe_context = &(vme_context->brc_context.gpe_contexts[gpe_index]);
2263
2264 gen8_gpe_context_init(ctx, gpe_context);
2265 gen9_gpe_reset_binding_table(ctx, gpe_context);
2266
2267 gen10_hevc_brc_add_pic_img_state(ctx, encode_state, encoder_context);
2268 gen10_hevc_enc_brc_update_set_curbe(ctx, encode_state, encoder_context, gpe_context, 0);
2269 gen10_hevc_enc_brc_frame_update_add_surfaces(ctx, encode_state, encoder_context, gpe_context);
2270 gen8_gpe_setup_interface_data(ctx, gpe_context);
2271
2272 memset(&media_object_param, 0, sizeof(media_object_param));
2273 gen10_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
2274 }
2275
2276 static void
gen10_hevc_enc_brc_lcu_update_add_surfaces(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context)2277 gen10_hevc_enc_brc_lcu_update_add_surfaces(VADriverContextP ctx,
2278 struct encode_state *encode_state,
2279 struct intel_encoder_context *encoder_context,
2280 struct i965_gpe_context *gpe_context)
2281 {
2282 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2283
2284 i965_add_buffer_gpe_surface(ctx,
2285 gpe_context,
2286 &vme_context->res_brc_history_buffer,
2287 0,
2288 BYTES2UINT32(vme_context->res_brc_history_buffer.size),
2289 0,
2290 0);
2291
2292 i965_add_buffer_2d_gpe_surface(ctx,
2293 gpe_context,
2294 &vme_context->res_brc_me_dist_surface,
2295 1,
2296 I965_SURFACEFORMAT_R8_UNORM,
2297 1);
2298
2299 i965_add_buffer_2d_gpe_surface(ctx,
2300 gpe_context,
2301 &vme_context->res_mb_stat_surface,
2302 1,
2303 I965_SURFACEFORMAT_R8_UNORM,
2304 2);
2305
2306 i965_add_buffer_2d_gpe_surface(ctx,
2307 gpe_context,
2308 &vme_context->res_brc_mb_qp_surface,
2309 1,
2310 I965_SURFACEFORMAT_R8_UNORM,
2311 3);
2312
2313 i965_add_buffer_2d_gpe_surface(ctx,
2314 gpe_context,
2315 &vme_context->res_mb_split_surface,
2316 1,
2317 I965_SURFACEFORMAT_R8_UNORM,
2318 4);
2319
2320 i965_add_buffer_2d_gpe_surface(ctx,
2321 gpe_context,
2322 &vme_context->res_brc_intra_dist_surface,
2323 1,
2324 I965_SURFACEFORMAT_R8_UNORM,
2325 5);
2326
2327 i965_add_buffer_2d_gpe_surface(ctx,
2328 gpe_context,
2329 &vme_context->res_cu_split_surface,
2330 1,
2331 I965_SURFACEFORMAT_R8_UNORM,
2332 6);
2333 }
2334
2335 static void
gen10_hevc_enc_brc_lcu_update_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)2336 gen10_hevc_enc_brc_lcu_update_kernel(VADriverContextP ctx,
2337 struct encode_state *encode_state,
2338 struct intel_encoder_context *encoder_context)
2339 {
2340 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2341 struct gen10_hevc_enc_state *hevc_state;
2342 struct i965_gpe_context *gpe_context;
2343 int gpe_index = GEN10_HEVC_BRC_LCU_UPDATE;
2344 int media_function = GEN10_HEVC_MEDIA_STATE_BRC_LCU_UPDATE;
2345 uint32_t resolution_x, resolution_y;
2346 struct gpe_media_object_walker_parameter media_object_walker_param;
2347 struct gen10_hevc_enc_kernel_walker_parameter kernel_walker_param;
2348
2349 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2350
2351 gpe_context = &(vme_context->brc_context.gpe_contexts[gpe_index]);
2352
2353 gen8_gpe_context_init(ctx, gpe_context);
2354 gen9_gpe_reset_binding_table(ctx, gpe_context);
2355
2356 gen10_hevc_enc_brc_update_set_curbe(ctx, encode_state, encoder_context, gpe_context, 1);
2357 gen10_hevc_enc_brc_lcu_update_add_surfaces(ctx, encode_state, encoder_context, gpe_context);
2358
2359 gen8_gpe_setup_interface_data(ctx, gpe_context);
2360
2361 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2362
2363 resolution_x = ALIGN(hevc_state->frame_width, 16) >> 4;
2364 resolution_x = ALIGN(resolution_x, 16) >> 4;
2365 resolution_y = ALIGN(hevc_state->frame_height, 16) >> 4;
2366 resolution_y = ALIGN(resolution_y, 8) >> 3;
2367 kernel_walker_param.resolution_x = resolution_x;
2368 kernel_walker_param.resolution_y = resolution_y;
2369 kernel_walker_param.no_dependency = 1;
2370
2371 gen10_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
2372
2373 gen10_run_kernel_media_object_walker(ctx, encoder_context,
2374 gpe_context,
2375 media_function,
2376 &media_object_walker_param);
2377 }
2378
2379 static void
gen10_hevc_enc_scaling_curbe(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context,struct gen10_hevc_scaling_conversion_param * scale_param)2380 gen10_hevc_enc_scaling_curbe(VADriverContextP ctx,
2381 struct encode_state *encode_state,
2382 struct intel_encoder_context *encoder_context,
2383 struct i965_gpe_context *gpe_context,
2384 struct gen10_hevc_scaling_conversion_param *scale_param)
2385 {
2386 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2387 struct gen10_hevc_enc_state *hevc_state;
2388 gen10_hevc_scaling_curbe_data *scaling_curbe;
2389
2390 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2391 scaling_curbe = i965_gpe_context_map_curbe(gpe_context);
2392
2393 if (!scaling_curbe)
2394 return;
2395
2396 memset(scaling_curbe, 0, sizeof(gen10_hevc_scaling_curbe_data));
2397
2398 scaling_curbe->dw0.input_bit_depth_for_chroma = 10;
2399 scaling_curbe->dw0.input_bit_depth_for_luma = 10;
2400 scaling_curbe->dw0.output_bit_depth_for_chroma = 8;
2401 scaling_curbe->dw0.output_bit_depth_for_luma = 8;
2402 scaling_curbe->dw0.rounding_enabled = 1;
2403
2404 scaling_curbe->dw1.convert_flag = scale_param->scale_flag.conv_enable;
2405 scaling_curbe->dw1.downscale_stage = scale_param->scale_flag.ds_type;
2406 scaling_curbe->dw1.mb_statistics_dump_flag = scale_param->scale_flag.dump_enable;
2407 if (scale_param->scale_flag.is_64lcu) {
2408 scaling_curbe->dw1.lcu_size = 0;
2409 scaling_curbe->dw1.job_queue_size = 32;
2410 } else {
2411 scaling_curbe->dw1.lcu_size = 1;
2412 scaling_curbe->dw1.job_queue_size = 2656;
2413 }
2414
2415 scaling_curbe->dw2.orig_pic_width_in_pixel = hevc_state->frame_width;
2416 scaling_curbe->dw2.orig_pic_height_in_pixel = hevc_state->frame_height;
2417
2418 scaling_curbe->dw3.bti_input_conversion_surface = GEN10_HEVC_SCALING_10BIT_Y;
2419 scaling_curbe->dw4.bti_input_ds_surface = GEN10_HEVC_SCALING_8BIT_Y;
2420 scaling_curbe->dw5.bti_4x_ds_surface = GEN10_HEVC_SCALING_4xDS;
2421 scaling_curbe->dw6.bti_mbstat_surface = GEN10_HEVC_SCALING_MB_STATS;
2422 scaling_curbe->dw7.bti_2x_ds_surface = GEN10_HEVC_SCALING_2xDS;
2423 scaling_curbe->dw8.bti_mb_split_surface = GEN10_HEVC_SCALING_MB_SPLIT_SURFACE;
2424 scaling_curbe->dw9.bti_lcu32_jobqueue_buffer_surface = GEN10_HEVC_SCALING_LCU32_JOB_QUEUE_SCRATCH_SURFACE;
2425 scaling_curbe->dw10.bti_lcu64_lcu32_jobqueue_buffer_surface = GEN10_HEVC_SCALING_LCU64_JOB_QUEUE_SCRATCH_SURFACE;
2426 scaling_curbe->dw11.bti_lcu64_cu32_distortion_surface = GEN10_HEVC_SCALING_LCU64_64x64_DISTORTION_SURFACE;
2427
2428 i965_gpe_context_unmap_curbe(gpe_context);
2429 }
2430
2431 static void
gen10_hevc_enc_scaling_surfaces(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context,struct gen10_hevc_scaling_conversion_param * scale_param)2432 gen10_hevc_enc_scaling_surfaces(VADriverContextP ctx,
2433 struct encode_state *encode_state,
2434 struct intel_encoder_context *encoder_context,
2435 struct i965_gpe_context *gpe_context,
2436 struct gen10_hevc_scaling_conversion_param *scale_param)
2437 {
2438 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2439 int input_bti = 0;
2440 struct object_surface *obj_surface;
2441
2442 if (scale_param->scale_flag.conv_enable) {
2443 obj_surface = scale_param->input_surface;
2444 i965_add_2d_gpe_surface(ctx,
2445 gpe_context,
2446 obj_surface,
2447 0,
2448 1,
2449 I965_SURFACEFORMAT_R32_UNORM,
2450 input_bti);
2451 input_bti++;
2452
2453 i965_add_2d_gpe_surface(ctx,
2454 gpe_context,
2455 obj_surface,
2456 1,
2457 1,
2458 I965_SURFACEFORMAT_R16G16_UNORM,
2459 input_bti);
2460 input_bti++;
2461
2462 obj_surface = scale_param->converted_output_surface;
2463 i965_add_2d_gpe_surface(ctx,
2464 gpe_context,
2465 obj_surface,
2466 0,
2467 1,
2468 I965_SURFACEFORMAT_R8_UNORM,
2469 input_bti);
2470 input_bti++;
2471 i965_add_2d_gpe_surface(ctx,
2472 gpe_context,
2473 obj_surface,
2474 1,
2475 1,
2476 I965_SURFACEFORMAT_R16_UINT,
2477 input_bti);
2478 input_bti++;
2479 } else {
2480 input_bti = 2;
2481 obj_surface = scale_param->input_surface;
2482 i965_add_2d_gpe_surface(ctx,
2483 gpe_context,
2484 obj_surface,
2485 0,
2486 1,
2487 I965_SURFACEFORMAT_R32_UNORM,
2488 input_bti);
2489 input_bti++;
2490
2491 i965_add_2d_gpe_surface(ctx,
2492 gpe_context,
2493 obj_surface,
2494 1,
2495 1,
2496 I965_SURFACEFORMAT_R16_UINT,
2497 input_bti);
2498 input_bti++;
2499 }
2500
2501 if (scale_param->scale_flag.ds_type == GEN10_4X_DS ||
2502 scale_param->scale_flag.ds_type == GEN10_16X_DS ||
2503 scale_param->scale_flag.ds_type == GEN10_2X_4X_DS) {
2504 obj_surface = scale_param->scaled_4x_surface;
2505
2506 i965_add_2d_gpe_surface(ctx,
2507 gpe_context,
2508 obj_surface,
2509 0,
2510 1,
2511 I965_SURFACEFORMAT_R32_UNORM,
2512 input_bti);
2513 input_bti++;
2514 } else
2515 input_bti++;
2516
2517 i965_add_buffer_2d_gpe_surface(ctx,
2518 gpe_context,
2519 &vme_context->res_mb_stat_surface,
2520 1,
2521 I965_SURFACEFORMAT_R8_UNORM,
2522 input_bti);
2523 input_bti++;
2524
2525 if (scale_param->scale_flag.ds_type == GEN10_2X_DS ||
2526 scale_param->scale_flag.ds_type == GEN10_2X_4X_DS) {
2527 obj_surface = scale_param->scaled_2x_surface;
2528
2529 i965_add_2d_gpe_surface(ctx,
2530 gpe_context,
2531 obj_surface,
2532 0,
2533 1,
2534 I965_SURFACEFORMAT_R32_UNORM,
2535 input_bti);
2536 input_bti++;
2537 } else
2538 input_bti++;
2539
2540 i965_add_buffer_2d_gpe_surface(ctx,
2541 gpe_context,
2542 &vme_context->res_mb_split_surface,
2543 1,
2544 I965_SURFACEFORMAT_R8_UNORM,
2545 input_bti);
2546 input_bti++;
2547
2548 i965_add_buffer_gpe_surface(ctx,
2549 gpe_context,
2550 &vme_context->res_jbq_header_buffer,
2551 0,
2552 BYTES2UINT32(vme_context->res_jbq_header_buffer.size),
2553 0,
2554 input_bti);
2555 input_bti++;
2556
2557 i965_add_buffer_gpe_surface(ctx,
2558 gpe_context,
2559 &vme_context->res_jbq_header_lcu64_buffer,
2560 0,
2561 BYTES2UINT32(vme_context->res_jbq_header_lcu64_buffer.size),
2562 0,
2563 input_bti);
2564 input_bti++;
2565
2566 i965_add_buffer_gpe_surface(ctx,
2567 gpe_context,
2568 &vme_context->res_64x64_dist_buffer,
2569 0,
2570 BYTES2UINT32(vme_context->res_64x64_dist_buffer.size),
2571 0,
2572 input_bti);
2573 input_bti++;
2574 }
2575
2576 static void
gen10_hevc_enc_scaling_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct gen10_hevc_scaling_conversion_param * scale_param)2577 gen10_hevc_enc_scaling_kernel(VADriverContextP ctx,
2578 struct encode_state *encode_state,
2579 struct intel_encoder_context *encoder_context,
2580 struct gen10_hevc_scaling_conversion_param *scale_param)
2581 {
2582 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2583 struct gen10_hevc_enc_state *hevc_state;
2584 struct i965_gpe_context *gpe_context;
2585 int media_function;
2586 struct gpe_media_object_walker_parameter media_object_walker_param;
2587 struct gen10_hevc_enc_kernel_walker_parameter kernel_walker_param;
2588
2589 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2590
2591 gpe_context = &(vme_context->scaling_context.gpe_context);
2592
2593 gen8_gpe_context_init(ctx, gpe_context);
2594 gen9_gpe_reset_binding_table(ctx, gpe_context);
2595
2596 gen10_hevc_enc_scaling_curbe(ctx, encode_state, encoder_context, gpe_context, scale_param);
2597 gen10_hevc_enc_scaling_surfaces(ctx, encode_state, encoder_context, gpe_context, scale_param);
2598
2599 gen8_gpe_setup_interface_data(ctx, gpe_context);
2600
2601 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2602 if (scale_param->scale_flag.ds_type == GEN10_NONE_DS) {
2603 kernel_walker_param.resolution_x = hevc_state->frame_width >> 3;
2604 kernel_walker_param.resolution_y = hevc_state->frame_height >> 3;
2605 media_function = GEN10_HEVC_MEDIA_STATE_NO_SCALING;
2606 } else if (scale_param->scale_flag.ds_type == GEN10_2X_DS) {
2607 kernel_walker_param.resolution_x = ALIGN(hevc_state->frame_width >> 1, 64) >> 3;
2608 kernel_walker_param.resolution_y = ALIGN(hevc_state->frame_height >> 1, 64) >> 3;
2609 media_function = GEN10_HEVC_MEDIA_STATE_2X_SCALING;
2610 } else if (scale_param->scale_flag.ds_type == GEN10_4X_DS ||
2611 scale_param->scale_flag.ds_type == GEN10_2X_4X_DS) {
2612 kernel_walker_param.resolution_x = hevc_state->frame_width_4x >> 3;
2613 kernel_walker_param.resolution_y = hevc_state->frame_height_4x >> 3;
2614
2615 if (scale_param->scale_flag.ds_type == GEN10_4X_DS)
2616 media_function = GEN10_HEVC_MEDIA_STATE_4X_SCALING;
2617 else
2618 media_function = GEN10_HEVC_MEDIA_STATE_2X_4X_SCALING;
2619 } else {
2620 kernel_walker_param.resolution_x = hevc_state->frame_width_16x >> 3;
2621 kernel_walker_param.resolution_y = hevc_state->frame_height_16x >> 3;
2622
2623 media_function = GEN10_HEVC_MEDIA_STATE_16X_SCALING;
2624 }
2625 kernel_walker_param.no_dependency = 1;
2626
2627 gen10_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
2628
2629 gen10_run_kernel_media_object_walker(ctx, encoder_context,
2630 gpe_context,
2631 media_function,
2632 &media_object_walker_param);
2633 }
2634
2635 static void
gen10_hevc_enc_conv_scaling_surface(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct object_surface * input_surface,struct object_surface * obj_surface,int only_for_reference)2636 gen10_hevc_enc_conv_scaling_surface(VADriverContextP ctx,
2637 struct encode_state *encode_state,
2638 struct intel_encoder_context *encoder_context,
2639 struct object_surface *input_surface,
2640 struct object_surface *obj_surface,
2641 int only_for_reference)
2642 {
2643 struct gen10_hevc_enc_context *vme_context;
2644 struct gen10_hevc_enc_state *hevc_state;
2645 struct gen10_hevc_scaling_conversion_param scale_param;
2646 struct gen10_hevc_surface_priv *surface_priv;
2647
2648 vme_context = encoder_context->vme_context;
2649 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2650 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
2651
2652 if (!hevc_state->is_10bit &&
2653 !hevc_state->is_64lcu &&
2654 !hevc_state->hme_supported)
2655 return;
2656
2657 memset(&scale_param, 0, sizeof(scale_param));
2658
2659 scale_param.input_surface = input_surface ? input_surface : obj_surface;
2660 scale_param.input_width = hevc_state->frame_width;
2661 scale_param.input_height = hevc_state->frame_height;
2662 scale_param.output_4x_width = hevc_state->frame_width_4x;
2663 scale_param.output_4x_height = hevc_state->frame_height_4x;
2664 scale_param.scaled_2x_surface = surface_priv->scaled_2x_surface;
2665 scale_param.scaled_4x_surface = surface_priv->scaled_4x_surface;
2666 scale_param.converted_output_surface = surface_priv->converted_surface;
2667
2668 if (hevc_state->is_10bit)
2669 scale_param.scale_flag.conv_enable = GEN10_DEPTH_CONV_ENABLE;
2670
2671 scale_param.scale_flag.is_64lcu = hevc_state->is_64lcu;
2672
2673 scale_param.scale_flag.dump_enable = 0;
2674 if (hevc_state->is_64lcu && hevc_state->hme_supported) {
2675 scale_param.scale_flag.ds_type = GEN10_2X_4X_DS;
2676 scale_param.scale_flag.dump_enable = hevc_state->brc.brc_enabled ? 1 : 0;
2677 } else if (hevc_state->is_64lcu)
2678 scale_param.scale_flag.ds_type = GEN10_2X_DS;
2679 else if (hevc_state->hme_supported) {
2680 scale_param.scale_flag.ds_type = GEN10_4X_DS;
2681 scale_param.scale_flag.dump_enable = hevc_state->brc.brc_enabled ? 1 : 0;
2682 } else
2683 scale_param.scale_flag.ds_type = GEN10_NONE_DS;
2684
2685 gen10_hevc_enc_scaling_kernel(ctx, encode_state,
2686 encoder_context,
2687 &scale_param);
2688
2689 if (only_for_reference)
2690 surface_priv->conv_scaling_done = 1;
2691
2692 if (!hevc_state->b16xme_supported ||
2693 only_for_reference)
2694 return;
2695
2696 memset(&scale_param, 0, sizeof(scale_param));
2697
2698 scale_param.input_surface = surface_priv->scaled_4x_surface;
2699 scale_param.scaled_4x_surface = surface_priv->scaled_16x_surface;
2700 scale_param.input_width = hevc_state->frame_width_4x;
2701 scale_param.input_height = hevc_state->frame_height_4x;
2702 scale_param.output_4x_width = hevc_state->frame_width_16x;
2703 scale_param.output_4x_height = hevc_state->frame_height_16x;
2704
2705 scale_param.scale_flag.ds_type = GEN10_16X_DS;
2706
2707 gen10_hevc_enc_scaling_kernel(ctx, encode_state,
2708 encoder_context,
2709 &scale_param);
2710 }
2711
2712 #define GEN10_HEVC_HME_STAGE_4X_NO_16X 0
2713 #define GEN10_HEVC_HME_STAGE_4X_AFTER_16X 1
2714 #define GEN10_HEVC_HME_STAGE_16X 2
2715
2716 static void
gen10_hevc_enc_me_curbe(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context,uint32_t hme_level,int dist_type)2717 gen10_hevc_enc_me_curbe(VADriverContextP ctx,
2718 struct encode_state *encode_state,
2719 struct intel_encoder_context *encoder_context,
2720 struct i965_gpe_context *gpe_context,
2721 uint32_t hme_level,
2722 int dist_type)
2723 {
2724 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2725 struct gen10_hevc_enc_state *hevc_state;
2726 gen10_hevc_me_curbe_data *me_curbe;
2727 VAEncSliceParameterBufferHEVC *slice_param;
2728 VAEncSequenceParameterBufferHEVC *seq_param;
2729
2730 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2731
2732 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2733 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2734
2735 me_curbe = i965_gpe_context_map_curbe(gpe_context);
2736
2737 if (!me_curbe)
2738 return;
2739
2740 memset(me_curbe, 0, sizeof(gen10_hevc_me_curbe_data));
2741
2742 me_curbe->dw0.rounded_frame_width_in_mv_for4x = hevc_state->frame_width_4x >> 3;
2743 me_curbe->dw0.rounded_frame_height_in_mv_for4x = hevc_state->frame_height_4x >> 3;
2744
2745 me_curbe->dw2.sub_pel_mode = 3;
2746 me_curbe->dw2.bme_disable_fbr = 1;
2747 me_curbe->dw2.inter_sad_adj = 2;
2748
2749 me_curbe->dw3.adaptive_search_en = 1;
2750 me_curbe->dw3.ime_ref_window_size = 1; // From the HW-spec
2751
2752 me_curbe->dw4.quarter_quad_tree_cand = 1; // 32x32 split is enabled.
2753 me_curbe->dw4.bi_weight = 32; // default weight.
2754
2755 me_curbe->dw5.len_sp = 0x3F;
2756 me_curbe->dw5.max_num_su = 0x3F;
2757 me_curbe->dw5.start_center0_x = ((gen10_hevc_ime_ref_window_size[1][0] - 32) >> 3) & 0xF;
2758 me_curbe->dw5.start_center0_y = ((gen10_hevc_ime_ref_window_size[1][1] - 32) >> 3) & 0xF;
2759
2760 me_curbe->dw6.slice_type = (dist_type == GEN10_HEVC_ME_DIST_TYPE_INTER_BRC) ? 1 : 0;
2761 if (dist_type == GEN10_HEVC_ME_DIST_TYPE_INTER_BRC) {
2762 if (hme_level == GEN10_HEVC_HME_LEVEL_4X)
2763 me_curbe->dw6.hme_stage =
2764 (hevc_state->b16xme_enabled) ? GEN10_HEVC_HME_STAGE_4X_AFTER_16X :
2765 GEN10_HEVC_HME_STAGE_4X_NO_16X;
2766 else
2767 me_curbe->dw6.hme_stage = GEN10_HEVC_HME_STAGE_16X;
2768 } else
2769 me_curbe->dw6.hme_stage = GEN10_HEVC_HME_STAGE_4X_NO_16X;
2770
2771 if (slice_param->slice_type == HEVC_SLICE_I) {
2772 me_curbe->dw6.num_ref_l0 = 0;
2773 me_curbe->dw6.num_ref_l1 = 0;
2774 } else if (slice_param->slice_type == HEVC_SLICE_P) {
2775 me_curbe->dw6.num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
2776 me_curbe->dw6.num_ref_l1 = 0;
2777 } else {
2778 me_curbe->dw6.num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
2779 me_curbe->dw6.num_ref_l1 = hevc_state->low_delay ? 0 : slice_param->num_ref_idx_l1_active_minus1 + 1;
2780 }
2781
2782 me_curbe->dw7.rounded_frame_width_in_mv_for16x = hevc_state->frame_width_16x >> 3;
2783 me_curbe->dw7.rounded_frame_height_in_mv_for16x = hevc_state->frame_height_16x >> 3;
2784
2785 /* Search path */
2786 memcpy(&me_curbe->ime_search_path_03, gen10_hevc_me_search_path,
2787 sizeof(gen10_hevc_me_search_path));
2788
2789 me_curbe->dw24.coding_unit_size = 1;
2790 me_curbe->dw24.coding_unit_partition_mode = 0;
2791 me_curbe->dw24.coding_unit_prediction_mode = 1;
2792
2793 if (hme_level == GEN10_HEVC_HME_LEVEL_4X) {
2794 me_curbe->dw25.frame_width_in_pixel_cs = hevc_state->frame_width >> 2;
2795 me_curbe->dw25.frame_height_in_pixel_cs = hevc_state->frame_height >> 2;
2796 } else {
2797 me_curbe->dw25.frame_width_in_pixel_cs = hevc_state->frame_width >> 4;
2798 me_curbe->dw25.frame_height_in_pixel_cs = hevc_state->frame_height >> 4;
2799 }
2800
2801 me_curbe->dw27.intra_compute_type = 1;
2802
2803 me_curbe->dw28.penalty_intra32x32_nondc = 36;
2804 me_curbe->dw28.penalty_intra16x16_nondc = 12;
2805 me_curbe->dw28.penalty_intra8x8_nondc = 4;
2806
2807 me_curbe->dw30.mode4_cost = 13;
2808 me_curbe->dw30.mode5_cost = 9;
2809 me_curbe->dw30.mode6_cost = 13;
2810 me_curbe->dw30.mode7_cost = 3;
2811 me_curbe->dw31.mode8_cost = 9;
2812
2813 me_curbe->dw32.sicintra_neighbor_avail_flag = 0x3F;
2814 me_curbe->dw32.sic_inter_sad_measure = 0x02;
2815 me_curbe->dw32.sic_intra_sad_measure = 0x02;
2816
2817 me_curbe->dw33.sic_log2_min_cu_size = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
2818
2819 me_curbe->dw34.bti_hme_output_mv_data_surface = GEN10_HEVC_HME_OUTPUT_MV_DATA;
2820 me_curbe->dw35.bti_16xinput_mv_data_surface = GEN10_HEVC_HME_16xINPUT_MV_DATA;
2821 me_curbe->dw36.bti_4x_output_distortion_surface = GEN10_HEVC_HME_4xOUTPUT_DISTORTION;
2822 me_curbe->dw37.bti_vme_input_surface = GEN10_HEVC_HME_VME_PRED_CURR_PIC_IDX0;
2823 me_curbe->dw38.bti_4xds_surface = GEN10_HEVC_HME_4xDS_INPUT;
2824 me_curbe->dw39.bti_brc_distortion_surface = GEN10_HEVC_HME_BRC_DISTORTION;
2825 me_curbe->dw40.bti_mv_and_distortion_sum_surface = GEN10_HEVC_HME_MV_AND_DISTORTION_SUM;
2826
2827 i965_gpe_context_unmap_curbe(gpe_context);
2828 }
2829
2830 static void
gen10_hevc_enc_me_surfaces(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context,uint32_t hme_level,int dist_type)2831 gen10_hevc_enc_me_surfaces(VADriverContextP ctx,
2832 struct encode_state *encode_state,
2833 struct intel_encoder_context *encoder_context,
2834 struct i965_gpe_context *gpe_context,
2835 uint32_t hme_level,
2836 int dist_type)
2837 {
2838 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2839 struct gen10_hevc_enc_state *hevc_state;
2840 struct gen10_hevc_enc_frame_info *frame_info;
2841 struct gen10_hevc_enc_common_res *common_res;
2842 struct object_surface *obj_surface, *vme_surface;
2843 struct gen10_hevc_surface_priv *surface_priv;
2844 struct i965_gpe_resource *res_source;
2845 int input_bti, i;
2846
2847 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2848 frame_info = &vme_context->frame_info;
2849 common_res = &vme_context->common_res;
2850
2851 obj_surface = encode_state->reconstructed_object;
2852
2853 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
2854
2855 if (hme_level == GEN10_HEVC_HME_LEVEL_4X) {
2856 vme_surface = surface_priv->scaled_4x_surface;
2857 res_source = &vme_context->res_s4x_memv_data_surface;
2858 } else {
2859 vme_surface = surface_priv->scaled_16x_surface;
2860 res_source = &vme_context->res_s16x_memv_data_surface;
2861 }
2862
2863 input_bti = 0;
2864 i965_add_buffer_2d_gpe_surface(ctx, gpe_context, res_source,
2865 1, I965_SURFACEFORMAT_R8_UNORM,
2866 GEN10_HEVC_HME_OUTPUT_MV_DATA);
2867
2868 if (hme_level == GEN10_HEVC_HME_LEVEL_4X && hevc_state->b16xme_enabled)
2869 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
2870 &vme_context->res_s16x_memv_data_surface,
2871 1, I965_SURFACEFORMAT_R8_UNORM,
2872 GEN10_HEVC_HME_16xINPUT_MV_DATA);
2873
2874 if (hme_level == GEN10_HEVC_HME_LEVEL_4X)
2875 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
2876 &vme_context->res_s4x_me_dist_surface,
2877 1, I965_SURFACEFORMAT_R8_UNORM,
2878 GEN10_HEVC_HME_4xOUTPUT_DISTORTION);
2879
2880 input_bti = GEN10_HEVC_HME_VME_PRED_CURR_PIC_IDX0;
2881
2882 i965_add_adv_gpe_surface(ctx, gpe_context,
2883 vme_surface,
2884 input_bti);
2885 input_bti++;
2886
2887 for (i = 0; i < 4; i++) {
2888 struct object_surface *tmp_surface, *input_surface;
2889 struct gen10_hevc_surface_priv *tmp_hevc_surface;
2890
2891 if (frame_info->mapped_ref_idx_list0[i] >= 0)
2892 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[i]].obj_surface;
2893 else
2894 tmp_surface = NULL;
2895
2896 if (tmp_surface && tmp_surface->private_data) {
2897 tmp_hevc_surface = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
2898
2899 if (hme_level == GEN10_HEVC_HME_LEVEL_4X)
2900 input_surface = tmp_hevc_surface->scaled_4x_surface;
2901 else
2902 input_surface = tmp_hevc_surface->scaled_16x_surface;
2903
2904 i965_add_adv_gpe_surface(ctx, gpe_context,
2905 input_surface,
2906 input_bti + 2 * i);
2907 } else
2908 i965_add_adv_gpe_surface(ctx, gpe_context,
2909 vme_surface,
2910 input_bti + 2 * i);
2911
2912 if (frame_info->mapped_ref_idx_list1[i] >= 0)
2913 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[i]].obj_surface;
2914 else
2915 tmp_surface = NULL;
2916
2917 if (tmp_surface && tmp_surface->private_data) {
2918 tmp_hevc_surface = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
2919
2920 if (hme_level == GEN10_HEVC_HME_LEVEL_4X)
2921 input_surface = tmp_hevc_surface->scaled_4x_surface;
2922 else
2923 input_surface = tmp_hevc_surface->scaled_16x_surface;
2924
2925 i965_add_adv_gpe_surface(ctx, gpe_context,
2926 input_surface,
2927 input_bti + 2 * i + 1);
2928 } else
2929 i965_add_adv_gpe_surface(ctx, gpe_context,
2930 vme_surface,
2931 input_bti + 2 * i + 1);
2932 }
2933
2934 if (hme_level == GEN10_HEVC_HME_LEVEL_4X) {
2935 i965_add_2d_gpe_surface(ctx,
2936 gpe_context,
2937 vme_surface,
2938 0,
2939 1,
2940 I965_SURFACEFORMAT_R8_UNORM,
2941 GEN10_HEVC_HME_4xDS_INPUT);
2942
2943 if (dist_type != GEN10_HEVC_ME_DIST_TYPE_INTRA)
2944 res_source = &vme_context->res_brc_me_dist_surface;
2945 else
2946 res_source = &vme_context->res_brc_intra_dist_surface;
2947
2948 i965_add_buffer_2d_gpe_surface(ctx, gpe_context, res_source,
2949 1, I965_SURFACEFORMAT_R8_UNORM,
2950 GEN10_HEVC_HME_BRC_DISTORTION);
2951 }
2952
2953 i965_add_buffer_gpe_surface(ctx, gpe_context,
2954 &vme_context->res_mv_dist_sum_buffer,
2955 1,
2956 vme_context->res_mv_dist_sum_buffer.size,
2957 0,
2958 GEN10_HEVC_HME_MV_AND_DISTORTION_SUM);
2959 }
2960
2961 static void
gen10_hevc_enc_me_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,int hme_level,int dist_type)2962 gen10_hevc_enc_me_kernel(VADriverContextP ctx,
2963 struct encode_state *encode_state,
2964 struct intel_encoder_context *encoder_context,
2965 int hme_level,
2966 int dist_type)
2967 {
2968 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
2969 struct gen10_hevc_enc_state *hevc_state;
2970 struct i965_gpe_context *gpe_context;
2971 int media_function;
2972 struct gpe_media_object_walker_parameter media_object_walker_param;
2973 struct gen10_hevc_enc_kernel_walker_parameter kernel_walker_param;
2974
2975 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
2976
2977 gpe_context = &(vme_context->me_context.gpe_context);
2978
2979 gen8_gpe_context_init(ctx, gpe_context);
2980 gen9_gpe_reset_binding_table(ctx, gpe_context);
2981
2982 gen10_hevc_enc_me_curbe(ctx, encode_state, encoder_context, gpe_context, hme_level, dist_type);
2983 gen10_hevc_enc_me_surfaces(ctx, encode_state, encoder_context, gpe_context, hme_level, dist_type);
2984
2985 gen8_gpe_setup_interface_data(ctx, gpe_context);
2986
2987 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2988
2989 if (hme_level == GEN10_HEVC_HME_LEVEL_4X) {
2990 kernel_walker_param.resolution_x = hevc_state->frame_width_4x >> 5;
2991 kernel_walker_param.resolution_y = hevc_state->frame_height_4x >> 5;
2992
2993 media_function = GEN10_HEVC_MEDIA_STATE_4XME;
2994 } else {
2995 kernel_walker_param.resolution_x = hevc_state->frame_width_16x >> 5;
2996 kernel_walker_param.resolution_y = hevc_state->frame_height_16x >> 5;
2997
2998 media_function = GEN10_HEVC_MEDIA_STATE_16XME;
2999 }
3000
3001 kernel_walker_param.no_dependency = 1;
3002
3003 gen10_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
3004
3005 gen10_run_kernel_media_object_walker(ctx, encoder_context,
3006 gpe_context,
3007 media_function,
3008 &media_object_walker_param);
3009 }
3010
3011 #define LUTMODE_INTRA_NONPRED_HEVC 0x00
3012 #define LUTMODE_INTRA_32x32_HEVC 0x01
3013 #define LUTMODE_INTRA_16x16_HEVC 0x02
3014 #define LUTMODE_INTRA_8x8_HEVC 0x03
3015 #define LUTMODE_INTER_32x16_HEVC 0x04
3016 #define LUTMODE_INTER_16x32_HEVC 0x04
3017 #define LUTMODE_INTER_AMP_HEVC 0x04
3018 #define LUTMODE_INTER_16x16_HEVC 0x05
3019 #define LUTMODE_INTER_16x8_HEVC 0x06
3020 #define LUTMODE_INTER_8x16_HEVC 0x06
3021 #define LUTMODE_INTER_8x8_HEVC 0x07
3022 #define LUTMODE_INTER_32x32_HEVC 0x08
3023 #define LUTMODE_INTER_BIDIR_HEVC 0x09
3024 #define LUTMODE_REF_ID_HEVC 0x0A
3025 #define LUTMODE_INTRA_CHROMA_HEVC 0x0B
3026
3027 #define LAMBDA_RD_IDX 0x10
3028 #define LAMBDA_MD_IDX 0x11
3029 #define TUSAD_THR_IDX 0x12
3030
3031 #define MAX_MODE_COST 0x20
3032
3033 static uint8_t
map_44_lut_value(uint32_t value,uint8_t max)3034 map_44_lut_value(uint32_t value,
3035 uint8_t max)
3036 {
3037 uint32_t max_cost = 0;
3038 int data = 0;
3039 uint8_t ret = 0;
3040
3041 if (value == 0)
3042 return 0;
3043
3044 max_cost = ((max & 15) << (max >> 4));
3045 if (value >= max_cost)
3046 return max;
3047
3048 data = (int)(log((double)value) / log(2.)) - 3;
3049 if (data < 0)
3050 data = 0;
3051
3052 ret = (uint8_t)((data << 4) +
3053 (int)((value + (data == 0 ? 0 : (1 << (data - 1)))) >> data));
3054 ret = (ret & 0xf) == 0 ? (ret | 8) : ret;
3055
3056 return ret;
3057 }
3058
3059 static void
gen10_hevc_calc_costs(uint32_t * mode_cost,int slice_type,int qp,bool b_lcu64)3060 gen10_hevc_calc_costs(uint32_t *mode_cost, int slice_type, int qp, bool b_lcu64)
3061 {
3062 unsigned short lambda_md;
3063 unsigned int lambda_rd;
3064 unsigned int tu_sad_thres;
3065 float qp_value;
3066 double lambda;
3067 double intra_weigh_factor;
3068 double inter_weigh_factor;
3069 double qp_scale, cost_scale;
3070 int lcu_idx;
3071
3072 if (!mode_cost)
3073 return;
3074
3075 if (slice_type == HEVC_SLICE_I) {
3076 qp_scale = 5.0;
3077 cost_scale = 1.0;
3078 } else {
3079 qp_scale = 0.55;
3080 cost_scale = 2.0;
3081 }
3082
3083 if (b_lcu64)
3084 lcu_idx = 1;
3085 else
3086 lcu_idx = 0;
3087
3088 qp_value = qp - 12;
3089 if (qp_value < 0)
3090 qp_value = 0;
3091
3092 lambda = sqrt(qp_scale * pow(2.0, qp_value / 3.0));
3093 lambda_rd = (unsigned int)(qp_scale * pow(2.0, qp_value / 3.0) * 256 + 0.5);
3094 lambda_md = (unsigned short)(lambda * 256 + 0.5);
3095 tu_sad_thres = (unsigned int)(sqrt(0.85 * pow(2.0, qp_value / 3.0)) * 0.4 * 256 + 0.5);
3096
3097 inter_weigh_factor = cost_scale * lambda;
3098 intra_weigh_factor = inter_weigh_factor * gen10_hevc_lambda_factor[slice_type][qp];
3099
3100 mode_cost[LAMBDA_RD_IDX] = lambda_rd;
3101 mode_cost[LAMBDA_MD_IDX] = lambda_md;
3102 mode_cost[TUSAD_THR_IDX] = tu_sad_thres;
3103
3104 mode_cost[LUTMODE_INTRA_NONPRED_HEVC] = map_44_lut_value((uint32_t)(intra_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTRA_NONPRED_HEVC]), 0x6f);
3105 mode_cost[LUTMODE_INTRA_32x32_HEVC] = map_44_lut_value((uint32_t)(intra_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTRA_32x32_HEVC]), 0x8f);
3106 mode_cost[LUTMODE_INTRA_16x16_HEVC] = map_44_lut_value((uint32_t)(intra_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTRA_16x16_HEVC]), 0x8f);
3107 mode_cost[LUTMODE_INTRA_8x8_HEVC] = map_44_lut_value((uint32_t)(intra_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTRA_8x8_HEVC]), 0x8f);
3108 mode_cost[LUTMODE_INTRA_CHROMA_HEVC] = map_44_lut_value((uint32_t)(intra_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTRA_CHROMA_HEVC]), 0x6f);
3109
3110 mode_cost[LUTMODE_INTER_32x32_HEVC] = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_32x32_HEVC]), 0x8f);
3111 mode_cost[LUTMODE_INTER_32x16_HEVC] = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_32x16_HEVC]), 0x8f);
3112 mode_cost[LUTMODE_INTER_16x16_HEVC] = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_16x16_HEVC]), 0x6f);
3113 mode_cost[LUTMODE_INTER_16x8_HEVC] = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_16x8_HEVC]), 0x6f);
3114 mode_cost[LUTMODE_INTER_8x8_HEVC] = map_44_lut_value((uint32_t)(0.45 * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_8x8_HEVC]), 0x6f);
3115
3116 mode_cost[LUTMODE_INTER_BIDIR_HEVC] = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_INTER_BIDIR_HEVC]), 0x6f);
3117 if (slice_type != HEVC_SLICE_I)
3118 mode_cost[LUTMODE_REF_ID_HEVC] = map_44_lut_value((uint32_t)(inter_weigh_factor * gen10_hevc_mode_bits[lcu_idx][slice_type][LUTMODE_REF_ID_HEVC]), 0x6f);
3119 else
3120 mode_cost[LUTMODE_REF_ID_HEVC] = 0;
3121 }
3122
3123 static void
gen10_hevc_enc_generate_regions_in_slice_control(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)3124 gen10_hevc_enc_generate_regions_in_slice_control(VADriverContextP ctx,
3125 struct encode_state *encode_state,
3126 struct intel_encoder_context *encoder_context)
3127 {
3128 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3129 struct gen10_hevc_enc_state *hevc_state;
3130 VAEncSliceParameterBufferHEVC *slice_param;
3131 gen10_hevc_concurrent_tg_data *pregion;
3132 int i, k, slice, num_regions, height, num_slices;
3133 int num_wf_in_region;
3134 uint32_t frame_width_in_ctb, frame_height_in_ctb;
3135 bool is_arbitary_slices;
3136 int slice_starty[I965_MAX_NUM_SLICE + 1];
3137 int regions_start_table[64];
3138 uint32_t start_offset_to_region[16];
3139 int16_t data_tmp[32][32];
3140 int max_height;
3141 int log2_lcu_size;
3142 int copy_blk_size = 0;
3143
3144 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
3145
3146 memset(slice_starty, 0, sizeof(slice_starty));
3147 memset(regions_start_table, 0, sizeof(regions_start_table));
3148 memset(data_tmp, 0, sizeof(data_tmp));
3149 memset(&hevc_state->hevc_wf_param, 0, sizeof(hevc_state->hevc_wf_param));
3150 memset(start_offset_to_region, 0, sizeof(start_offset_to_region));
3151
3152 frame_width_in_ctb = vme_context->frame_info.width_in_lcu;
3153 frame_height_in_ctb = vme_context->frame_info.height_in_lcu;
3154 if (hevc_state->is_64lcu) {
3155 log2_lcu_size = 6;
3156 copy_blk_size = 22;
3157 } else {
3158 log2_lcu_size = 5;
3159 copy_blk_size = 18;
3160 }
3161
3162 is_arbitary_slices = false;
3163 for (slice = 0; slice < encode_state->num_slice_params_ext; slice++) {
3164 slice_param = NULL;
3165 if (encode_state->slice_params_ext[slice] &&
3166 encode_state->slice_params_ext[slice]->buffer)
3167 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice]->buffer;
3168
3169 if (!slice_param)
3170 continue;
3171
3172 if (slice_param->slice_segment_address % frame_width_in_ctb) {
3173 is_arbitary_slices = true;
3174 } else {
3175 slice_starty[slice] = slice_param->slice_segment_address / frame_width_in_ctb;
3176 }
3177 }
3178
3179 slice_starty[encode_state->num_slice_params_ext] = frame_height_in_ctb;
3180
3181 regions_start_table[0] = 0;
3182 regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + 0] = 0;
3183 num_regions = 1;
3184
3185 if (is_arbitary_slices) {
3186 height = frame_height_in_ctb;
3187 num_slices = 1;
3188 max_height = height;
3189 if (hevc_state->num_regions_in_slice > 1) {
3190 num_wf_in_region = (frame_width_in_ctb + 2 * (frame_height_in_ctb - 1) + hevc_state->num_regions_in_slice - 1) /
3191 hevc_state->num_regions_in_slice;
3192
3193 num_regions = hevc_state->num_regions_in_slice;
3194
3195 for (i = 1; i < hevc_state->num_regions_in_slice; i++) {
3196 int front = i * num_wf_in_region;
3197
3198 if (front < frame_width_in_ctb) {
3199 regions_start_table[i] = front;
3200 } else if (((front - frame_width_in_ctb + 1) & 1) == 0) {
3201 regions_start_table[i] = frame_width_in_ctb - 1;
3202 } else {
3203 regions_start_table[i] = frame_width_in_ctb - 2;
3204 }
3205
3206 regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + i] = (front - regions_start_table[i]) >> 1;
3207 }
3208 }
3209 } else {
3210 int start_y = 0, slice_height;
3211 int slice_is_merged = 0;
3212
3213 max_height = 0;
3214 num_slices = encode_state->num_slice_params_ext;
3215
3216 for (slice = 0; slice < num_slices; slice++) {
3217 slice_height = slice_starty[slice + 1] - slice_starty[slice];
3218
3219 if (slice_height > max_height)
3220 max_height = slice_height;
3221 }
3222
3223 while (!slice_is_merged) {
3224 int new_num_slices = 1;
3225
3226 start_y = 0;
3227
3228 for (slice = 1; slice < num_slices; slice++) {
3229 if ((slice_starty[slice + 1] - start_y) <= max_height) {
3230 slice_starty[slice] = -1;
3231 } else {
3232 start_y = slice_starty[slice];
3233 }
3234 }
3235
3236 for (slice = 1; slice < num_slices; slice++) {
3237 if (slice_starty[slice] > 0) {
3238 slice_starty[new_num_slices] = slice_starty[slice];
3239 new_num_slices++;
3240 }
3241 }
3242
3243 num_slices = new_num_slices;
3244 slice_starty[num_slices] = frame_height_in_ctb;
3245
3246 if (num_slices * hevc_state->num_regions_in_slice <= 16) {
3247 slice_is_merged = 1;
3248 } else {
3249 int num = 1;
3250
3251 max_height = frame_height_in_ctb;
3252
3253 for (slice = 0; slice < num_slices - 1; slice++) {
3254 if ((slice_starty[slice + 2] - slice_starty[slice]) <= max_height) {
3255 max_height = slice_starty[slice + 2] - slice_starty[slice];
3256 num = slice + 1;
3257 }
3258 }
3259
3260 for (slice = num; slice < num_slices; slice++)
3261 slice_starty[slice] = slice_starty[slice + 1];
3262
3263 num_slices--;
3264 }
3265 }
3266
3267 num_wf_in_region = (frame_width_in_ctb + 2 * (max_height - 1) + hevc_state->num_regions_in_slice - 1) /
3268 hevc_state->num_regions_in_slice;
3269 num_regions = num_slices * hevc_state->num_regions_in_slice;
3270
3271 for (slice = 0; slice < num_slices; slice++) {
3272 regions_start_table[slice * hevc_state->num_regions_in_slice] = 0;
3273 regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + (slice * hevc_state->num_regions_in_slice)] = slice_starty[slice];
3274
3275 for (i = 1; i < hevc_state->num_regions_in_slice; i++) {
3276 int front = i * num_wf_in_region;
3277
3278 if (front < frame_width_in_ctb)
3279 regions_start_table[slice * hevc_state->num_regions_in_slice + i] = front;
3280 else if (((front - frame_width_in_ctb + 1) & 1) == 0)
3281 regions_start_table[slice * hevc_state->num_regions_in_slice + i] = frame_width_in_ctb - 1;
3282 else
3283 regions_start_table[slice * hevc_state->num_regions_in_slice + i] = frame_width_in_ctb - 2;
3284
3285 regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + (slice * hevc_state->num_regions_in_slice + i)] = slice_starty[slice] +
3286 ((front - regions_start_table[i]) >> 1);
3287 }
3288 }
3289 height = max_height;
3290 }
3291
3292 for (k = 0; k < num_slices; k++) {
3293 int nearest_reg = 0, delta, tmp_y;
3294 int min_delta = hevc_state->frame_height;
3295 int cur_lcu_pel_y = regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + (k * hevc_state->num_regions_in_slice)] << log2_lcu_size;
3296 int ts_width = frame_width_in_ctb;
3297 int ts_height = height;
3298 int offset_y = -((ts_width + 1) >> 1);
3299 int offset_delta = ((ts_width + ((ts_height - 1) << 1)) + (hevc_state->num_regions_in_slice - 1)) / (hevc_state->num_regions_in_slice);
3300
3301 for (i = 0; i < num_regions; i++) {
3302 if (regions_start_table[i] == 0) {
3303 delta = cur_lcu_pel_y - (regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + i] << log2_lcu_size);
3304
3305 if (delta >= 0) {
3306 if (delta < min_delta) {
3307 min_delta = delta;
3308 nearest_reg = i;
3309 }
3310 }
3311 }
3312
3313 start_offset_to_region[k] = 2 * regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + nearest_reg];
3314 }
3315 for (i = 0; i < hevc_state->num_regions_in_slice; i++) {
3316 data_tmp[k * hevc_state->num_regions_in_slice + i][0] = slice_starty[k] * frame_width_in_ctb;
3317 data_tmp[k * hevc_state->num_regions_in_slice + i][1] = (k == (num_slices - 1)) ?
3318 frame_width_in_ctb * frame_height_in_ctb : slice_starty[k + 1] * frame_width_in_ctb;
3319 data_tmp[k * hevc_state->num_regions_in_slice + i][2] = k * hevc_state->num_regions_in_slice + i;
3320 if (!hevc_state->is_64lcu && hevc_state->num_regions_in_slice == 1) {
3321 continue;
3322 }
3323
3324 data_tmp[k * hevc_state->num_regions_in_slice + i][3] = height;
3325 data_tmp[k * hevc_state->num_regions_in_slice + i][4] = regions_start_table[nearest_reg + i];
3326 data_tmp[k * hevc_state->num_regions_in_slice + i][5] = regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + (nearest_reg + i)];
3327 data_tmp[k * hevc_state->num_regions_in_slice + i][6] = regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + nearest_reg];
3328 tmp_y = regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + (nearest_reg + hevc_state->num_regions_in_slice)];
3329 data_tmp[k * hevc_state->num_regions_in_slice + i][7] = (tmp_y != 0) ? tmp_y : frame_height_in_ctb;
3330 data_tmp[k * hevc_state->num_regions_in_slice + i][8] = offset_y + regions_start_table[GEN10_HEVC_REGION_START_Y_OFFSET + nearest_reg] + ((i * offset_delta) >> 1);
3331 if (hevc_state->is_64lcu) {
3332 data_tmp[k * hevc_state->num_regions_in_slice + i][9] = (frame_width_in_ctb + 2 * (max_height - 1) + hevc_state->num_regions_in_slice - 1) / hevc_state->num_regions_in_slice;
3333 data_tmp[k * hevc_state->num_regions_in_slice + i][10] = num_regions;
3334 }
3335 }
3336 }
3337
3338
3339 pregion = (gen10_hevc_concurrent_tg_data *) i965_map_gpe_resource(&vme_context->res_concurrent_tg_data);
3340 if (!pregion)
3341 return;
3342
3343 memset(pregion, 0, vme_context->res_concurrent_tg_data.size);
3344
3345 for (i = 0; i < 16; i++) {
3346 memcpy(pregion, data_tmp[i], copy_blk_size);
3347 pregion++;
3348 }
3349
3350 hevc_state->hevc_wf_param.max_height_in_region = max_height;
3351 hevc_state->hevc_wf_param.num_regions = num_regions;
3352 hevc_state->hevc_wf_param.num_unit_in_wf = (frame_width_in_ctb + 2 * (max_height - 1) + hevc_state->num_regions_in_slice - 1) / hevc_state->num_regions_in_slice;
3353
3354 i965_unmap_gpe_resource(&vme_context->res_concurrent_tg_data);
3355 }
3356
3357 static void
gen10_hevc_enc_generate_lculevel_data(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)3358 gen10_hevc_enc_generate_lculevel_data(VADriverContextP ctx,
3359 struct encode_state *encode_state,
3360 struct intel_encoder_context *encoder_context)
3361 {
3362 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3363 VAEncPictureParameterBufferHEVC *pic_param;
3364 VAEncSliceParameterBufferHEVC *slice_param;
3365 gen10_hevc_lcu_level_data *plcu_level_data;
3366 int ui_start_lcu, slice_idx, i;
3367
3368 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
3369
3370 plcu_level_data = (gen10_hevc_lcu_level_data *)
3371 i965_map_gpe_resource(&vme_context->res_lculevel_input_data_buffer);
3372
3373 if (!plcu_level_data)
3374 return;
3375
3376 slice_idx = 0;
3377 for (ui_start_lcu = 0, slice_idx = 0; slice_idx < encode_state->num_slice_params_ext; slice_idx++) {
3378
3379 slice_param = NULL;
3380 if (encode_state->slice_params_ext[slice_idx] &&
3381 encode_state->slice_params_ext[slice_idx]->buffer)
3382 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_idx]->buffer;
3383
3384 if (!slice_param)
3385 continue;
3386
3387 for (i = 0; i < slice_param->num_ctu_in_slice; i++, plcu_level_data++) {
3388 plcu_level_data->slice_start_lcu_idx = ui_start_lcu;
3389 plcu_level_data->slice_end_lcu_idx = ui_start_lcu + slice_param->num_ctu_in_slice;
3390 plcu_level_data->slice_id = slice_idx + 1;
3391 plcu_level_data->slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
3392 }
3393
3394 ui_start_lcu += slice_param->num_ctu_in_slice;
3395 }
3396
3397 i965_unmap_gpe_resource(&vme_context->res_lculevel_input_data_buffer);
3398 }
3399
3400 static void
gen10_hevc_enc_mbenc_intra_curbe(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context)3401 gen10_hevc_enc_mbenc_intra_curbe(VADriverContextP ctx,
3402 struct encode_state *encode_state,
3403 struct intel_encoder_context *encoder_context,
3404 struct i965_gpe_context *gpe_context)
3405 {
3406 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3407 struct gen10_hevc_enc_state *hevc_state;
3408 gen10_hevc_mbenc_intra_curbe_data *mbenc_curbe;
3409 VAEncSliceParameterBufferHEVC *slice_param;
3410 VAEncPictureParameterBufferHEVC *pic_param;
3411 VAEncSequenceParameterBufferHEVC *seq_param;
3412 int slice_qp;
3413 unsigned int mode_cost[MAX_MODE_COST];
3414 int tu_idx;
3415
3416 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
3417
3418 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
3419 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
3420 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
3421 mbenc_curbe = i965_gpe_context_map_curbe(gpe_context);
3422
3423 if (!mbenc_curbe)
3424 return;
3425
3426 memset(mbenc_curbe, 0, sizeof(gen10_hevc_mbenc_intra_curbe_data));
3427
3428 if (hevc_state->brc.target_usage < 3)
3429 tu_idx = 0;
3430 else if (hevc_state->brc.target_usage < 7)
3431 tu_idx = 1;
3432 else
3433 tu_idx = 2;
3434
3435 mbenc_curbe->dw0.frame_width_in_pixel = hevc_state->frame_width;
3436 mbenc_curbe->dw0.frame_height_in_pixel = hevc_state->frame_height;
3437
3438 mbenc_curbe->dw1.penalty_intra32x32_nondc_pred = 36;
3439 mbenc_curbe->dw1.penalty_intra16x16_nondc_pred = 12;
3440 mbenc_curbe->dw1.penalty_intra8x8_nondc_pred = 4;
3441
3442 mbenc_curbe->dw2.intra_sad_measure_adj = 2;
3443 slice_qp = slice_param->slice_qp_delta + pic_param->pic_init_qp;
3444 gen10_hevc_calc_costs(mode_cost, HEVC_SLICE_I, slice_qp, hevc_state->is_64lcu);
3445
3446 mbenc_curbe->dw3.mode0_cost = mode_cost[0];
3447 mbenc_curbe->dw3.mode1_cost = mode_cost[1];
3448 mbenc_curbe->dw3.mode2_cost = mode_cost[2];
3449 mbenc_curbe->dw3.mode3_cost = mode_cost[3];
3450
3451 mbenc_curbe->dw4.mode4_cost = mode_cost[4];
3452 mbenc_curbe->dw4.mode5_cost = mode_cost[5];
3453 mbenc_curbe->dw4.mode6_cost = mode_cost[6];
3454 mbenc_curbe->dw4.mode7_cost = mode_cost[7];
3455
3456 mbenc_curbe->dw5.mode8_cost = mode_cost[8];
3457 mbenc_curbe->dw5.mode9_cost = mode_cost[9];
3458 mbenc_curbe->dw5.ref_id_cost = mode_cost[10];
3459 mbenc_curbe->dw5.chroma_intra_mode_cost = mode_cost[11];
3460
3461 mbenc_curbe->dw6.log2_min_cu_size = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
3462 mbenc_curbe->dw6.log2_max_cu_size = seq_param->log2_diff_max_min_luma_coding_block_size +
3463 seq_param->log2_min_luma_coding_block_size_minus3 + 3;
3464 mbenc_curbe->dw6.log2_max_tu_size = seq_param->log2_diff_max_min_transform_block_size +
3465 seq_param->log2_min_transform_block_size_minus2 + 2;
3466 mbenc_curbe->dw6.log2_min_tu_size = seq_param->log2_min_transform_block_size_minus2 + 2;
3467 if (seq_param->max_transform_hierarchy_depth_intra)
3468 mbenc_curbe->dw6.max_tr_depth_intra = gen10_hevc_tu_settings[GEN10_LOG2_TU_MAX_DEPTH_INTRA_TU_PARAM][tu_idx];
3469 else
3470 mbenc_curbe->dw6.max_tr_depth_intra = 0;
3471
3472 mbenc_curbe->dw6.tu_split_flag = 1;
3473
3474 mbenc_curbe->dw7.concurrent_group_num = 1;
3475 mbenc_curbe->dw7.slice_qp = slice_qp;
3476 mbenc_curbe->dw7.enc_tu_decision_mode = gen10_hevc_tu_settings[GEN10_ENC_TU_DECISION_MODE_TU_PARAM][tu_idx];
3477
3478 mbenc_curbe->dw8.lambda_rd = mode_cost[LAMBDA_RD_IDX];
3479 mbenc_curbe->dw9.lambda_md = mode_cost[LAMBDA_MD_IDX];
3480 mbenc_curbe->dw10.intra_tusad_thr = mode_cost[TUSAD_THR_IDX];
3481
3482 mbenc_curbe->dw11.slice_type = HEVC_SLICE_I;
3483
3484 if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_CQP)
3485 mbenc_curbe->dw11.qp_type = GEN10_HEVC_QP_TYPE_CONSTANT;
3486 else
3487 mbenc_curbe->dw11.qp_type = hevc_state->brc.lcu_brc_enabled ? GEN10_HEVC_QP_TYPE_CU_LEVEL : GEN10_HEVC_QP_TYPE_FRAME;
3488
3489 mbenc_curbe->dw11.enc_qt_decision_mode = gen10_hevc_tu_settings[GEN10_ENC_QT_DECISION_MODE_TU_PARAM][tu_idx];
3490
3491 mbenc_curbe->dw12.pcm_8x8_sad_threshold = 4700;
3492
3493 mbenc_curbe->dw16.bti_vme_intra_pred_surface = GEN10_HEVC_MBENC_INTRA_VME_PRED_CURR_PIC_IDX0;
3494 mbenc_curbe->dw17.bti_curr_picture_y = GEN10_HEVC_MBENC_INTRA_CURR_Y;
3495 mbenc_curbe->dw18.bti_enc_curecord_surface = GEN10_HEVC_MBENC_INTRA_INTERMEDIATE_CU_RECORD;
3496 mbenc_curbe->dw19.bti_pak_obj_cmd_surface = GEN10_HEVC_MBENC_INTRA_PAK_OBJ0;
3497 mbenc_curbe->dw20.bti_cu_packet_for_pak_surface = GEN10_HEVC_MBENC_INTRA_PAK_CU_RECORD;
3498 mbenc_curbe->dw21.bti_internal_scratch_surface = GEN10_HEVC_MBENC_INTRA_SCRATCH_SURFACE;
3499 mbenc_curbe->dw22.bti_cu_based_qp_surface = GEN10_HEVC_MBENC_INTRA_CU_QP_DATA;
3500 mbenc_curbe->dw23.bti_const_data_lut_surface = GEN10_HEVC_MBENC_INTRA_CONST_DATA_LUT;
3501 mbenc_curbe->dw24.bti_lcu_level_data_input_surface = GEN10_HEVC_MBENC_INTRA_LCU_LEVEL_DATA_INPUT;
3502 mbenc_curbe->dw25.bti_concurrent_tg_data_surface = GEN10_HEVC_MBENC_INTRA_CONCURRENT_TG_DATA;
3503 mbenc_curbe->dw26.bti_brc_combined_enc_param_surface = GEN10_HEVC_MBENC_INTRA_BRC_COMBINED_ENC_PARAMETER_SURFACE;
3504 mbenc_curbe->dw27.bti_cu_split_surface = GEN10_HEVC_MBENC_INTRA_CU_SPLIT_SURFACE,
3505 mbenc_curbe->dw28.bti_debug_surface = GEN10_HEVC_MBENC_INTRA_DEBUG_DUMP;
3506
3507 i965_gpe_context_unmap_curbe(gpe_context);
3508 }
3509
3510 static int
gen10_hevc_compute_diff_poc(VADriverContextP ctx,VAPictureHEVC * curr_pic,VAPictureHEVC * ref_pic)3511 gen10_hevc_compute_diff_poc(VADriverContextP ctx,
3512 VAPictureHEVC *curr_pic,
3513 VAPictureHEVC *ref_pic)
3514 {
3515 struct i965_driver_data *i965 = i965_driver_data(ctx);
3516 struct object_surface *obj_surface = NULL;
3517 int diff_poc = 0;
3518
3519 if (ref_pic->picture_id != VA_INVALID_SURFACE)
3520 obj_surface = SURFACE(ref_pic->picture_id);
3521
3522 if (!obj_surface || (ref_pic->flags & VA_PICTURE_HEVC_INVALID))
3523 return diff_poc;
3524
3525 diff_poc = curr_pic->pic_order_cnt - ref_pic->pic_order_cnt;
3526
3527 if (diff_poc < -128)
3528 diff_poc = -128;
3529 else if (diff_poc > 127)
3530 diff_poc = 127;
3531
3532 return diff_poc;
3533 }
3534
3535 static void
gen10_hevc_enc_mbenc_inter_curbe(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context)3536 gen10_hevc_enc_mbenc_inter_curbe(VADriverContextP ctx,
3537 struct encode_state *encode_state,
3538 struct intel_encoder_context *encoder_context,
3539 struct i965_gpe_context *gpe_context)
3540 {
3541 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3542 struct gen10_hevc_enc_state *hevc_state;
3543 gen10_hevc_mbenc_inter_curbe_data *mbenc_curbe;
3544 VAEncSliceParameterBufferHEVC *slice_param;
3545 VAEncPictureParameterBufferHEVC *pic_param;
3546 VAEncSequenceParameterBufferHEVC *seq_param;
3547 int slice_qp;
3548 int tu_idx;
3549
3550 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
3551
3552 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
3553 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
3554 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
3555 mbenc_curbe = i965_gpe_context_map_curbe(gpe_context);
3556
3557 if (!mbenc_curbe)
3558 return;
3559
3560 memset(mbenc_curbe, 0, sizeof(gen10_hevc_mbenc_inter_curbe_data));
3561
3562 if (hevc_state->brc.target_usage < 3)
3563 tu_idx = 0;
3564 else if (hevc_state->brc.target_usage < 7)
3565 tu_idx = 1;
3566 else
3567 tu_idx = 2;
3568
3569 slice_qp = slice_param->slice_qp_delta + pic_param->pic_init_qp;
3570 mbenc_curbe->dw0.frame_width_in_pixel = hevc_state->frame_width;
3571 mbenc_curbe->dw0.frame_height_in_pixel = hevc_state->frame_height;
3572
3573 mbenc_curbe->dw1.log2_min_cu_size = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
3574 mbenc_curbe->dw1.log2_max_cu_size = seq_param->log2_diff_max_min_luma_coding_block_size +
3575 seq_param->log2_min_luma_coding_block_size_minus3 + 3;
3576 mbenc_curbe->dw1.log2_max_tu_size = seq_param->log2_diff_max_min_transform_block_size +
3577 seq_param->log2_min_transform_block_size_minus2 + 2;
3578 mbenc_curbe->dw1.log2_min_tu_size = seq_param->log2_min_transform_block_size_minus2 + 2;
3579
3580 if (seq_param->max_transform_hierarchy_depth_intra)
3581 mbenc_curbe->dw1.max_tr_depth_intra = gen10_hevc_tu_settings[GEN10_LOG2_TU_MAX_DEPTH_INTRA_TU_PARAM][tu_idx];
3582 else
3583 mbenc_curbe->dw1.max_tr_depth_intra = 0;
3584
3585 if (seq_param->max_transform_hierarchy_depth_inter)
3586 mbenc_curbe->dw1.max_tr_depth_inter = gen10_hevc_tu_settings[GEN10_LOG2_TU_MAX_DEPTH_INTER_TU_PARAM][tu_idx];
3587 else
3588 mbenc_curbe->dw1.max_tr_depth_inter = 0;
3589 mbenc_curbe->dw1.log2_para_merge_level = 2;
3590 mbenc_curbe->dw1.max_num_ime_search_center = 6;
3591
3592 mbenc_curbe->dw2.hme_flag = hevc_state->hme_enabled ? 3 : 0;
3593 mbenc_curbe->dw2.super_hme_enable = hevc_state->b16xme_enabled ? 1 : 0;
3594 mbenc_curbe->dw2.hme_coarse_stage = 1;
3595 mbenc_curbe->dw2.hme_subpel_mode = 3;
3596 if (hevc_state->brc.brc_method == GEN10_HEVC_BRC_CQP)
3597 mbenc_curbe->dw2.qp_type = GEN10_HEVC_QP_TYPE_CONSTANT;
3598 else
3599 mbenc_curbe->dw2.qp_type = hevc_state->brc.lcu_brc_enabled ? GEN10_HEVC_QP_TYPE_CU_LEVEL : GEN10_HEVC_QP_TYPE_FRAME;
3600
3601 if (hevc_state->num_regions_in_slice > 1)
3602 mbenc_curbe->dw2.regions_in_slice_splits_enable = 1;
3603 else
3604 mbenc_curbe->dw2.regions_in_slice_splits_enable = 0;
3605
3606 mbenc_curbe->dw3.active_num_child_threads_cu64 = 0;
3607 mbenc_curbe->dw3.active_num_child_threads_cu32_0 = 0;
3608 mbenc_curbe->dw3.active_num_child_threads_cu32_1 = 0;
3609 mbenc_curbe->dw3.active_num_child_threads_cu32_2 = 0;
3610 mbenc_curbe->dw3.active_num_child_threads_cu32_3 = 0;
3611 mbenc_curbe->dw3.slice_qp = slice_qp;
3612
3613 mbenc_curbe->dw4.skip_mode_enable = 1;
3614 mbenc_curbe->dw4.adaptive_enable = 1;
3615 mbenc_curbe->dw4.ime_ref_window_size = 1;
3616 mbenc_curbe->dw4.hevc_min_cu_ctrl = seq_param->log2_min_luma_coding_block_size_minus3;
3617
3618 mbenc_curbe->dw5.subpel_mode = 3;
3619 mbenc_curbe->dw5.inter_sad_measure = 2;
3620 mbenc_curbe->dw5.intra_sad_measure = 2;
3621 mbenc_curbe->dw5.len_sp = 63;
3622 mbenc_curbe->dw5.max_num_su = 63;
3623 mbenc_curbe->dw5.refid_cost_mode = 1;
3624
3625 mbenc_curbe->dw7.max_num_merge_cand = slice_param->max_num_merge_cand;
3626 mbenc_curbe->dw7.slice_type = slice_param->slice_type;
3627 mbenc_curbe->dw7.temporal_mvp_enable = seq_param->seq_fields.bits.sps_temporal_mvp_enabled_flag;
3628 mbenc_curbe->dw7.mvp_collocated_from_l0 = slice_param->slice_fields.bits.collocated_from_l0_flag;
3629 mbenc_curbe->dw7.same_ref_list = hevc_state->is_same_ref_list;
3630 if (slice_param->slice_type == HEVC_SLICE_B)
3631 mbenc_curbe->dw7.is_low_delay = hevc_state->low_delay;
3632 else
3633 mbenc_curbe->dw7.is_low_delay = 1;
3634
3635 mbenc_curbe->dw7.num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3636 if (slice_param->slice_type == HEVC_SLICE_B)
3637 mbenc_curbe->dw7.num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
3638 else
3639 mbenc_curbe->dw7.num_ref_idx_l1 = 0;
3640
3641 mbenc_curbe->dw8.fwd_poc_num_l0_mtb_0 = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3642 &slice_param->ref_pic_list0[0]);
3643 mbenc_curbe->dw8.fwd_poc_num_l0_mtb_1 = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3644 &slice_param->ref_pic_list0[1]);
3645 mbenc_curbe->dw9.fwd_poc_num_l0_mtb_2 = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3646 &slice_param->ref_pic_list0[2]);
3647 mbenc_curbe->dw9.fwd_poc_num_l0_mtb_3 = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3648 &slice_param->ref_pic_list0[3]);
3649 if (slice_param->slice_type == HEVC_SLICE_B) {
3650 mbenc_curbe->dw8.bwd_poc_num_l1_mtb_0 = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3651 &slice_param->ref_pic_list1[0]);
3652 mbenc_curbe->dw8.bwd_poc_num_l1_mtb_1 = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3653 &slice_param->ref_pic_list1[1]);
3654 mbenc_curbe->dw9.bwd_poc_num_l1_mtb_2 = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3655 &slice_param->ref_pic_list1[2]);
3656 mbenc_curbe->dw9.bwd_poc_num_l1_mtb_3 = gen10_hevc_compute_diff_poc(ctx, &pic_param->decoded_curr_pic,
3657 &slice_param->ref_pic_list1[3]);
3658 }
3659
3660 mbenc_curbe->dw13.ref_frame_hor_size = hevc_state->frame_width;
3661 mbenc_curbe->dw13.ref_frame_ver_size = hevc_state->frame_height;
3662
3663 mbenc_curbe->dw15.concurrent_gop_num = hevc_state->hevc_wf_param.num_regions;
3664 mbenc_curbe->dw15.total_thread_num_per_lcu = gen10_hevc_tu_settings[GEN10_TOTAL_THREAD_NUM_PER_LCU_TU_PARAM][tu_idx];
3665 mbenc_curbe->dw15.regions_in_slice_split_count = hevc_state->num_regions_in_slice;
3666
3667 mbenc_curbe->dw1.max_num_ime_search_center = gen10_hevc_tu_settings[GEN10_MAX_NUM_IME_SEARCH_CENTER_TU_PARAM][tu_idx];
3668
3669 if (hevc_state->is_64lcu)
3670 mbenc_curbe->dw2.enable_cu64_check = gen10_hevc_tu_settings[GEN10_ENABLE_CU64_CHECK_TU_PARAM][tu_idx];
3671 else
3672 mbenc_curbe->dw2.enable_cu64_check = 0;
3673
3674 mbenc_curbe->dw2.enc_trans_simplify = gen10_hevc_tu_settings[GEN10_ENC_TRANSFORM_SIMPLIFY_TU_PARAM][tu_idx];
3675 mbenc_curbe->dw2.enc_tu_dec_mode = gen10_hevc_tu_settings[GEN10_ENC_TU_DECISION_MODE_TU_PARAM][tu_idx];
3676 mbenc_curbe->dw2.enc_tu_dec_for_all_qt = gen10_hevc_tu_settings[GEN10_ENC_TU_DECISION_FOR_ALL_QT_TU_PARAM][tu_idx];
3677 mbenc_curbe->dw2.coef_bit_est_mode = gen10_hevc_tu_settings[GEN10_COEF_BIT_EST_MODE_TU_PARAM][tu_idx];
3678 mbenc_curbe->dw2.enc_skip_dec_mode = gen10_hevc_tu_settings[GEN10_ENC_SKIP_DECISION_MODE_TU_PARAM][tu_idx];
3679 mbenc_curbe->dw2.enc_qt_dec_mode = gen10_hevc_tu_settings[GEN10_ENC_QT_DECISION_MODE_TU_PARAM][tu_idx];
3680 mbenc_curbe->dw2.lcu32_enc_rd_dec_mode_for_all_qt = gen10_hevc_tu_settings[GEN10_ENC_RD_DECISION_MODE_FOR_ALL_QT_TU_PARAM][tu_idx];
3681 mbenc_curbe->dw2.lcu64_cu64_skip_check_only = (tu_idx == 1);
3682 mbenc_curbe->dw2.sic_dys_run_path_mode = gen10_hevc_tu_settings[GEN10_SIC_DYNAMIC_RUN_PATH_MODE][tu_idx];
3683
3684 if (hevc_state->is_64lcu) {
3685 mbenc_curbe->dw16.bti_curr_picture_y =
3686 GEN10_HEVC_MBENC_INTER_LCU64_CURR_Y;
3687 mbenc_curbe->dw17.bti_enc_curecord_surface =
3688 GEN10_HEVC_MBENC_INTER_LCU64_CU32_ENC_CU_RECORD;
3689 mbenc_curbe->dw18.bti_lcu64_enc_curecord2_surface =
3690 GEN10_HEVC_MBENC_INTER_LCU64_SECOND_CU32_ENC_CU_RECORD;
3691 mbenc_curbe->dw19.bti_lcu64_pak_objcmd_surface =
3692 GEN10_HEVC_MBENC_INTER_LCU64_PAK_OBJ0;
3693 mbenc_curbe->dw20.bti_lcu64_pak_curecord_surface =
3694 GEN10_HEVC_MBENC_INTER_LCU64_PAK_CU_RECORD;
3695 mbenc_curbe->dw21.bti_lcu64_vme_intra_inter_pred_surface =
3696 GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_CURR_PIC_IDX0;
3697 mbenc_curbe->dw22.bti_lcu64_cu16_qpdata_input_surface =
3698 GEN10_HEVC_MBENC_INTER_LCU64_CU16x16_QP_DATA;
3699 mbenc_curbe->dw23.bti_lcu64_cu32_enc_const_table_surface =
3700 GEN10_HEVC_MBENC_INTER_LCU64_CU32_ENC_CONST_TABLE;
3701 mbenc_curbe->dw24.bti_lcu64_colocated_mvdata_surface =
3702 GEN10_HEVC_MBENC_INTER_LCU64_COLOCATED_CU_MV_DATA;
3703 mbenc_curbe->dw25.bti_lcu64_hme_pred_surface =
3704 GEN10_HEVC_MBENC_INTER_LCU64_HME_MOTION_PREDICTOR_DATA;
3705 mbenc_curbe->dw26.bti_lcu64_lculevel_data_input_surface =
3706 GEN10_HEVC_MBENC_INTER_LCU64_LCU_LEVEL_DATA_INPUT;
3707 mbenc_curbe->dw27.bti_lcu64_cu32_enc_scratch_surface =
3708 GEN10_HEVC_MBENC_INTER_LCU64_CU32_LCU_ENC_SCRATCH_SURFACE;
3709 mbenc_curbe->dw28.bti_lcu64_64x64_dist_surface =
3710 GEN10_HEVC_MBENC_INTER_LCU64_64X64_DISTORTION_SURFACE;
3711 mbenc_curbe->dw29.bti_lcu64_concurrent_tg_data_surface =
3712 GEN10_HEVC_MBENC_INTER_LCU64_CONCURRENT_TG_DATA;
3713 mbenc_curbe->dw30.bti_lcu64_brc_combined_enc_param_surface =
3714 GEN10_HEVC_MBENC_INTER_LCU64_BRC_COMBINED_ENC_PARAMETER_SURFACE;
3715 mbenc_curbe->dw31.bti_lcu64_cu32_jbq1d_buf_surface =
3716 GEN10_HEVC_MBENC_INTER_LCU64_CU32_JOB_QUEUE_1D_SURFACE;
3717 mbenc_curbe->dw32.bti_lcu64_cu32_jbq2d_buf_surface =
3718 GEN10_HEVC_MBENC_INTER_LCU64_CU32_JOB_QUEUE_2D_SURFACE;
3719 mbenc_curbe->dw33.bti_lcu64_cu32_residual_scratch_surface =
3720 GEN10_HEVC_MBENC_INTER_LCU64_CU32_RESIDUAL_DATA_SCRATCH_SURFACE;
3721 mbenc_curbe->dw34.bti_lcu64_cusplit_surface =
3722 GEN10_HEVC_MBENC_INTER_LCU64_CU_SPLIT_DATA_SURFACE;
3723 mbenc_curbe->dw35.bti_lcu64_curr_picture_y_2xds =
3724 GEN10_HEVC_MBENC_INTER_LCU64_CURR_Y_2xDS;
3725 mbenc_curbe->dw36.bti_lcu64_intermediate_curecord_surface =
3726 GEN10_HEVC_MBENC_INTER_LCU64_INTERMEDIATE_CU_RECORD;
3727 mbenc_curbe->dw37.bti_lcu64_const_data_lut_surface =
3728 GEN10_HEVC_MBENC_INTER_LCU64_CONST64_DATA_LUT;
3729 mbenc_curbe->dw38.bti_lcu64_lcu_storage_surface =
3730 GEN10_HEVC_MBENC_INTER_LCU64_LCU_STORAGE_SURFACE;
3731 mbenc_curbe->dw39.bti_lcu64_vme_inter_pred_2xds_surface =
3732 GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_CURR_PIC_2xDS_IDX0;
3733 mbenc_curbe->dw40.bti_lcu64_cu64_jbq1d_surface =
3734 GEN10_HEVC_MBENC_INTER_LCU64_JOB_QUEUE_1D_SURFACE;
3735 mbenc_curbe->dw41.bti_lcu64_cu64_jbq2d_surface =
3736 GEN10_HEVC_MBENC_INTER_LCU64_JOB_QUEUE_2D_SURFACE;
3737 mbenc_curbe->dw42.bti_lcu64_cu64_residual_scratch_surface =
3738 GEN10_HEVC_MBENC_INTER_LCU64_RESIDUAL_DATA_SCRATCH_SURFACE;
3739 mbenc_curbe->dw43.bti_lcu64_debug_surface =
3740 GEN10_HEVC_MBENC_INTER_LCU64_DEBUG_SURFACE;
3741 } else {
3742 mbenc_curbe->dw16.bti_curr_picture_y =
3743 GEN10_HEVC_MBENC_INTER_LCU32_CURR_Y;
3744 mbenc_curbe->dw17.bti_enc_curecord_surface =
3745 GEN10_HEVC_MBENC_INTER_LCU32_ENC_CU_RECORD;
3746 mbenc_curbe->dw18.bti_lcu32_pak_objcmd_surface =
3747 GEN10_HEVC_MBENC_INTER_LCU32_PAK_OBJ0;
3748 mbenc_curbe->dw19.bti_lcu32_pak_curecord_surface =
3749 GEN10_HEVC_MBENC_INTER_LCU32_PAK_CU_RECORD;
3750 mbenc_curbe->dw20.bti_lcu32_vme_intra_inter_pred_surface =
3751 GEN10_HEVC_MBENC_INTER_LCU32_VME_PRED_CURR_PIC_IDX0;
3752 mbenc_curbe->dw21.bti_lcu32_cu16_qpdata_input_surface =
3753 GEN10_HEVC_MBENC_INTER_LCU32_CU16x16_QP_DATA;
3754 mbenc_curbe->dw22.bti_lcu32_enc_const_table_surface =
3755 GEN10_HEVC_MBENC_INTER_LCU32_ENC_CONST_TABLE;
3756 mbenc_curbe->dw23.bti_lcu32_colocated_mvdata_surface =
3757 GEN10_HEVC_MBENC_INTER_LCU32_COLOCATED_CU_MV_DATA;
3758 mbenc_curbe->dw24.bti_lcu32_hme_pred_data_surface =
3759 GEN10_HEVC_MBENC_INTER_LCU32_HME_MOTION_PREDICTOR_DATA;
3760 mbenc_curbe->dw25.bti_lcu32_lculevel_data_input_surface =
3761 GEN10_HEVC_MBENC_INTER_LCU32_LCU_LEVEL_DATA_INPUT;
3762 mbenc_curbe->dw26.bti_lcu32_enc_scratch_surface =
3763 GEN10_HEVC_MBENC_INTER_LCU32_LCU_ENC_SCRATCH_SURFACE;
3764 mbenc_curbe->dw27.bti_lcu32_concurrent_tg_data_surface =
3765 GEN10_HEVC_MBENC_INTER_LCU32_CONCURRENT_TG_DATA;
3766 mbenc_curbe->dw28.bti_lcu32_brc_combined_enc_param_surface =
3767 GEN10_HEVC_MBENC_INTER_LCU32_BRC_COMBINED_ENC_PARAMETER_SURFACE;
3768 mbenc_curbe->dw29.bti_lcu32_jbq_scratch_surface =
3769 GEN10_HEVC_MBENC_INTER_LCU32_JOB_QUEUE_SCRATCH_SURFACE;
3770 mbenc_curbe->dw30.bti_lcu32_cusplit_data_surface =
3771 GEN10_HEVC_MBENC_INTER_LCU32_CU_SPLIT_DATA_SURFACE,
3772 mbenc_curbe->dw31.bti_lcu32_residual_scratch_surface =
3773 GEN10_HEVC_MBENC_INTER_LCU32_RESIDUAL_DATA_SCRATCH_SURFACE,
3774 mbenc_curbe->dw32.bti_lcu32_debug_surface =
3775 GEN10_HEVC_MBENC_INTER_LCU32_DEBUG_SURFACE;
3776 }
3777
3778 i965_gpe_context_unmap_curbe(gpe_context);
3779 }
3780
3781 static void
gen10_hevc_enc_mbenc_intra_surfaces(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context)3782 gen10_hevc_enc_mbenc_intra_surfaces(VADriverContextP ctx,
3783 struct encode_state *encode_state,
3784 struct intel_encoder_context *encoder_context,
3785 struct i965_gpe_context *gpe_context)
3786 {
3787 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3788 struct gen10_hevc_enc_state *hevc_state;
3789 struct object_surface *obj_surface;
3790 struct object_surface *vme_surface;
3791 struct gen10_hevc_surface_priv *surface_priv;
3792 int input_bti, i;
3793
3794 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
3795
3796 obj_surface = encode_state->reconstructed_object;
3797
3798 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
3799
3800 if (hevc_state->is_10bit)
3801 vme_surface = surface_priv->converted_surface;
3802 else
3803 vme_surface = encode_state->input_yuv_object;
3804
3805 input_bti = GEN10_HEVC_MBENC_INTRA_VME_PRED_CURR_PIC_IDX0;
3806 i965_add_adv_gpe_surface(ctx, gpe_context,
3807 vme_surface,
3808 input_bti);
3809 input_bti++;
3810
3811 for (i = 0; i < 8; i++) {
3812 i965_add_adv_gpe_surface(ctx, gpe_context,
3813 vme_surface,
3814 input_bti);
3815 input_bti++;
3816 }
3817
3818 input_bti = GEN10_HEVC_MBENC_INTRA_CURR_Y;
3819
3820 i965_add_2d_gpe_surface(ctx,
3821 gpe_context,
3822 vme_surface,
3823 0,
3824 1,
3825 I965_SURFACEFORMAT_R8_UNORM,
3826 input_bti);
3827 i965_add_2d_gpe_surface(ctx,
3828 gpe_context,
3829 vme_surface,
3830 1,
3831 1,
3832 I965_SURFACEFORMAT_R16_UINT,
3833 input_bti + 1);
3834
3835
3836 input_bti = GEN10_HEVC_MBENC_INTRA_INTERMEDIATE_CU_RECORD;
3837 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3838 &vme_context->res_temp_curecord_lcu32_surface,
3839 1, I965_SURFACEFORMAT_R8_UNORM,
3840 input_bti);
3841
3842 i965_add_buffer_gpe_surface(ctx, gpe_context,
3843 &vme_context->res_mb_code_surface,
3844 0,
3845 BYTES2UINT32(hevc_state->cu_records_offset),
3846 0,
3847 input_bti + 1);
3848
3849
3850 i965_add_buffer_gpe_surface(ctx, gpe_context,
3851 &vme_context->res_mb_code_surface,
3852 0,
3853 BYTES2UINT32(vme_context->res_mb_code_surface.size - hevc_state->cu_records_offset),
3854 hevc_state->cu_records_offset,
3855 input_bti + 2);
3856
3857 input_bti = GEN10_HEVC_MBENC_INTRA_SCRATCH_SURFACE;
3858 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3859 &vme_context->res_scratch_surface,
3860 1, I965_SURFACEFORMAT_R8_UNORM,
3861 input_bti);
3862
3863 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3864 &vme_context->res_16x16_qp_data_surface,
3865 1, I965_SURFACEFORMAT_R8_UNORM,
3866 input_bti + 1);
3867
3868 i965_add_buffer_gpe_surface(ctx, gpe_context,
3869 &vme_context->res_enc_const_table_intra,
3870 0,
3871 BYTES2UINT32(vme_context->res_enc_const_table_intra.size),
3872 0,
3873 input_bti + 2);
3874
3875 i965_add_buffer_gpe_surface(ctx, gpe_context,
3876 &vme_context->res_lculevel_input_data_buffer,
3877 0,
3878 BYTES2UINT32(vme_context->res_lculevel_input_data_buffer.size),
3879 0,
3880 input_bti + 3);
3881
3882 i965_add_buffer_gpe_surface(ctx, gpe_context,
3883 &vme_context->res_concurrent_tg_data,
3884 0,
3885 BYTES2UINT32(vme_context->res_concurrent_tg_data.size),
3886 0,
3887 input_bti + 4);
3888
3889 i965_add_buffer_gpe_surface(ctx, gpe_context,
3890 &vme_context->res_brc_input_enc_kernel_buffer,
3891 0,
3892 BYTES2UINT32(vme_context->res_brc_input_enc_kernel_buffer.size),
3893 0,
3894 input_bti + 5);
3895
3896 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3897 &vme_context->res_cu_split_surface,
3898 1, I965_SURFACEFORMAT_R8_UNORM,
3899 input_bti + 6);
3900
3901 i965_add_buffer_gpe_surface(ctx, gpe_context,
3902 &vme_context->res_kernel_trace_data,
3903 0,
3904 BYTES2UINT32(vme_context->res_kernel_trace_data.size),
3905 0,
3906 input_bti + 7);
3907 }
3908
3909 static void
gen10_hevc_enc_mbenc_inter_lcu32_surfaces(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context)3910 gen10_hevc_enc_mbenc_inter_lcu32_surfaces(VADriverContextP ctx,
3911 struct encode_state *encode_state,
3912 struct intel_encoder_context *encoder_context,
3913 struct i965_gpe_context *gpe_context)
3914 {
3915 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
3916 struct gen10_hevc_enc_state *hevc_state;
3917 struct gen10_hevc_enc_frame_info *frame_info;
3918 struct gen10_hevc_enc_common_res *common_res;
3919 VAEncSliceParameterBufferHEVC *slice_param;
3920 VAEncPictureParameterBufferHEVC *pic_param;
3921 struct object_surface *obj_surface, *vme_surface;
3922 struct gen10_hevc_surface_priv *surface_priv;
3923 struct object_surface *l0_surface = NULL, *l1_surface = NULL, *tmp_surface;
3924 int input_bti, i;
3925
3926 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
3927 frame_info = &vme_context->frame_info;
3928 common_res = &vme_context->common_res;
3929
3930 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
3931 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
3932
3933 obj_surface = encode_state->reconstructed_object;
3934
3935 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
3936
3937 if (hevc_state->is_10bit)
3938 vme_surface = surface_priv->converted_surface;
3939 else
3940 vme_surface = encode_state->input_yuv_object;
3941
3942 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_CURR_Y;
3943 i965_add_2d_gpe_surface(ctx,
3944 gpe_context,
3945 vme_surface,
3946 0,
3947 1,
3948 I965_SURFACEFORMAT_R8_UNORM,
3949 input_bti);
3950 i965_add_2d_gpe_surface(ctx,
3951 gpe_context,
3952 vme_surface,
3953 1,
3954 1,
3955 I965_SURFACEFORMAT_R16_UINT,
3956 input_bti + 1);
3957
3958 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_ENC_CU_RECORD;
3959 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3960 &vme_context->res_temp_curecord_lcu32_surface,
3961 1, I965_SURFACEFORMAT_R8_UNORM,
3962 input_bti);
3963
3964 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_PAK_OBJ0;
3965 i965_add_buffer_gpe_surface(ctx, gpe_context,
3966 &vme_context->res_mb_code_surface,
3967 0,
3968 BYTES2UINT32(hevc_state->cu_records_offset),
3969 0,
3970 input_bti);
3971 i965_add_buffer_gpe_surface(ctx, gpe_context,
3972 &vme_context->res_mb_code_surface,
3973 0,
3974 BYTES2UINT32(vme_context->res_mb_code_surface.size -
3975 hevc_state->cu_records_offset),
3976 hevc_state->cu_records_offset,
3977 input_bti + 1);
3978
3979 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_VME_PRED_CURR_PIC_IDX0;
3980
3981 i965_add_adv_gpe_surface(ctx, gpe_context,
3982 vme_surface,
3983 input_bti);
3984
3985 if (frame_info->mapped_ref_idx_list0[0] >= 0)
3986 l0_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[0]].obj_surface;
3987 else
3988 l0_surface = NULL;
3989
3990 if (!l0_surface || !l0_surface->private_data)
3991 l0_surface = vme_surface;
3992 else {
3993 surface_priv = (struct gen10_hevc_surface_priv *)(l0_surface->private_data);
3994 if (hevc_state->is_10bit)
3995 l0_surface = surface_priv->converted_surface;
3996 }
3997
3998 if (slice_param->slice_type == HEVC_SLICE_B) {
3999 if (frame_info->mapped_ref_idx_list1[0] > 0)
4000 l1_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[0]].obj_surface;
4001 else
4002 l1_surface = NULL;
4003
4004 if (!l1_surface || !l1_surface->private_data)
4005 l1_surface = l0_surface;
4006 else {
4007 surface_priv = (struct gen10_hevc_surface_priv *)(l1_surface->private_data);
4008 if (hevc_state->is_10bit)
4009 l1_surface = surface_priv->converted_surface;
4010 }
4011 }
4012
4013 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_VME_PRED_FWD_PIC_IDX0;
4014 for (i = 0; i < 4; i++) {
4015 if (frame_info->mapped_ref_idx_list0[i] >= 0)
4016 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[i]].obj_surface;
4017 else
4018 tmp_surface = NULL;
4019
4020 if (tmp_surface && tmp_surface->private_data) {
4021 surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4022 if (hevc_state->is_10bit)
4023 tmp_surface = surface_priv->converted_surface;
4024
4025 i965_add_adv_gpe_surface(ctx, gpe_context,
4026 tmp_surface,
4027 input_bti + 2 * i);
4028 } else
4029 i965_add_adv_gpe_surface(ctx, gpe_context,
4030 l0_surface,
4031 input_bti + 2 * i);
4032
4033 if (slice_param->slice_type == HEVC_SLICE_B) {
4034 if (frame_info->mapped_ref_idx_list1[i] >= 0)
4035 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[0]].obj_surface;
4036 else
4037 tmp_surface = NULL;
4038
4039 if (tmp_surface && tmp_surface->private_data) {
4040 surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4041 if (hevc_state->is_10bit)
4042 tmp_surface = surface_priv->converted_surface;
4043
4044 i965_add_adv_gpe_surface(ctx, gpe_context,
4045 tmp_surface,
4046 input_bti + 2 * i + 1);
4047 } else
4048 i965_add_adv_gpe_surface(ctx, gpe_context,
4049 l1_surface,
4050 input_bti + 2 * i + 1);
4051 }
4052 }
4053
4054 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_CU16x16_QP_DATA;
4055 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4056 &vme_context->res_16x16_qp_data_surface,
4057 1, I965_SURFACEFORMAT_R8_UNORM,
4058 input_bti);
4059
4060 i965_add_buffer_gpe_surface(ctx, gpe_context,
4061 &vme_context->res_enc_const_table_inter,
4062 0,
4063 BYTES2UINT32(vme_context->res_enc_const_table_inter.size),
4064 0,
4065 input_bti + 1);
4066
4067 if (slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag &&
4068 (pic_param->collocated_ref_pic_index != 0xFF)) {
4069 obj_surface = common_res->reference_pics[pic_param->collocated_ref_pic_index].obj_surface;
4070 if (obj_surface && obj_surface->private_data) {
4071 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
4072
4073 i965_add_buffer_gpe_surface(ctx, gpe_context,
4074 &surface_priv->motion_vector_temporal,
4075 0,
4076 BYTES2UINT32(surface_priv->motion_vector_temporal.size),
4077 0,
4078 input_bti + 2);
4079 }
4080 }
4081
4082 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_HME_MOTION_PREDICTOR_DATA;
4083 if (hevc_state->hme_enabled) {
4084 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4085 &vme_context->res_s4x_memv_data_surface,
4086 1, I965_SURFACEFORMAT_R8_UNORM,
4087 input_bti);
4088 }
4089
4090 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_LCU_LEVEL_DATA_INPUT;
4091 i965_add_buffer_gpe_surface(ctx, gpe_context,
4092 &vme_context->res_lculevel_input_data_buffer,
4093 0,
4094 BYTES2UINT32(vme_context->res_lculevel_input_data_buffer.size),
4095 0,
4096 input_bti);
4097
4098 i965_add_buffer_gpe_surface(ctx, gpe_context,
4099 &vme_context->res_enc_scratch_buffer,
4100 0,
4101 BYTES2UINT32(vme_context->res_enc_scratch_buffer.size),
4102 0,
4103 input_bti + 1);
4104
4105
4106 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_CONCURRENT_TG_DATA;
4107 i965_add_buffer_gpe_surface(ctx, gpe_context,
4108 &vme_context->res_concurrent_tg_data,
4109 0,
4110 BYTES2UINT32(vme_context->res_concurrent_tg_data.size),
4111 0,
4112 input_bti);
4113
4114 i965_add_buffer_gpe_surface(ctx, gpe_context,
4115 &vme_context->res_brc_input_enc_kernel_buffer,
4116 0,
4117 BYTES2UINT32(vme_context->res_brc_input_enc_kernel_buffer.size),
4118 0,
4119 input_bti + 1);
4120
4121 i965_add_buffer_gpe_surface(ctx, gpe_context,
4122 &vme_context->res_jbq_header_buffer,
4123 1,
4124 vme_context->res_jbq_header_buffer.size,
4125 0,
4126 input_bti + 2);
4127
4128 input_bti = GEN10_HEVC_MBENC_INTER_LCU32_CU_SPLIT_DATA_SURFACE;
4129 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4130 &vme_context->res_cu_split_surface,
4131 1, I965_SURFACEFORMAT_R8_UNORM,
4132 input_bti);
4133
4134 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4135 &vme_context->res_residual_scratch_lcu32_surface,
4136 1, I965_SURFACEFORMAT_R8_UNORM,
4137 input_bti + 1);
4138
4139 i965_add_buffer_gpe_surface(ctx, gpe_context,
4140 &vme_context->res_kernel_trace_data,
4141 0,
4142 BYTES2UINT32(vme_context->res_kernel_trace_data.size),
4143 0,
4144 input_bti + 2);
4145 }
4146
4147 static void
gen10_hevc_enc_mbenc_inter_lcu64_surfaces(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context)4148 gen10_hevc_enc_mbenc_inter_lcu64_surfaces(VADriverContextP ctx,
4149 struct encode_state *encode_state,
4150 struct intel_encoder_context *encoder_context,
4151 struct i965_gpe_context *gpe_context)
4152 {
4153 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
4154 struct gen10_hevc_enc_state *hevc_state;
4155 struct gen10_hevc_enc_common_res *common_res;
4156 struct gen10_hevc_enc_frame_info *frame_info;
4157 struct object_surface *obj_surface, *vme_surface;
4158 struct gen10_hevc_surface_priv *surface_priv;
4159 struct object_surface *l0_surface, *l1_surface, *tmp_surface;
4160 VAEncSliceParameterBufferHEVC *slice_param;
4161 VAEncPictureParameterBufferHEVC *pic_param;
4162 int input_bti, i;
4163
4164 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
4165 frame_info = &vme_context->frame_info;
4166 common_res = &vme_context->common_res;
4167
4168 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
4169 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
4170
4171 obj_surface = encode_state->reconstructed_object;
4172
4173 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
4174
4175 if (hevc_state->is_10bit)
4176 vme_surface = surface_priv->converted_surface;
4177 else
4178 vme_surface = encode_state->input_yuv_object;
4179
4180 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CURR_Y;
4181 i965_add_2d_gpe_surface(ctx,
4182 gpe_context,
4183 vme_surface,
4184 0,
4185 1,
4186 I965_SURFACEFORMAT_R8_UNORM,
4187 input_bti);
4188 i965_add_2d_gpe_surface(ctx,
4189 gpe_context,
4190 vme_surface,
4191 1,
4192 1,
4193 I965_SURFACEFORMAT_R16_UINT,
4194 input_bti + 1);
4195
4196 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CU32_ENC_CU_RECORD;
4197 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4198 &vme_context->res_temp_curecord_lcu32_surface,
4199 1, I965_SURFACEFORMAT_R8_UNORM,
4200 input_bti);
4201
4202 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4203 &vme_context->res_temp2_curecord_lcu32_surface,
4204 1, I965_SURFACEFORMAT_R8_UNORM,
4205 input_bti + 1);
4206
4207 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_PAK_OBJ0;
4208 i965_add_buffer_gpe_surface(ctx, gpe_context,
4209 &vme_context->res_mb_code_surface,
4210 1,
4211 hevc_state->cu_records_offset,
4212 0,
4213 input_bti);
4214 i965_add_buffer_gpe_surface(ctx, gpe_context,
4215 &vme_context->res_mb_code_surface,
4216 0,
4217 vme_context->res_mb_code_surface.size,
4218 hevc_state->cu_records_offset,
4219 input_bti + 1);
4220
4221 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_CURR_PIC_IDX0;
4222
4223 i965_add_adv_gpe_surface(ctx, gpe_context,
4224 vme_surface,
4225 input_bti);
4226
4227 if (frame_info->mapped_ref_idx_list0[0] >= 0)
4228 l0_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[0]].obj_surface;
4229 else
4230 l0_surface = NULL;
4231
4232 if (!l0_surface || !l0_surface->private_data)
4233 l0_surface = vme_surface;
4234 else {
4235 surface_priv = (struct gen10_hevc_surface_priv *)(l0_surface->private_data);
4236 if (hevc_state->is_10bit)
4237 l0_surface = surface_priv->converted_surface;
4238 }
4239
4240 l1_surface = l0_surface;
4241 if (slice_param->slice_type == HEVC_SLICE_B) {
4242 if (frame_info->mapped_ref_idx_list1[0] > 0)
4243 l1_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[0]].obj_surface;
4244 else
4245 l1_surface = NULL;
4246
4247 if (!l1_surface || !l1_surface->private_data)
4248 l1_surface = l0_surface;
4249 else {
4250 surface_priv = (struct gen10_hevc_surface_priv *)(l1_surface->private_data);
4251 if (hevc_state->is_10bit)
4252 l1_surface = surface_priv->converted_surface;
4253 }
4254 }
4255
4256 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_FWD_PIC_IDX0;
4257 for (i = 0; i < 4; i++) {
4258 if (frame_info->mapped_ref_idx_list0[i] >= 0)
4259 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[i]].obj_surface;
4260 else
4261 tmp_surface = NULL;
4262
4263 if (tmp_surface && tmp_surface->private_data) {
4264 surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4265 if (hevc_state->is_10bit)
4266 tmp_surface = surface_priv->converted_surface;
4267
4268 i965_add_adv_gpe_surface(ctx, gpe_context,
4269 tmp_surface,
4270 input_bti + 2 * i);
4271 } else
4272 i965_add_adv_gpe_surface(ctx, gpe_context,
4273 l0_surface,
4274 input_bti + 2 * i);
4275
4276 if (slice_param->slice_type == HEVC_SLICE_B) {
4277 if (frame_info->mapped_ref_idx_list1[i] >= 0)
4278 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[i]].obj_surface;
4279 else
4280 tmp_surface = NULL;
4281
4282 if (tmp_surface && tmp_surface->private_data) {
4283 surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4284 if (hevc_state->is_10bit)
4285 tmp_surface = surface_priv->converted_surface;
4286
4287 i965_add_adv_gpe_surface(ctx, gpe_context,
4288 tmp_surface,
4289 input_bti + 2 * i + 1);
4290 } else
4291 i965_add_adv_gpe_surface(ctx, gpe_context,
4292 l1_surface,
4293 input_bti + 2 * i + 1);
4294 }
4295 }
4296
4297 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CU16x16_QP_DATA;
4298 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4299 &vme_context->res_16x16_qp_data_surface,
4300 1, I965_SURFACEFORMAT_R8_UNORM,
4301 input_bti);
4302
4303 i965_add_buffer_gpe_surface(ctx, gpe_context,
4304 &vme_context->res_enc_const_table_inter,
4305 0,
4306 vme_context->res_enc_const_table_inter.size,
4307 0,
4308 input_bti + 1);
4309
4310 if (slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag &&
4311 (pic_param->collocated_ref_pic_index != 0xFF)) {
4312 obj_surface = common_res->reference_pics[pic_param->collocated_ref_pic_index].obj_surface;
4313 if (obj_surface && obj_surface->private_data) {
4314 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
4315
4316 i965_add_buffer_gpe_surface(ctx, gpe_context,
4317 &surface_priv->motion_vector_temporal,
4318 0,
4319 surface_priv->motion_vector_temporal.size,
4320 0,
4321 input_bti + 2);
4322 }
4323 }
4324
4325 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_HME_MOTION_PREDICTOR_DATA;
4326 if (hevc_state->hme_enabled) {
4327 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4328 &vme_context->res_s4x_memv_data_surface,
4329 1, I965_SURFACEFORMAT_R8_UNORM,
4330 input_bti);
4331 }
4332
4333 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_LCU_LEVEL_DATA_INPUT;
4334 i965_add_buffer_gpe_surface(ctx, gpe_context,
4335 &vme_context->res_lculevel_input_data_buffer,
4336 0,
4337 vme_context->res_lculevel_input_data_buffer.size,
4338 0,
4339 input_bti);
4340
4341 i965_add_buffer_gpe_surface(ctx, gpe_context,
4342 &vme_context->res_enc_scratch_buffer,
4343 0,
4344 vme_context->res_enc_scratch_buffer.size,
4345 0,
4346 input_bti + 1);
4347
4348 i965_add_buffer_gpe_surface(ctx, gpe_context,
4349 &vme_context->res_64x64_dist_buffer,
4350 1,
4351 vme_context->res_64x64_dist_buffer.size,
4352 0,
4353 input_bti + 2);
4354
4355
4356
4357 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CONCURRENT_TG_DATA;
4358 i965_add_buffer_gpe_surface(ctx, gpe_context,
4359 &vme_context->res_concurrent_tg_data,
4360 0,
4361 vme_context->res_concurrent_tg_data.size,
4362 0,
4363 input_bti);
4364
4365 i965_add_buffer_gpe_surface(ctx, gpe_context,
4366 &vme_context->res_brc_input_enc_kernel_buffer,
4367 0,
4368 vme_context->res_brc_input_enc_kernel_buffer.size,
4369 0,
4370 input_bti + 1);
4371
4372
4373 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CU32_JOB_QUEUE_1D_SURFACE;
4374 i965_add_buffer_gpe_surface(ctx, gpe_context,
4375 &vme_context->res_jbq_header_buffer,
4376 1,
4377 vme_context->res_jbq_header_buffer.size,
4378 0,
4379 input_bti);
4380
4381 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4382 &vme_context->res_jbq_data_lcu32_surface,
4383 1, I965_SURFACEFORMAT_R8_UNORM,
4384 input_bti + 1);
4385
4386 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4387 &vme_context->res_residual_scratch_lcu32_surface,
4388 1, I965_SURFACEFORMAT_R8_UNORM,
4389 input_bti + 2);
4390
4391
4392 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CU_SPLIT_DATA_SURFACE;
4393 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4394 &vme_context->res_cu_split_surface,
4395 1, I965_SURFACEFORMAT_R8_UNORM,
4396 input_bti);
4397
4398 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_CURR_Y_2xDS;
4399 obj_surface = encode_state->reconstructed_object;
4400 surface_priv = (struct gen10_hevc_surface_priv *)(obj_surface->private_data);
4401 vme_surface = surface_priv->scaled_2x_surface;
4402
4403 i965_add_2d_gpe_surface(ctx,
4404 gpe_context,
4405 vme_surface,
4406 0,
4407 1,
4408 I965_SURFACEFORMAT_R8_UNORM,
4409 input_bti);
4410
4411 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_INTERMEDIATE_CU_RECORD;
4412 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4413 &vme_context->res_temp_curecord_surface_lcu64,
4414 1, I965_SURFACEFORMAT_R8_UNORM,
4415 input_bti);
4416
4417 i965_add_buffer_gpe_surface(ctx, gpe_context,
4418 &vme_context->res_enc_const_table_inter_lcu64,
4419 1,
4420 vme_context->res_enc_const_table_inter_lcu64.size,
4421 0,
4422 input_bti + 1);
4423
4424 i965_add_buffer_gpe_surface(ctx, gpe_context,
4425 &vme_context->res_enc_scratch_lcu64_buffer,
4426 1,
4427 vme_context->res_enc_scratch_lcu64_buffer.size,
4428 0,
4429 input_bti + 2);
4430
4431 if (frame_info->mapped_ref_idx_list0[0] >= 0)
4432 l0_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[0]].obj_surface;
4433 else
4434 l0_surface = NULL;
4435
4436 if (!l0_surface || !l0_surface->private_data) {
4437 l0_surface = vme_surface;
4438 } else {
4439 surface_priv = (struct gen10_hevc_surface_priv *)(l0_surface->private_data);
4440 l0_surface = surface_priv->scaled_2x_surface;
4441 }
4442
4443 l1_surface = l0_surface;
4444 if (slice_param->slice_type == HEVC_SLICE_B) {
4445 if (frame_info->mapped_ref_idx_list1[0] > 0)
4446 l1_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[0]].obj_surface;
4447 else
4448 l1_surface = NULL;
4449
4450 if (!l1_surface || !l1_surface->private_data)
4451 l1_surface = l0_surface;
4452 else {
4453 surface_priv = (struct gen10_hevc_surface_priv *)(l1_surface->private_data);
4454 l1_surface = surface_priv->scaled_2x_surface;
4455 }
4456 }
4457
4458 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_CURR_PIC_2xDS_IDX0;
4459 i965_add_adv_gpe_surface(ctx, gpe_context,
4460 vme_surface,
4461 input_bti);
4462
4463 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_VME_PRED_CURR_PIC_2xDS_IDX0;
4464 for (i = 0; i < 4; i++) {
4465 if (frame_info->mapped_ref_idx_list0[i] >= 0)
4466 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list0[i]].obj_surface;
4467 else
4468 tmp_surface = NULL;
4469
4470 if (tmp_surface && tmp_surface->private_data) {
4471 surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4472 tmp_surface = surface_priv->scaled_2x_surface;
4473
4474 i965_add_adv_gpe_surface(ctx, gpe_context,
4475 tmp_surface,
4476 input_bti + 2 * i);
4477 } else {
4478 i965_add_adv_gpe_surface(ctx, gpe_context,
4479 l0_surface,
4480 input_bti + 2 * i);
4481
4482 }
4483
4484 if (slice_param->slice_type == HEVC_SLICE_B) {
4485 if (frame_info->mapped_ref_idx_list1[i] >= 0)
4486 tmp_surface = common_res->reference_pics[frame_info->mapped_ref_idx_list1[i]].obj_surface;
4487 else
4488 tmp_surface = NULL;
4489
4490 if (tmp_surface && tmp_surface->private_data) {
4491 surface_priv = (struct gen10_hevc_surface_priv *)(tmp_surface->private_data);
4492 tmp_surface = surface_priv->scaled_2x_surface;
4493
4494 i965_add_adv_gpe_surface(ctx, gpe_context,
4495 tmp_surface,
4496 input_bti + 2 * i + 1);
4497 } else
4498 i965_add_adv_gpe_surface(ctx, gpe_context,
4499 l1_surface,
4500 input_bti + 2 * i + 1);
4501 }
4502 }
4503
4504 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_JOB_QUEUE_1D_SURFACE;
4505
4506 i965_add_buffer_gpe_surface(ctx, gpe_context,
4507 &vme_context->res_jbq_header_lcu64_buffer,
4508 1,
4509 vme_context->res_jbq_header_lcu64_buffer.size,
4510 0,
4511 input_bti);
4512
4513 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4514 &vme_context->res_jbq_data_lcu64_surface,
4515 1, I965_SURFACEFORMAT_R8_UNORM,
4516 input_bti + 1);
4517
4518 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_RESIDUAL_DATA_SCRATCH_SURFACE;
4519 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
4520 &vme_context->res_residual_scratch_lcu64_surface,
4521 1, I965_SURFACEFORMAT_R8_UNORM,
4522 input_bti);
4523
4524
4525 input_bti = GEN10_HEVC_MBENC_INTER_LCU64_DEBUG_SURFACE;
4526 i965_add_buffer_gpe_surface(ctx, gpe_context,
4527 &vme_context->res_kernel_trace_data,
4528 0,
4529 vme_context->res_kernel_trace_data.size,
4530 0,
4531 input_bti);
4532 }
4533
4534 static void
gen10_hevc_mbenc_init_walker_param(struct gen10_hevc_enc_state * hevc_state,struct gen10_hevc_enc_kernel_walker_parameter * kernel_walker_param,struct gpe_media_object_walker_parameter * media_object_walker_param,struct gen10_hevc_gpe_scoreboard * hw_scoreboard)4535 gen10_hevc_mbenc_init_walker_param(struct gen10_hevc_enc_state *hevc_state,
4536 struct gen10_hevc_enc_kernel_walker_parameter *kernel_walker_param,
4537 struct gpe_media_object_walker_parameter *media_object_walker_param,
4538 struct gen10_hevc_gpe_scoreboard *hw_scoreboard)
4539 {
4540 int mw_26zx_h_factor;
4541
4542 if (kernel_walker_param->use_custom_walker == 0) {
4543 hw_scoreboard->scoreboard0.mask = 0x7F;
4544 hw_scoreboard->scoreboard0.enable = hevc_state->use_hw_scoreboard;
4545 hw_scoreboard->scoreboard0.type = hevc_state->use_hw_non_stalling_scoreboard;
4546
4547 hw_scoreboard->dw1.scoreboard1.delta_x0 = -1;
4548 hw_scoreboard->dw1.scoreboard1.delta_y0 = 0;
4549
4550 hw_scoreboard->dw1.scoreboard1.delta_x1 = 0;
4551 hw_scoreboard->dw1.scoreboard1.delta_y1 = -1;
4552
4553 hw_scoreboard->dw1.scoreboard1.delta_x2 = 1;
4554 hw_scoreboard->dw1.scoreboard1.delta_y2 = -1;
4555
4556 hw_scoreboard->dw1.scoreboard1.delta_x3 = -1;
4557 hw_scoreboard->dw1.scoreboard1.delta_y3 = -1;
4558
4559 hw_scoreboard->dw2.scoreboard2.delta_x4 = 0;
4560 hw_scoreboard->dw2.scoreboard2.delta_y4 = 0;
4561 hw_scoreboard->dw2.scoreboard2.delta_x5 = 0;
4562 hw_scoreboard->dw2.scoreboard2.delta_y5 = 0;
4563 hw_scoreboard->dw2.scoreboard2.delta_x6 = 0;
4564 hw_scoreboard->dw2.scoreboard2.delta_y6 = 0;
4565 hw_scoreboard->dw2.scoreboard2.delta_x7 = 0;
4566 hw_scoreboard->dw2.scoreboard2.delta_y7 = 0;
4567
4568 gen10_init_media_object_walker_parameter(kernel_walker_param, media_object_walker_param);
4569 return;
4570 }
4571
4572 media_object_walker_param->color_count_minus1 = hevc_state->hevc_wf_param.num_regions - 1;
4573
4574 media_object_walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
4575
4576 media_object_walker_param->local_loop_exec_count = 0xFFF;
4577 media_object_walker_param->global_loop_exec_count = 0xFFF;
4578
4579 switch (kernel_walker_param->walker_degree) {
4580 case GEN10_WALKER_26_DEGREE:
4581 if (hevc_state->num_regions_in_slice > 1) {
4582 int thread_space_width = kernel_walker_param->resolution_x;
4583 int thread_space_height = hevc_state->hevc_wf_param.max_height_in_region;
4584
4585 int ts_width = thread_space_width;
4586 int ts_height = thread_space_height;
4587 int tmp_height = (ts_height + 1) & 0xfffe;
4588 ts_height = tmp_height;
4589 tmp_height = ((ts_width + 1) >> 1) + ((ts_width + ((tmp_height - 1) << 1)) + (2 * hevc_state->num_regions_in_slice - 1)) / (2 * hevc_state->num_regions_in_slice);
4590
4591 media_object_walker_param->block_resolution.x = ts_width;
4592 media_object_walker_param->block_resolution.y = tmp_height;
4593
4594 media_object_walker_param->global_start.x = 0;
4595 media_object_walker_param->global_start.y = 0;
4596
4597 media_object_walker_param->global_resolution.x = ts_width;
4598 media_object_walker_param->global_resolution.y = tmp_height;
4599
4600 media_object_walker_param->local_start.x = (ts_width + 1) & 0xfffe;;
4601 media_object_walker_param->local_start.y = 0;
4602
4603 media_object_walker_param->local_end.x = 0;
4604 media_object_walker_param->local_end.y = 0;
4605
4606 media_object_walker_param->global_outer_loop_stride.x = ts_width;
4607 media_object_walker_param->global_outer_loop_stride.y = 0;
4608
4609 media_object_walker_param->global_inner_loop_unit.x = 0;
4610 media_object_walker_param->global_inner_loop_unit.y = tmp_height;
4611
4612 media_object_walker_param->scoreboard_mask = 0x7F;
4613 media_object_walker_param->local_outer_loop_stride.x = 1;
4614 media_object_walker_param->local_outer_loop_stride.y = 0;
4615 media_object_walker_param->local_inner_loop_unit.x = -2;
4616 media_object_walker_param->local_inner_loop_unit.y = 1;
4617
4618 media_object_walker_param->global_loop_exec_count = 0;
4619 media_object_walker_param->local_loop_exec_count = (thread_space_width + (ts_height - 1) * 2 + hevc_state->num_regions_in_slice - 1) / hevc_state->num_regions_in_slice;
4620 } else {
4621 media_object_walker_param->block_resolution.x = kernel_walker_param->resolution_x;
4622 media_object_walker_param->block_resolution.y = kernel_walker_param->resolution_y;
4623
4624 media_object_walker_param->global_resolution.x = media_object_walker_param->block_resolution.x;
4625 media_object_walker_param->global_resolution.y = media_object_walker_param->block_resolution.y;
4626
4627 media_object_walker_param->global_outer_loop_stride.x = media_object_walker_param->block_resolution.x;
4628 media_object_walker_param->global_outer_loop_stride.y = 0;
4629
4630 media_object_walker_param->global_inner_loop_unit.x = 0;
4631 media_object_walker_param->global_inner_loop_unit.y = media_object_walker_param->block_resolution.y;
4632
4633 media_object_walker_param->scoreboard_mask = 0x7F;
4634 media_object_walker_param->local_outer_loop_stride.x = 1;
4635 media_object_walker_param->local_outer_loop_stride.y = 0;
4636 media_object_walker_param->local_inner_loop_unit.x = -2;
4637 media_object_walker_param->local_inner_loop_unit.y = 1;
4638 }
4639
4640 {
4641 hw_scoreboard->scoreboard0.mask = 0x7F;
4642 hw_scoreboard->scoreboard0.enable = hevc_state->use_hw_scoreboard;
4643
4644 hw_scoreboard->dw1.scoreboard1.delta_x0 = -1;
4645 hw_scoreboard->dw1.scoreboard1.delta_y0 = 0;
4646
4647 hw_scoreboard->dw1.scoreboard1.delta_x1 = -1;
4648 hw_scoreboard->dw1.scoreboard1.delta_y1 = -1;
4649
4650 hw_scoreboard->dw1.scoreboard1.delta_x2 = 0;
4651 hw_scoreboard->dw1.scoreboard1.delta_y2 = -1;
4652
4653 hw_scoreboard->dw1.scoreboard1.delta_x3 = 1;
4654 hw_scoreboard->dw1.scoreboard1.delta_y3 = -1;
4655
4656 hw_scoreboard->dw2.scoreboard2.delta_x4 = 0;
4657 hw_scoreboard->dw2.scoreboard2.delta_y4 = 0;
4658
4659 hw_scoreboard->dw2.scoreboard2.delta_x5 = 0;
4660 hw_scoreboard->dw2.scoreboard2.delta_y5 = 0;
4661
4662 hw_scoreboard->dw2.scoreboard2.delta_x6 = 0;
4663 hw_scoreboard->dw2.scoreboard2.delta_y6 = 0;
4664
4665 hw_scoreboard->dw2.scoreboard2.delta_x7 = 0;
4666 hw_scoreboard->dw2.scoreboard2.delta_y7 = 0;
4667 }
4668 break;
4669 case GEN10_WALKER_26Z_DEGREE: {
4670 media_object_walker_param->scoreboard_mask = 0x7f;
4671
4672 media_object_walker_param->global_resolution.x = kernel_walker_param->resolution_x;
4673 media_object_walker_param->global_resolution.y = kernel_walker_param->resolution_y;
4674
4675 media_object_walker_param->global_outer_loop_stride.x = 2;
4676 media_object_walker_param->global_outer_loop_stride.y = 0;
4677
4678 media_object_walker_param->global_inner_loop_unit.x = 0xFFF - 4 + 1;
4679 media_object_walker_param->global_inner_loop_unit.y = 2;
4680
4681 media_object_walker_param->local_outer_loop_stride.x = 0;
4682 media_object_walker_param->local_outer_loop_stride.y = 1;
4683 media_object_walker_param->local_inner_loop_unit.x = 1;
4684 media_object_walker_param->local_inner_loop_unit.y = 0;
4685
4686 media_object_walker_param->block_resolution.x = 2;
4687 media_object_walker_param->block_resolution.y = 2;
4688 }
4689
4690 {
4691 hw_scoreboard->scoreboard0.type = hevc_state->use_hw_non_stalling_scoreboard;
4692 hw_scoreboard->scoreboard0.mask = 0x7F;
4693 hw_scoreboard->scoreboard0.enable = hevc_state->use_hw_scoreboard;
4694
4695 hw_scoreboard->dw1.scoreboard1.delta_x0 = -1;
4696 hw_scoreboard->dw1.scoreboard1.delta_y0 = 1;
4697
4698 hw_scoreboard->dw1.scoreboard1.delta_x1 = -1;
4699 hw_scoreboard->dw1.scoreboard1.delta_y1 = 0;
4700
4701 hw_scoreboard->dw1.scoreboard1.delta_x2 = -1;
4702 hw_scoreboard->dw1.scoreboard1.delta_y2 = -1;
4703
4704 hw_scoreboard->dw1.scoreboard1.delta_x3 = 0;
4705 hw_scoreboard->dw1.scoreboard1.delta_y3 = -1;
4706
4707 hw_scoreboard->dw2.scoreboard2.delta_x4 = 1;
4708 hw_scoreboard->dw2.scoreboard2.delta_y4 = -1;
4709 }
4710 break;
4711 case GEN10_WALKER_26X_DEGREE:
4712 if (hevc_state->num_regions_in_slice > 1) {
4713 int thread_space_width = ALIGN(hevc_state->frame_width, 32) >> 5;
4714 int ts_width = thread_space_width;
4715 int ts_height = hevc_state->hevc_wf_param.max_height_in_region;
4716 int tmp_height = (ts_height + 1) & 0xfffe;
4717 ts_height = tmp_height;
4718 tmp_height = ((ts_width + 1) >> 1) + ((ts_width + ((tmp_height - 1) << 1)) + (2 * hevc_state->num_regions_in_slice - 1)) / (2 * hevc_state->num_regions_in_slice);
4719 tmp_height *= (hevc_state->thread_num_per_ctb);
4720
4721 media_object_walker_param->scoreboard_mask = 0xff;
4722
4723 media_object_walker_param->global_resolution.x = ts_width;
4724 media_object_walker_param->global_resolution.y = tmp_height;
4725
4726 media_object_walker_param->global_start.x = 0;
4727 media_object_walker_param->global_start.y = 0;
4728
4729 media_object_walker_param->local_start.x = (ts_width + 1) & 0xfffe;
4730 media_object_walker_param->local_start.y = 0;
4731
4732 media_object_walker_param->local_end.x = 0;
4733 media_object_walker_param->local_end.y = 0;
4734
4735 media_object_walker_param->global_outer_loop_stride.x = ts_width;
4736 media_object_walker_param->global_outer_loop_stride.y = 0;
4737
4738 media_object_walker_param->global_inner_loop_unit.x = 0;
4739 media_object_walker_param->global_inner_loop_unit.y = tmp_height;
4740
4741 media_object_walker_param->local_outer_loop_stride.x = 1;
4742 media_object_walker_param->local_outer_loop_stride.y = 0;
4743 media_object_walker_param->local_inner_loop_unit.x = -2;
4744 media_object_walker_param->local_inner_loop_unit.y = hevc_state->thread_num_per_ctb;
4745 media_object_walker_param->middle_loop_extra_steps = hevc_state->thread_num_per_ctb - 1;
4746 media_object_walker_param->mid_loop_unit_x = 0;
4747 media_object_walker_param->mid_loop_unit_y = 1;
4748
4749 media_object_walker_param->block_resolution.x = media_object_walker_param->global_resolution.x;
4750 media_object_walker_param->block_resolution.y = media_object_walker_param->global_resolution.y;
4751
4752 media_object_walker_param->global_loop_exec_count = 0;
4753 media_object_walker_param->local_loop_exec_count = (thread_space_width + (ts_height - 1) * 2 + hevc_state->num_regions_in_slice - 1) / hevc_state->num_regions_in_slice;
4754 } else {
4755 media_object_walker_param->scoreboard_mask = 0xff;
4756
4757 media_object_walker_param->global_resolution.x = kernel_walker_param->resolution_x;
4758 media_object_walker_param->global_resolution.y = kernel_walker_param->resolution_y * hevc_state->thread_num_per_ctb;
4759
4760 media_object_walker_param->global_outer_loop_stride.x = media_object_walker_param->global_resolution.x;
4761 media_object_walker_param->global_outer_loop_stride.y = 0;
4762
4763 media_object_walker_param->global_inner_loop_unit.x = 0;
4764 media_object_walker_param->global_inner_loop_unit.y = media_object_walker_param->global_resolution.y;
4765
4766 media_object_walker_param->local_outer_loop_stride.x = 1;
4767 media_object_walker_param->local_outer_loop_stride.y = 0;
4768 media_object_walker_param->local_inner_loop_unit.x = 0xFFF - 2 + 1; // -2 in 2's compliment format;
4769 media_object_walker_param->local_inner_loop_unit.y = hevc_state->thread_num_per_ctb;
4770 media_object_walker_param->middle_loop_extra_steps = hevc_state->thread_num_per_ctb - 1;
4771 media_object_walker_param->mid_loop_unit_x = 0;
4772 media_object_walker_param->mid_loop_unit_y = 1;
4773
4774 media_object_walker_param->block_resolution.x = media_object_walker_param->global_resolution.x;
4775 media_object_walker_param->block_resolution.y = media_object_walker_param->global_resolution.y;
4776 }
4777
4778 {
4779 hw_scoreboard->scoreboard0.type = hevc_state->use_hw_non_stalling_scoreboard;
4780 hw_scoreboard->scoreboard0.mask = 0xff;
4781 hw_scoreboard->scoreboard0.enable = hevc_state->use_hw_scoreboard;
4782
4783 hw_scoreboard->dw1.scoreboard1.delta_x0 = -1;
4784 hw_scoreboard->dw1.scoreboard1.delta_y0 = hevc_state->thread_num_per_ctb - 1;
4785
4786 hw_scoreboard->dw1.scoreboard1.delta_x1 = -1;
4787 hw_scoreboard->dw1.scoreboard1.delta_y1 = -1;
4788
4789 hw_scoreboard->dw1.scoreboard1.delta_x2 = 0;
4790 hw_scoreboard->dw1.scoreboard1.delta_y2 = -1;
4791
4792 hw_scoreboard->dw1.scoreboard1.delta_x3 = 1;
4793 hw_scoreboard->dw1.scoreboard1.delta_y3 = -1;
4794
4795 hw_scoreboard->dw2.scoreboard2.delta_x4 = 0;
4796 hw_scoreboard->dw2.scoreboard2.delta_y4 = -hevc_state->thread_num_per_ctb;
4797
4798 hw_scoreboard->dw2.scoreboard2.delta_x5 = 0;
4799 hw_scoreboard->dw2.scoreboard2.delta_y5 = -2;
4800
4801 hw_scoreboard->dw2.scoreboard2.delta_x6 = 0;
4802 hw_scoreboard->dw2.scoreboard2.delta_y6 = -3;
4803
4804 hw_scoreboard->dw2.scoreboard2.delta_x7 = 0;
4805 hw_scoreboard->dw2.scoreboard2.delta_y7 = -4;
4806 }
4807
4808 break;
4809 case GEN10_WALKER_26ZX_DEGREE:
4810 mw_26zx_h_factor = 5;
4811
4812 if (hevc_state->num_regions_in_slice > 1) {
4813 int thread_space_width = ALIGN(hevc_state->frame_width, 64) >> 6;
4814 int thread_space_height = hevc_state->hevc_wf_param.max_height_in_region;
4815 int sp_width = (thread_space_width + 1) & 0xfffe;
4816 int sp_height = (thread_space_height + 1) & 0xfffe;
4817 int wf_num = (sp_width + (sp_height - 1) * 2 + hevc_state->num_regions_in_slice - 1) / hevc_state->num_regions_in_slice;
4818 sp_height = ((sp_width + 1) >> 1) + ((sp_width + ((sp_height - 1) << 1)) + (2 * hevc_state->num_regions_in_slice - 1)) / (2 * hevc_state->num_regions_in_slice);
4819 int ts_width = sp_width * mw_26zx_h_factor;
4820 int ts_height = sp_height * (hevc_state->thread_num_per_ctb);
4821
4822 media_object_walker_param->scoreboard_mask = 0xff;
4823
4824 media_object_walker_param->global_resolution.x = ts_width;
4825 media_object_walker_param->global_resolution.y = ts_height;
4826
4827 media_object_walker_param->global_start.x = 0;
4828 media_object_walker_param->global_start.y = 0;
4829
4830 media_object_walker_param->local_start.x = media_object_walker_param->global_resolution.x;
4831 media_object_walker_param->local_start.y = 0;
4832
4833 media_object_walker_param->local_end.x = 0;
4834 media_object_walker_param->local_end.y = 0;
4835
4836 media_object_walker_param->global_outer_loop_stride.x = media_object_walker_param->global_resolution.x;
4837 media_object_walker_param->global_outer_loop_stride.y = 0;
4838
4839 media_object_walker_param->global_inner_loop_unit.x = 0;
4840 media_object_walker_param->global_inner_loop_unit.y = media_object_walker_param->global_resolution.y;
4841
4842 media_object_walker_param->local_outer_loop_stride.x = 1;
4843 media_object_walker_param->local_outer_loop_stride.y = 0;
4844 media_object_walker_param->local_inner_loop_unit.x = -mw_26zx_h_factor * 2;
4845 media_object_walker_param->local_inner_loop_unit.y = hevc_state->thread_num_per_ctb;
4846 media_object_walker_param->middle_loop_extra_steps = hevc_state->thread_num_per_ctb - 1;
4847 media_object_walker_param->mid_loop_unit_x = 0;
4848 media_object_walker_param->mid_loop_unit_y = 1;
4849
4850 media_object_walker_param->block_resolution.x = media_object_walker_param->global_resolution.x;
4851 media_object_walker_param->block_resolution.y = media_object_walker_param->global_resolution.y;
4852
4853 media_object_walker_param->global_loop_exec_count = 0;
4854 media_object_walker_param->local_loop_exec_count = (wf_num + 1) * mw_26zx_h_factor;
4855 } else {
4856 media_object_walker_param->scoreboard_mask = 0xff;
4857
4858 media_object_walker_param->global_resolution.x = kernel_walker_param->resolution_x * mw_26zx_h_factor;
4859 media_object_walker_param->global_resolution.y = kernel_walker_param->resolution_y * hevc_state->thread_num_per_ctb;
4860
4861 media_object_walker_param->global_outer_loop_stride.x = media_object_walker_param->global_resolution.x;
4862 media_object_walker_param->global_outer_loop_stride.y = 0;
4863
4864 media_object_walker_param->global_inner_loop_unit.x = 0;
4865 media_object_walker_param->global_inner_loop_unit.y = media_object_walker_param->global_resolution.y;
4866
4867 media_object_walker_param->local_outer_loop_stride.x = 1;
4868 media_object_walker_param->local_outer_loop_stride.y = 0;
4869 media_object_walker_param->local_inner_loop_unit.x = 0xFFF - 10 + 1; // -10 in 2's compliment format;
4870 media_object_walker_param->local_inner_loop_unit.y = hevc_state->thread_num_per_ctb;
4871 media_object_walker_param->middle_loop_extra_steps = hevc_state->thread_num_per_ctb - 1;
4872 media_object_walker_param->mid_loop_unit_x = 0;
4873 media_object_walker_param->mid_loop_unit_y = 1;
4874
4875 media_object_walker_param->block_resolution.x = media_object_walker_param->global_resolution.x;
4876 media_object_walker_param->block_resolution.y = media_object_walker_param->global_resolution.y;
4877 }
4878
4879 {
4880 hw_scoreboard->scoreboard0.mask = 0xff;
4881 hw_scoreboard->scoreboard0.type = hevc_state->use_hw_non_stalling_scoreboard;
4882 hw_scoreboard->scoreboard0.enable = hevc_state->use_hw_scoreboard;
4883
4884 hw_scoreboard->dw1.scoreboard1.delta_x0 = -5;
4885 hw_scoreboard->dw1.scoreboard1.delta_y0 = -1;
4886
4887 hw_scoreboard->dw1.scoreboard1.delta_x1 = -2;
4888 hw_scoreboard->dw1.scoreboard1.delta_y1 = -1;
4889
4890 hw_scoreboard->dw1.scoreboard1.delta_x2 = 3;
4891 hw_scoreboard->dw1.scoreboard1.delta_y2 = -1;
4892
4893 hw_scoreboard->dw1.scoreboard1.delta_x3 = -1;
4894 hw_scoreboard->dw1.scoreboard1.delta_y3 = 0;
4895
4896 hw_scoreboard->dw2.scoreboard2.delta_x4 = -2;
4897 hw_scoreboard->dw2.scoreboard2.delta_y4 = 0;
4898
4899 hw_scoreboard->dw2.scoreboard2.delta_x5 = -5;
4900 hw_scoreboard->dw2.scoreboard2.delta_y5 = hevc_state->thread_num_per_ctb - 1;
4901
4902 hw_scoreboard->dw2.scoreboard2.delta_x6 = 0;
4903 hw_scoreboard->dw2.scoreboard2.delta_y6 = -1;
4904
4905 hw_scoreboard->dw2.scoreboard2.delta_x7 = 5;
4906 hw_scoreboard->dw2.scoreboard2.delta_y7 = -1;
4907 }
4908 break;
4909 default:
4910 break;
4911 }
4912
4913 return;
4914 }
4915
4916 static void
gen10_hevc_update_scoreboard(struct i965_gpe_context * gpe_context,struct gen10_hevc_gpe_scoreboard * scoreboard)4917 gen10_hevc_update_scoreboard(struct i965_gpe_context *gpe_context,
4918 struct gen10_hevc_gpe_scoreboard *scoreboard)
4919 {
4920 if (!gpe_context || !scoreboard)
4921 return;
4922
4923 gpe_context->vfe_desc5.scoreboard0.mask = scoreboard->scoreboard0.mask;
4924 gpe_context->vfe_desc5.scoreboard0.type = scoreboard->scoreboard0.type;
4925 gpe_context->vfe_desc5.scoreboard0.enable = scoreboard->scoreboard0.enable;
4926
4927 gpe_context->vfe_desc6.dword = scoreboard->dw1.value;
4928 gpe_context->vfe_desc7.dword = scoreboard->dw2.value;
4929 return;
4930 }
4931
4932 static void
gen10_hevc_enc_mbenc_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,int mbenc_type)4933 gen10_hevc_enc_mbenc_kernel(VADriverContextP ctx,
4934 struct encode_state *encode_state,
4935 struct intel_encoder_context *encoder_context,
4936 int mbenc_type)
4937 {
4938 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
4939 struct gen10_hevc_enc_state *hevc_state;
4940 struct i965_gpe_context *gpe_context;
4941 int media_function;
4942 struct gpe_media_object_walker_parameter media_object_walker_param;
4943 struct gen10_hevc_enc_kernel_walker_parameter kernel_walker_param;
4944 struct gen10_hevc_gpe_scoreboard hw_scoreboard;
4945 int mbenc_idx;
4946
4947 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
4948
4949 if (mbenc_type != GEN10_HEVC_MBENC_INTRA)
4950 gen10_hevc_enc_generate_regions_in_slice_control(ctx, encode_state, encoder_context);
4951
4952 switch (mbenc_type) {
4953 case GEN10_HEVC_MBENC_INTER_LCU32:
4954 mbenc_idx = GEN10_HEVC_MBENC_INTER_LCU32_KRNIDX_G10;
4955 media_function = GEN10_HEVC_MEDIA_STATE_MBENC_LCU32;
4956 break;
4957 case GEN10_HEVC_MBENC_INTER_LCU64:
4958 mbenc_idx = GEN10_HEVC_MBENC_INTER_LCU64_KRNIDX_G10;
4959 media_function = GEN10_HEVC_MEDIA_STATE_MBENC_LCU64;
4960 break;
4961 case GEN10_HEVC_MBENC_INTRA:
4962 default:
4963 mbenc_idx = GEN10_HEVC_MBENC_I_KRNIDX_G10;
4964 media_function = GEN10_HEVC_MEDIA_STATE_MBENC_INTRA;
4965 break;
4966 }
4967
4968 gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_idx]);
4969
4970 memset(&hw_scoreboard, 0, sizeof(hw_scoreboard));
4971 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
4972 gen8_gpe_context_init(ctx, gpe_context);
4973 gen9_gpe_reset_binding_table(ctx, gpe_context);
4974
4975 kernel_walker_param.use_scoreboard = hevc_state->use_hw_scoreboard;
4976 kernel_walker_param.use_custom_walker = 0;
4977 if (mbenc_type == GEN10_HEVC_MBENC_INTRA)
4978 gen10_hevc_enc_mbenc_intra_curbe(ctx, encode_state, encoder_context, gpe_context);
4979 else
4980 gen10_hevc_enc_mbenc_inter_curbe(ctx, encode_state, encoder_context, gpe_context);
4981
4982 if (mbenc_type == GEN10_HEVC_MBENC_INTRA) {
4983 gen10_hevc_enc_mbenc_intra_surfaces(ctx, encode_state, encoder_context, gpe_context);
4984 kernel_walker_param.resolution_x = ALIGN(hevc_state->frame_width, 32) >> 5;
4985 kernel_walker_param.resolution_y = ALIGN(hevc_state->frame_height, 32) >> 5;
4986 if (hevc_state->is_64lcu) {
4987 kernel_walker_param.walker_degree = GEN10_WALKER_26_DEGREE;// 26_DEGREE
4988 kernel_walker_param.use_custom_walker = 1;
4989 } else {
4990 kernel_walker_param.use_vertical_scan = 1;
4991 }
4992 } else if (mbenc_type == GEN10_HEVC_MBENC_INTER_LCU32) {
4993 gen10_hevc_enc_mbenc_inter_lcu32_surfaces(ctx, encode_state, encoder_context, gpe_context);
4994 kernel_walker_param.resolution_x = ALIGN(hevc_state->frame_width, 32) >> 5;
4995 kernel_walker_param.resolution_y = ALIGN(hevc_state->frame_height, 32) >> 5;
4996 kernel_walker_param.use_custom_walker = 1;
4997 if (hevc_state->brc.target_usage == 7)
4998 kernel_walker_param.walker_degree = GEN10_WALKER_26_DEGREE;
4999 else
5000 kernel_walker_param.walker_degree = GEN10_WALKER_26X_DEGREE;
5001 } else {
5002 gen10_hevc_enc_mbenc_inter_lcu64_surfaces(ctx, encode_state, encoder_context, gpe_context);
5003 kernel_walker_param.resolution_x = vme_context->frame_info.width_in_lcu;
5004 kernel_walker_param.resolution_y = vme_context->frame_info.height_in_lcu;
5005 kernel_walker_param.use_custom_walker = 1;
5006 kernel_walker_param.walker_degree = GEN10_WALKER_26ZX_DEGREE;
5007 }
5008
5009 gen10_hevc_enc_generate_lculevel_data(ctx, encode_state, encoder_context);
5010
5011 memset(&hw_scoreboard, 0, sizeof(hw_scoreboard));
5012 memset(&media_object_walker_param, 0, sizeof(media_object_walker_param));
5013
5014 gen10_hevc_mbenc_init_walker_param(hevc_state, &kernel_walker_param,
5015 &media_object_walker_param,
5016 &hw_scoreboard);
5017
5018 gen10_hevc_update_scoreboard(gpe_context, &hw_scoreboard);
5019
5020 gen8_gpe_setup_interface_data(ctx, gpe_context);
5021
5022 gen10_run_kernel_media_object_walker(ctx, encoder_context,
5023 gpe_context,
5024 media_function,
5025 &media_object_walker_param);
5026 }
5027
5028 static VAStatus
gen10_hevc_vme_pipeline_prepare(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)5029 gen10_hevc_vme_pipeline_prepare(VADriverContextP ctx,
5030 struct encode_state *encode_state,
5031 struct intel_encoder_context *encoder_context)
5032 {
5033 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
5034 struct gen10_hevc_enc_state *hevc_state;
5035 struct gen10_hevc_enc_frame_info *frame_info;
5036 struct gen10_hevc_enc_common_res *common_res;
5037 int i;
5038
5039 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
5040 frame_info = &vme_context->frame_info;
5041 common_res = &vme_context->common_res;
5042
5043 if (hevc_state->is_64lcu || hevc_state->is_10bit) {
5044 if (frame_info->picture_coding_type != HEVC_SLICE_I) {
5045 for (i = 0; i < 16; i++) {
5046 if (common_res->reference_pics[i].obj_surface == NULL)
5047 continue;
5048
5049 gen10_hevc_enc_conv_scaling_surface(ctx, encode_state,
5050 encoder_context,
5051 NULL,
5052 common_res->reference_pics[i].obj_surface,
5053 1);
5054 }
5055 }
5056 }
5057
5058 gen10_hevc_enc_conv_scaling_surface(ctx, encode_state, encoder_context,
5059 common_res->uncompressed_pic.obj_surface,
5060 common_res->reconstructed_pic.obj_surface,
5061 0);
5062
5063 return VA_STATUS_SUCCESS;
5064 }
5065
5066 static VAStatus
gen10_hevc_vme_pipeline(VADriverContextP ctx,VAProfile profile,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)5067 gen10_hevc_vme_pipeline(VADriverContextP ctx,
5068 VAProfile profile,
5069 struct encode_state *encode_state,
5070 struct intel_encoder_context *encoder_context)
5071 {
5072 struct gen10_hevc_enc_context *vme_context = encoder_context->vme_context;
5073 struct gen10_hevc_enc_state *hevc_state;
5074 struct gen10_hevc_enc_frame_info *frame_info;
5075 VAStatus va_status = VA_STATUS_SUCCESS;
5076
5077 if (!vme_context || !vme_context->enc_priv_state)
5078 return VA_STATUS_ERROR_INVALID_CONTEXT;
5079
5080 hevc_state = (struct gen10_hevc_enc_state *)vme_context->enc_priv_state;
5081 frame_info = &vme_context->frame_info;
5082
5083 va_status = gen10_hevc_enc_init_parameters(ctx, encode_state, encoder_context);
5084 if (va_status != VA_STATUS_SUCCESS)
5085 return va_status;
5086
5087 va_status = gen10_hevc_vme_pipeline_prepare(ctx, encode_state, encoder_context);
5088 if (va_status != VA_STATUS_SUCCESS)
5089 return va_status;
5090
5091 if (hevc_state->brc.brc_reset || !hevc_state->brc.brc_inited) {
5092 gen10_hevc_enc_brc_init_reset(ctx, encode_state, encoder_context);
5093
5094 hevc_state->brc.brc_inited = 1;
5095 hevc_state->brc.brc_reset = 0;
5096 }
5097
5098 if (frame_info->picture_coding_type == HEVC_SLICE_I) {
5099 gen10_hevc_enc_me_kernel(ctx, encode_state, encoder_context,
5100 GEN10_HEVC_HME_LEVEL_4X,
5101 GEN10_HEVC_ME_DIST_TYPE_INTRA_BRC);
5102 } else {
5103 if (hevc_state->hme_enabled) {
5104 if (hevc_state->b16xme_enabled)
5105 gen10_hevc_enc_me_kernel(ctx, encode_state, encoder_context,
5106 GEN10_HEVC_HME_LEVEL_16X,
5107 GEN10_HEVC_ME_DIST_TYPE_INTER_BRC);
5108
5109
5110
5111 gen10_hevc_enc_me_kernel(ctx, encode_state, encoder_context,
5112 GEN10_HEVC_HME_LEVEL_4X,
5113 GEN10_HEVC_ME_DIST_TYPE_INTER_BRC);
5114 }
5115 }
5116
5117 gen10_hevc_enc_me_kernel(ctx, encode_state, encoder_context,
5118 GEN10_HEVC_HME_LEVEL_4X,
5119 GEN10_HEVC_ME_DIST_TYPE_INTRA);
5120
5121 gen10_hevc_enc_brc_frame_update_kernel(ctx, encode_state,
5122 encoder_context);
5123
5124 gen10_hevc_enc_brc_lcu_update_kernel(ctx, encode_state,
5125 encoder_context);
5126
5127 if (frame_info->picture_coding_type == HEVC_SLICE_I)
5128 gen10_hevc_enc_mbenc_kernel(ctx, encode_state, encoder_context,
5129 GEN10_HEVC_MBENC_INTRA);
5130 else
5131 gen10_hevc_enc_mbenc_kernel(ctx, encode_state, encoder_context,
5132 (hevc_state->is_64lcu ?
5133 GEN10_HEVC_MBENC_INTER_LCU64 :
5134 GEN10_HEVC_MBENC_INTER_LCU32));
5135
5136
5137 #if 0
5138 if (hevc_state->frame_number == 0) {
5139 struct gen10_hevc_surface_priv *surface_priv = NULL;
5140
5141 surface_priv = (struct gen10_hevc_surface_priv *)encode_state->reconstructed_object->private_data;
5142 //print_out_obj_surface(ctx, surface_priv->scaled_4x_surface_id, 1);
5143
5144 //print_out_gpe_resource(&vme_context->res_mb_code_surface, 0,
5145 // hevc_state->cu_records_offset, 1, 0, 0, 64);
5146 //print_out_gpe_resource(&vme_context->res_mb_code_surface, 0,
5147 // 0, 1, 0, 0, 64);
5148 //print_out_gpe_resource(&vme_context->res_s4x_me_dist_surface, 0,
5149 // 0, 1, 0, 0, 64);
5150
5151 //return VA_STATUS_ERROR_INVALID_PARAMETER;
5152 }
5153 #endif
5154 return VA_STATUS_SUCCESS;
5155 }
5156
5157 static void
gen10_hevc_hcp_pipe_mode_select(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct intel_batchbuffer * batch)5158 gen10_hevc_hcp_pipe_mode_select(VADriverContextP ctx,
5159 struct encode_state *encode_state,
5160 struct intel_encoder_context *encoder_context,
5161 struct intel_batchbuffer *batch)
5162 {
5163 struct gen10_hevc_enc_context *pak_context;
5164 struct gen10_hevc_enc_state *hevc_state;
5165 gen10_hcp_pipe_mode_select_param param;
5166
5167 pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5168 hevc_state = (struct gen10_hevc_enc_state *)pak_context->enc_priv_state;
5169
5170 memset(¶m, 0, sizeof(param));
5171
5172 param.dw1.codec_select = GEN10_HCP_ENCODE;
5173 param.dw1.codec_standard_select = GEN10_HCP_HEVC_CODEC;
5174 param.dw1.sao_first_pass = hevc_state->sao_first_pass_flag;
5175 param.dw1.rdoq_enabled = hevc_state->rdoq_enabled;
5176 param.dw1.pak_frame_level_streamout_enabled = 1;
5177
5178 if (hevc_state->brc.brc_enabled &&
5179 hevc_state->curr_pak_idx != (hevc_state->num_sao_passes - 1))
5180 param.dw1.pak_streamout_enabled = 1;
5181
5182 gen10_hcp_pipe_mode_select(ctx, batch, ¶m);
5183 }
5184
5185 static void
gen10_hevc_hcp_multi_surfaces(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct intel_batchbuffer * batch)5186 gen10_hevc_hcp_multi_surfaces(VADriverContextP ctx,
5187 struct encode_state *encode_state,
5188 struct intel_encoder_context *encoder_context,
5189 struct intel_batchbuffer *batch)
5190 {
5191 struct gen10_hevc_enc_context *pak_context;
5192 gen10_hcp_surface_state_param param;
5193 struct object_surface *obj_surface;
5194 int i = 0;
5195
5196 pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5197
5198 for (i = 0; i < 2; i++) {
5199 if (i == 0)
5200 obj_surface = pak_context->common_res.reconstructed_pic.obj_surface;
5201 else
5202 obj_surface = pak_context->common_res.uncompressed_pic.obj_surface;
5203
5204 memset(¶m, 0, sizeof(param));
5205
5206 param.dw1.surface_pitch = obj_surface->width - 1;
5207 param.dw1.surface_id = (i == 0 ? GEN10_HCP_DECODE_SURFACE_ID :
5208 GEN10_HCP_INPUT_SURFACE_ID);
5209 param.dw2.y_cb_offset = obj_surface->y_cb_offset;
5210
5211 if (obj_surface->fourcc == VA_FOURCC_P010)
5212 param.dw2.surface_format = SURFACE_FORMAT_P010;
5213 else if (obj_surface->fourcc == VA_FOURCC_NV12)
5214 param.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5215 else
5216 assert(0);
5217
5218 gen10_hcp_surface_state(ctx, batch, ¶m);
5219 }
5220 }
5221
5222 static void
gen10_hevc_hcp_pipe_buf_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct intel_batchbuffer * batch)5223 gen10_hevc_hcp_pipe_buf_state(VADriverContextP ctx,
5224 struct encode_state *encode_state,
5225 struct intel_encoder_context *encoder_context,
5226 struct intel_batchbuffer *batch)
5227 {
5228 struct gen10_hevc_enc_context *pak_context;
5229 struct gen10_hevc_surface_priv *surface_priv;
5230 gen10_hcp_pipe_buf_addr_state_param param;
5231 struct gen10_hevc_enc_common_res *common_res;
5232 int i;
5233
5234 pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5235
5236 common_res = &pak_context->common_res;
5237 surface_priv = (struct gen10_hevc_surface_priv *)common_res->
5238 reconstructed_pic.obj_surface->private_data;
5239
5240 memset(¶m, 0, sizeof(param));
5241
5242 param.reconstructed = &common_res->reconstructed_pic.gpe_res;
5243 param.deblocking_filter_line = &common_res->deblocking_filter_line_buffer;
5244 param.deblocking_filter_tile_line = &common_res->deblocking_filter_tile_line_buffer;
5245 param.deblocking_filter_tile_column = &common_res->deblocking_filter_tile_column_buffer;
5246 param.metadata_line = &common_res->metadata_line_buffer;
5247 param.metadata_tile_line = &common_res->metadata_tile_line_buffer;
5248 param.metadata_tile_column = &common_res->metadata_tile_column_buffer;
5249 param.sao_line = &common_res->sao_line_buffer;
5250 param.sao_tile_line = &common_res->sao_tile_line_buffer;
5251 param.sao_tile_column = &common_res->sao_tile_column_buffer;
5252
5253 if (surface_priv)
5254 param.current_motion_vector_temporal = &surface_priv->motion_vector_temporal;
5255
5256 for (i = 0; i < 8; i++) {
5257 if (common_res->reference_pics[i].obj_surface)
5258 param.reference_picture[i] = &common_res->reference_pics[i].gpe_res;
5259 }
5260
5261 param.uncompressed_picture = &common_res->uncompressed_pic.gpe_res;
5262 param.streamout_data_destination = &common_res->streamout_data_destination_buffer;
5263 param.picture_status = &common_res->picture_status_buffer;
5264 param.ildb_streamout = &common_res->ildb_streamout_buffer;
5265
5266 for (i = 0; i < 8; i++) {
5267 if (common_res->reference_pics[i].obj_surface) {
5268 surface_priv = (struct gen10_hevc_surface_priv *)common_res->
5269 reference_pics[i].obj_surface->private_data;
5270 if (surface_priv)
5271 param.collocated_motion_vector_temporal[i] =
5272 &surface_priv->motion_vector_temporal;
5273 }
5274 }
5275
5276 param.sao_streamout_data_destination = &common_res->sao_streamout_data_destination_buffer;
5277 param.frame_statics_streamout_data_destination =
5278 &common_res->frame_statics_streamout_data_destination_buffer;
5279 param.sse_source_pixel_rowstore = &common_res->sse_source_pixel_rowstore_buffer;
5280
5281 gen10_hcp_pipe_buf_addr_state(ctx, batch, ¶m);
5282 }
5283
5284 static void
gen10_hevc_hcp_ind_obj_base_addr_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct intel_batchbuffer * batch)5285 gen10_hevc_hcp_ind_obj_base_addr_state(VADriverContextP ctx,
5286 struct encode_state *encode_state,
5287 struct intel_encoder_context *encoder_context,
5288 struct intel_batchbuffer *batch)
5289 {
5290 struct gen10_hevc_enc_context *pak_context;
5291 struct gen10_hevc_enc_state *hevc_state;
5292 gen10_hcp_ind_obj_base_addr_state_param param;
5293
5294 pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5295 hevc_state = (struct gen10_hevc_enc_state *)pak_context->enc_priv_state;
5296
5297 memset(¶m, 0, sizeof(param));
5298
5299 param.ind_cu_obj_bse = &pak_context->res_mb_code_surface;
5300 param.ind_cu_obj_bse_offset = hevc_state->cu_records_offset;
5301
5302 param.ind_pak_bse = &pak_context->common_res.compressed_bitstream.gpe_res;
5303 param.ind_pak_bse_offset = pak_context->common_res.compressed_bitstream.offset;
5304 param.ind_pak_bse_upper = pak_context->common_res.compressed_bitstream.end_offset;
5305
5306 gen10_hcp_ind_obj_base_addr_state(ctx, batch, ¶m);
5307 }
5308
5309 static void
gen10_hevc_hcp_qm_fqm_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct intel_batchbuffer * batch)5310 gen10_hevc_hcp_qm_fqm_state(VADriverContextP ctx,
5311 struct encode_state *encode_state,
5312 struct intel_encoder_context *encoder_context,
5313 struct intel_batchbuffer *batch)
5314 {
5315 struct gen10_hevc_enc_context *pak_context;
5316
5317 pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5318
5319 gen10_hevc_enc_hcp_set_qm_fqm_states(ctx, batch, &pak_context->frame_info);
5320 }
5321
5322 static void
gen10_hevc_hcp_pic_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct intel_batchbuffer * batch)5323 gen10_hevc_hcp_pic_state(VADriverContextP ctx,
5324 struct encode_state *encode_state,
5325 struct intel_encoder_context *encoder_context,
5326 struct intel_batchbuffer *batch)
5327 {
5328 struct gen10_hevc_enc_context *pak_context;
5329 struct gen10_hevc_enc_state *hevc_state;
5330 VAEncSequenceParameterBufferHEVC *seq_param;
5331 VAEncPictureParameterBufferHEVC *pic_param;
5332 VAEncSliceParameterBufferHEVC *slice_param;
5333 struct gen10_hevc_enc_frame_info *frame_info;
5334 gen10_hcp_pic_state_param param;
5335
5336 pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5337 hevc_state = (struct gen10_hevc_enc_state *)pak_context->enc_priv_state;
5338
5339 frame_info = &pak_context->frame_info;
5340 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
5341 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
5342 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
5343
5344 memset(¶m, 0, sizeof(param));
5345 param.dw1.frame_width_in_cu_minus1 = frame_info->width_in_cu - 1;
5346 param.dw1.frame_height_in_cu_minus1 = frame_info->height_in_cu - 1;
5347 param.dw1.pak_transform_skip = pic_param->pic_fields.bits.transform_skip_enabled_flag;
5348
5349 param.dw2.min_cu_size = seq_param->log2_min_luma_coding_block_size_minus3;
5350 param.dw2.lcu_size = seq_param->log2_min_luma_coding_block_size_minus3 +
5351 seq_param->log2_diff_max_min_luma_coding_block_size;
5352 param.dw2.min_tu_size = seq_param->log2_min_transform_block_size_minus2;
5353 param.dw2.max_tu_size = seq_param->log2_min_transform_block_size_minus2 +
5354 seq_param->log2_diff_max_min_transform_block_size;
5355 param.dw2.min_pcm_size = 0;
5356 param.dw2.max_pcm_size = 0;
5357
5358 if ((slice_param->slice_fields.bits.slice_sao_luma_flag ||
5359 slice_param->slice_fields.bits.slice_sao_chroma_flag) &&
5360 !frame_info->bit_depth_luma_minus8)
5361 param.dw4.sao_enabled_flag = 1;
5362
5363 if (pic_param->pic_fields.bits.cu_qp_delta_enabled_flag) {
5364 param.dw4.cu_qp_delta_enabled_flag = 1;
5365 param.dw4.diff_cu_qp_delta_depth = pic_param->diff_cu_qp_delta_depth;
5366 }
5367
5368 param.dw4.pcm_loop_filter_disable_flag = seq_param->seq_fields.bits.pcm_loop_filter_disabled_flag;
5369 param.dw4.weighted_bipred_flag = pic_param->pic_fields.bits.weighted_bipred_flag;
5370 param.dw4.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
5371 param.dw4.transform_skip_enabled_flag = pic_param->pic_fields.bits.transform_skip_enabled_flag;
5372 param.dw4.amp_enabled_flag = seq_param->seq_fields.bits.amp_enabled_flag;
5373 param.dw4.transquant_bypass_enabled_flag = pic_param->pic_fields.bits.transquant_bypass_enabled_flag;
5374 param.dw4.strong_intra_smoothing_enabled_flag = seq_param->seq_fields.bits.strong_intra_smoothing_enabled_flag;
5375
5376 param.dw5.pic_cb_qp_offset = pic_param->pps_cr_qp_offset & 0x1f;
5377 param.dw5.pic_cr_qp_offset = pic_param->pps_cb_qp_offset & 0x1f;
5378 param.dw5.max_transform_hierarchy_depth_intra = seq_param->max_transform_hierarchy_depth_intra;
5379 param.dw5.max_transform_hierarchy_depth_inter = seq_param->max_transform_hierarchy_depth_inter;
5380 param.dw5.pcm_sample_bit_depth_chroma_minus1 = seq_param->pcm_sample_bit_depth_chroma_minus1;
5381 param.dw5.pcm_sample_bit_depth_luma_minus1 = seq_param->pcm_sample_bit_depth_luma_minus1;
5382 param.dw5.bit_depth_chroma_minus8 = seq_param->seq_fields.bits.bit_depth_chroma_minus8;
5383 param.dw5.bit_depth_luma_minus8 = seq_param->seq_fields.bits.bit_depth_luma_minus8;
5384
5385 param.dw6.lcu_max_bits_allowed = frame_info->ctu_max_bitsize_allowed;
5386
5387 param.dw19.rho_domain_rc_enabled = 0;
5388 param.dw19.rho_domain_frame_qp = 0;
5389 param.dw19.fraction_qp_adj_enabled = 0;
5390 param.dw19.first_slice_segment_in_pic_flag = 1;
5391 param.dw19.nal_unit_type_flag = 1;
5392 param.dw19.sse_enabled = 1;
5393 param.dw19.rhoq_enabled = hevc_state->rdoq_enabled;
5394
5395 gen10_hcp_pic_state(ctx, batch, ¶m);
5396 }
5397
5398 static void
gen10_hevc_hcp_rdoq_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct intel_batchbuffer * batch)5399 gen10_hevc_hcp_rdoq_state(VADriverContextP ctx,
5400 struct encode_state *encode_state,
5401 struct intel_encoder_context *encoder_context,
5402 struct intel_batchbuffer *batch)
5403 {
5404 struct gen10_hevc_enc_context *pak_context;
5405 gen10_hcp_rdoq_state_param param;
5406
5407 pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5408
5409 memset(¶m, 0, sizeof(param));
5410
5411 memcpy(param.lambda_intra_luma, pak_context->lambda_param.lambda_intra[0],
5412 sizeof(param.lambda_intra_luma));
5413 memcpy(param.lambda_intra_chroma, pak_context->lambda_param.lambda_intra[1],
5414 sizeof(param.lambda_intra_chroma));
5415 memcpy(param.lambda_inter_luma, pak_context->lambda_param.lambda_inter[0],
5416 sizeof(param.lambda_inter_luma));
5417 memcpy(param.lambda_inter_chroma, pak_context->lambda_param.lambda_inter[1],
5418 sizeof(param.lambda_inter_chroma));
5419
5420 gen10_hcp_rdoq_state(ctx, batch, ¶m);
5421 }
5422
5423 static void
gen10_hevc_pak_picture_level(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)5424 gen10_hevc_pak_picture_level(VADriverContextP ctx,
5425 struct encode_state *encode_state,
5426 struct intel_encoder_context *encoder_context)
5427 {
5428 struct intel_batchbuffer *batch = encoder_context->base.batch;
5429 struct gen10_hevc_enc_context *pak_context;
5430 struct gen10_hevc_enc_state *hevc_state;
5431
5432 pak_context = (struct gen10_hevc_enc_context *)encoder_context->mfc_context;
5433 hevc_state = (struct gen10_hevc_enc_state *) pak_context->enc_priv_state;
5434
5435 gen10_hevc_hcp_pipe_mode_select(ctx, encode_state, encoder_context, batch);
5436 gen10_hevc_hcp_multi_surfaces(ctx, encode_state, encoder_context, batch);
5437 gen10_hevc_hcp_pipe_buf_state(ctx, encode_state, encoder_context, batch);
5438 gen10_hevc_hcp_ind_obj_base_addr_state(ctx, encode_state, encoder_context, batch);
5439 gen10_hevc_hcp_qm_fqm_state(ctx, encode_state, encoder_context, batch);
5440
5441 if (hevc_state->brc.brc_enabled) {
5442 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5443
5444 memset(&second_level_batch, 0, sizeof(second_level_batch));
5445 second_level_batch.offset = GEN10_HEVC_BRC_IMG_STATE_SIZE_PER_PASS *
5446 hevc_state->curr_pak_idx;
5447 second_level_batch.is_second_level = 1;
5448 second_level_batch.bo = pak_context->res_brc_pic_image_state_write_buffer.bo;
5449
5450 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5451 } else
5452 gen10_hevc_hcp_pic_state(ctx, encode_state, encoder_context, batch);
5453
5454 if (hevc_state->rdoq_enabled)
5455 gen10_hevc_hcp_rdoq_state(ctx, encode_state, encoder_context, batch);
5456 }
5457
5458 static void
gen10_hevc_hcp_weightoffset(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct intel_batchbuffer * batch,int slice_index)5459 gen10_hevc_hcp_weightoffset(VADriverContextP ctx,
5460 struct encode_state *encode_state,
5461 struct intel_encoder_context *encoder_context,
5462 struct intel_batchbuffer *batch,
5463 int slice_index)
5464 {
5465 VAEncPictureParameterBufferHEVC *pic_param;
5466 VAEncSliceParameterBufferHEVC *slice_param;
5467
5468 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
5469 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
5470
5471 gen10_hevc_enc_hcp_set_weight_offsets(ctx, batch, pic_param, slice_param);
5472 }
5473
5474 static void
gen10_hevc_ref_idx_lists(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct intel_batchbuffer * batch,int slice_index)5475 gen10_hevc_ref_idx_lists(VADriverContextP ctx,
5476 struct encode_state *encode_state,
5477 struct intel_encoder_context *encoder_context,
5478 struct intel_batchbuffer *batch,
5479 int slice_index)
5480 {
5481 VAEncPictureParameterBufferHEVC *pic_param;
5482 VAEncSliceParameterBufferHEVC *slice_param;
5483
5484 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
5485 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
5486
5487 if (slice_param->slice_type != HEVC_SLICE_I)
5488 gen10_hevc_enc_hcp_set_ref_idx_lists(ctx, batch, pic_param, slice_param);
5489 }
5490
5491 static void
gen10_hevc_hcp_slice_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct intel_batchbuffer * batch,int slice_index)5492 gen10_hevc_hcp_slice_state(VADriverContextP ctx,
5493 struct encode_state *encode_state,
5494 struct intel_encoder_context *encoder_context,
5495 struct intel_batchbuffer *batch,
5496 int slice_index)
5497 {
5498 struct gen10_hevc_enc_context *pak_context;
5499 struct gen10_hevc_enc_state *hevc_state;
5500 VAEncPictureParameterBufferHEVC *pic_param;
5501 VAEncSliceParameterBufferHEVC *slice_param;
5502 gen10_hcp_slice_state_param param;
5503 int last_slice, slice_qp, qp_idx;
5504
5505 pak_context = (struct gen10_hevc_enc_context *) encoder_context->mfc_context;
5506 hevc_state = (struct gen10_hevc_enc_state *)pak_context->enc_priv_state;
5507
5508 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
5509 slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
5510
5511 memset(¶m, 0, sizeof(param));
5512
5513 param.dw1.slice_start_ctu_x = slice_param->slice_segment_address %
5514 pak_context->frame_info.width_in_lcu;
5515 param.dw1.slice_start_ctu_y = slice_param->slice_segment_address /
5516 pak_context->frame_info.width_in_lcu;
5517
5518 if (slice_index == encode_state->num_slice_params_ext - 1) {
5519 param.dw2.next_slice_start_ctu_x = 0;
5520 param.dw2.next_slice_start_ctu_y = 0;
5521
5522 last_slice = 1;
5523 } else {
5524 last_slice = slice_param->slice_segment_address + slice_param->num_ctu_in_slice;
5525
5526 param.dw2.next_slice_start_ctu_x = last_slice %
5527 pak_context->frame_info.width_in_lcu;
5528 param.dw2.next_slice_start_ctu_y = last_slice /
5529 pak_context->frame_info.width_in_lcu;
5530
5531 last_slice = 0;
5532 }
5533
5534 param.dw3.slice_type = slice_param->slice_type;
5535 param.dw3.last_slice_flag = last_slice;
5536 param.dw3.slice_temporal_mvp_enabled = slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag;
5537 param.dw3.slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
5538 param.dw3.slice_cb_qp_offset = slice_param->slice_cb_qp_offset;
5539 param.dw3.slice_cr_qp_offset = slice_param->slice_cr_qp_offset;
5540
5541 param.dw4.deblocking_filter_disable = slice_param->slice_fields.bits.slice_deblocking_filter_disabled_flag;
5542 param.dw4.tc_offset_div2 = slice_param->slice_tc_offset_div2 & 0xf;
5543 param.dw4.beta_offset_div2 = slice_param->slice_beta_offset_div2 & 0xf;
5544 param.dw4.sao_chroma_flag = slice_param->slice_fields.bits.slice_sao_chroma_flag;
5545 param.dw4.sao_luma_flag = slice_param->slice_fields.bits.slice_sao_luma_flag;
5546 param.dw4.mvd_l1_zero_flag = slice_param->slice_fields.bits.mvd_l1_zero_flag;
5547 param.dw4.is_low_delay = slice_param->slice_type != HEVC_SLICE_B ? 1 : hevc_state->low_delay;
5548 param.dw4.collocated_from_l0_flag = slice_param->slice_fields.bits.collocated_from_l0_flag;
5549 param.dw4.chroma_log2_weight_denom = slice_param->luma_log2_weight_denom + slice_param->delta_chroma_log2_weight_denom;
5550 param.dw4.luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
5551 param.dw4.cabac_init_flag = slice_param->slice_fields.bits.cabac_init_flag;
5552 param.dw4.max_merge_idx = slice_param->max_num_merge_cand - 1;
5553
5554 if (pic_param->collocated_ref_pic_index != 0xFF)
5555 param.dw4.collocated_ref_idx = pic_param->collocated_ref_pic_index;
5556
5557 param.dw6.round_intra = 10;
5558 param.dw6.round_inter = 4;
5559
5560 param.dw7.cabac_zero_word_insertion_enabled = 1;
5561 param.dw7.emulation_byte_insert_enabled = 1;
5562 param.dw7.slice_data_enabled = 1;
5563 param.dw7.header_insertion_enabled = 1;
5564
5565 if (pic_param->pic_fields.bits.transform_skip_enabled_flag) {
5566 slice_qp = pak_context->frame_info.slice_qp;
5567
5568 if (slice_qp <= 22)
5569 qp_idx = 0;
5570 else if (slice_qp <= 27)
5571 qp_idx = 1;
5572 else if (slice_qp <= 32)
5573 qp_idx = 2;
5574 else
5575 qp_idx = 3;
5576
5577 param.dw9.transform_skip_lambda = gen10_hevc_tr_lambda_coeffs[slice_qp];
5578
5579 if (slice_param->slice_type == HEVC_SLICE_I) {
5580 param.dw10.transform_skip_zero_factor0 = gen10_hevc_tr_skip_coeffs[qp_idx][0][0][0][0];
5581 param.dw10.transform_skip_nonezero_factor0 = gen10_hevc_tr_skip_coeffs[qp_idx][0][0][1][0];
5582 param.dw10.transform_skip_zero_factor1 = gen10_hevc_tr_skip_coeffs[qp_idx][0][0][0][1] + 32;
5583 param.dw10.transform_skip_nonezero_factor1 = gen10_hevc_tr_skip_coeffs[qp_idx][0][0][1][1] + 32;
5584 } else {
5585 param.dw10.transform_skip_zero_factor0 = gen10_hevc_tr_skip_coeffs[qp_idx][1][0][0][0];
5586 param.dw10.transform_skip_nonezero_factor0 = gen10_hevc_tr_skip_coeffs[qp_idx][1][0][1][0];
5587 param.dw10.transform_skip_zero_factor1 = gen10_hevc_tr_skip_coeffs[qp_idx][1][0][0][1] + 32;
5588 param.dw10.transform_skip_nonezero_factor1 = gen10_hevc_tr_skip_coeffs[qp_idx][1][0][1][1] + 32;
5589 }
5590 }
5591
5592 gen10_hcp_slice_state(ctx, batch, ¶m);
5593 }
5594
5595 static void
gen10_hevc_pak_slice_level(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)5596 gen10_hevc_pak_slice_level(VADriverContextP ctx,
5597 struct encode_state *encode_state,
5598 struct intel_encoder_context *encoder_context)
5599 {
5600 struct intel_batchbuffer *batch = encoder_context->base.batch;
5601 struct gen10_hevc_enc_context *pak_context = encoder_context->mfc_context;
5602 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5603 VAEncSliceParameterBufferHEVC *slice_param;
5604 int slice_index;
5605 int i, j;
5606
5607 slice_index = 0;
5608 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
5609 for (j = 0; j < encode_state->slice_params_ext[i]->num_elements; j++) {
5610 slice_param = (VAEncSliceParameterBufferHEVC *)(encode_state->slice_params_ext[slice_index]->buffer);
5611
5612 gen10_hevc_ref_idx_lists(ctx, encode_state, encoder_context, batch, slice_index);
5613
5614 gen10_hevc_hcp_weightoffset(ctx, encode_state, encoder_context,
5615 batch, slice_index);
5616
5617 gen10_hevc_hcp_slice_state(ctx, encode_state, encoder_context,
5618 batch, slice_index);
5619
5620 if (slice_index == 0)
5621 gen10_hevc_enc_insert_packed_header(ctx, encode_state, encoder_context,
5622 batch);
5623
5624 gen10_hevc_enc_insert_slice_header(ctx, encode_state, encoder_context,
5625 batch, slice_index);
5626
5627
5628 memset(&second_level_batch, 0, sizeof(second_level_batch));
5629 second_level_batch.offset = 32 * slice_param->slice_segment_address;
5630 second_level_batch.is_second_level = 1;
5631 second_level_batch.bo = pak_context->res_mb_code_surface.bo;
5632
5633 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5634
5635 slice_index++;
5636 }
5637 }
5638 }
5639
5640 static void
gen10_hevc_read_mfc_status(VADriverContextP ctx,struct intel_encoder_context * encoder_context)5641 gen10_hevc_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5642 {
5643 struct intel_batchbuffer *batch = encoder_context->base.batch;
5644 struct gen10_hevc_enc_context *pak_context = encoder_context->mfc_context;
5645 struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
5646 struct gpe_mi_store_data_imm_parameter mi_store_data_imm_param;
5647 struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
5648 struct gen10_hevc_enc_status_buffer *status_buffer;
5649 struct gen10_hevc_enc_state *hevc_state;
5650 int write_pak_idx;
5651
5652 hevc_state = (struct gen10_hevc_enc_state *) pak_context->enc_priv_state;
5653 status_buffer = &pak_context->status_buffer;
5654
5655 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
5656 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5657
5658 memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
5659 mi_store_reg_mem_param.bo = status_buffer->gpe_res.bo;
5660 mi_store_reg_mem_param.offset = status_buffer->status_bytes_per_frame_offset;
5661 mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_bytes_per_frame_offset;
5662 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5663
5664 mi_store_reg_mem_param.bo = status_buffer->gpe_res.bo;
5665 mi_store_reg_mem_param.offset = status_buffer->status_image_mask_offset;
5666 mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_image_mask_offset;
5667 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5668
5669 mi_store_reg_mem_param.bo = status_buffer->gpe_res.bo;
5670 mi_store_reg_mem_param.offset = status_buffer->status_image_ctrl_offset;
5671 mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_image_ctrl_offset;
5672 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5673
5674 mi_store_reg_mem_param.bo = status_buffer->gpe_res.bo;
5675 mi_store_reg_mem_param.offset = status_buffer->status_qp_status_offset;
5676 mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_qp_status_offset;
5677 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5678
5679 mi_store_reg_mem_param.bo = status_buffer->gpe_res.bo;
5680 mi_store_reg_mem_param.offset = status_buffer->status_bs_se_bitcount_offset;
5681 mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_bs_se_bitcount_offset;
5682 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5683
5684 write_pak_idx = hevc_state->curr_pak_stat_index;
5685 mi_store_reg_mem_param.bo = pak_context->res_brc_pak_statistics_buffer[write_pak_idx].bo;
5686 mi_store_reg_mem_param.offset = offsetof(gen10_hevc_pak_stats_info, hcp_bs_frame);
5687 mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_bytes_per_frame_offset;
5688 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5689
5690 mi_store_reg_mem_param.bo = pak_context->res_brc_pak_statistics_buffer[write_pak_idx].bo;
5691 mi_store_reg_mem_param.offset = offsetof(gen10_hevc_pak_stats_info, hcp_bs_frame_noheader);
5692 mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_bs_frame_no_header_offset;
5693 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5694
5695 mi_store_reg_mem_param.bo = pak_context->res_brc_pak_statistics_buffer[write_pak_idx].bo;
5696 mi_store_reg_mem_param.offset = offsetof(gen10_hevc_pak_stats_info, hcp_image_status_control);
5697 mi_store_reg_mem_param.mmio_offset = status_buffer->mmio_image_ctrl_offset;
5698 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5699
5700 memset(&mi_store_data_imm_param, 0, sizeof(mi_store_data_imm_param));
5701 mi_store_data_imm_param.bo = pak_context->res_brc_pak_statistics_buffer[write_pak_idx].bo;
5702 mi_store_data_imm_param.offset = offsetof(gen10_hevc_pak_stats_info, hcp_image_status_ctl_last_pass);
5703 mi_store_data_imm_param.dw0 = hevc_state->curr_pak_idx;
5704 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_param);
5705
5706 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5707 }
5708
5709 static void
gen10_hevc_pak_brc_prepare(struct encode_state * encode_state,struct intel_encoder_context * encoder_context)5710 gen10_hevc_pak_brc_prepare(struct encode_state *encode_state,
5711 struct intel_encoder_context *encoder_context)
5712 {
5713 return;
5714 }
5715
5716 static void
gen10_hevc_pak_context_destroy(void * context)5717 gen10_hevc_pak_context_destroy(void *context)
5718 {
5719 return;
5720 }
5721
5722 static VAStatus
gen10_hevc_pak_pipeline(VADriverContextP ctx,VAProfile profile,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)5723 gen10_hevc_pak_pipeline(VADriverContextP ctx,
5724 VAProfile profile,
5725 struct encode_state *encode_state,
5726 struct intel_encoder_context *encoder_context)
5727 {
5728 struct i965_driver_data *i965 = i965_driver_data(ctx);
5729 struct intel_batchbuffer *batch = encoder_context->base.batch;
5730 struct gen10_hevc_enc_context *pak_context = encoder_context->mfc_context;
5731 struct gen10_hevc_enc_status_buffer *status_buffer;
5732 struct gen10_hevc_enc_state *hevc_state;
5733 struct gpe_mi_conditional_batch_buffer_end_parameter mi_cond_end;
5734 struct gpe_mi_load_register_mem_parameter mi_load_reg_mem;
5735 struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
5736 int i;
5737
5738 if (!pak_context || !pak_context->enc_priv_state)
5739 return VA_STATUS_ERROR_INVALID_CONTEXT;
5740
5741 hevc_state = (struct gen10_hevc_enc_state *) pak_context->enc_priv_state;
5742 status_buffer = &pak_context->status_buffer;
5743
5744 if (i965->intel.has_bsd2)
5745 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5746 else
5747 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5748
5749 intel_batchbuffer_emit_mi_flush(batch);
5750
5751 for (hevc_state->curr_pak_idx = 0;
5752 hevc_state->curr_pak_idx < hevc_state->num_pak_passes;
5753 hevc_state->curr_pak_idx++) {
5754 if (hevc_state->curr_pak_idx == 0) {
5755 memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
5756 mi_load_reg_imm.mmio_offset = status_buffer->mmio_image_ctrl_offset;
5757 mi_load_reg_imm.data = 0;
5758 gen8_gpe_mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
5759 } else if (hevc_state->brc.brc_enabled) {
5760 memset(&mi_cond_end, 0, sizeof(mi_cond_end));
5761 mi_cond_end.offset = status_buffer->status_image_mask_offset;
5762 mi_cond_end.bo = status_buffer->gpe_res.bo;
5763 mi_cond_end.compare_data = 0;
5764 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch,
5765 &mi_cond_end);
5766
5767 memset(&mi_load_reg_mem, 0, sizeof(mi_load_reg_mem));
5768 mi_load_reg_mem.mmio_offset = status_buffer->mmio_image_ctrl_offset;
5769 mi_load_reg_mem.bo = status_buffer->gpe_res.bo;
5770 mi_load_reg_mem.offset = status_buffer->status_image_ctrl_offset;
5771 gen8_gpe_mi_load_register_mem(ctx, batch, &mi_load_reg_mem);
5772 }
5773
5774 gen10_hevc_pak_picture_level(ctx, encode_state, encoder_context);
5775 gen10_hevc_pak_slice_level(ctx, encode_state, encoder_context);
5776 gen10_hevc_read_mfc_status(ctx, encoder_context);
5777 }
5778
5779 intel_batchbuffer_end_atomic(batch);
5780 intel_batchbuffer_flush(batch);
5781
5782 if (hevc_state->sao_2nd_needed) {
5783 if (i965->intel.has_bsd2)
5784 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5785 else
5786 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5787
5788 intel_batchbuffer_emit_mi_flush(batch);
5789
5790 BEGIN_BCS_BATCH(batch, 64);
5791 for (i = 0; i < 64; i++)
5792 OUT_BCS_BATCH(batch, MI_NOOP);
5793
5794 ADVANCE_BCS_BATCH(batch);
5795 gen10_hevc_pak_picture_level(ctx, encode_state, encoder_context);
5796 gen10_hevc_pak_slice_level(ctx, encode_state, encoder_context);
5797 gen10_hevc_read_mfc_status(ctx, encoder_context);
5798 intel_batchbuffer_end_atomic(batch);
5799 intel_batchbuffer_flush(batch);
5800 }
5801
5802 hevc_state->curr_pak_stat_index ^= 1;
5803
5804 hevc_state->frame_number++;
5805
5806 return VA_STATUS_SUCCESS;
5807 }
5808
5809 static void
gen10_hevc_vme_context_destroy(void * context)5810 gen10_hevc_vme_context_destroy(void *context)
5811 {
5812 struct gen10_hevc_enc_context *vme_context = context;
5813 int i;
5814
5815 if (!vme_context)
5816 return;
5817
5818 gen10_hevc_free_enc_resources(context);
5819
5820 gen10_hevc_enc_free_common_resource(&vme_context->common_res);
5821
5822 gen8_gpe_context_destroy(&vme_context->scaling_context.gpe_context);
5823
5824 gen8_gpe_context_destroy(&vme_context->me_context.gpe_context);
5825
5826 for (i = 0; i < GEN10_HEVC_BRC_NUM; i++)
5827 gen8_gpe_context_destroy(&vme_context->brc_context.gpe_contexts[i]);
5828
5829 for (i = 0; i < GEN10_HEVC_MBENC_NUM; i++)
5830 gen8_gpe_context_destroy(&vme_context->mbenc_context.gpe_contexts[i]);
5831
5832 if (vme_context->enc_priv_state)
5833 free(vme_context->enc_priv_state);
5834
5835 free(vme_context);
5836 }
5837
5838 Bool
gen10_hevc_vme_context_init(VADriverContextP ctx,struct intel_encoder_context * encoder_context)5839 gen10_hevc_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5840 {
5841 struct gen10_hevc_enc_context *vme_context = NULL;
5842 struct gen10_hevc_enc_state *hevc_state = NULL;
5843
5844 vme_context = calloc(1, sizeof(struct gen10_hevc_enc_context));
5845 hevc_state = calloc(1, sizeof(struct gen10_hevc_enc_state));
5846
5847 if (!vme_context || !hevc_state) {
5848 if (vme_context)
5849 free(vme_context);
5850
5851 if (hevc_state)
5852 free(hevc_state);
5853
5854 return false;
5855 }
5856
5857 vme_context->enc_priv_state = hevc_state;
5858
5859 gen10_hevc_vme_init_kernels_context(ctx, encoder_context, vme_context);
5860
5861 hevc_state->use_hw_scoreboard = 1;
5862 hevc_state->use_hw_non_stalling_scoreboard = 0;
5863 hevc_state->num_regions_in_slice = 1;
5864 hevc_state->rdoq_enabled = 1;
5865
5866 encoder_context->vme_context = vme_context;
5867 encoder_context->vme_pipeline = gen10_hevc_vme_pipeline;
5868 encoder_context->vme_context_destroy = gen10_hevc_vme_context_destroy;
5869
5870 return true;
5871 }
5872
5873 static VAStatus
gen10_hevc_get_coded_status(VADriverContextP ctx,struct intel_encoder_context * encoder_context,struct i965_coded_buffer_segment * coded_buf_seg)5874 gen10_hevc_get_coded_status(VADriverContextP ctx,
5875 struct intel_encoder_context *encoder_context,
5876 struct i965_coded_buffer_segment *coded_buf_seg)
5877 {
5878 struct gen10_hevc_enc_status *enc_status;
5879
5880 if (!encoder_context || !coded_buf_seg)
5881 return VA_STATUS_ERROR_INVALID_BUFFER;
5882
5883 enc_status = (struct gen10_hevc_enc_status *)coded_buf_seg->codec_private_data;
5884 coded_buf_seg->base.size = enc_status->bytes_per_frame;
5885
5886 return VA_STATUS_SUCCESS;
5887 }
5888
5889 Bool
gen10_hevc_pak_context_init(VADriverContextP ctx,struct intel_encoder_context * encoder_context)5890 gen10_hevc_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5891 {
5892 struct gen10_hevc_enc_context *pak_context = encoder_context->vme_context;
5893
5894 if (!pak_context)
5895 return false;
5896
5897 encoder_context->mfc_context = pak_context;
5898 encoder_context->mfc_context_destroy = gen10_hevc_pak_context_destroy;
5899 encoder_context->mfc_pipeline = gen10_hevc_pak_pipeline;
5900 encoder_context->mfc_brc_prepare = gen10_hevc_pak_brc_prepare;
5901 encoder_context->get_status = gen10_hevc_get_coded_status;
5902
5903 return true;
5904 }
5905