1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
14 * of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Qu Pengfei <Pengfei.Qu@intel.com>
26 *
27 */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <math.h>
33 #include <assert.h>
34
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
41 #include "gen9_mfc.h"
42 #include "gen6_vme.h"
43 #include "intel_media.h"
44
45 typedef enum _gen6_brc_status {
46 BRC_NO_HRD_VIOLATION = 0,
47 BRC_UNDERFLOW = 1,
48 BRC_OVERFLOW = 2,
49 BRC_UNDERFLOW_WITH_MAX_QP = 3,
50 BRC_OVERFLOW_WITH_MIN_QP = 4,
51 } gen6_brc_status;
52
53 /* BRC define */
54 #define BRC_CLIP(x, min, max) \
55 { \
56 x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \
57 }
58
59 #define BRC_P_B_QP_DIFF 4
60 #define BRC_I_P_QP_DIFF 2
61 #define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
62
63 #define BRC_PWEIGHT 0.6 /* weight if P slice with comparison to I slice */
64 #define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
65
66 #define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
67 #define BRC_CY 0.1 /* weight for */
68 #define BRC_CX_UNDERFLOW 5.
69 #define BRC_CX_OVERFLOW -4.
70
71 #define BRC_PI_0_5 1.5707963267948966192313216916398
72
73 /* intel buffer write */
74 #define ALLOC_ENCODER_BUFFER(gen_buffer, string, size) do { \
75 dri_bo_unreference(gen_buffer->bo); \
76 gen_buffer->bo = dri_bo_alloc(i965->intel.bufmgr, \
77 string, \
78 size, \
79 0x1000); \
80 assert(gen_buffer->bo); \
81 } while (0);
82
83
84 #define OUT_BUFFER_X(buf_bo, is_target, ma) do { \
85 if (buf_bo) { \
86 OUT_BCS_RELOC64(batch, \
87 buf_bo, \
88 I915_GEM_DOMAIN_INSTRUCTION, \
89 is_target ? I915_GEM_DOMAIN_INSTRUCTION : 0, \
90 0); \
91 } else { \
92 OUT_BCS_BATCH(batch, 0); \
93 OUT_BCS_BATCH(batch, 0); \
94 } \
95 if (ma) \
96 OUT_BCS_BATCH(batch, i965->intel.mocs_state); \
97 } while (0)
98
99 #define OUT_BUFFER_MA_TARGET(buf_bo) OUT_BUFFER_X(buf_bo, 1, 1)
100 #define OUT_BUFFER_MA_REFERENCE(buf_bo) OUT_BUFFER_X(buf_bo, 0, 1)
101 #define OUT_BUFFER_NMA_TARGET(buf_bo) OUT_BUFFER_X(buf_bo, 1, 0)
102 #define OUT_BUFFER_NMA_REFERENCE(buf_bo) OUT_BUFFER_X(buf_bo, 0, 0)
103
104
105 #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
106 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
107 #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
108
109 #define HCP_SOFTWARE_SKYLAKE 1
110
111 #define NUM_HCPE_KERNEL 2
112
113 #define INTER_MODE_MASK 0x03
114 #define INTER_8X8 0x03
115 #define INTER_16X8 0x01
116 #define INTER_8X16 0x02
117 #define SUBMB_SHAPE_MASK 0x00FF00
118
119 #define INTER_MV8 (4 << 20)
120 #define INTER_MV32 (6 << 20)
121
122
123 /* HEVC */
124
125 /* utils */
126 static void
hevc_gen_default_iq_matrix_encoder(VAQMatrixBufferHEVC * iq_matrix)127 hevc_gen_default_iq_matrix_encoder(VAQMatrixBufferHEVC *iq_matrix)
128 {
129 /* Flat_4x4_16 */
130 memset(&iq_matrix->scaling_lists_4x4, 16, sizeof(iq_matrix->scaling_lists_4x4));
131
132 /* Flat_8x8_16 */
133 memset(&iq_matrix->scaling_lists_8x8, 16, sizeof(iq_matrix->scaling_lists_8x8));
134
135 /* Flat_16x16_16 */
136 memset(&iq_matrix->scaling_lists_16x16, 16, sizeof(iq_matrix->scaling_lists_16x16));
137
138 /* Flat_32x32_16 */
139 memset(&iq_matrix->scaling_lists_32x32, 16, sizeof(iq_matrix->scaling_lists_32x32));
140
141 /* Flat_16x16_dc_16 */
142 memset(&iq_matrix->scaling_list_dc_16x16, 16, sizeof(iq_matrix->scaling_list_dc_16x16));
143
144 /* Flat_32x32_dc_16 */
145 memset(&iq_matrix->scaling_list_dc_32x32, 16, sizeof(iq_matrix->scaling_list_dc_32x32));
146 }
147
148 /* HEVC picture and slice state related */
149
150 static void
gen9_hcpe_pipe_mode_select(VADriverContextP ctx,int standard_select,struct intel_encoder_context * encoder_context)151 gen9_hcpe_pipe_mode_select(VADriverContextP ctx,
152 int standard_select,
153 struct intel_encoder_context *encoder_context)
154 {
155 struct i965_driver_data *i965 = i965_driver_data(ctx);
156 struct intel_batchbuffer *batch = encoder_context->base.batch;
157
158 assert(standard_select == HCP_CODEC_HEVC);
159
160 if (IS_KBL(i965->intel.device_info) ||
161 IS_GLK(i965->intel.device_info)) {
162 BEGIN_BCS_BATCH(batch, 6);
163
164 OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
165 } else {
166 BEGIN_BCS_BATCH(batch, 4);
167
168 OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (4 - 2));
169 }
170
171 OUT_BCS_BATCH(batch,
172 (standard_select << 5) |
173 (0 << 3) | /* disable Pic Status / Error Report */
174 HCP_CODEC_SELECT_ENCODE);
175 OUT_BCS_BATCH(batch, 0);
176 OUT_BCS_BATCH(batch, 0);
177
178 if (IS_KBL(i965->intel.device_info) ||
179 IS_GLK(i965->intel.device_info)) {
180 OUT_BCS_BATCH(batch, 0);
181 OUT_BCS_BATCH(batch, 0);
182 }
183
184 ADVANCE_BCS_BATCH(batch);
185 }
186
187 static void
gen9_hcpe_surface_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)188 gen9_hcpe_surface_state(VADriverContextP ctx, struct encode_state *encode_state,
189 struct intel_encoder_context *encoder_context)
190 {
191 struct intel_batchbuffer *batch = encoder_context->base.batch;
192 struct object_surface *obj_surface = encode_state->reconstructed_object;
193 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
194 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
195 unsigned int surface_format = SURFACE_FORMAT_PLANAR_420_8;
196
197 /* to do */
198 unsigned int y_cb_offset;
199
200 assert(obj_surface);
201
202 if ((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
203 || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0)) {
204 assert(obj_surface->fourcc == VA_FOURCC_P010);
205 surface_format = SURFACE_FORMAT_P010;
206 }
207
208 y_cb_offset = obj_surface->y_cb_offset;
209
210 BEGIN_BCS_BATCH(batch, 3);
211 OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
212 OUT_BCS_BATCH(batch,
213 (1 << 28) | /* surface id */
214 (mfc_context->surface_state.w_pitch - 1)); /* pitch - 1 */
215 OUT_BCS_BATCH(batch,
216 surface_format << 28 |
217 y_cb_offset);
218 ADVANCE_BCS_BATCH(batch);
219
220 BEGIN_BCS_BATCH(batch, 3);
221 OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
222 OUT_BCS_BATCH(batch,
223 (0 << 28) | /* surface id */
224 (mfc_context->surface_state.w_pitch - 1)); /* pitch - 1 */
225 OUT_BCS_BATCH(batch,
226 surface_format << 28 |
227 y_cb_offset);
228 ADVANCE_BCS_BATCH(batch);
229 }
230
231 static void
gen9_hcpe_pipe_buf_addr_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)232 gen9_hcpe_pipe_buf_addr_state(VADriverContextP ctx, struct encode_state *encode_state,
233 struct intel_encoder_context *encoder_context)
234 {
235 struct i965_driver_data *i965 = i965_driver_data(ctx);
236 struct intel_batchbuffer *batch = encoder_context->base.batch;
237 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
238 struct object_surface *obj_surface;
239 GenHevcSurface *hcpe_hevc_surface;
240 dri_bo *bo;
241 unsigned int i;
242
243 if (IS_KBL(i965->intel.device_info) ||
244 IS_GLK(i965->intel.device_info)) {
245 BEGIN_BCS_BATCH(batch, 104);
246
247 OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
248 } else {
249 BEGIN_BCS_BATCH(batch, 95);
250
251 OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (95 - 2));
252 }
253
254 obj_surface = encode_state->reconstructed_object;
255 assert(obj_surface && obj_surface->bo);
256 hcpe_hevc_surface = obj_surface->private_data;
257 assert(hcpe_hevc_surface && hcpe_hevc_surface->motion_vector_temporal_bo);
258
259 OUT_BUFFER_MA_TARGET(obj_surface->bo); /* DW 1..3 */
260 OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_line_buffer.bo);/* DW 4..6 */
261 OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_tile_line_buffer.bo); /* DW 7..9 */
262 OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_tile_column_buffer.bo); /* DW 10..12 */
263 OUT_BUFFER_MA_TARGET(mfc_context->metadata_line_buffer.bo); /* DW 13..15 */
264 OUT_BUFFER_MA_TARGET(mfc_context->metadata_tile_line_buffer.bo); /* DW 16..18 */
265 OUT_BUFFER_MA_TARGET(mfc_context->metadata_tile_column_buffer.bo); /* DW 19..21 */
266 OUT_BUFFER_MA_TARGET(mfc_context->sao_line_buffer.bo); /* DW 22..24 */
267 OUT_BUFFER_MA_TARGET(mfc_context->sao_tile_line_buffer.bo); /* DW 25..27 */
268 OUT_BUFFER_MA_TARGET(mfc_context->sao_tile_column_buffer.bo); /* DW 28..30 */
269 OUT_BUFFER_MA_TARGET(hcpe_hevc_surface->motion_vector_temporal_bo); /* DW 31..33 */
270 OUT_BUFFER_MA_TARGET(NULL); /* DW 34..36, reserved */
271
272 /* here only max 8 reference allowed */
273 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
274 bo = mfc_context->reference_surfaces[i].bo;
275
276 if (bo) {
277 OUT_BUFFER_NMA_REFERENCE(bo);
278 } else
279 OUT_BUFFER_NMA_REFERENCE(NULL);
280 }
281 OUT_BCS_BATCH(batch, 0); /* DW 53, memory address attributes */
282
283 OUT_BUFFER_MA_TARGET(mfc_context->uncompressed_picture_source.bo); /* DW 54..56, uncompressed picture source */
284 OUT_BUFFER_MA_TARGET(NULL); /* DW 57..59, ignore */
285 OUT_BUFFER_MA_TARGET(NULL); /* DW 60..62, ignore */
286 OUT_BUFFER_MA_TARGET(NULL); /* DW 63..65, ignore */
287
288 for (i = 0; i < ARRAY_ELEMS(mfc_context->current_collocated_mv_temporal_buffer) - 1; i++) {
289 bo = mfc_context->current_collocated_mv_temporal_buffer[i].bo;
290
291 if (bo) {
292 OUT_BUFFER_NMA_REFERENCE(bo);
293 } else
294 OUT_BUFFER_NMA_REFERENCE(NULL);
295 }
296 OUT_BCS_BATCH(batch, 0); /* DW 82, memory address attributes */
297
298 OUT_BUFFER_MA_TARGET(NULL); /* DW 83..85, ignore for HEVC */
299 OUT_BUFFER_MA_TARGET(NULL); /* DW 86..88, ignore for HEVC */
300 OUT_BUFFER_MA_TARGET(NULL); /* DW 89..91, ignore for HEVC */
301 OUT_BUFFER_MA_TARGET(NULL); /* DW 92..94, ignore for HEVC */
302
303 if (IS_KBL(i965->intel.device_info) ||
304 IS_GLK(i965->intel.device_info)) {
305 for (i = 0; i < 9; i++)
306 OUT_BCS_BATCH(batch, 0);
307 }
308
309 ADVANCE_BCS_BATCH(batch);
310 }
311
312 static void
gen9_hcpe_ind_obj_base_addr_state(VADriverContextP ctx,struct intel_encoder_context * encoder_context)313 gen9_hcpe_ind_obj_base_addr_state(VADriverContextP ctx,
314 struct intel_encoder_context *encoder_context)
315 {
316 struct i965_driver_data *i965 = i965_driver_data(ctx);
317 struct intel_batchbuffer *batch = encoder_context->base.batch;
318 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
319
320 /* to do */
321 BEGIN_BCS_BATCH(batch, 14);
322
323 OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (14 - 2));
324 OUT_BUFFER_MA_REFERENCE(NULL); /* DW 1..3 igonre for encoder*/
325 OUT_BUFFER_NMA_REFERENCE(NULL); /* DW 4..5, Upper Bound */
326 OUT_BUFFER_MA_TARGET(mfc_context->hcp_indirect_cu_object.bo); /* DW 6..8, CU */
327 /* DW 9..11, PAK-BSE */
328 OUT_BCS_RELOC64(batch,
329 mfc_context->hcp_indirect_pak_bse_object.bo,
330 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
331 mfc_context->hcp_indirect_pak_bse_object.offset);
332 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
333 OUT_BCS_RELOC64(batch,
334 mfc_context->hcp_indirect_pak_bse_object.bo,
335 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
336 mfc_context->hcp_indirect_pak_bse_object.end_offset);
337
338 ADVANCE_BCS_BATCH(batch);
339 }
340
341 static void
gen9_hcpe_fqm_state(VADriverContextP ctx,int size_id,int color_component,int pred_type,int dc,unsigned int * fqm,int fqm_length,struct intel_encoder_context * encoder_context)342 gen9_hcpe_fqm_state(VADriverContextP ctx,
343 int size_id,
344 int color_component,
345 int pred_type,
346 int dc,
347 unsigned int *fqm,
348 int fqm_length,
349 struct intel_encoder_context *encoder_context)
350 {
351 struct intel_batchbuffer *batch = encoder_context->base.batch;
352 unsigned int fqm_buffer[32];
353
354 assert(fqm_length <= 32);
355 assert(sizeof(*fqm) == 4);
356 memset(fqm_buffer, 0, sizeof(fqm_buffer));
357 memcpy(fqm_buffer, fqm, fqm_length * 4);
358
359 BEGIN_BCS_BATCH(batch, 34);
360
361 OUT_BCS_BATCH(batch, HCP_FQM_STATE | (34 - 2));
362 OUT_BCS_BATCH(batch,
363 dc << 16 |
364 color_component << 3 |
365 size_id << 1 |
366 pred_type);
367 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
368
369 ADVANCE_BCS_BATCH(batch);
370 }
371
372
373 static void
gen9_hcpe_hevc_fqm_state(VADriverContextP ctx,struct intel_encoder_context * encoder_context)374 gen9_hcpe_hevc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
375 {
376 unsigned int qm[32] = {
377 0x10001000, 0x10001000, 0x10001000, 0x10001000,
378 0x10001000, 0x10001000, 0x10001000, 0x10001000,
379 0x10001000, 0x10001000, 0x10001000, 0x10001000,
380 0x10001000, 0x10001000, 0x10001000, 0x10001000,
381 0x10001000, 0x10001000, 0x10001000, 0x10001000,
382 0x10001000, 0x10001000, 0x10001000, 0x10001000,
383 0x10001000, 0x10001000, 0x10001000, 0x10001000,
384 0x10001000, 0x10001000, 0x10001000, 0x10001000
385 };
386
387 gen9_hcpe_fqm_state(ctx,
388 0, 0, 0, 0,
389 qm, 8,
390 encoder_context);
391 gen9_hcpe_fqm_state(ctx,
392 0, 0, 1, 0,
393 qm, 8,
394 encoder_context);
395 gen9_hcpe_fqm_state(ctx,
396 1, 0, 0, 0,
397 qm, 32,
398 encoder_context);
399 gen9_hcpe_fqm_state(ctx,
400 1, 0, 1, 0,
401 qm, 32,
402 encoder_context);
403 gen9_hcpe_fqm_state(ctx,
404 2, 0, 0, 0x1000,
405 qm, 0,
406 encoder_context);
407 gen9_hcpe_fqm_state(ctx,
408 2, 0, 1, 0x1000,
409 qm, 0,
410 encoder_context);
411 gen9_hcpe_fqm_state(ctx,
412 3, 0, 0, 0x1000,
413 qm, 0,
414 encoder_context);
415 gen9_hcpe_fqm_state(ctx,
416 3, 0, 1, 0x1000,
417 qm, 0,
418 encoder_context);
419 }
420
421 static void
gen9_hcpe_qm_state(VADriverContextP ctx,int size_id,int color_component,int pred_type,int dc,unsigned int * qm,int qm_length,struct intel_encoder_context * encoder_context)422 gen9_hcpe_qm_state(VADriverContextP ctx,
423 int size_id,
424 int color_component,
425 int pred_type,
426 int dc,
427 unsigned int *qm,
428 int qm_length,
429 struct intel_encoder_context *encoder_context)
430 {
431 struct intel_batchbuffer *batch = encoder_context->base.batch;
432 unsigned int qm_buffer[16];
433
434 assert(qm_length <= 16);
435 assert(sizeof(*qm) == 4);
436 memset(qm_buffer, 0, sizeof(qm_buffer));
437 memcpy(qm_buffer, qm, qm_length * 4);
438
439 BEGIN_BCS_BATCH(batch, 18);
440
441 OUT_BCS_BATCH(batch, HCP_QM_STATE | (18 - 2));
442 OUT_BCS_BATCH(batch,
443 dc << 5 |
444 color_component << 3 |
445 size_id << 1 |
446 pred_type);
447 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
448
449 ADVANCE_BCS_BATCH(batch);
450 }
451
452 static void
gen9_hcpe_hevc_qm_state(VADriverContextP ctx,struct intel_encoder_context * encoder_context)453 gen9_hcpe_hevc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
454 {
455
456 int i;
457
458 unsigned int qm[16] = {
459 0x10101010, 0x10101010, 0x10101010, 0x10101010,
460 0x10101010, 0x10101010, 0x10101010, 0x10101010,
461 0x10101010, 0x10101010, 0x10101010, 0x10101010,
462 0x10101010, 0x10101010, 0x10101010, 0x10101010
463 };
464
465 for (i = 0; i < 6; i++) {
466 gen9_hcpe_qm_state(ctx,
467 0, i % 3, i / 3, 0,
468 qm, 4,
469 encoder_context);
470 }
471
472 for (i = 0; i < 6; i++) {
473 gen9_hcpe_qm_state(ctx,
474 1, i % 3, i / 3, 0,
475 qm, 16,
476 encoder_context);
477 }
478
479 for (i = 0; i < 6; i++) {
480 gen9_hcpe_qm_state(ctx,
481 2, i % 3, i / 3, 16,
482 qm, 16,
483 encoder_context);
484 }
485
486 for (i = 0; i < 2; i++) {
487 gen9_hcpe_qm_state(ctx,
488 3, 0, i % 2, 16,
489 qm, 16,
490 encoder_context);
491 }
492 }
493
494 static void
gen9_hcpe_hevc_pic_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)495 gen9_hcpe_hevc_pic_state(VADriverContextP ctx, struct encode_state *encode_state,
496 struct intel_encoder_context *encoder_context)
497 {
498 struct i965_driver_data *i965 = i965_driver_data(ctx);
499 struct intel_batchbuffer *batch = encoder_context->base.batch;
500 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
501 VAEncPictureParameterBufferHEVC *pic_param ;
502 VAEncSequenceParameterBufferHEVC *seq_param ;
503
504 int max_pcm_size_minus3 = 0, min_pcm_size_minus3 = 0;
505 int pcm_sample_bit_depth_luma_minus1 = 7, pcm_sample_bit_depth_chroma_minus1 = 7;
506 /*
507 * 7.4.3.1
508 *
509 * When not present, the value of loop_filter_across_tiles_enabled_flag
510 * is inferred to be equal to 1.
511 */
512 int loop_filter_across_tiles_enabled_flag = 0;
513 pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
514 seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
515
516 int log2_cu_size = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
517 int log2_ctb_size = seq_param->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
518 int ctb_size = 1 << log2_ctb_size;
519 double rawctubits = 8 * 3 * ctb_size * ctb_size / 2.0;
520 int maxctubits = (int)(5 * rawctubits / 3) ;
521 double bitrate = (double)encoder_context->brc.bits_per_second[0];
522 double framebitrate = bitrate / 32 / 8; //32 byte unit
523 int minframebitrate = 0;//(int) (framebitrate * 3 / 10);
524 int maxframebitrate = (int)(framebitrate * 10 / 10);
525 int maxdeltaframebitrate = 0x1c5c; //(int) (framebitrate * 1/ 10);
526 int mindeltaframebitrate = 0; //(int) (framebitrate * 1/ 10);
527 int minframesize = 0;//(int)(rawframebits * 1/50);
528
529 if (seq_param->seq_fields.bits.pcm_enabled_flag) {
530 max_pcm_size_minus3 = seq_param->log2_max_pcm_luma_coding_block_size_minus3;
531 min_pcm_size_minus3 = seq_param->log2_min_pcm_luma_coding_block_size_minus3;
532 pcm_sample_bit_depth_luma_minus1 = (seq_param->pcm_sample_bit_depth_luma_minus1 & 0x0f);
533 pcm_sample_bit_depth_chroma_minus1 = (seq_param->pcm_sample_bit_depth_chroma_minus1 & 0x0f);
534 } else {
535 max_pcm_size_minus3 = MIN(seq_param->log2_min_luma_coding_block_size_minus3 + seq_param->log2_diff_max_min_luma_coding_block_size, 2);
536 }
537
538 if (pic_param->pic_fields.bits.tiles_enabled_flag)
539 loop_filter_across_tiles_enabled_flag = pic_param->pic_fields.bits.loop_filter_across_tiles_enabled_flag;
540
541 /* set zero for encoder */
542 loop_filter_across_tiles_enabled_flag = 0;
543
544 if (IS_KBL(i965->intel.device_info) ||
545 IS_GLK(i965->intel.device_info)) {
546 BEGIN_BCS_BATCH(batch, 31);
547
548 OUT_BCS_BATCH(batch, HCP_PIC_STATE | (31 - 2));
549 } else {
550 BEGIN_BCS_BATCH(batch, 19);
551
552 OUT_BCS_BATCH(batch, HCP_PIC_STATE | (19 - 2));
553 }
554
555 OUT_BCS_BATCH(batch,
556 mfc_context->pic_size.picture_height_in_min_cb_minus1 << 16 |
557 0 << 14 |
558 mfc_context->pic_size.picture_width_in_min_cb_minus1);
559 OUT_BCS_BATCH(batch,
560 max_pcm_size_minus3 << 10 |
561 min_pcm_size_minus3 << 8 |
562 (seq_param->log2_min_transform_block_size_minus2 +
563 seq_param->log2_diff_max_min_transform_block_size) << 6 |
564 seq_param->log2_min_transform_block_size_minus2 << 4 |
565 (seq_param->log2_min_luma_coding_block_size_minus3 +
566 seq_param->log2_diff_max_min_luma_coding_block_size) << 2 |
567 seq_param->log2_min_luma_coding_block_size_minus3);
568 OUT_BCS_BATCH(batch, 0); /* DW 3, ignored */
569 OUT_BCS_BATCH(batch,
570 ((IS_KBL(i965->intel.device_info) || IS_GLK(i965->intel.device_info)) ?
571 1 : 0) << 27 | /* CU packet structure is 0 for SKL */
572 seq_param->seq_fields.bits.strong_intra_smoothing_enabled_flag << 26 |
573 pic_param->pic_fields.bits.transquant_bypass_enabled_flag << 25 |
574 seq_param->seq_fields.bits.amp_enabled_flag << 23 |
575 pic_param->pic_fields.bits.transform_skip_enabled_flag << 22 |
576 0 << 21 | /* 0 for encoder !(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_BOTTOM_FIELD)*/
577 0 << 20 | /* 0 for encoder !!(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_FIELD_PIC)*/
578 pic_param->pic_fields.bits.weighted_pred_flag << 19 |
579 pic_param->pic_fields.bits.weighted_bipred_flag << 18 |
580 pic_param->pic_fields.bits.tiles_enabled_flag << 17 | /* 0 for encoder */
581 pic_param->pic_fields.bits.entropy_coding_sync_enabled_flag << 16 |
582 loop_filter_across_tiles_enabled_flag << 15 |
583 pic_param->pic_fields.bits.sign_data_hiding_enabled_flag << 13 | /* 0 for encoder */
584 pic_param->log2_parallel_merge_level_minus2 << 10 | /* 0 for encoder */
585 pic_param->pic_fields.bits.constrained_intra_pred_flag << 9 | /* 0 for encoder */
586 seq_param->seq_fields.bits.pcm_loop_filter_disabled_flag << 8 |
587 (pic_param->diff_cu_qp_delta_depth & 0x03) << 6 | /* 0 for encoder */
588 pic_param->pic_fields.bits.cu_qp_delta_enabled_flag << 5 | /* 0 for encoder */
589 seq_param->seq_fields.bits.pcm_enabled_flag << 4 |
590 seq_param->seq_fields.bits.sample_adaptive_offset_enabled_flag << 3 | /* 0 for encoder */
591 0);
592 OUT_BCS_BATCH(batch,
593 seq_param->seq_fields.bits.bit_depth_luma_minus8 << 27 | /* 10 bit for KBL+*/
594 seq_param->seq_fields.bits.bit_depth_chroma_minus8 << 24 | /* 10 bit for KBL+ */
595 pcm_sample_bit_depth_luma_minus1 << 20 |
596 pcm_sample_bit_depth_chroma_minus1 << 16 |
597 seq_param->max_transform_hierarchy_depth_inter << 13 | /* for encoder */
598 seq_param->max_transform_hierarchy_depth_intra << 10 | /* for encoder */
599 (pic_param->pps_cr_qp_offset & 0x1f) << 5 |
600 (pic_param->pps_cb_qp_offset & 0x1f));
601 OUT_BCS_BATCH(batch,
602 0 << 29 | /* must be 0 for encoder */
603 maxctubits); /* DW 6, max LCU bit size allowed for encoder */
604 OUT_BCS_BATCH(batch,
605 0 << 31 | /* frame bitrate max unit */
606 maxframebitrate); /* DW 7, frame bitrate max 0:13 */
607 OUT_BCS_BATCH(batch,
608 0 << 31 | /* frame bitrate min unit */
609 minframebitrate); /* DW 8, frame bitrate min 0:13 */
610 OUT_BCS_BATCH(batch,
611 maxdeltaframebitrate << 16 | /* frame bitrate max delta ,help to select deltaQP of slice*/
612 mindeltaframebitrate); /* DW 9,(0,14) frame bitrate min delta ,help to select deltaQP of slice*/
613 OUT_BCS_BATCH(batch, 0x07050402); /* DW 10, frame delta qp max */
614 OUT_BCS_BATCH(batch, 0x0d0b0908);
615 OUT_BCS_BATCH(batch, 0); /* DW 12, frame delta qp min */
616 OUT_BCS_BATCH(batch, 0);
617 OUT_BCS_BATCH(batch, 0x04030200); /* DW 14, frame delta qp max range */
618 OUT_BCS_BATCH(batch, 0x100c0806); /* DW 15 */
619 OUT_BCS_BATCH(batch, 0x04030200); /* DW 16, frame delta qp min range */
620 OUT_BCS_BATCH(batch, 0x100c0806);
621 OUT_BCS_BATCH(batch,
622 0 << 30 |
623 minframesize); /* DW 18, min frame size units */
624
625 if (IS_KBL(i965->intel.device_info) ||
626 IS_GLK(i965->intel.device_info)) {
627 int i = 0;
628
629 for (i = 0; i < 12; i++)
630 OUT_BCS_BATCH(batch, 0);
631 }
632
633 ADVANCE_BCS_BATCH(batch);
634 }
635
636
637 static void
gen9_hcpe_hevc_insert_object(VADriverContextP ctx,struct intel_encoder_context * encoder_context,unsigned int * insert_data,int lenght_in_dws,int data_bits_in_last_dw,int skip_emul_byte_count,int is_last_header,int is_end_of_slice,int emulation_flag,struct intel_batchbuffer * batch)638 gen9_hcpe_hevc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
639 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
640 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
641 struct intel_batchbuffer *batch)
642 {
643 if (batch == NULL)
644 batch = encoder_context->base.batch;
645
646 if (data_bits_in_last_dw == 0)
647 data_bits_in_last_dw = 32;
648
649 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
650
651 OUT_BCS_BATCH(batch, HCP_INSERT_PAK_OBJECT | (lenght_in_dws + 2 - 2));
652 OUT_BCS_BATCH(batch,
653 (0 << 31) | /* inline payload */
654 (0 << 16) | /* always start at offset 0 */
655 (0 << 15) | /* HeaderLengthExcludeFrmSize */
656 (data_bits_in_last_dw << 8) |
657 (skip_emul_byte_count << 4) |
658 (!!emulation_flag << 3) |
659 ((!!is_last_header) << 2) |
660 ((!!is_end_of_slice) << 1) |
661 (0 << 0)); /* Reserved */
662 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
663
664 ADVANCE_BCS_BATCH(batch);
665 }
666 /*
667 // To be do: future
668 static uint8_t
669 intel_get_ref_idx_state_1(VAPictureHEVC *va_pic, unsigned int frame_store_id)
670 {
671 unsigned int is_long_term =
672 !!(va_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE);
673 unsigned int is_top_field =
674 !!!(va_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD);
675 unsigned int is_bottom_field =
676 !!(va_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD);
677
678 return ((is_long_term << 6) |
679 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
680 (frame_store_id << 1) |
681 ((is_top_field ^ 1) & is_bottom_field));
682 }
683 */
684 static void
gen9_hcpe_ref_idx_state_1(struct intel_batchbuffer * batch,int list,struct intel_encoder_context * encoder_context,struct encode_state * encode_state)685 gen9_hcpe_ref_idx_state_1(struct intel_batchbuffer *batch,
686 int list,
687 struct intel_encoder_context *encoder_context,
688 struct encode_state *encode_state)
689 {
690 int i;
691 VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
692 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
693 uint8_t num_ref_minus1 = (list ? slice_param->num_ref_idx_l1_active_minus1 : slice_param->num_ref_idx_l0_active_minus1);
694 VAPictureHEVC *ref_list = (list ? slice_param->ref_pic_list1 : slice_param->ref_pic_list0);
695 struct gen6_vme_context *vme_context = encoder_context->vme_context;
696 struct object_surface *obj_surface;
697 int frame_index;
698
699 int ref_idx_l0 = (vme_context->ref_index_in_mb[list] & 0xff);
700
701 if (ref_idx_l0 > 3) {
702 WARN_ONCE("ref_idx_l0 is out of range\n");
703 ref_idx_l0 = 0;
704 }
705
706 obj_surface = vme_context->used_reference_objects[list];
707 frame_index = -1;
708 for (i = 0; i < 16; i++) {
709 if (obj_surface &&
710 obj_surface == encode_state->reference_objects[i]) {
711 frame_index = i;
712 break;
713 }
714 }
715 if (frame_index == -1) {
716 WARN_ONCE("RefPicList 0 or 1 is not found in DPB!\n");
717 }
718
719 BEGIN_BCS_BATCH(batch, 18);
720
721 OUT_BCS_BATCH(batch, HCP_REF_IDX_STATE | (18 - 2));
722 OUT_BCS_BATCH(batch,
723 num_ref_minus1 << 1 |
724 list);
725
726 for (i = 0; i < 16; i++) {
727 if (i < MIN((num_ref_minus1 + 1), 15)) {
728 VAPictureHEVC *ref_pic = &ref_list[i];
729 VAPictureHEVC *curr_pic = &pic_param->decoded_curr_pic;
730
731 OUT_BCS_BATCH(batch,
732 1 << 15 | /* bottom_field_flag 0 */
733 0 << 14 | /* field_pic_flag 0 */
734 !!(ref_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE) << 13 | /* short term is 1 */
735 0 << 12 | /* disable WP */
736 0 << 11 | /* disable WP */
737 frame_index << 8 |
738 (CLAMP(-128, 127, curr_pic->pic_order_cnt - ref_pic->pic_order_cnt) & 0xff));
739 } else {
740 OUT_BCS_BATCH(batch, 0);
741 }
742 }
743
744 ADVANCE_BCS_BATCH(batch);
745 }
746
747 void
intel_hcpe_hevc_ref_idx_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)748 intel_hcpe_hevc_ref_idx_state(VADriverContextP ctx,
749 struct encode_state *encode_state,
750 struct intel_encoder_context *encoder_context
751 )
752 {
753 struct intel_batchbuffer *batch = encoder_context->base.batch;
754 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
755
756 if (slice_param->slice_type == HEVC_SLICE_I)
757 return;
758
759 gen9_hcpe_ref_idx_state_1(batch, 0, encoder_context, encode_state);
760
761 if (slice_param->slice_type == HEVC_SLICE_P)
762 return;
763
764 gen9_hcpe_ref_idx_state_1(batch, 1, encoder_context, encode_state);
765 }
766
767 static void
gen9_hcpe_hevc_slice_state(VADriverContextP ctx,VAEncPictureParameterBufferHEVC * pic_param,VAEncSliceParameterBufferHEVC * slice_param,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct intel_batchbuffer * batch)768 gen9_hcpe_hevc_slice_state(VADriverContextP ctx,
769 VAEncPictureParameterBufferHEVC *pic_param,
770 VAEncSliceParameterBufferHEVC *slice_param,
771 struct encode_state *encode_state,
772 struct intel_encoder_context *encoder_context,
773 struct intel_batchbuffer *batch)
774 {
775 struct i965_driver_data *i965 = i965_driver_data(ctx);
776 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
777 int slice_type = slice_param->slice_type;
778
779 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
780 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
781 int ctb_size = 1 << log2_ctb_size;
782 int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
783 int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
784 int last_slice = (((slice_param->slice_segment_address + slice_param->num_ctu_in_slice) == (width_in_ctb * height_in_ctb)) ? 1 : 0);
785
786 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
787
788 slice_hor_pos = slice_param->slice_segment_address % width_in_ctb;
789 slice_ver_pos = slice_param->slice_segment_address / width_in_ctb;
790
791 next_slice_hor_pos = (slice_param->slice_segment_address + slice_param->num_ctu_in_slice) % width_in_ctb;
792 next_slice_ver_pos = (slice_param->slice_segment_address + slice_param->num_ctu_in_slice) / width_in_ctb;
793
794 /* only support multi slice begin from row start address */
795 assert((slice_param->slice_segment_address % width_in_ctb) == 0);
796
797 if (last_slice == 1) {
798 if (slice_param->slice_segment_address == 0) {
799 next_slice_hor_pos = 0;
800 next_slice_ver_pos = height_in_ctb;
801 } else {
802 next_slice_hor_pos = 0;
803 next_slice_ver_pos = 0;
804 }
805 }
806
807 if (IS_KBL(i965->intel.device_info) ||
808 IS_GLK(i965->intel.device_info)) {
809 BEGIN_BCS_BATCH(batch, 11);
810
811 OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (11 - 2));
812 } else {
813 BEGIN_BCS_BATCH(batch, 9);
814
815 OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (9 - 2));
816 }
817
818 OUT_BCS_BATCH(batch,
819 slice_ver_pos << 16 |
820 slice_hor_pos);
821 OUT_BCS_BATCH(batch,
822 next_slice_ver_pos << 16 |
823 next_slice_hor_pos);
824 OUT_BCS_BATCH(batch,
825 (slice_param->slice_cr_qp_offset & 0x1f) << 17 |
826 (slice_param->slice_cb_qp_offset & 0x1f) << 12 |
827 (pic_param->pic_init_qp + slice_param->slice_qp_delta) << 6 |
828 slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag << 5 |
829 slice_param->slice_fields.bits.dependent_slice_segment_flag << 4 |
830 last_slice << 2 |
831 slice_type);
832 OUT_BCS_BATCH(batch,
833 0 << 26 |
834 (slice_param->max_num_merge_cand - 1) << 23 |
835 slice_param->slice_fields.bits.cabac_init_flag << 22 |
836 slice_param->luma_log2_weight_denom << 19 |
837 (slice_param->luma_log2_weight_denom + slice_param->delta_chroma_log2_weight_denom) << 16 |
838 slice_param->slice_fields.bits.collocated_from_l0_flag << 15 |
839 (slice_type != HEVC_SLICE_B) << 14 |
840 slice_param->slice_fields.bits.mvd_l1_zero_flag << 13 |
841 slice_param->slice_fields.bits.slice_sao_luma_flag << 12 |
842 slice_param->slice_fields.bits.slice_sao_chroma_flag << 11 |
843 slice_param->slice_fields.bits.slice_loop_filter_across_slices_enabled_flag << 10 |
844 (slice_param->slice_beta_offset_div2 & 0xf) << 5 |
845 (slice_param->slice_tc_offset_div2 & 0xf) << 1 |
846 slice_param->slice_fields.bits.slice_deblocking_filter_disabled_flag);
847 OUT_BCS_BATCH(batch, 0); /* DW 5 ,ignore for encoder.*/
848 OUT_BCS_BATCH(batch,
849 4 << 26 |
850 4 << 20 |
851 0);
852 OUT_BCS_BATCH(batch,
853 1 << 10 | /* header insertion enable */
854 1 << 9 | /* slice data enable */
855 1 << 8 | /* tail insertion enable, must at end of frame, not slice */
856 1 << 2 | /* RBSP or EBSP, EmulationByteSliceInsertEnable */
857 1 << 1 | /* cabacZeroWordInsertionEnable */
858 0); /* Ignored for decoding */
859 OUT_BCS_BATCH(batch, 0); /* PAK-BSE data start offset */
860
861 if (IS_KBL(i965->intel.device_info) ||
862 IS_GLK(i965->intel.device_info)) {
863 OUT_BCS_BATCH(batch, 0);
864 OUT_BCS_BATCH(batch, 0);
865 }
866
867 ADVANCE_BCS_BATCH(batch);
868 }
869
870 /* HEVC pipe line related */
gen9_hcpe_hevc_pipeline_picture_programing(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)871 static void gen9_hcpe_hevc_pipeline_picture_programing(VADriverContextP ctx,
872 struct encode_state *encode_state,
873 struct intel_encoder_context *encoder_context)
874 {
875 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
876
877 mfc_context->pipe_mode_select(ctx, HCP_CODEC_HEVC, encoder_context);
878 mfc_context->set_surface_state(ctx, encode_state, encoder_context);
879 gen9_hcpe_pipe_buf_addr_state(ctx, encode_state, encoder_context);
880 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
881
882 mfc_context->qm_state(ctx, encoder_context);
883 mfc_context->fqm_state(ctx, encoder_context);
884 mfc_context->pic_state(ctx, encode_state, encoder_context);
885 intel_hcpe_hevc_ref_idx_state(ctx, encode_state, encoder_context);
886 }
887
gen9_hcpe_init(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)888 static void gen9_hcpe_init(VADriverContextP ctx,
889 struct encode_state *encode_state,
890 struct intel_encoder_context *encoder_context)
891 {
892 /* to do */
893 struct i965_driver_data *i965 = i965_driver_data(ctx);
894 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
895 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
896 VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
897 dri_bo *bo;
898 int i, size = 0;
899 int slice_batchbuffer_size;
900 int slice_type = slice_param->slice_type;
901 int is_inter = (slice_type != HEVC_SLICE_I);
902
903 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
904 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
905 int ctb_size = 1 << log2_ctb_size;
906 int cu_size = 1 << log2_cu_size;
907
908 int width_in_ctb = ALIGN(pSequenceParameter->pic_width_in_luma_samples , ctb_size) / ctb_size;
909 int height_in_ctb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, ctb_size) / ctb_size;
910 int width_in_cu = ALIGN(pSequenceParameter->pic_width_in_luma_samples , cu_size) / cu_size;
911 int height_in_cu = ALIGN(pSequenceParameter->pic_height_in_luma_samples, cu_size) / cu_size;
912 int width_in_mb = ALIGN(pSequenceParameter->pic_width_in_luma_samples , 16) / 16;
913 int height_in_mb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, 16) / 16;
914
915 int num_cu_record = 64;
916 int size_shift = 3;
917
918 if ((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0)
919 || (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
920 size_shift = 2;
921
922 if (log2_ctb_size == 5) num_cu_record = 16;
923 else if (log2_ctb_size == 4) num_cu_record = 4;
924 else if (log2_ctb_size == 6) num_cu_record = 64;
925
926 /* frame size in samples, cu,ctu, mb */
927 mfc_context->pic_size.picture_width_in_samples = pSequenceParameter->pic_width_in_luma_samples;
928 mfc_context->pic_size.picture_height_in_samples = pSequenceParameter->pic_height_in_luma_samples;
929 mfc_context->pic_size.ctb_size = ctb_size;
930 mfc_context->pic_size.picture_width_in_ctbs = width_in_ctb;
931 mfc_context->pic_size.picture_height_in_ctbs = height_in_ctb;
932 mfc_context->pic_size.min_cb_size = cu_size;
933 mfc_context->pic_size.picture_width_in_min_cb_minus1 = width_in_cu - 1;
934 mfc_context->pic_size.picture_height_in_min_cb_minus1 = height_in_cu - 1;
935 mfc_context->pic_size.picture_width_in_mbs = width_in_mb;
936 mfc_context->pic_size.picture_height_in_mbs = height_in_mb;
937
938 slice_batchbuffer_size = 64 * width_in_ctb * width_in_ctb + 4096 +
939 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
940
941 /*Encode common setup for HCP*/
942 /*deblocking */
943 dri_bo_unreference(mfc_context->deblocking_filter_line_buffer.bo);
944 mfc_context->deblocking_filter_line_buffer.bo = NULL;
945
946 dri_bo_unreference(mfc_context->deblocking_filter_tile_line_buffer.bo);
947 mfc_context->deblocking_filter_tile_line_buffer.bo = NULL;
948
949 dri_bo_unreference(mfc_context->deblocking_filter_tile_column_buffer.bo);
950 mfc_context->deblocking_filter_tile_column_buffer.bo = NULL;
951
952 /* input source */
953 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
954 mfc_context->uncompressed_picture_source.bo = NULL;
955
956 /* metadata */
957 dri_bo_unreference(mfc_context->metadata_line_buffer.bo);
958 mfc_context->metadata_line_buffer.bo = NULL;
959
960 dri_bo_unreference(mfc_context->metadata_tile_line_buffer.bo);
961 mfc_context->metadata_tile_line_buffer.bo = NULL;
962
963 dri_bo_unreference(mfc_context->metadata_tile_column_buffer.bo);
964 mfc_context->metadata_tile_column_buffer.bo = NULL;
965
966 /* sao */
967 dri_bo_unreference(mfc_context->sao_line_buffer.bo);
968 mfc_context->sao_line_buffer.bo = NULL;
969
970 dri_bo_unreference(mfc_context->sao_tile_line_buffer.bo);
971 mfc_context->sao_tile_line_buffer.bo = NULL;
972
973 dri_bo_unreference(mfc_context->sao_tile_column_buffer.bo);
974 mfc_context->sao_tile_column_buffer.bo = NULL;
975
976 /* mv temporal buffer */
977 for (i = 0; i < NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS; i++) {
978 if (mfc_context->current_collocated_mv_temporal_buffer[i].bo != NULL)
979 dri_bo_unreference(mfc_context->current_collocated_mv_temporal_buffer[i].bo);
980 mfc_context->current_collocated_mv_temporal_buffer[i].bo = NULL;
981 }
982
983 /* reference */
984 for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
985 if (mfc_context->reference_surfaces[i].bo != NULL)
986 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
987 mfc_context->reference_surfaces[i].bo = NULL;
988 }
989
990 /* indirect data CU recording */
991 dri_bo_unreference(mfc_context->hcp_indirect_cu_object.bo);
992 mfc_context->hcp_indirect_cu_object.bo = NULL;
993
994 dri_bo_unreference(mfc_context->hcp_indirect_pak_bse_object.bo);
995 mfc_context->hcp_indirect_pak_bse_object.bo = NULL;
996
997 /* Current internal buffer for HCP */
998
999 size = ALIGN(pSequenceParameter->pic_width_in_luma_samples, 32) >> size_shift;
1000 size <<= 6;
1001 ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_line_buffer), "line buffer", size);
1002 ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_line_buffer), "tile line buffer", size);
1003
1004 size = ALIGN(pSequenceParameter->pic_height_in_luma_samples + 6 * width_in_ctb, 32) >> size_shift;
1005 size <<= 6;
1006 ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_column_buffer), "tile column buffer", size);
1007
1008 if (is_inter) {
1009 size = (((pSequenceParameter->pic_width_in_luma_samples + 15) >> 4) * 188 + 9 * width_in_ctb + 1023) >> 9;
1010 size <<= 6;
1011 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_line_buffer), "metadata line buffer", size);
1012
1013 size = (((pSequenceParameter->pic_width_in_luma_samples + 15) >> 4) * 172 + 9 * width_in_ctb + 1023) >> 9;
1014 size <<= 6;
1015 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
1016
1017 size = (((pSequenceParameter->pic_height_in_luma_samples + 15) >> 4) * 176 + 89 * width_in_ctb + 1023) >> 9;
1018 size <<= 6;
1019 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
1020 } else {
1021 size = (pSequenceParameter->pic_width_in_luma_samples + 8 * width_in_ctb + 1023) >> 9;
1022 size <<= 6;
1023 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_line_buffer), "metadata line buffer", size);
1024
1025 size = (pSequenceParameter->pic_width_in_luma_samples + 16 * width_in_ctb + 1023) >> 9;
1026 size <<= 6;
1027 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
1028
1029 size = (pSequenceParameter->pic_height_in_luma_samples + 8 * height_in_ctb + 1023) >> 9;
1030 size <<= 6;
1031 ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
1032 }
1033
1034 size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 3 * width_in_ctb), 16) >> size_shift;
1035 size <<= 6;
1036 ALLOC_ENCODER_BUFFER((&mfc_context->sao_line_buffer), "sao line buffer", size);
1037
1038 size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 6 * width_in_ctb), 16) >> size_shift;
1039 size <<= 6;
1040 ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_line_buffer), "sao tile line buffer", size);
1041
1042 size = ALIGN(((pSequenceParameter->pic_height_in_luma_samples >> 1) + 6 * height_in_ctb), 16) >> size_shift;
1043 size <<= 6;
1044 ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_column_buffer), "sao tile column buffer", size);
1045
1046 /////////////////////
1047 dri_bo_unreference(mfc_context->hcp_indirect_cu_object.bo);
1048 bo = dri_bo_alloc(i965->intel.bufmgr,
1049 "Indirect data CU Buffer",
1050 width_in_ctb * height_in_ctb * num_cu_record * 16 * 4,
1051 0x1000);
1052 assert(bo);
1053 mfc_context->hcp_indirect_cu_object.bo = bo;
1054
1055 /* to do pak bse object buffer */
1056 /* to do current collocated mv temporal buffer */
1057
1058 dri_bo_unreference(mfc_context->hcp_batchbuffer_surface.bo);
1059 mfc_context->hcp_batchbuffer_surface.bo = NULL;
1060
1061 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
1062 mfc_context->aux_batchbuffer_surface.bo = NULL;
1063
1064 if (mfc_context->aux_batchbuffer)
1065 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
1066
1067 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
1068 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
1069 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
1070 mfc_context->aux_batchbuffer_surface.pitch = 16;
1071 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
1072 mfc_context->aux_batchbuffer_surface.size_block = 16;
1073 }
1074
gen9_hcpe_run(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)1075 static VAStatus gen9_hcpe_run(VADriverContextP ctx,
1076 struct encode_state *encode_state,
1077 struct intel_encoder_context *encoder_context)
1078 {
1079 struct intel_batchbuffer *batch = encoder_context->base.batch;
1080
1081 intel_batchbuffer_flush(batch); //run the pipeline
1082
1083 return VA_STATUS_SUCCESS;
1084 }
1085
1086
1087 static VAStatus
gen9_hcpe_stop(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,int * encoded_bits_size)1088 gen9_hcpe_stop(VADriverContextP ctx,
1089 struct encode_state *encode_state,
1090 struct intel_encoder_context *encoder_context,
1091 int *encoded_bits_size)
1092 {
1093 VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
1094 VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1095 VACodedBufferSegment *coded_buffer_segment;
1096
1097 vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
1098 assert(vaStatus == VA_STATUS_SUCCESS);
1099 *encoded_bits_size = coded_buffer_segment->size * 8;
1100 i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
1101
1102 return VA_STATUS_SUCCESS;
1103 }
1104
1105
intel_hevc_find_skipemulcnt(unsigned char * buf,int bits_length)1106 int intel_hevc_find_skipemulcnt(unsigned char *buf, int bits_length)
1107 {
1108 /* to do */
1109 int i, found;
1110 int leading_zero_cnt, byte_length, zero_byte;
1111 int nal_unit_type;
1112 int skip_cnt = 0;
1113
1114 #define NAL_UNIT_TYPE_MASK 0x7e
1115 #define HW_MAX_SKIP_LENGTH 15
1116
1117 byte_length = ALIGN(bits_length, 32) >> 3;
1118
1119
1120 leading_zero_cnt = 0;
1121 found = 0;
1122 for (i = 0; i < byte_length - 4; i++) {
1123 if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) ||
1124 ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) {
1125 found = 1;
1126 break;
1127 }
1128 leading_zero_cnt++;
1129 }
1130 if (!found) {
1131 /* warning message is complained. But anyway it will be inserted. */
1132 WARN_ONCE("Invalid packed header data. "
1133 "Can't find the 000001 start_prefix code\n");
1134 return 0;
1135 }
1136 i = leading_zero_cnt;
1137
1138 zero_byte = 0;
1139 if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)))
1140 zero_byte = 1;
1141
1142 skip_cnt = leading_zero_cnt + zero_byte + 3;
1143
1144 /* the unit header byte is accounted */
1145 nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK;
1146 skip_cnt += 1;
1147 skip_cnt += 1; /* two bytes length of nal headers in hevc */
1148
1149 if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) {
1150 /* more unit header bytes are accounted for MVC/SVC */
1151 //skip_cnt += 3;
1152 }
1153 if (skip_cnt > HW_MAX_SKIP_LENGTH) {
1154 WARN_ONCE("Too many leading zeros are padded for packed data. "
1155 "It is beyond the HW range.!!!\n");
1156 }
1157 return skip_cnt;
1158 }
1159
1160 #ifdef HCP_SOFTWARE_SKYLAKE
1161
1162 static int
gen9_hcpe_hevc_pak_object(VADriverContextP ctx,int lcu_x,int lcu_y,int isLast_ctb,struct intel_encoder_context * encoder_context,int cu_count_in_lcu,unsigned int split_coding_unit_flag,struct intel_batchbuffer * batch)1163 gen9_hcpe_hevc_pak_object(VADriverContextP ctx, int lcu_x, int lcu_y, int isLast_ctb,
1164 struct intel_encoder_context *encoder_context,
1165 int cu_count_in_lcu, unsigned int split_coding_unit_flag,
1166 struct intel_batchbuffer *batch)
1167 {
1168 struct i965_driver_data *i965 = i965_driver_data(ctx);
1169 int len_in_dwords = 3;
1170
1171 if (IS_KBL(i965->intel.device_info) ||
1172 IS_GLK(i965->intel.device_info))
1173 len_in_dwords = 5;
1174
1175 if (batch == NULL)
1176 batch = encoder_context->base.batch;
1177
1178 BEGIN_BCS_BATCH(batch, len_in_dwords);
1179
1180 OUT_BCS_BATCH(batch, HCP_PAK_OBJECT | (len_in_dwords - 2));
1181 OUT_BCS_BATCH(batch,
1182 (((isLast_ctb > 0) ? 1 : 0) << 31) | /* last ctb?*/
1183 ((cu_count_in_lcu - 1) << 24) | /* No motion vector */
1184 split_coding_unit_flag);
1185
1186 OUT_BCS_BATCH(batch, (lcu_y << 16) | lcu_x); /* LCU for Y*/
1187
1188 if (IS_KBL(i965->intel.device_info) ||
1189 IS_GLK(i965->intel.device_info)) {
1190 OUT_BCS_BATCH(batch, 0);
1191 OUT_BCS_BATCH(batch, 0);
1192 }
1193
1194 ADVANCE_BCS_BATCH(batch);
1195
1196 return len_in_dwords;
1197 }
1198
1199 #define AVC_INTRA_RDO_OFFSET 4
1200 #define AVC_INTER_RDO_OFFSET 10
1201 #define AVC_INTER_MSG_OFFSET 8
1202 #define AVC_INTER_MV_OFFSET 48
1203 #define AVC_RDO_MASK 0xFFFF
1204
1205 #define AVC_INTRA_MODE_MASK 0x30
1206 #define AVC_INTRA_16X16 0x00
1207 #define AVC_INTRA_8X8 0x01
1208 #define AVC_INTRA_4X4 0x02
1209
1210 #define AVC_INTER_MODE_MASK 0x03
1211 #define AVC_INTER_8X8 0x03
1212 #define AVC_INTER_8X16 0x02
1213 #define AVC_INTER_16X8 0x01
1214 #define AVC_INTER_16X16 0x00
1215 #define AVC_SUBMB_SHAPE_MASK 0x00FF00
1216
1217 /* VME output message, write back message */
1218 #define AVC_INTER_SUBMB_PRE_MODE_MASK 0x00ff0000
1219 #define AVC_SUBMB_SHAPE_MASK 0x00FF00
1220
1221 /* here 1 MB = 1CU = 16x16 */
1222 static void
gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,int qp,unsigned int * msg,int ctb_x,int ctb_y,int mb_x,int mb_y,int ctb_width_in_mb,int width_in_ctb,int num_cu_record,int slice_type,int cu_index,int index)1223 gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx,
1224 struct encode_state *encode_state,
1225 struct intel_encoder_context *encoder_context,
1226 int qp, unsigned int *msg,
1227 int ctb_x, int ctb_y,
1228 int mb_x, int mb_y,
1229 int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type, int cu_index, int index)
1230 {
1231 /* here cu == mb, so we use mb address as the cu address */
1232 /* to fill the indirect cu by the vme out */
1233 static int intra_mode_8x8_avc2hevc[9] = {26, 10, 1, 34, 18, 24, 13, 28, 8};
1234 static int intra_mode_16x16_avc2hevc[4] = {26, 10, 1, 34};
1235 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1236 unsigned char * cu_record_ptr = NULL;
1237 unsigned int * cu_msg = NULL;
1238 int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
1239 int mb_address_in_ctb = 0;
1240 int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
1241 int zero = 0;
1242 int is_inter = 0;
1243 int intraMbMode = 0;
1244 int cu_part_mode = 0;
1245 int intraMode[4];
1246 int inerpred_idc = 0;
1247 int intra_chroma_mode = 5;
1248 int cu_size = 1;
1249 int tu_size = 0x55;
1250 int tu_count = 4;
1251 int chroma_mode_remap[4] = {5, 4, 3, 2};
1252
1253 if (!is_inter) inerpred_idc = 0xff;
1254
1255 intraMbMode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
1256
1257 intra_chroma_mode = (msg[3] & 0x3);
1258 intra_chroma_mode = chroma_mode_remap[intra_chroma_mode];
1259 if (intraMbMode == AVC_INTRA_16X16) {
1260 cu_part_mode = 0; //2Nx2N
1261 cu_size = 1;
1262 tu_size = 0x55;
1263 tu_count = 4;
1264 intraMode[0] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1265 intraMode[1] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1266 intraMode[2] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1267 intraMode[3] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
1268 } else if (intraMbMode == AVC_INTRA_8X8) {
1269 cu_part_mode = 0; //2Nx2N
1270 cu_size = 0;
1271 tu_size = 0;
1272 tu_count = 4;
1273 intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1274 intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1275 intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1276 intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
1277
1278 } else { // for 4x4 to use 8x8 replace
1279 cu_part_mode = 3; //NxN
1280 cu_size = 0;
1281 tu_size = 0;
1282 tu_count = 4;
1283 intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 0) & 0xf];
1284 intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 4) & 0xf];
1285 intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 8) & 0xf];
1286 intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 12) & 0xf];
1287
1288 }
1289
1290 cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
1291 /* get the mb info from the vme out */
1292 cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
1293
1294 cu_msg[0] = (inerpred_idc << 24 | /* interpred_idc[3:0][1:0] */
1295 zero << 23 | /* reserved */
1296 qp << 16 | /* CU_qp */
1297 zero << 11 | /* reserved */
1298 intra_chroma_mode << 8 | /* intra_chroma_mode */
1299 zero << 7 | /* IPCM_enable , reserved for SKL*/
1300 cu_part_mode << 4 | /* cu_part_mode */
1301 zero << 3 | /* cu_transquant_bypass_flag */
1302 is_inter << 2 | /* cu_pred_mode :intra 1,inter 1*/
1303 cu_size /* cu_size */
1304 );
1305 cu_msg[1] = (zero << 30 | /* reserved */
1306 intraMode[3] << 24 | /* intra_mode */
1307 zero << 22 | /* reserved */
1308 intraMode[2] << 16 | /* intra_mode */
1309 zero << 14 | /* reserved */
1310 intraMode[1] << 8 | /* intra_mode */
1311 zero << 6 | /* reserved */
1312 intraMode[0] /* intra_mode */
1313 );
1314 /* l0: 4 MV (x,y); l1; 4 MV (x,y) */
1315 cu_msg[2] = (zero << 16 | /* mvx_l0[1] */
1316 zero /* mvx_l0[0] */
1317 );
1318 cu_msg[3] = (zero << 16 | /* mvx_l0[3] */
1319 zero /* mvx_l0[2] */
1320 );
1321 cu_msg[4] = (zero << 16 | /* mvy_l0[1] */
1322 zero /* mvy_l0[0] */
1323 );
1324 cu_msg[5] = (zero << 16 | /* mvy_l0[3] */
1325 zero /* mvy_l0[2] */
1326 );
1327
1328 cu_msg[6] = (zero << 16 | /* mvx_l1[1] */
1329 zero /* mvx_l1[0] */
1330 );
1331 cu_msg[7] = (zero << 16 | /* mvx_l1[3] */
1332 zero /* mvx_l1[2] */
1333 );
1334 cu_msg[8] = (zero << 16 | /* mvy_l1[1] */
1335 zero /* mvy_l1[0] */
1336 );
1337 cu_msg[9] = (zero << 16 | /* mvy_l1[3] */
1338 zero /* mvy_l1[2] */
1339 );
1340
1341 cu_msg[10] = (zero << 28 | /* ref_idx_l1[3] */
1342 zero << 24 | /* ref_idx_l1[2] */
1343 zero << 20 | /* ref_idx_l1[1] */
1344 zero << 16 | /* ref_idx_l1[0] */
1345 zero << 12 | /* ref_idx_l0[3] */
1346 zero << 8 | /* ref_idx_l0[2] */
1347 zero << 4 | /* ref_idx_l0[1] */
1348 zero /* ref_idx_l0[0] */
1349 );
1350
1351 cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010 or 0x0*/
1352 cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
1353 zero << 16 | /* reserved */
1354 zero /* tu_xform_Yskip[15:0] */
1355 );
1356 cu_msg[13] = (zero << 16 | /* tu_xform_Vskip[15:0] */
1357 zero /* tu_xform_Uskip[15:0] */
1358 );
1359 cu_msg[14] = zero ;
1360 cu_msg[15] = zero ;
1361 }
1362
1363 /* here 1 MB = 1CU = 16x16 */
1364 static void
gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,int qp,unsigned int * msg,int ctb_x,int ctb_y,int mb_x,int mb_y,int ctb_width_in_mb,int width_in_ctb,int num_cu_record,int slice_type,int cu_index,int index)1365 gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,
1366 struct encode_state *encode_state,
1367 struct intel_encoder_context *encoder_context,
1368 int qp, unsigned int *msg,
1369 int ctb_x, int ctb_y,
1370 int mb_x, int mb_y,
1371 int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type, int cu_index, int index)
1372 {
1373 /* here cu == mb, so we use mb address as the cu address */
1374 /* to fill the indirect cu by the vme out */
1375 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1376 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1377 unsigned char * cu_record_ptr = NULL;
1378 unsigned int * cu_msg = NULL;
1379 int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
1380 int mb_address_in_ctb = 0;
1381 int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
1382 int zero = 0;
1383 int cu_part_mode = 0;
1384 int submb_pre_mode = 0;
1385 int is_inter = 1;
1386 int cu_size = 1;
1387 int tu_size = 0x55;
1388 int tu_count = 4;
1389 int inter_mode = 0;
1390
1391 unsigned int *mv_ptr;
1392 {
1393 inter_mode = (msg[0] & AVC_INTER_MODE_MASK);
1394 submb_pre_mode = (msg[1] & AVC_INTER_SUBMB_PRE_MODE_MASK) >> 16;
1395 #define MSG_MV_OFFSET 4
1396 mv_ptr = msg + MSG_MV_OFFSET;
1397 /* MV of VME output is based on 16 sub-blocks. So it is necessary
1398 * to convert them to be compatible with the format of AVC_PAK
1399 * command.
1400 */
1401 /* 0/2/4/6/8... : l0, 1/3/5/7...: l1 ; now it only support 16x16,16x8,8x16,8x8*/
1402
1403 if (inter_mode == AVC_INTER_16X16) {
1404 mv_ptr[4] = mv_ptr[0];
1405 mv_ptr[5] = mv_ptr[1];
1406 mv_ptr[2] = mv_ptr[0];
1407 mv_ptr[3] = mv_ptr[1];
1408 mv_ptr[6] = mv_ptr[0];
1409 mv_ptr[7] = mv_ptr[1];
1410 cu_part_mode = 0;
1411 cu_size = 1;
1412 tu_size = 0x55;
1413 tu_count = 4;
1414 } else if (inter_mode == AVC_INTER_8X16) {
1415 mv_ptr[4] = mv_ptr[0];
1416 mv_ptr[5] = mv_ptr[1];
1417 mv_ptr[2] = mv_ptr[8];
1418 mv_ptr[3] = mv_ptr[9];
1419 mv_ptr[6] = mv_ptr[8];
1420 mv_ptr[7] = mv_ptr[9];
1421 cu_part_mode = 1;
1422 cu_size = 1;
1423 tu_size = 0x55;
1424 tu_count = 4;
1425 } else if (inter_mode == AVC_INTER_16X8) {
1426 mv_ptr[2] = mv_ptr[0];
1427 mv_ptr[3] = mv_ptr[1];
1428 mv_ptr[4] = mv_ptr[16];
1429 mv_ptr[5] = mv_ptr[17];
1430 mv_ptr[6] = mv_ptr[24];
1431 mv_ptr[7] = mv_ptr[25];
1432 cu_part_mode = 2;
1433 cu_size = 1;
1434 tu_size = 0x55;
1435 tu_count = 4;
1436 } else if (inter_mode == AVC_INTER_8X8) {
1437 mv_ptr[0] = mv_ptr[index * 8 + 0 ];
1438 mv_ptr[1] = mv_ptr[index * 8 + 1 ];
1439 mv_ptr[2] = mv_ptr[index * 8 + 0 ];
1440 mv_ptr[3] = mv_ptr[index * 8 + 1 ];
1441 mv_ptr[4] = mv_ptr[index * 8 + 0 ];
1442 mv_ptr[5] = mv_ptr[index * 8 + 1 ];
1443 mv_ptr[6] = mv_ptr[index * 8 + 0 ];
1444 mv_ptr[7] = mv_ptr[index * 8 + 1 ];
1445 cu_part_mode = 0;
1446 cu_size = 0;
1447 tu_size = 0x0;
1448 tu_count = 4;
1449
1450 } else {
1451 mv_ptr[4] = mv_ptr[0];
1452 mv_ptr[5] = mv_ptr[1];
1453 mv_ptr[2] = mv_ptr[0];
1454 mv_ptr[3] = mv_ptr[1];
1455 mv_ptr[6] = mv_ptr[0];
1456 mv_ptr[7] = mv_ptr[1];
1457 cu_part_mode = 0;
1458 cu_size = 1;
1459 tu_size = 0x55;
1460 tu_count = 4;
1461
1462 }
1463 }
1464
1465 cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
1466 /* get the mb info from the vme out */
1467 cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
1468
1469 cu_msg[0] = (submb_pre_mode << 24 | /* interpred_idc[3:0][1:0] */
1470 zero << 23 | /* reserved */
1471 qp << 16 | /* CU_qp */
1472 zero << 11 | /* reserved */
1473 5 << 8 | /* intra_chroma_mode */
1474 zero << 7 | /* IPCM_enable , reserved for SKL*/
1475 cu_part_mode << 4 | /* cu_part_mode */
1476 zero << 3 | /* cu_transquant_bypass_flag */
1477 is_inter << 2 | /* cu_pred_mode :intra 1,inter 1*/
1478 cu_size /* cu_size */
1479 );
1480 cu_msg[1] = (zero << 30 | /* reserved */
1481 zero << 24 | /* intra_mode */
1482 zero << 22 | /* reserved */
1483 zero << 16 | /* intra_mode */
1484 zero << 14 | /* reserved */
1485 zero << 8 | /* intra_mode */
1486 zero << 6 | /* reserved */
1487 zero /* intra_mode */
1488 );
1489 /* l0: 4 MV (x,y); l1; 4 MV (x,y) */
1490 cu_msg[2] = ((mv_ptr[2] & 0xffff) << 16 | /* mvx_l0[1] */
1491 (mv_ptr[0] & 0xffff) /* mvx_l0[0] */
1492 );
1493 cu_msg[3] = ((mv_ptr[6] & 0xffff) << 16 | /* mvx_l0[3] */
1494 (mv_ptr[4] & 0xffff) /* mvx_l0[2] */
1495 );
1496 cu_msg[4] = ((mv_ptr[2] & 0xffff0000) | /* mvy_l0[1] */
1497 (mv_ptr[0] & 0xffff0000) >> 16 /* mvy_l0[0] */
1498 );
1499 cu_msg[5] = ((mv_ptr[6] & 0xffff0000) | /* mvy_l0[3] */
1500 (mv_ptr[4] & 0xffff0000) >> 16 /* mvy_l0[2] */
1501 );
1502
1503 cu_msg[6] = ((mv_ptr[3] & 0xffff) << 16 | /* mvx_l1[1] */
1504 (mv_ptr[1] & 0xffff) /* mvx_l1[0] */
1505 );
1506 cu_msg[7] = ((mv_ptr[7] & 0xffff) << 16 | /* mvx_l1[3] */
1507 (mv_ptr[5] & 0xffff) /* mvx_l1[2] */
1508 );
1509 cu_msg[8] = ((mv_ptr[3] & 0xffff0000) | /* mvy_l1[1] */
1510 (mv_ptr[1] & 0xffff0000) >> 16 /* mvy_l1[0] */
1511 );
1512 cu_msg[9] = ((mv_ptr[7] & 0xffff0000) | /* mvy_l1[3] */
1513 (mv_ptr[5] & 0xffff0000) >> 16 /* mvy_l1[2] */
1514 );
1515
1516 cu_msg[10] = (((vme_context->ref_index_in_mb[1] >> 24) & 0xf) << 28 | /* ref_idx_l1[3] */
1517 ((vme_context->ref_index_in_mb[1] >> 16) & 0xf) << 24 | /* ref_idx_l1[2] */
1518 ((vme_context->ref_index_in_mb[1] >> 8) & 0xf) << 20 | /* ref_idx_l1[1] */
1519 ((vme_context->ref_index_in_mb[1] >> 0) & 0xf) << 16 | /* ref_idx_l1[0] */
1520 ((vme_context->ref_index_in_mb[0] >> 24) & 0xf) << 12 | /* ref_idx_l0[3] */
1521 ((vme_context->ref_index_in_mb[0] >> 16) & 0xf) << 8 | /* ref_idx_l0[2] */
1522 ((vme_context->ref_index_in_mb[0] >> 8) & 0xf) << 4 | /* ref_idx_l0[1] */
1523 ((vme_context->ref_index_in_mb[0] >> 0) & 0xf) /* ref_idx_l0[0] */
1524 );
1525
1526 cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010 or 0x0*/
1527 cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
1528 zero << 16 | /* reserved */
1529 zero /* tu_xform_Yskip[15:0] */
1530 );
1531 cu_msg[13] = (zero << 16 | /* tu_xform_Vskip[15:0] */
1532 zero /* tu_xform_Uskip[15:0] */
1533 );
1534 cu_msg[14] = zero ;
1535 cu_msg[15] = zero ;
1536 }
1537
1538 #define HEVC_SPLIT_CU_FLAG_64_64 ((0x1<<20)|(0xf<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1539 #define HEVC_SPLIT_CU_FLAG_32_32 ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1540 #define HEVC_SPLIT_CU_FLAG_16_16 ((0x0<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1541 #define HEVC_SPLIT_CU_FLAG_8_8 ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
1542
1543
1544 void
intel_hevc_slice_insert_packed_data(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,int slice_index,struct intel_batchbuffer * slice_batch)1545 intel_hevc_slice_insert_packed_data(VADriverContextP ctx,
1546 struct encode_state *encode_state,
1547 struct intel_encoder_context *encoder_context,
1548 int slice_index,
1549 struct intel_batchbuffer *slice_batch)
1550 {
1551 int count, i, start_index;
1552 unsigned int length_in_bits;
1553 VAEncPackedHeaderParameterBuffer *param = NULL;
1554 unsigned int *header_data = NULL;
1555 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1556 int slice_header_index;
1557
1558 if (encode_state->slice_header_index[slice_index] == 0)
1559 slice_header_index = -1;
1560 else
1561 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1562
1563 count = encode_state->slice_rawdata_count[slice_index];
1564 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1565
1566 for (i = 0; i < count; i++) {
1567 unsigned int skip_emul_byte_cnt;
1568
1569 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1570
1571 param = (VAEncPackedHeaderParameterBuffer *)
1572 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1573
1574 /* skip the slice header packed data type as it is lastly inserted */
1575 if (param->type == VAEncPackedHeaderSlice)
1576 continue;
1577
1578 length_in_bits = param->bit_length;
1579
1580 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1581
1582 /* as the slice header is still required, the last header flag is set to
1583 * zero.
1584 */
1585 mfc_context->insert_object(ctx,
1586 encoder_context,
1587 header_data,
1588 ALIGN(length_in_bits, 32) >> 5,
1589 length_in_bits & 0x1f,
1590 skip_emul_byte_cnt,
1591 0,
1592 0,
1593 !param->has_emulation_bytes,
1594 slice_batch);
1595 }
1596
1597 if (slice_header_index == -1) {
1598 unsigned char *slice_header = NULL;
1599 int slice_header_length_in_bits = 0;
1600 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1601 VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1602 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
1603
1604 /* For the Normal HEVC */
1605 slice_header_length_in_bits = build_hevc_slice_header(pSequenceParameter,
1606 pPicParameter,
1607 pSliceParameter,
1608 &slice_header,
1609 0);
1610 mfc_context->insert_object(ctx, encoder_context,
1611 (unsigned int *)slice_header,
1612 ALIGN(slice_header_length_in_bits, 32) >> 5,
1613 slice_header_length_in_bits & 0x1f,
1614 5, /* first 6 bytes are start code + nal unit type */
1615 1, 0, 1, slice_batch);
1616 free(slice_header);
1617 } else {
1618 unsigned int skip_emul_byte_cnt;
1619
1620 header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1621
1622 param = (VAEncPackedHeaderParameterBuffer *)
1623 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1624 length_in_bits = param->bit_length;
1625
1626 /* as the slice header is the last header data for one slice,
1627 * the last header flag is set to one.
1628 */
1629 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1630
1631 mfc_context->insert_object(ctx,
1632 encoder_context,
1633 header_data,
1634 ALIGN(length_in_bits, 32) >> 5,
1635 length_in_bits & 0x1f,
1636 skip_emul_byte_cnt,
1637 1,
1638 0,
1639 !param->has_emulation_bytes,
1640 slice_batch);
1641 }
1642
1643 return;
1644 }
1645
1646 static void
gen9_hcpe_hevc_pipeline_slice_programing(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,int slice_index,struct intel_batchbuffer * slice_batch)1647 gen9_hcpe_hevc_pipeline_slice_programing(VADriverContextP ctx,
1648 struct encode_state *encode_state,
1649 struct intel_encoder_context *encoder_context,
1650 int slice_index,
1651 struct intel_batchbuffer *slice_batch)
1652 {
1653 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1654 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1655 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
1656 VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
1657 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
1658 int qp_slice = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1659 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1660 //unsigned char *slice_header = NULL; // for future use
1661 //int slice_header_length_in_bits = 0;
1662 unsigned int tail_data[] = { 0x0, 0x0 };
1663 int slice_type = pSliceParameter->slice_type;
1664
1665 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
1666 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
1667 int ctb_size = 1 << log2_ctb_size;
1668 int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
1669 int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
1670 int last_slice = (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice) == (width_in_ctb * height_in_ctb);
1671 int ctb_width_in_mb = (ctb_size + 15) / 16;
1672 int i_ctb, ctb_x, ctb_y;
1673 unsigned int split_coding_unit_flag = 0;
1674 int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15) / 16;
1675 int row_pad_flag = (pSequenceParameter->pic_height_in_luma_samples % ctb_size) > 0 ? 1 : 0;
1676 int col_pad_flag = (pSequenceParameter->pic_width_in_luma_samples % ctb_size) > 0 ? 1 : 0;
1677
1678 int is_intra = (slice_type == HEVC_SLICE_I);
1679 unsigned int *msg = NULL;
1680 unsigned char *msg_ptr = NULL;
1681 int macroblock_address = 0;
1682 int num_cu_record = 64;
1683 int cu_count = 1;
1684 int tmp_mb_mode = 0;
1685 int mb_x = 0, mb_y = 0;
1686 int mb_addr = 0;
1687 int cu_index = 0;
1688 int inter_rdo, intra_rdo;
1689 int qp;
1690 int drop_cu_row_in_last_mb = 0;
1691 int drop_cu_column_in_last_mb = 0;
1692
1693 if (log2_ctb_size == 5) num_cu_record = 16;
1694 else if (log2_ctb_size == 4) num_cu_record = 4;
1695 else if (log2_ctb_size == 6) num_cu_record = 64;
1696
1697 qp = qp_slice;
1698 if (rate_control_mode == VA_RC_CBR) {
1699 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1700 if (slice_type == HEVC_SLICE_B) {
1701 if (pSequenceParameter->ip_period == 1) {
1702 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
1703
1704 } else if (mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1) {
1705 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
1706 }
1707 }
1708 if (encode_state->slice_header_index[slice_index] == 0) {
1709 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1710 }
1711 }
1712
1713 /* only support for 8-bit pixel bit-depth */
1714 assert(pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 >= 0 && pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 <= 2);
1715 assert(pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 >= 0 && pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 <= 2);
1716 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1717 assert(qp >= 0 && qp < 52);
1718
1719 {
1720 gen9_hcpe_hevc_slice_state(ctx,
1721 pPicParameter,
1722 pSliceParameter,
1723 encode_state, encoder_context,
1724 slice_batch);
1725
1726 if (slice_index == 0)
1727 intel_hcpe_hevc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1728
1729 intel_hevc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
1730
1731 /*
1732 slice_header_length_in_bits = build_hevc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header, slice_index);
1733 int skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)slice_header, slice_header_length_in_bits);
1734
1735 mfc_context->insert_object(ctx, encoder_context,
1736 (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1737 skip_emul_byte_cnt,
1738 1, 0, 1, slice_batch);
1739 free(slice_header);
1740 */
1741 }
1742
1743
1744
1745 split_coding_unit_flag = (ctb_width_in_mb == 4) ? HEVC_SPLIT_CU_FLAG_64_64 : ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
1746
1747 dri_bo_map(vme_context->vme_output.bo , 1);
1748 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1749 dri_bo_map(mfc_context->hcp_indirect_cu_object.bo , 1);
1750
1751 for (i_ctb = pSliceParameter->slice_segment_address; i_ctb < pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice; i_ctb++) {
1752 int last_ctb = (i_ctb == (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice - 1));
1753 int ctb_height_in_mb_internal = ctb_width_in_mb;
1754 int ctb_width_in_mb_internal = ctb_width_in_mb;
1755 int max_cu_num_in_mb = 4;
1756
1757 ctb_x = i_ctb % width_in_ctb;
1758 ctb_y = i_ctb / width_in_ctb;
1759
1760 drop_cu_row_in_last_mb = 0;
1761 drop_cu_column_in_last_mb = 0;
1762
1763 if (ctb_y == (height_in_ctb - 1) && row_pad_flag) {
1764 ctb_height_in_mb_internal = (pSequenceParameter->pic_height_in_luma_samples - (ctb_y * ctb_size) + 15) / 16;
1765
1766 if ((log2_cu_size == 3) && (pSequenceParameter->pic_height_in_luma_samples % 16))
1767 drop_cu_row_in_last_mb = (16 - (pSequenceParameter->pic_height_in_luma_samples % 16)) >> log2_cu_size;
1768 }
1769
1770 if (ctb_x == (width_in_ctb - 1) && col_pad_flag) {
1771 ctb_width_in_mb_internal = (pSequenceParameter->pic_width_in_luma_samples - (ctb_x * ctb_size) + 15) / 16;
1772
1773 if ((log2_cu_size == 3) && (pSequenceParameter->pic_width_in_luma_samples % 16))
1774 drop_cu_column_in_last_mb = (16 - (pSequenceParameter->pic_width_in_luma_samples % 16)) >> log2_cu_size;
1775 }
1776
1777 mb_x = 0;
1778 mb_y = 0;
1779 macroblock_address = ctb_y * width_in_mbs * ctb_width_in_mb + ctb_x * ctb_width_in_mb;
1780 split_coding_unit_flag = ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
1781 cu_count = 1;
1782 cu_index = 0;
1783 mb_addr = 0;
1784 msg = NULL;
1785 for (mb_y = 0; mb_y < ctb_height_in_mb_internal; mb_y++) {
1786 mb_addr = macroblock_address + mb_y * width_in_mbs ;
1787 for (mb_x = 0; mb_x < ctb_width_in_mb_internal; mb_x++) {
1788 max_cu_num_in_mb = 4;
1789 if (drop_cu_row_in_last_mb && (mb_y == ctb_height_in_mb_internal - 1))
1790 max_cu_num_in_mb /= 2;
1791
1792 if (drop_cu_column_in_last_mb && (mb_x == ctb_width_in_mb_internal - 1))
1793 max_cu_num_in_mb /= 2;
1794
1795 /* get the mb info from the vme out */
1796 msg = (unsigned int *)(msg_ptr + mb_addr * vme_context->vme_output.size_block);
1797
1798 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
1799 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
1800 /*fill to indirect cu */
1801 /*to do */
1802 if (is_intra || intra_rdo < inter_rdo) {
1803 /* fill intra cu */
1804 tmp_mb_mode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
1805 if (max_cu_num_in_mb < 4) {
1806 if (tmp_mb_mode == AVC_INTRA_16X16) {
1807 msg[0] = (msg[0] & !AVC_INTRA_MODE_MASK) | (AVC_INTRA_8X8 << 4);
1808 tmp_mb_mode = AVC_INTRA_8X8;
1809 }
1810
1811 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1812 if (--max_cu_num_in_mb > 0)
1813 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 2);
1814
1815 if (ctb_width_in_mb == 2)
1816 split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1817 else if (ctb_width_in_mb == 1)
1818 split_coding_unit_flag |= 0x1 << 20;
1819 } else if (tmp_mb_mode == AVC_INTRA_16X16) {
1820 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1821 } else { // for 4x4 to use 8x8 replace
1822 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1823 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 1);
1824 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 2);
1825 gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 3);
1826 if (ctb_width_in_mb == 2)
1827 split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1828 else if (ctb_width_in_mb == 1)
1829 split_coding_unit_flag |= 0x1 << 20;
1830 }
1831 } else {
1832 msg += AVC_INTER_MSG_OFFSET;
1833 /* fill inter cu */
1834 tmp_mb_mode = msg[0] & AVC_INTER_MODE_MASK;
1835 if (max_cu_num_in_mb < 4) {
1836 if (tmp_mb_mode != AVC_INTER_8X8) {
1837 msg[0] = (msg[0] & !AVC_INTER_MODE_MASK) | AVC_INTER_8X8;
1838 tmp_mb_mode = AVC_INTER_8X8;
1839 }
1840 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1841 if (--max_cu_num_in_mb > 0)
1842 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 1);
1843
1844 if (ctb_width_in_mb == 2)
1845 split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1846 else if (ctb_width_in_mb == 1)
1847 split_coding_unit_flag |= 0x1 << 20;
1848 } else if (tmp_mb_mode == AVC_INTER_8X8) {
1849 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1850 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 1);
1851 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 2);
1852 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 3);
1853 if (ctb_width_in_mb == 2)
1854 split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
1855 else if (ctb_width_in_mb == 1)
1856 split_coding_unit_flag |= 0x1 << 20;
1857
1858 } else if (tmp_mb_mode == AVC_INTER_16X16 ||
1859 tmp_mb_mode == AVC_INTER_8X16 ||
1860 tmp_mb_mode == AVC_INTER_16X8) {
1861 gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type, cu_index++, 0);
1862 }
1863 }
1864 mb_addr++;
1865 }
1866 }
1867
1868 cu_count = cu_index;
1869 // PAK object fill accordingly.
1870 gen9_hcpe_hevc_pak_object(ctx, ctb_x, ctb_y, last_ctb, encoder_context, cu_count, split_coding_unit_flag, slice_batch);
1871 }
1872
1873 dri_bo_unmap(mfc_context->hcp_indirect_cu_object.bo);
1874 dri_bo_unmap(vme_context->vme_output.bo);
1875
1876 if (last_slice) {
1877 mfc_context->insert_object(ctx, encoder_context,
1878 tail_data, 2, 8,
1879 2, 1, 1, 0, slice_batch);
1880 } else {
1881 mfc_context->insert_object(ctx, encoder_context,
1882 tail_data, 1, 8,
1883 1, 1, 1, 0, slice_batch);
1884 }
1885 }
1886
1887 static dri_bo *
gen9_hcpe_hevc_software_batchbuffer(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)1888 gen9_hcpe_hevc_software_batchbuffer(VADriverContextP ctx,
1889 struct encode_state *encode_state,
1890 struct intel_encoder_context *encoder_context)
1891 {
1892 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1893 struct intel_batchbuffer *batch;
1894 dri_bo *batch_bo;
1895 int i;
1896
1897 batch = mfc_context->aux_batchbuffer;
1898 batch_bo = batch->buffer;
1899
1900 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1901 gen9_hcpe_hevc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1902 }
1903
1904 intel_batchbuffer_align(batch, 8);
1905
1906 BEGIN_BCS_BATCH(batch, 2);
1907 OUT_BCS_BATCH(batch, 0);
1908 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1909 ADVANCE_BCS_BATCH(batch);
1910
1911 dri_bo_reference(batch_bo);
1912 intel_batchbuffer_free(batch);
1913 mfc_context->aux_batchbuffer = NULL;
1914
1915 return batch_bo;
1916 }
1917
1918 #else
1919
1920 #endif
1921
1922 static void
gen9_hcpe_hevc_pipeline_programing(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)1923 gen9_hcpe_hevc_pipeline_programing(VADriverContextP ctx,
1924 struct encode_state *encode_state,
1925 struct intel_encoder_context *encoder_context)
1926 {
1927 struct i965_driver_data *i965 = i965_driver_data(ctx);
1928 struct intel_batchbuffer *batch = encoder_context->base.batch;
1929 dri_bo *slice_batch_bo;
1930
1931 #ifdef HCP_SOFTWARE_SKYLAKE
1932 slice_batch_bo = gen9_hcpe_hevc_software_batchbuffer(ctx, encode_state, encoder_context);
1933 #else
1934 slice_batch_bo = gen9_hcpe_hevc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1935 #endif
1936
1937 // begin programing
1938 if (i965->intel.has_bsd2)
1939 intel_batchbuffer_start_atomic_bcs_override(batch, 0x4000, BSD_RING0);
1940 else
1941 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1942 intel_batchbuffer_emit_mi_flush(batch);
1943
1944 // picture level programing
1945 gen9_hcpe_hevc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1946
1947 BEGIN_BCS_BATCH(batch, 3);
1948 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
1949 OUT_BCS_RELOC64(batch,
1950 slice_batch_bo,
1951 I915_GEM_DOMAIN_COMMAND, 0,
1952 0);
1953 ADVANCE_BCS_BATCH(batch);
1954
1955 // end programing
1956 intel_batchbuffer_end_atomic(batch);
1957
1958 dri_bo_unreference(slice_batch_bo);
1959 }
1960
intel_hcpe_hevc_pipeline_header_programing(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct intel_batchbuffer * slice_batch)1961 void intel_hcpe_hevc_pipeline_header_programing(VADriverContextP ctx,
1962 struct encode_state *encode_state,
1963 struct intel_encoder_context *encoder_context,
1964 struct intel_batchbuffer *slice_batch)
1965 {
1966 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
1967 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_VPS);
1968 unsigned int skip_emul_byte_cnt;
1969
1970 if (encode_state->packed_header_data[idx]) {
1971 VAEncPackedHeaderParameterBuffer *param = NULL;
1972 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1973 unsigned int length_in_bits;
1974
1975 assert(encode_state->packed_header_param[idx]);
1976 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1977 length_in_bits = param->bit_length;
1978
1979 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1980 mfc_context->insert_object(ctx,
1981 encoder_context,
1982 header_data,
1983 ALIGN(length_in_bits, 32) >> 5,
1984 length_in_bits & 0x1f,
1985 skip_emul_byte_cnt,
1986 0,
1987 0,
1988 !param->has_emulation_bytes,
1989 slice_batch);
1990 }
1991
1992 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_VPS) + 1; // index to SPS
1993
1994 if (encode_state->packed_header_data[idx]) {
1995 VAEncPackedHeaderParameterBuffer *param = NULL;
1996 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1997 unsigned int length_in_bits;
1998
1999 assert(encode_state->packed_header_param[idx]);
2000 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2001 length_in_bits = param->bit_length;
2002
2003 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2004 mfc_context->insert_object(ctx,
2005 encoder_context,
2006 header_data,
2007 ALIGN(length_in_bits, 32) >> 5,
2008 length_in_bits & 0x1f,
2009 skip_emul_byte_cnt,
2010 0,
2011 0,
2012 !param->has_emulation_bytes,
2013 slice_batch);
2014 }
2015
2016 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_PPS);
2017
2018 if (encode_state->packed_header_data[idx]) {
2019 VAEncPackedHeaderParameterBuffer *param = NULL;
2020 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2021 unsigned int length_in_bits;
2022
2023 assert(encode_state->packed_header_param[idx]);
2024 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2025 length_in_bits = param->bit_length;
2026
2027 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2028
2029 mfc_context->insert_object(ctx,
2030 encoder_context,
2031 header_data,
2032 ALIGN(length_in_bits, 32) >> 5,
2033 length_in_bits & 0x1f,
2034 skip_emul_byte_cnt,
2035 0,
2036 0,
2037 !param->has_emulation_bytes,
2038 slice_batch);
2039 }
2040
2041 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_SEI);
2042
2043 if (encode_state->packed_header_data[idx]) {
2044 VAEncPackedHeaderParameterBuffer *param = NULL;
2045 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
2046 unsigned int length_in_bits;
2047
2048 assert(encode_state->packed_header_param[idx]);
2049 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
2050 length_in_bits = param->bit_length;
2051
2052 skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2053 mfc_context->insert_object(ctx,
2054 encoder_context,
2055 header_data,
2056 ALIGN(length_in_bits, 32) >> 5,
2057 length_in_bits & 0x1f,
2058 skip_emul_byte_cnt,
2059 0,
2060 0,
2061 !param->has_emulation_bytes,
2062 slice_batch);
2063 }
2064 }
2065
intel_hcpe_hevc_prepare(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)2066 VAStatus intel_hcpe_hevc_prepare(VADriverContextP ctx,
2067 struct encode_state *encode_state,
2068 struct intel_encoder_context *encoder_context)
2069 {
2070 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2071 struct object_surface *obj_surface;
2072 struct object_buffer *obj_buffer;
2073 GenHevcSurface *hevc_encoder_surface;
2074 dri_bo *bo;
2075 VAStatus vaStatus = VA_STATUS_SUCCESS;
2076 int i;
2077 struct i965_coded_buffer_segment *coded_buffer_segment;
2078
2079 /*Setup all the input&output object*/
2080
2081 /* Setup current frame and current direct mv buffer*/
2082 obj_surface = encode_state->reconstructed_object;
2083
2084 hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2085 assert(hevc_encoder_surface);
2086
2087 if (hevc_encoder_surface) {
2088 hevc_encoder_surface->has_p010_to_nv12_done = 0;
2089 hevc_encoder_surface->base.frame_store_id = -1;
2090 mfc_context->current_collocated_mv_temporal_buffer[NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS - 1].bo = hevc_encoder_surface->motion_vector_temporal_bo;
2091 dri_bo_reference(hevc_encoder_surface->motion_vector_temporal_bo);
2092 }
2093
2094 mfc_context->surface_state.width = obj_surface->orig_width;
2095 mfc_context->surface_state.height = obj_surface->orig_height;
2096 mfc_context->surface_state.w_pitch = obj_surface->width;
2097 mfc_context->surface_state.h_pitch = obj_surface->height;
2098
2099 /* Setup reference frames and direct mv buffers*/
2100 for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
2101 obj_surface = encode_state->reference_objects[i];
2102
2103 if (obj_surface && obj_surface->bo) {
2104 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
2105 dri_bo_reference(obj_surface->bo);
2106
2107 /* Check MV temporal buffer */
2108 hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
2109 assert(hevc_encoder_surface);
2110
2111 if (hevc_encoder_surface) {
2112 hevc_encoder_surface->base.frame_store_id = -1;
2113 /* Setup MV temporal buffer */
2114 mfc_context->current_collocated_mv_temporal_buffer[i].bo = hevc_encoder_surface->motion_vector_temporal_bo;
2115 dri_bo_reference(hevc_encoder_surface->motion_vector_temporal_bo);
2116 }
2117 } else {
2118 break;
2119 }
2120 }
2121
2122
2123 mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
2124 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
2125
2126 obj_buffer = encode_state->coded_buf_object;
2127 bo = obj_buffer->buffer_store->bo;
2128 mfc_context->hcp_indirect_pak_bse_object.bo = bo;
2129 mfc_context->hcp_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
2130 mfc_context->hcp_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
2131 dri_bo_reference(mfc_context->hcp_indirect_pak_bse_object.bo);
2132
2133 dri_bo_map(bo, 1);
2134 coded_buffer_segment = (struct i965_coded_buffer_segment *)(bo->virtual);
2135 coded_buffer_segment->mapped = 0;
2136 coded_buffer_segment->codec = encoder_context->codec;
2137 dri_bo_unmap(bo);
2138
2139 return vaStatus;
2140 }
2141
2142 /* HEVC BRC related */
2143
2144 static void
intel_hcpe_bit_rate_control_context_init(struct encode_state * encode_state,struct intel_encoder_context * encoder_context)2145 intel_hcpe_bit_rate_control_context_init(struct encode_state *encode_state,
2146 struct intel_encoder_context *encoder_context)
2147 {
2148 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2149 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2150 int ctb_size = 16;
2151 int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
2152 int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
2153
2154 double fps = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
2155 double bitrate = encoder_context->brc.bits_per_second[0];
2156 int inter_mb_size = bitrate * 1.0 / (fps + 4.0) / width_in_mbs / height_in_mbs;
2157 int intra_mb_size = inter_mb_size * 5.0;
2158 int i;
2159
2160 mfc_context->bit_rate_control_context[HEVC_SLICE_I].target_mb_size = intra_mb_size;
2161 mfc_context->bit_rate_control_context[HEVC_SLICE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
2162 mfc_context->bit_rate_control_context[HEVC_SLICE_P].target_mb_size = inter_mb_size;
2163 mfc_context->bit_rate_control_context[HEVC_SLICE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
2164 mfc_context->bit_rate_control_context[HEVC_SLICE_B].target_mb_size = inter_mb_size;
2165 mfc_context->bit_rate_control_context[HEVC_SLICE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
2166
2167 for (i = 0 ; i < 3; i++) {
2168 mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
2169 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
2170 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
2171 mfc_context->bit_rate_control_context[i].GrowInit = 6;
2172 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
2173 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
2174 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
2175
2176 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
2177 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
2178 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
2179 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
2180 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
2181 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
2182 }
2183
2184 mfc_context->bit_rate_control_context[HEVC_SLICE_I].TargetSizeInWord = (intra_mb_size + 16) / 16;
2185 mfc_context->bit_rate_control_context[HEVC_SLICE_P].TargetSizeInWord = (inter_mb_size + 16) / 16;
2186 mfc_context->bit_rate_control_context[HEVC_SLICE_B].TargetSizeInWord = (inter_mb_size + 16) / 16;
2187
2188 mfc_context->bit_rate_control_context[HEVC_SLICE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_I].TargetSizeInWord * 1.5;
2189 mfc_context->bit_rate_control_context[HEVC_SLICE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_P].TargetSizeInWord * 1.5;
2190 mfc_context->bit_rate_control_context[HEVC_SLICE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_B].TargetSizeInWord * 1.5;
2191 }
2192
intel_hcpe_brc_init(struct encode_state * encode_state,struct intel_encoder_context * encoder_context)2193 static void intel_hcpe_brc_init(struct encode_state *encode_state,
2194 struct intel_encoder_context* encoder_context)
2195 {
2196 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2197 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2198
2199 double bitrate = (double)encoder_context->brc.bits_per_second[0];
2200 double framerate = (double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den;
2201 int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
2202 int intra_period = pSequenceParameter->intra_period;
2203 int ip_period = pSequenceParameter->ip_period;
2204 double qp1_size = 0.1 * 8 * 3 * pSequenceParameter->pic_width_in_luma_samples * pSequenceParameter->pic_height_in_luma_samples / 2;
2205 double qp51_size = 0.001 * 8 * 3 * pSequenceParameter->pic_width_in_luma_samples * pSequenceParameter->pic_height_in_luma_samples / 2;
2206 double bpf;
2207 int ratio_min = 1;
2208 int ratio_max = 32;
2209 int ratio = 8;
2210 double buffer_size = 0;
2211 int bpp = 1;
2212
2213 if ((pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 > 0) ||
2214 (pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 > 0))
2215 bpp = 2;
2216
2217 qp1_size = qp1_size * bpp;
2218 qp51_size = qp51_size * bpp;
2219
2220 if (pSequenceParameter->ip_period) {
2221 pnum = (intra_period + ip_period - 1) / ip_period - 1;
2222 bnum = intra_period - inum - pnum;
2223 }
2224
2225 mfc_context->brc.mode = encoder_context->rate_control_mode;
2226
2227 mfc_context->brc.target_frame_size[HEVC_SLICE_I] = (int)((double)((bitrate * intra_period) / framerate) /
2228 (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
2229 mfc_context->brc.target_frame_size[HEVC_SLICE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[HEVC_SLICE_I];
2230 mfc_context->brc.target_frame_size[HEVC_SLICE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[HEVC_SLICE_I];
2231
2232 mfc_context->brc.gop_nums[HEVC_SLICE_I] = inum;
2233 mfc_context->brc.gop_nums[HEVC_SLICE_P] = pnum;
2234 mfc_context->brc.gop_nums[HEVC_SLICE_B] = bnum;
2235
2236 bpf = mfc_context->brc.bits_per_frame = bitrate / framerate;
2237
2238 if (!encoder_context->brc.hrd_buffer_size) {
2239 mfc_context->hrd.buffer_size = bitrate * ratio;
2240 mfc_context->hrd.current_buffer_fullness =
2241 (double)(bitrate * ratio / 2 < mfc_context->hrd.buffer_size) ?
2242 bitrate * ratio / 2 : mfc_context->hrd.buffer_size / 2.;
2243 } else {
2244 buffer_size = (double)encoder_context->brc.hrd_buffer_size;
2245 if (buffer_size < bitrate * ratio_min) {
2246 buffer_size = bitrate * ratio_min;
2247 } else if (buffer_size > bitrate * ratio_max) {
2248 buffer_size = bitrate * ratio_max ;
2249 }
2250 mfc_context->hrd.buffer_size = buffer_size;
2251 if (encoder_context->brc.hrd_initial_buffer_fullness) {
2252 mfc_context->hrd.current_buffer_fullness =
2253 (double)(encoder_context->brc.hrd_initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
2254 encoder_context->brc.hrd_initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
2255 } else {
2256 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size / 2.;
2257
2258 }
2259 }
2260
2261 mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size / 2.;
2262 mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size / qp1_size;
2263 mfc_context->hrd.violation_noted = 0;
2264
2265 if ((bpf > qp51_size) && (bpf < qp1_size)) {
2266 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 51 - 50 * (bpf - qp51_size) / (qp1_size - qp51_size);
2267 } else if (bpf >= qp1_size)
2268 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 1;
2269 else if (bpf <= qp51_size)
2270 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 51;
2271
2272 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2273 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY;
2274
2275 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 36);
2276 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 40);
2277 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 45);
2278 }
2279
intel_hcpe_update_hrd(struct encode_state * encode_state,struct gen9_hcpe_context * mfc_context,int frame_bits)2280 int intel_hcpe_update_hrd(struct encode_state *encode_state,
2281 struct gen9_hcpe_context *mfc_context,
2282 int frame_bits)
2283 {
2284 double prev_bf = mfc_context->hrd.current_buffer_fullness;
2285
2286 mfc_context->hrd.current_buffer_fullness -= frame_bits;
2287
2288 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
2289 mfc_context->hrd.current_buffer_fullness = prev_bf;
2290 return BRC_UNDERFLOW;
2291 }
2292
2293 mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
2294 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
2295 if (mfc_context->brc.mode == VA_RC_VBR)
2296 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
2297 else {
2298 mfc_context->hrd.current_buffer_fullness = prev_bf;
2299 return BRC_OVERFLOW;
2300 }
2301 }
2302 return BRC_NO_HRD_VIOLATION;
2303 }
2304
intel_hcpe_brc_postpack(struct encode_state * encode_state,struct gen9_hcpe_context * mfc_context,int frame_bits)2305 int intel_hcpe_brc_postpack(struct encode_state *encode_state,
2306 struct gen9_hcpe_context *mfc_context,
2307 int frame_bits)
2308 {
2309 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
2310 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2311 VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
2312 int slicetype = pSliceParameter->slice_type;
2313 int qpi = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY;
2314 int qpp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
2315 int qpb = mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY;
2316 int qp; // quantizer of previously encoded slice of current type
2317 int qpn; // predicted quantizer for next frame of current type in integer format
2318 double qpf; // predicted quantizer for next frame of current type in float format
2319 double delta_qp; // QP correction
2320 int target_frame_size, frame_size_next;
2321 /* Notes:
2322 * x - how far we are from HRD buffer borders
2323 * y - how far we are from target HRD buffer fullness
2324 */
2325 double x, y;
2326 double frame_size_alpha;
2327
2328 if (slicetype == HEVC_SLICE_B) {
2329 if (pSequenceParameter->ip_period == 1) {
2330 slicetype = HEVC_SLICE_P;
2331 } else if (mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1) {
2332 slicetype = HEVC_SLICE_P;
2333 }
2334 }
2335
2336 qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
2337
2338 target_frame_size = mfc_context->brc.target_frame_size[slicetype];
2339 if (mfc_context->hrd.buffer_capacity < 5)
2340 frame_size_alpha = 0;
2341 else
2342 frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
2343 if (frame_size_alpha > 30) frame_size_alpha = 30;
2344 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
2345 (double)(frame_size_alpha + 1.);
2346
2347 /* frame_size_next: avoiding negative number and too small value */
2348 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
2349 frame_size_next = (int)((double)target_frame_size * 0.25);
2350
2351 qpf = (double)qp * target_frame_size / frame_size_next;
2352 qpn = (int)(qpf + 0.5);
2353
2354 if (qpn == qp) {
2355 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
2356 mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
2357 if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
2358 qpn++;
2359 mfc_context->brc.qpf_rounding_accumulator = 0.;
2360 } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
2361 qpn--;
2362 mfc_context->brc.qpf_rounding_accumulator = 0.;
2363 }
2364 }
2365 /* making sure that QP is not changing too fast */
2366 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
2367 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
2368 /* making sure that with QP predictions we did do not leave QPs range */
2369 BRC_CLIP(qpn, 1, 51);
2370
2371 /* checking wthether HRD compliance is still met */
2372 sts = intel_hcpe_update_hrd(encode_state, mfc_context, frame_bits);
2373
2374 /* calculating QP delta as some function*/
2375 x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
2376 if (x > 0) {
2377 x /= mfc_context->hrd.target_buffer_fullness;
2378 y = mfc_context->hrd.current_buffer_fullness;
2379 } else {
2380 x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
2381 y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
2382 }
2383 if (y < 0.01) y = 0.01;
2384 if (x > 1) x = 1;
2385 else if (x < -1) x = -1;
2386
2387 delta_qp = BRC_QP_MAX_CHANGE * exp(-1 / y) * sin(BRC_PI_0_5 * x);
2388 qpn = (int)(qpn + delta_qp + 0.5);
2389
2390 /* making sure that with QP predictions we did do not leave QPs range */
2391 BRC_CLIP(qpn, 1, 51);
2392
2393 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
2394 /* correcting QPs of slices of other types */
2395 if (slicetype == HEVC_SLICE_P) {
2396 if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
2397 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
2398 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
2399 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
2400 } else if (slicetype == HEVC_SLICE_I) {
2401 if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
2402 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
2403 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
2404 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
2405 } else { // HEVC_SLICE_B
2406 if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
2407 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
2408 if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
2409 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
2410 }
2411 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 51);
2412 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 51);
2413 BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 51);
2414 } else if (sts == BRC_UNDERFLOW) { // underflow
2415 if (qpn <= qp) qpn = qp + 1;
2416 if (qpn > 51) {
2417 qpn = 51;
2418 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
2419 }
2420 } else if (sts == BRC_OVERFLOW) {
2421 if (qpn >= qp) qpn = qp - 1;
2422 if (qpn < 1) { // < 0 (?) overflow with minQP
2423 qpn = 1;
2424 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
2425 }
2426 }
2427
2428 mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
2429
2430 return sts;
2431 }
2432
intel_hcpe_hrd_context_init(struct encode_state * encode_state,struct intel_encoder_context * encoder_context)2433 static void intel_hcpe_hrd_context_init(struct encode_state *encode_state,
2434 struct intel_encoder_context *encoder_context)
2435 {
2436 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2437 unsigned int rate_control_mode = encoder_context->rate_control_mode;
2438 unsigned int target_bit_rate = encoder_context->brc.bits_per_second[0];
2439
2440 // current we only support CBR mode.
2441 if (rate_control_mode == VA_RC_CBR) {
2442 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
2443 mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
2444 mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
2445 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
2446 mfc_context->vui_hrd.i_frame_number = 0;
2447
2448 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
2449 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
2450 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
2451 }
2452
2453 }
2454
2455 void
intel_hcpe_hrd_context_update(struct encode_state * encode_state,struct gen9_hcpe_context * mfc_context)2456 intel_hcpe_hrd_context_update(struct encode_state *encode_state,
2457 struct gen9_hcpe_context *mfc_context)
2458 {
2459 mfc_context->vui_hrd.i_frame_number++;
2460 }
2461
intel_hcpe_interlace_check(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)2462 int intel_hcpe_interlace_check(VADriverContextP ctx,
2463 struct encode_state *encode_state,
2464 struct intel_encoder_context *encoder_context)
2465 {
2466 VAEncSliceParameterBufferHEVC *pSliceParameter;
2467 VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
2468 int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
2469 int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
2470 int ctb_size = 1 << log2_ctb_size;
2471 int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
2472 int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
2473 int i;
2474 int ctbCount = 0;
2475
2476 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
2477 pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[i]->buffer;
2478 ctbCount += pSliceParameter->num_ctu_in_slice;
2479 }
2480
2481 if (ctbCount == (width_in_ctb * height_in_ctb))
2482 return 0;
2483
2484 return 1;
2485 }
2486
intel_hcpe_brc_prepare(struct encode_state * encode_state,struct intel_encoder_context * encoder_context)2487 void intel_hcpe_brc_prepare(struct encode_state *encode_state,
2488 struct intel_encoder_context *encoder_context)
2489 {
2490 unsigned int rate_control_mode = encoder_context->rate_control_mode;
2491 struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
2492
2493 if (rate_control_mode == VA_RC_CBR) {
2494 bool brc_updated;
2495 assert(encoder_context->codec != CODEC_MPEG2);
2496
2497 brc_updated = encoder_context->brc.need_reset;
2498
2499 /*Programing bit rate control */
2500 if ((mfc_context->bit_rate_control_context[HEVC_SLICE_I].MaxSizeInWord == 0) ||
2501 brc_updated) {
2502 intel_hcpe_bit_rate_control_context_init(encode_state, encoder_context);
2503 intel_hcpe_brc_init(encode_state, encoder_context);
2504 }
2505
2506 /*Programing HRD control */
2507 if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated)
2508 intel_hcpe_hrd_context_init(encode_state, encoder_context);
2509 }
2510 }
2511
2512 /* HEVC interface API for encoder */
2513
2514 static VAStatus
gen9_hcpe_hevc_encode_picture(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)2515 gen9_hcpe_hevc_encode_picture(VADriverContextP ctx,
2516 struct encode_state *encode_state,
2517 struct intel_encoder_context *encoder_context)
2518 {
2519 struct gen9_hcpe_context *hcpe_context = encoder_context->mfc_context;
2520 unsigned int rate_control_mode = encoder_context->rate_control_mode;
2521 int current_frame_bits_size;
2522 int sts;
2523
2524 for (;;) {
2525 gen9_hcpe_init(ctx, encode_state, encoder_context);
2526 intel_hcpe_hevc_prepare(ctx, encode_state, encoder_context);
2527 /*Programing bcs pipeline*/
2528 gen9_hcpe_hevc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
2529 gen9_hcpe_run(ctx, encode_state, encoder_context);
2530 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
2531 gen9_hcpe_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
2532 sts = intel_hcpe_brc_postpack(encode_state, hcpe_context, current_frame_bits_size);
2533 if (sts == BRC_NO_HRD_VIOLATION) {
2534 intel_hcpe_hrd_context_update(encode_state, hcpe_context);
2535 break;
2536 } else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
2537 if (!hcpe_context->hrd.violation_noted) {
2538 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP) ? "overflow" : "underflow");
2539 hcpe_context->hrd.violation_noted = 1;
2540 }
2541 return VA_STATUS_SUCCESS;
2542 }
2543 } else {
2544 break;
2545 }
2546 }
2547
2548 return VA_STATUS_SUCCESS;
2549 }
2550
2551 void
gen9_hcpe_context_destroy(void * context)2552 gen9_hcpe_context_destroy(void *context)
2553 {
2554 struct gen9_hcpe_context *hcpe_context = context;
2555 int i;
2556
2557 dri_bo_unreference(hcpe_context->deblocking_filter_line_buffer.bo);
2558 hcpe_context->deblocking_filter_line_buffer.bo = NULL;
2559
2560 dri_bo_unreference(hcpe_context->deblocking_filter_tile_line_buffer.bo);
2561 hcpe_context->deblocking_filter_tile_line_buffer.bo = NULL;
2562
2563 dri_bo_unreference(hcpe_context->deblocking_filter_tile_column_buffer.bo);
2564 hcpe_context->deblocking_filter_tile_column_buffer.bo = NULL;
2565
2566 dri_bo_unreference(hcpe_context->uncompressed_picture_source.bo);
2567 hcpe_context->uncompressed_picture_source.bo = NULL;
2568
2569 dri_bo_unreference(hcpe_context->metadata_line_buffer.bo);
2570 hcpe_context->metadata_line_buffer.bo = NULL;
2571
2572 dri_bo_unreference(hcpe_context->metadata_tile_line_buffer.bo);
2573 hcpe_context->metadata_tile_line_buffer.bo = NULL;
2574
2575 dri_bo_unreference(hcpe_context->metadata_tile_column_buffer.bo);
2576 hcpe_context->metadata_tile_column_buffer.bo = NULL;
2577
2578 dri_bo_unreference(hcpe_context->sao_line_buffer.bo);
2579 hcpe_context->sao_line_buffer.bo = NULL;
2580
2581 dri_bo_unreference(hcpe_context->sao_tile_line_buffer.bo);
2582 hcpe_context->sao_tile_line_buffer.bo = NULL;
2583
2584 dri_bo_unreference(hcpe_context->sao_tile_column_buffer.bo);
2585 hcpe_context->sao_tile_column_buffer.bo = NULL;
2586
2587 /* mv temporal buffer */
2588 for (i = 0; i < NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS; i++) {
2589 if (hcpe_context->current_collocated_mv_temporal_buffer[i].bo != NULL)
2590 dri_bo_unreference(hcpe_context->current_collocated_mv_temporal_buffer[i].bo);
2591 hcpe_context->current_collocated_mv_temporal_buffer[i].bo = NULL;
2592 }
2593
2594 for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
2595 dri_bo_unreference(hcpe_context->reference_surfaces[i].bo);
2596 hcpe_context->reference_surfaces[i].bo = NULL;
2597 }
2598
2599 dri_bo_unreference(hcpe_context->hcp_indirect_cu_object.bo);
2600 hcpe_context->hcp_indirect_cu_object.bo = NULL;
2601
2602 dri_bo_unreference(hcpe_context->hcp_indirect_pak_bse_object.bo);
2603 hcpe_context->hcp_indirect_pak_bse_object.bo = NULL;
2604
2605 dri_bo_unreference(hcpe_context->hcp_batchbuffer_surface.bo);
2606 hcpe_context->hcp_batchbuffer_surface.bo = NULL;
2607
2608 dri_bo_unreference(hcpe_context->aux_batchbuffer_surface.bo);
2609 hcpe_context->aux_batchbuffer_surface.bo = NULL;
2610
2611 if (hcpe_context->aux_batchbuffer)
2612 intel_batchbuffer_free(hcpe_context->aux_batchbuffer);
2613
2614 hcpe_context->aux_batchbuffer = NULL;
2615
2616 free(hcpe_context);
2617 }
2618
gen9_hcpe_pipeline(VADriverContextP ctx,VAProfile profile,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)2619 VAStatus gen9_hcpe_pipeline(VADriverContextP ctx,
2620 VAProfile profile,
2621 struct encode_state *encode_state,
2622 struct intel_encoder_context *encoder_context)
2623 {
2624 VAStatus vaStatus;
2625
2626 switch (profile) {
2627 case VAProfileHEVCMain:
2628 case VAProfileHEVCMain10:
2629 vaStatus = gen9_hcpe_hevc_encode_picture(ctx, encode_state, encoder_context);
2630 break;
2631
2632 default:
2633 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2634 break;
2635 }
2636
2637 return vaStatus;
2638 }
2639
gen9_hcpe_context_init(VADriverContextP ctx,struct intel_encoder_context * encoder_context)2640 Bool gen9_hcpe_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2641 {
2642 struct gen9_hcpe_context *hcpe_context = calloc(1, sizeof(struct gen9_hcpe_context));
2643
2644 assert(hcpe_context);
2645 hcpe_context->pipe_mode_select = gen9_hcpe_pipe_mode_select;
2646 hcpe_context->set_surface_state = gen9_hcpe_surface_state;
2647 hcpe_context->ind_obj_base_addr_state = gen9_hcpe_ind_obj_base_addr_state;
2648 hcpe_context->pic_state = gen9_hcpe_hevc_pic_state;
2649 hcpe_context->qm_state = gen9_hcpe_hevc_qm_state;
2650 hcpe_context->fqm_state = gen9_hcpe_hevc_fqm_state;
2651 hcpe_context->insert_object = gen9_hcpe_hevc_insert_object;
2652 hcpe_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
2653
2654 encoder_context->mfc_context = hcpe_context;
2655 encoder_context->mfc_context_destroy = gen9_hcpe_context_destroy;
2656 encoder_context->mfc_pipeline = gen9_hcpe_pipeline;
2657 encoder_context->mfc_brc_prepare = intel_hcpe_brc_prepare;
2658
2659 hevc_gen_default_iq_matrix_encoder(&hcpe_context->iq_matrix_hevc);
2660
2661 return True;
2662 }
2663