1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
14 * of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWAR
23 *
24 * Authors:
25 * Zhao, Yakui <yakui.zhao@intel.com>
26 *
27 */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35 #include <va/va.h>
36
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
39
40 #include "i965_defines.h"
41 #include "i965_drv_video.h"
42 #include "i965_encoder.h"
43 #include "gen9_vp9_encapi.h"
44 #include "gen9_vp9_encoder.h"
45 #include "gen9_vp9_encoder_kernels.h"
46 #include "vp9_probs.h"
47 #include "gen9_vp9_const_def.h"
48
49 #define MAX_VP9_ENCODER_FRAMERATE 60
50 #define MAX_VP9_ENCODER_SURFACES 64
51
52 #define MAX_URB_SIZE 4096 /* In register */
53 #define NUM_KERNELS_PER_GPE_CONTEXT 1
54
55 #define VP9_BRC_KBPS 1000
56
57 #define BRC_KERNEL_CBR 0x0010
58 #define BRC_KERNEL_VBR 0x0020
59 #define BRC_KERNEL_AVBR 0x0040
60 #define BRC_KERNEL_CQL 0x0080
61
62 #define VP9_PIC_STATE_BUFFER_SIZE 192
63
64 typedef struct _intel_kernel_header_ {
65 uint32_t reserved : 6;
66 uint32_t kernel_start_pointer : 26;
67 } intel_kernel_header;
68
69 typedef struct _intel_vp9_kernel_header {
70 int nKernelCount;
71 intel_kernel_header PLY_DSCALE;
72 intel_kernel_header VP9_ME_P;
73 intel_kernel_header VP9_Enc_I_32x32;
74 intel_kernel_header VP9_Enc_I_16x16;
75 intel_kernel_header VP9_Enc_P;
76 intel_kernel_header VP9_Enc_TX;
77 intel_kernel_header VP9_DYS;
78
79 intel_kernel_header VP9BRC_Intra_Distortion;
80 intel_kernel_header VP9BRC_Init;
81 intel_kernel_header VP9BRC_Reset;
82 intel_kernel_header VP9BRC_Update;
83 } intel_vp9_kernel_header;
84
85 #define DYS_1X_FLAG 0x01
86 #define DYS_4X_FLAG 0x02
87 #define DYS_16X_FLAG 0x04
88
89 struct vp9_surface_param {
90 uint32_t frame_width;
91 uint32_t frame_height;
92 };
93
intel_convert_sign_mag(int val,int sign_bit_pos)94 static uint32_t intel_convert_sign_mag(int val, int sign_bit_pos)
95 {
96 uint32_t ret_val = 0;
97 if (val < 0) {
98 val = -val;
99 ret_val = ((1 << (sign_bit_pos - 1)) | (val & ((1 << (sign_bit_pos - 1)) - 1)));
100 } else {
101 ret_val = val & ((1 << (sign_bit_pos - 1)) - 1);
102 }
103 return ret_val;
104 }
105
106 static bool
intel_vp9_get_kernel_header_and_size(void * pvbinary,int binary_size,INTEL_VP9_ENC_OPERATION operation,int krnstate_idx,struct i965_kernel * ret_kernel)107 intel_vp9_get_kernel_header_and_size(
108 void *pvbinary,
109 int binary_size,
110 INTEL_VP9_ENC_OPERATION operation,
111 int krnstate_idx,
112 struct i965_kernel *ret_kernel)
113 {
114 typedef uint32_t BIN_PTR[4];
115
116 char *bin_start;
117 intel_vp9_kernel_header *pkh_table;
118 intel_kernel_header *pcurr_header, *pinvalid_entry, *pnext_header;
119 int next_krnoffset;
120
121 if (!pvbinary || !ret_kernel)
122 return false;
123
124 bin_start = (char *)pvbinary;
125 pkh_table = (intel_vp9_kernel_header *)pvbinary;
126 pinvalid_entry = &(pkh_table->VP9BRC_Update) + 1;
127 next_krnoffset = binary_size;
128
129 if ((operation == INTEL_VP9_ENC_SCALING4X) || (operation == INTEL_VP9_ENC_SCALING2X)) {
130 pcurr_header = &pkh_table->PLY_DSCALE;
131 } else if (operation == INTEL_VP9_ENC_ME) {
132 pcurr_header = &pkh_table->VP9_ME_P;
133 } else if (operation == INTEL_VP9_ENC_MBENC) {
134 pcurr_header = &pkh_table->VP9_Enc_I_32x32;
135 } else if (operation == INTEL_VP9_ENC_DYS) {
136 pcurr_header = &pkh_table->VP9_DYS;
137 } else if (operation == INTEL_VP9_ENC_BRC) {
138 pcurr_header = &pkh_table->VP9BRC_Intra_Distortion;
139 } else {
140 return false;
141 }
142
143 pcurr_header += krnstate_idx;
144 ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
145
146 pnext_header = (pcurr_header + 1);
147 if (pnext_header < pinvalid_entry) {
148 next_krnoffset = pnext_header->kernel_start_pointer << 6;
149 }
150 ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
151
152 return true;
153 }
154
155
156 static void
gen9_free_surfaces_vp9(void ** data)157 gen9_free_surfaces_vp9(void **data)
158 {
159 struct gen9_surface_vp9 *vp9_surface;
160
161 if (!data || !*data)
162 return;
163
164 vp9_surface = *data;
165
166 if (vp9_surface->scaled_4x_surface_obj) {
167 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_4x_surface_id, 1);
168 vp9_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
169 vp9_surface->scaled_4x_surface_obj = NULL;
170 }
171
172 if (vp9_surface->scaled_16x_surface_obj) {
173 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_16x_surface_id, 1);
174 vp9_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
175 vp9_surface->scaled_16x_surface_obj = NULL;
176 }
177
178 if (vp9_surface->dys_4x_surface_obj) {
179 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
180 vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
181 vp9_surface->dys_4x_surface_obj = NULL;
182 }
183
184 if (vp9_surface->dys_16x_surface_obj) {
185 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
186 vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
187 vp9_surface->dys_16x_surface_obj = NULL;
188 }
189
190 if (vp9_surface->dys_surface_obj) {
191 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
192 vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
193 vp9_surface->dys_surface_obj = NULL;
194 }
195
196 free(vp9_surface);
197
198 *data = NULL;
199
200 return;
201 }
202
203 static VAStatus
gen9_vp9_init_check_surfaces(VADriverContextP ctx,struct object_surface * obj_surface,struct vp9_surface_param * surface_param)204 gen9_vp9_init_check_surfaces(VADriverContextP ctx,
205 struct object_surface *obj_surface,
206 struct vp9_surface_param *surface_param)
207 {
208 struct i965_driver_data *i965 = i965_driver_data(ctx);
209 struct gen9_surface_vp9 *vp9_surface;
210 int downscaled_width_4x, downscaled_height_4x;
211 int downscaled_width_16x, downscaled_height_16x;
212
213 if (!obj_surface || !obj_surface->bo)
214 return VA_STATUS_ERROR_INVALID_SURFACE;
215
216 if (obj_surface->private_data &&
217 obj_surface->free_private_data != gen9_free_surfaces_vp9) {
218 obj_surface->free_private_data(&obj_surface->private_data);
219 obj_surface->private_data = NULL;
220 }
221
222 if (obj_surface->private_data) {
223 /* if the frame width/height is already the same as the expected,
224 * it is unncessary to reallocate it.
225 */
226 vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
227 if (vp9_surface->frame_width >= surface_param->frame_width ||
228 vp9_surface->frame_height >= surface_param->frame_height)
229 return VA_STATUS_SUCCESS;
230
231 obj_surface->free_private_data(&obj_surface->private_data);
232 obj_surface->private_data = NULL;
233 vp9_surface = NULL;
234 }
235
236 vp9_surface = calloc(1, sizeof(struct gen9_surface_vp9));
237
238 if (!vp9_surface)
239 return VA_STATUS_ERROR_ALLOCATION_FAILED;
240
241 vp9_surface->ctx = ctx;
242 obj_surface->private_data = vp9_surface;
243 obj_surface->free_private_data = gen9_free_surfaces_vp9;
244
245 vp9_surface->frame_width = surface_param->frame_width;
246 vp9_surface->frame_height = surface_param->frame_height;
247
248 downscaled_width_4x = ALIGN(surface_param->frame_width / 4, 16);
249 downscaled_height_4x = ALIGN(surface_param->frame_height / 4, 16);
250
251 i965_CreateSurfaces(ctx,
252 downscaled_width_4x,
253 downscaled_height_4x,
254 VA_RT_FORMAT_YUV420,
255 1,
256 &vp9_surface->scaled_4x_surface_id);
257
258 vp9_surface->scaled_4x_surface_obj = SURFACE(vp9_surface->scaled_4x_surface_id);
259
260 if (!vp9_surface->scaled_4x_surface_obj) {
261 return VA_STATUS_ERROR_ALLOCATION_FAILED;
262 }
263
264 i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_4x_surface_obj, 1,
265 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
266
267 downscaled_width_16x = ALIGN(surface_param->frame_width / 16, 16);
268 downscaled_height_16x = ALIGN(surface_param->frame_height / 16, 16);
269 i965_CreateSurfaces(ctx,
270 downscaled_width_16x,
271 downscaled_height_16x,
272 VA_RT_FORMAT_YUV420,
273 1,
274 &vp9_surface->scaled_16x_surface_id);
275 vp9_surface->scaled_16x_surface_obj = SURFACE(vp9_surface->scaled_16x_surface_id);
276
277 if (!vp9_surface->scaled_16x_surface_obj) {
278 return VA_STATUS_ERROR_ALLOCATION_FAILED;
279 }
280
281 i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_16x_surface_obj, 1,
282 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
283
284 return VA_STATUS_SUCCESS;
285 }
286
287 static VAStatus
gen9_vp9_check_dys_surfaces(VADriverContextP ctx,struct object_surface * obj_surface,struct vp9_surface_param * surface_param)288 gen9_vp9_check_dys_surfaces(VADriverContextP ctx,
289 struct object_surface *obj_surface,
290 struct vp9_surface_param *surface_param)
291 {
292 struct i965_driver_data *i965 = i965_driver_data(ctx);
293 struct gen9_surface_vp9 *vp9_surface;
294 int dys_width_4x, dys_height_4x;
295 int dys_width_16x, dys_height_16x;
296
297 /* As this is handled after the surface checking, it is unnecessary
298 * to check the surface bo and vp9_priv_surface again
299 */
300
301 vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
302
303 if (!vp9_surface)
304 return VA_STATUS_ERROR_INVALID_SURFACE;
305
306 /* if the frame_width/height of dys_surface is the same as
307 * the expected, it is unnecessary to allocate it again
308 */
309 if (vp9_surface->dys_frame_width == surface_param->frame_width &&
310 vp9_surface->dys_frame_height == surface_param->frame_height)
311 return VA_STATUS_SUCCESS;
312
313 if (vp9_surface->dys_4x_surface_obj) {
314 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
315 vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
316 vp9_surface->dys_4x_surface_obj = NULL;
317 }
318
319 if (vp9_surface->dys_16x_surface_obj) {
320 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
321 vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
322 vp9_surface->dys_16x_surface_obj = NULL;
323 }
324
325 if (vp9_surface->dys_surface_obj) {
326 i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
327 vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
328 vp9_surface->dys_surface_obj = NULL;
329 }
330
331 vp9_surface->dys_frame_width = surface_param->frame_width;
332 vp9_surface->dys_frame_height = surface_param->frame_height;
333
334 i965_CreateSurfaces(ctx,
335 surface_param->frame_width,
336 surface_param->frame_height,
337 VA_RT_FORMAT_YUV420,
338 1,
339 &vp9_surface->dys_surface_id);
340 vp9_surface->dys_surface_obj = SURFACE(vp9_surface->dys_surface_id);
341
342 if (!vp9_surface->dys_surface_obj) {
343 return VA_STATUS_ERROR_ALLOCATION_FAILED;
344 }
345
346 i965_check_alloc_surface_bo(ctx, vp9_surface->dys_surface_obj, 1,
347 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
348
349 dys_width_4x = ALIGN(surface_param->frame_width / 4, 16);
350 dys_height_4x = ALIGN(surface_param->frame_width / 4, 16);
351
352 i965_CreateSurfaces(ctx,
353 dys_width_4x,
354 dys_height_4x,
355 VA_RT_FORMAT_YUV420,
356 1,
357 &vp9_surface->dys_4x_surface_id);
358
359 vp9_surface->dys_4x_surface_obj = SURFACE(vp9_surface->dys_4x_surface_id);
360
361 if (!vp9_surface->dys_4x_surface_obj) {
362 return VA_STATUS_ERROR_ALLOCATION_FAILED;
363 }
364
365 i965_check_alloc_surface_bo(ctx, vp9_surface->dys_4x_surface_obj, 1,
366 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
367
368 dys_width_16x = ALIGN(surface_param->frame_width / 16, 16);
369 dys_height_16x = ALIGN(surface_param->frame_width / 16, 16);
370 i965_CreateSurfaces(ctx,
371 dys_width_16x,
372 dys_height_16x,
373 VA_RT_FORMAT_YUV420,
374 1,
375 &vp9_surface->dys_16x_surface_id);
376 vp9_surface->dys_16x_surface_obj = SURFACE(vp9_surface->dys_16x_surface_id);
377
378 if (!vp9_surface->dys_16x_surface_obj) {
379 return VA_STATUS_ERROR_ALLOCATION_FAILED;
380 }
381
382 i965_check_alloc_surface_bo(ctx, vp9_surface->dys_16x_surface_obj, 1,
383 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
384
385 return VA_STATUS_SUCCESS;
386 }
387
388 static VAStatus
gen9_vp9_allocate_resources(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,int allocate)389 gen9_vp9_allocate_resources(VADriverContextP ctx,
390 struct encode_state *encode_state,
391 struct intel_encoder_context *encoder_context,
392 int allocate)
393 {
394 struct i965_driver_data *i965 = i965_driver_data(ctx);
395 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
396 struct gen9_vp9_state *vp9_state;
397 int allocate_flag, i;
398 int res_size;
399 uint32_t frame_width_in_sb, frame_height_in_sb, frame_sb_num;
400 unsigned int width, height;
401
402 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
403
404 if (!vp9_state || !vp9_state->pic_param)
405 return VA_STATUS_ERROR_INVALID_PARAMETER;
406
407 /* the buffer related with BRC is not changed. So it is allocated
408 * based on the input parameter
409 */
410 if (allocate) {
411 i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
412 i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
413 i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
414 i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
415 i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
416 i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
417 i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
418 i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
419 i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
420 i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
421 i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
422
423 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
424 &vme_context->res_brc_history_buffer,
425 VP9_BRC_HISTORY_BUFFER_SIZE,
426 "Brc History buffer");
427 if (!allocate_flag)
428 goto failed_allocation;
429 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
430 &vme_context->res_brc_const_data_buffer,
431 VP9_BRC_CONSTANTSURFACE_SIZE,
432 "Brc Constant buffer");
433 if (!allocate_flag)
434 goto failed_allocation;
435
436 res_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
437 ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
438 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
439 &vme_context->res_brc_mbenc_curbe_write_buffer,
440 res_size,
441 "Brc Curbe write");
442 if (!allocate_flag)
443 goto failed_allocation;
444
445 res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
446 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
447 &vme_context->res_pic_state_brc_read_buffer,
448 res_size,
449 "Pic State Brc_read");
450 if (!allocate_flag)
451 goto failed_allocation;
452
453 res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
454 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
455 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
456 res_size,
457 "Pic State Brc_write Hfw_Read");
458 if (!allocate_flag)
459 goto failed_allocation;
460
461 res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
462 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
463 &vme_context->res_pic_state_hfw_write_buffer,
464 res_size,
465 "Pic State Hfw Write");
466 if (!allocate_flag)
467 goto failed_allocation;
468
469 res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
470 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
471 &vme_context->res_seg_state_brc_read_buffer,
472 res_size,
473 "Segment state brc_read");
474 if (!allocate_flag)
475 goto failed_allocation;
476
477 res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
478 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
479 &vme_context->res_seg_state_brc_write_buffer,
480 res_size,
481 "Segment state brc_write");
482 if (!allocate_flag)
483 goto failed_allocation;
484
485 res_size = VP9_BRC_BITSTREAM_SIZE_BUFFER_SIZE;
486 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
487 &vme_context->res_brc_bitstream_size_buffer,
488 res_size,
489 "Brc bitstream buffer");
490 if (!allocate_flag)
491 goto failed_allocation;
492
493 res_size = VP9_HFW_BRC_DATA_BUFFER_SIZE;
494 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
495 &vme_context->res_brc_hfw_data_buffer,
496 res_size,
497 "mfw Brc data");
498 if (!allocate_flag)
499 goto failed_allocation;
500
501 res_size = VP9_BRC_MMDK_PAK_BUFFER_SIZE;
502 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
503 &vme_context->res_brc_mmdk_pak_buffer,
504 res_size,
505 "Brc mmdk_pak");
506 if (!allocate_flag)
507 goto failed_allocation;
508 }
509
510 /* If the width/height of allocated buffer is greater than the expected,
511 * it is unnecessary to allocate it again
512 */
513 if (vp9_state->res_width >= vp9_state->frame_width &&
514 vp9_state->res_height >= vp9_state->frame_height) {
515
516 return VA_STATUS_SUCCESS;
517 }
518 frame_width_in_sb = ALIGN(vp9_state->frame_width, 64) / 64;
519 frame_height_in_sb = ALIGN(vp9_state->frame_height, 64) / 64;
520 frame_sb_num = frame_width_in_sb * frame_height_in_sb;
521
522 i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
523 res_size = frame_width_in_sb * 64;
524 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
525 &vme_context->res_hvd_line_buffer,
526 res_size,
527 "VP9 hvd line line");
528 if (!allocate_flag)
529 goto failed_allocation;
530
531 i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
532 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
533 &vme_context->res_hvd_tile_line_buffer,
534 res_size,
535 "VP9 hvd tile_line line");
536 if (!allocate_flag)
537 goto failed_allocation;
538
539 i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
540 res_size = frame_width_in_sb * 18 * 64;
541 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
542 &vme_context->res_deblocking_filter_line_buffer,
543 res_size,
544 "VP9 deblocking filter line");
545 if (!allocate_flag)
546 goto failed_allocation;
547
548 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
549 res_size = frame_width_in_sb * 18 * 64;
550 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
551 &vme_context->res_deblocking_filter_tile_line_buffer,
552 res_size,
553 "VP9 deblocking tile line");
554 if (!allocate_flag)
555 goto failed_allocation;
556
557 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
558 res_size = frame_height_in_sb * 17 * 64;
559 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
560 &vme_context->res_deblocking_filter_tile_col_buffer,
561 res_size,
562 "VP9 deblocking tile col");
563 if (!allocate_flag)
564 goto failed_allocation;
565
566 i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
567 res_size = frame_width_in_sb * 5 * 64;
568 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
569 &vme_context->res_metadata_line_buffer,
570 res_size,
571 "VP9 metadata line");
572 if (!allocate_flag)
573 goto failed_allocation;
574
575 i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
576 res_size = frame_width_in_sb * 5 * 64;
577 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
578 &vme_context->res_metadata_tile_line_buffer,
579 res_size,
580 "VP9 metadata tile line");
581 if (!allocate_flag)
582 goto failed_allocation;
583
584 i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
585 res_size = frame_height_in_sb * 5 * 64;
586 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
587 &vme_context->res_metadata_tile_col_buffer,
588 res_size,
589 "VP9 metadata tile col");
590 if (!allocate_flag)
591 goto failed_allocation;
592
593 i965_free_gpe_resource(&vme_context->res_prob_buffer);
594 res_size = 2048;
595 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
596 &vme_context->res_prob_buffer,
597 res_size,
598 "VP9 prob");
599 if (!allocate_flag)
600 goto failed_allocation;
601
602 i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
603 res_size = frame_sb_num * 64;
604 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
605 &vme_context->res_segmentid_buffer,
606 res_size,
607 "VP9 segment id");
608 if (!allocate_flag)
609 goto failed_allocation;
610
611 i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
612
613 i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
614 res_size = 29 * 64;
615 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
616 &vme_context->res_prob_delta_buffer,
617 res_size,
618 "VP9 prob delta");
619 if (!allocate_flag)
620 goto failed_allocation;
621
622 i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
623
624 i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
625 res_size = 29 * 64;
626 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
627 &vme_context->res_prob_delta_buffer,
628 res_size,
629 "VP9 prob delta");
630 if (!allocate_flag)
631 goto failed_allocation;
632
633 i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
634 res_size = 32 * 64;
635 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
636 &vme_context->res_compressed_input_buffer,
637 res_size,
638 "VP9 compressed_input buffer");
639 if (!allocate_flag)
640 goto failed_allocation;
641
642 i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
643 res_size = 193 * 64;
644 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
645 &vme_context->res_prob_counter_buffer,
646 res_size,
647 "VP9 prob counter");
648 if (!allocate_flag)
649 goto failed_allocation;
650
651 i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
652 res_size = frame_sb_num * 64;
653 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
654 &vme_context->res_tile_record_streamout_buffer,
655 res_size,
656 "VP9 tile record stream_out");
657 if (!allocate_flag)
658 goto failed_allocation;
659
660 i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
661 res_size = frame_sb_num * 64;
662 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
663 &vme_context->res_cu_stat_streamout_buffer,
664 res_size,
665 "VP9 CU stat stream_out");
666 if (!allocate_flag)
667 goto failed_allocation;
668
669 width = vp9_state->downscaled_width_4x_in_mb * 32;
670 height = vp9_state->downscaled_height_4x_in_mb * 16;
671 i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
672 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
673 &vme_context->s4x_memv_data_buffer,
674 width, height,
675 ALIGN(width, 64),
676 "VP9 4x MEMV data");
677 if (!allocate_flag)
678 goto failed_allocation;
679
680 width = vp9_state->downscaled_width_4x_in_mb * 8;
681 height = vp9_state->downscaled_height_4x_in_mb * 16;
682 i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
683 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
684 &vme_context->s4x_memv_distortion_buffer,
685 width, height,
686 ALIGN(width, 64),
687 "VP9 4x MEMV distorion");
688 if (!allocate_flag)
689 goto failed_allocation;
690
691 width = ALIGN(vp9_state->downscaled_width_16x_in_mb * 32, 64);
692 height = vp9_state->downscaled_height_16x_in_mb * 16;
693 i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
694 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
695 &vme_context->s16x_memv_data_buffer,
696 width, height,
697 width,
698 "VP9 16x MEMV data");
699 if (!allocate_flag)
700 goto failed_allocation;
701
702 width = vp9_state->frame_width_in_mb * 16;
703 height = vp9_state->frame_height_in_mb * 8;
704 i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
705 allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
706 &vme_context->res_output_16x16_inter_modes,
707 width, height,
708 ALIGN(width, 64),
709 "VP9 output inter_mode");
710 if (!allocate_flag)
711 goto failed_allocation;
712
713 res_size = vp9_state->frame_width_in_mb * vp9_state->frame_height_in_mb *
714 16 * 4;
715 for (i = 0; i < 2; i++) {
716 i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
717 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
718 &vme_context->res_mode_decision[i],
719 res_size,
720 "VP9 mode decision");
721 if (!allocate_flag)
722 goto failed_allocation;
723
724 }
725
726 res_size = frame_sb_num * 9 * 64;
727 for (i = 0; i < 2; i++) {
728 i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
729 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
730 &vme_context->res_mv_temporal_buffer[i],
731 res_size,
732 "VP9 temporal mv");
733 if (!allocate_flag)
734 goto failed_allocation;
735 }
736
737 vp9_state->mb_data_offset = ALIGN(frame_sb_num * 16, 4096) + 4096;
738 res_size = vp9_state->mb_data_offset + frame_sb_num * 64 * 64 + 1000;
739 i965_free_gpe_resource(&vme_context->res_mb_code_surface);
740 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
741 &vme_context->res_mb_code_surface,
742 ALIGN(res_size, 4096),
743 "VP9 mb_code surface");
744 if (!allocate_flag)
745 goto failed_allocation;
746
747 res_size = 128;
748 i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
749 allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
750 &vme_context->res_pak_uncompressed_input_buffer,
751 ALIGN(res_size, 4096),
752 "VP9 pak_uncompressed_input");
753 if (!allocate_flag)
754 goto failed_allocation;
755
756 if (!vme_context->frame_header_data) {
757 /* allocate 512 bytes for generating the uncompressed header */
758 vme_context->frame_header_data = calloc(1, 512);
759 }
760
761 vp9_state->res_width = vp9_state->frame_width;
762 vp9_state->res_height = vp9_state->frame_height;
763
764 return VA_STATUS_SUCCESS;
765
766 failed_allocation:
767 return VA_STATUS_ERROR_ALLOCATION_FAILED;
768 }
769
770 static void
gen9_vp9_free_resources(struct gen9_encoder_context_vp9 * vme_context)771 gen9_vp9_free_resources(struct gen9_encoder_context_vp9 *vme_context)
772 {
773 int i;
774 struct gen9_vp9_state *vp9_state = (struct gen9_vp9_state *) vme_context->enc_priv_state;
775
776 if (vp9_state->brc_enabled) {
777 i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
778 i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
779 i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
780 i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
781 i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
782 i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
783 i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
784 i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
785 i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
786 i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
787 i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
788 }
789
790 i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
791 i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
792 i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
793 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
794 i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
795 i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
796 i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
797 i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
798 i965_free_gpe_resource(&vme_context->res_prob_buffer);
799 i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
800 i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
801 i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
802 i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
803 i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
804 i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
805 i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
806 i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
807 i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
808 for (i = 0; i < 2; i++) {
809 i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
810 }
811
812 for (i = 0; i < 2; i++) {
813 i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
814 }
815
816 i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
817 i965_free_gpe_resource(&vme_context->res_mb_code_surface);
818 i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
819
820 if (vme_context->frame_header_data) {
821 free(vme_context->frame_header_data);
822 vme_context->frame_header_data = NULL;
823 }
824 return;
825 }
826
827 static void
gen9_init_media_object_walker_parameter(struct intel_encoder_context * encoder_context,struct gpe_encoder_kernel_walker_parameter * kernel_walker_param,struct gpe_media_object_walker_parameter * walker_param)828 gen9_init_media_object_walker_parameter(struct intel_encoder_context *encoder_context,
829 struct gpe_encoder_kernel_walker_parameter *kernel_walker_param,
830 struct gpe_media_object_walker_parameter *walker_param)
831 {
832 memset(walker_param, 0, sizeof(*walker_param));
833
834 walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
835
836 walker_param->block_resolution.x = kernel_walker_param->resolution_x;
837 walker_param->block_resolution.y = kernel_walker_param->resolution_y;
838
839 walker_param->global_resolution.x = kernel_walker_param->resolution_x;
840 walker_param->global_resolution.y = kernel_walker_param->resolution_y;
841
842 walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
843 walker_param->global_outer_loop_stride.y = 0;
844
845 walker_param->global_inner_loop_unit.x = 0;
846 walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
847
848 walker_param->local_loop_exec_count = 0xFFFF; //MAX VALUE
849 walker_param->global_loop_exec_count = 0xFFFF; //MAX VALUE
850
851 if (kernel_walker_param->no_dependency) {
852 walker_param->scoreboard_mask = 0;
853 walker_param->use_scoreboard = 0;
854 // Raster scan walking pattern
855 walker_param->local_outer_loop_stride.x = 0;
856 walker_param->local_outer_loop_stride.y = 1;
857 walker_param->local_inner_loop_unit.x = 1;
858 walker_param->local_inner_loop_unit.y = 0;
859 walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
860 walker_param->local_end.y = 0;
861 } else {
862 walker_param->local_end.x = 0;
863 walker_param->local_end.y = 0;
864
865 if (kernel_walker_param->walker_degree == VP9_45Z_DEGREE) {
866 // 45z degree
867 walker_param->scoreboard_mask = 0x0F;
868
869 walker_param->global_loop_exec_count = 0x3FF;
870 walker_param->local_loop_exec_count = 0x3FF;
871
872 walker_param->global_resolution.x = (unsigned int)(kernel_walker_param->resolution_x / 2.f) + 1;
873 walker_param->global_resolution.y = 2 * kernel_walker_param->resolution_y;
874
875 walker_param->global_start.x = 0;
876 walker_param->global_start.y = 0;
877
878 walker_param->global_outer_loop_stride.x = walker_param->global_resolution.x;
879 walker_param->global_outer_loop_stride.y = 0;
880
881 walker_param->global_inner_loop_unit.x = 0;
882 walker_param->global_inner_loop_unit.y = walker_param->global_resolution.y;
883
884 walker_param->block_resolution.x = walker_param->global_resolution.x;
885 walker_param->block_resolution.y = walker_param->global_resolution.y;
886
887 walker_param->local_start.x = 0;
888 walker_param->local_start.y = 0;
889
890 walker_param->local_outer_loop_stride.x = 1;
891 walker_param->local_outer_loop_stride.y = 0;
892
893 walker_param->local_inner_loop_unit.x = -1;
894 walker_param->local_inner_loop_unit.y = 4;
895
896 walker_param->middle_loop_extra_steps = 3;
897 walker_param->mid_loop_unit_x = 0;
898 walker_param->mid_loop_unit_y = 1;
899 } else {
900 // 26 degree
901 walker_param->scoreboard_mask = 0x0F;
902 walker_param->local_outer_loop_stride.x = 1;
903 walker_param->local_outer_loop_stride.y = 0;
904 walker_param->local_inner_loop_unit.x = -2;
905 walker_param->local_inner_loop_unit.y = 1;
906 }
907 }
908 }
909
910 static void
gen9_run_kernel_media_object(VADriverContextP ctx,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context,int media_function,struct gpe_media_object_parameter * param)911 gen9_run_kernel_media_object(VADriverContextP ctx,
912 struct intel_encoder_context *encoder_context,
913 struct i965_gpe_context *gpe_context,
914 int media_function,
915 struct gpe_media_object_parameter *param)
916 {
917 struct intel_batchbuffer *batch = encoder_context->base.batch;
918 struct vp9_encode_status_buffer_internal *status_buffer;
919 struct gen9_vp9_state *vp9_state;
920 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
921
922 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
923 if (!vp9_state || !batch)
924 return;
925
926 intel_batchbuffer_start_atomic(batch, 0x1000);
927
928 status_buffer = &(vp9_state->status_buffer);
929 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
930 mi_store_data_imm.bo = status_buffer->bo;
931 mi_store_data_imm.offset = status_buffer->media_index_offset;
932 mi_store_data_imm.dw0 = media_function;
933 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
934
935 intel_batchbuffer_emit_mi_flush(batch);
936 gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
937 gen8_gpe_media_object(ctx, gpe_context, batch, param);
938 gen8_gpe_media_state_flush(ctx, gpe_context, batch);
939
940 gen9_gpe_pipeline_end(ctx, gpe_context, batch);
941
942 intel_batchbuffer_end_atomic(batch);
943
944 intel_batchbuffer_flush(batch);
945 }
946
947 static void
gen9_run_kernel_media_object_walker(VADriverContextP ctx,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context,int media_function,struct gpe_media_object_walker_parameter * param)948 gen9_run_kernel_media_object_walker(VADriverContextP ctx,
949 struct intel_encoder_context *encoder_context,
950 struct i965_gpe_context *gpe_context,
951 int media_function,
952 struct gpe_media_object_walker_parameter *param)
953 {
954 struct intel_batchbuffer *batch = encoder_context->base.batch;
955 struct vp9_encode_status_buffer_internal *status_buffer;
956 struct gen9_vp9_state *vp9_state;
957 struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
958
959 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
960 if (!vp9_state || !batch)
961 return;
962
963 intel_batchbuffer_start_atomic(batch, 0x1000);
964
965 intel_batchbuffer_emit_mi_flush(batch);
966
967 status_buffer = &(vp9_state->status_buffer);
968 memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
969 mi_store_data_imm.bo = status_buffer->bo;
970 mi_store_data_imm.offset = status_buffer->media_index_offset;
971 mi_store_data_imm.dw0 = media_function;
972 gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
973
974 gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
975 gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
976 gen8_gpe_media_state_flush(ctx, gpe_context, batch);
977
978 gen9_gpe_pipeline_end(ctx, gpe_context, batch);
979
980 intel_batchbuffer_end_atomic(batch);
981
982 intel_batchbuffer_flush(batch);
983 }
984
985 static
gen9_vp9_set_curbe_brc(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_brc_curbe_param * param)986 void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
987 struct encode_state *encode_state,
988 struct i965_gpe_context *gpe_context,
989 struct intel_encoder_context *encoder_context,
990 struct gen9_vp9_brc_curbe_param *param)
991 {
992 VAEncSequenceParameterBufferVP9 *seq_param;
993 VAEncPictureParameterBufferVP9 *pic_param;
994 VAEncMiscParameterTypeVP9PerSegmantParam *segment_param;
995 vp9_brc_curbe_data *cmd;
996 double dbps_ratio, dInputBitsPerFrame;
997 struct gen9_vp9_state *vp9_state;
998
999 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1000
1001 pic_param = param->ppic_param;
1002 seq_param = param->pseq_param;
1003 segment_param = param->psegment_param;
1004
1005 cmd = i965_gpe_context_map_curbe(gpe_context);
1006
1007 if (!cmd)
1008 return;
1009
1010 memset(cmd, 0, sizeof(vp9_brc_curbe_data));
1011
1012 if (!vp9_state->dys_enabled) {
1013 cmd->dw0.frame_width = pic_param->frame_width_src;
1014 cmd->dw0.frame_height = pic_param->frame_height_src;
1015 } else {
1016 cmd->dw0.frame_width = pic_param->frame_width_dst;
1017 cmd->dw0.frame_height = pic_param->frame_height_dst;
1018 }
1019
1020 cmd->dw1.frame_type = vp9_state->picture_coding_type;
1021 cmd->dw1.segmentation_enable = 0;
1022 cmd->dw1.ref_frame_flags = vp9_state->ref_frame_flag;
1023 cmd->dw1.num_tlevels = 1;
1024
1025 switch (param->media_state_type) {
1026 case VP9_MEDIA_STATE_BRC_INIT_RESET: {
1027 cmd->dw3.max_level_ratiot0 = 0;
1028 cmd->dw3.max_level_ratiot1 = 0;
1029 cmd->dw3.max_level_ratiot2 = 0;
1030 cmd->dw3.max_level_ratiot3 = 0;
1031
1032 cmd->dw4.profile_level_max_frame = seq_param->max_frame_width *
1033 seq_param->max_frame_height;
1034 cmd->dw5.init_buf_fullness = vp9_state->init_vbv_buffer_fullness_in_bit;
1035 cmd->dw6.buf_size = vp9_state->vbv_buffer_size_in_bit;
1036 cmd->dw7.target_bit_rate = (vp9_state->target_bit_rate + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1037 VP9_BRC_KBPS;
1038 cmd->dw8.max_bit_rate = (vp9_state->max_bit_rate + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1039 VP9_BRC_KBPS;
1040 cmd->dw9.min_bit_rate = (vp9_state->min_bit_rate + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1041 VP9_BRC_KBPS;
1042 cmd->dw10.frame_ratem = vp9_state->framerate.num;
1043 cmd->dw11.frame_rated = vp9_state->framerate.den;
1044
1045 cmd->dw14.avbr_accuracy = 30;
1046 cmd->dw14.avbr_convergence = 150;
1047
1048 if (encoder_context->rate_control_mode == VA_RC_CBR) {
1049 cmd->dw12.brc_flag = BRC_KERNEL_CBR;
1050 cmd->dw8.max_bit_rate = cmd->dw7.target_bit_rate;
1051 cmd->dw9.min_bit_rate = 0;
1052 } else if (encoder_context->rate_control_mode == VA_RC_VBR) {
1053 cmd->dw12.brc_flag = BRC_KERNEL_VBR;
1054 } else {
1055 cmd->dw12.brc_flag = BRC_KERNEL_CQL;
1056 cmd->dw16.cq_level = 30;
1057 }
1058 cmd->dw12.gopp = seq_param->intra_period - 1;
1059
1060 cmd->dw13.init_frame_width = pic_param->frame_width_src;
1061 cmd->dw13.init_frame_height = pic_param->frame_height_src;
1062
1063 cmd->dw15.min_qp = 1;
1064 cmd->dw15.max_qp = 255;
1065
1066 cmd->dw16.cq_level = 30;
1067
1068 cmd->dw17.enable_dynamic_scaling = vp9_state->dys_in_use;
1069 cmd->dw17.brc_overshoot_cbr_pct = 150;
1070
1071 dInputBitsPerFrame = (double)cmd->dw8.max_bit_rate * (double)vp9_state->framerate.den / (double)vp9_state->framerate.num;
1072 dbps_ratio = dInputBitsPerFrame / ((double)vp9_state->vbv_buffer_size_in_bit / 30.0);
1073 if (dbps_ratio < 0.1)
1074 dbps_ratio = 0.1;
1075 if (dbps_ratio > 3.5)
1076 dbps_ratio = 3.5;
1077
1078 *param->pbrc_init_reset_buf_size_in_bits = cmd->dw6.buf_size;
1079 *param->pbrc_init_reset_input_bits_per_frame = dInputBitsPerFrame;
1080 *param->pbrc_init_current_target_buf_full_in_bits = cmd->dw6.buf_size >> 1;
1081
1082 cmd->dw18.pframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.90, dbps_ratio));
1083 cmd->dw18.pframe_deviation_threshold1 = (uint32_t)(-50 * pow(0.66, dbps_ratio));
1084 cmd->dw18.pframe_deviation_threshold2 = (uint32_t)(-50 * pow(0.46, dbps_ratio));
1085 cmd->dw18.pframe_deviation_threshold3 = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1086 cmd->dw19.pframe_deviation_threshold4 = (uint32_t)(50 * pow(0.3, dbps_ratio));
1087 cmd->dw19.pframe_deviation_threshold5 = (uint32_t)(50 * pow(0.46, dbps_ratio));
1088 cmd->dw19.pframe_deviation_threshold6 = (uint32_t)(50 * pow(0.7, dbps_ratio));
1089 cmd->dw19.pframe_deviation_threshold7 = (uint32_t)(50 * pow(0.9, dbps_ratio));
1090
1091 cmd->dw20.vbr_deviation_threshold0 = (uint32_t)(-50 * pow(0.9, dbps_ratio));
1092 cmd->dw20.vbr_deviation_threshold1 = (uint32_t)(-50 * pow(0.7, dbps_ratio));
1093 cmd->dw20.vbr_deviation_threshold2 = (uint32_t)(-50 * pow(0.5, dbps_ratio));
1094 cmd->dw20.vbr_deviation_threshold3 = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1095 cmd->dw21.vbr_deviation_threshold4 = (uint32_t)(100 * pow(0.4, dbps_ratio));
1096 cmd->dw21.vbr_deviation_threshold5 = (uint32_t)(100 * pow(0.5, dbps_ratio));
1097 cmd->dw21.vbr_deviation_threshold6 = (uint32_t)(100 * pow(0.75, dbps_ratio));
1098 cmd->dw21.vbr_deviation_threshold7 = (uint32_t)(100 * pow(0.9, dbps_ratio));
1099
1100 cmd->dw22.kframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.8, dbps_ratio));
1101 cmd->dw22.kframe_deviation_threshold1 = (uint32_t)(-50 * pow(0.6, dbps_ratio));
1102 cmd->dw22.kframe_deviation_threshold2 = (uint32_t)(-50 * pow(0.34, dbps_ratio));
1103 cmd->dw22.kframe_deviation_threshold3 = (uint32_t)(-50 * pow(0.2, dbps_ratio));
1104 cmd->dw23.kframe_deviation_threshold4 = (uint32_t)(50 * pow(0.2, dbps_ratio));
1105 cmd->dw23.kframe_deviation_threshold5 = (uint32_t)(50 * pow(0.4, dbps_ratio));
1106 cmd->dw23.kframe_deviation_threshold6 = (uint32_t)(50 * pow(0.66, dbps_ratio));
1107 cmd->dw23.kframe_deviation_threshold7 = (uint32_t)(50 * pow(0.9, dbps_ratio));
1108
1109 break;
1110 }
1111 case VP9_MEDIA_STATE_BRC_UPDATE: {
1112 cmd->dw15.min_qp = 1;
1113 cmd->dw15.max_qp = 255;
1114
1115 cmd->dw25.frame_number = param->frame_number;
1116
1117 // Used in dynamic scaling. set to zero for now
1118 cmd->dw27.hrd_buffer_fullness_upper_limit = 0;
1119 cmd->dw28.hrd_buffer_fullness_lower_limit = 0;
1120
1121 if (pic_param->pic_flags.bits.segmentation_enabled) {
1122 cmd->dw32.seg_delta_qp0 = segment_param->seg_data[0].segment_qindex_delta;
1123 cmd->dw32.seg_delta_qp1 = segment_param->seg_data[1].segment_qindex_delta;
1124 cmd->dw32.seg_delta_qp2 = segment_param->seg_data[2].segment_qindex_delta;
1125 cmd->dw32.seg_delta_qp3 = segment_param->seg_data[3].segment_qindex_delta;
1126
1127 cmd->dw33.seg_delta_qp4 = segment_param->seg_data[4].segment_qindex_delta;
1128 cmd->dw33.seg_delta_qp5 = segment_param->seg_data[5].segment_qindex_delta;
1129 cmd->dw33.seg_delta_qp6 = segment_param->seg_data[6].segment_qindex_delta;
1130 cmd->dw33.seg_delta_qp7 = segment_param->seg_data[7].segment_qindex_delta;
1131 }
1132
1133 //cmd->dw34.temporal_id = pPicParams->temporal_idi;
1134 cmd->dw34.temporal_id = 0;
1135 cmd->dw34.multi_ref_qp_check = param->multi_ref_qp_check;
1136
1137 cmd->dw35.max_num_pak_passes = param->brc_num_pak_passes;
1138 cmd->dw35.sync_async = 0;
1139 cmd->dw35.mbrc = param->mbbrc_enabled;
1140 if (*param->pbrc_init_current_target_buf_full_in_bits >
1141 ((double)(*param->pbrc_init_reset_buf_size_in_bits))) {
1142 *param->pbrc_init_current_target_buf_full_in_bits -=
1143 (double)(*param->pbrc_init_reset_buf_size_in_bits);
1144 cmd->dw35.overflow = 1;
1145 } else
1146 cmd->dw35.overflow = 0;
1147
1148 cmd->dw24.target_size = (uint32_t)(*param->pbrc_init_current_target_buf_full_in_bits);
1149
1150 cmd->dw36.segmentation = pic_param->pic_flags.bits.segmentation_enabled;
1151
1152 *param->pbrc_init_current_target_buf_full_in_bits += *param->pbrc_init_reset_input_bits_per_frame;
1153
1154 cmd->dw38.qdelta_ydc = pic_param->luma_dc_qindex_delta;
1155 cmd->dw38.qdelta_uvdc = pic_param->chroma_dc_qindex_delta;
1156 cmd->dw38.qdelta_uvac = pic_param->chroma_ac_qindex_delta;
1157
1158 break;
1159 }
1160 case VP9_MEDIA_STATE_ENC_I_FRAME_DIST:
1161 cmd->dw2.intra_mode_disable = 0;
1162 break;
1163 default:
1164 break;
1165 }
1166
1167 cmd->dw48.brc_y4x_input_bti = VP9_BTI_BRC_SRCY4X_G9;
1168 cmd->dw49.brc_vme_coarse_intra_input_bti = VP9_BTI_BRC_VME_COARSE_INTRA_G9;
1169 cmd->dw50.brc_history_buffer_bti = VP9_BTI_BRC_HISTORY_G9;
1170 cmd->dw51.brc_const_data_input_bti = VP9_BTI_BRC_CONSTANT_DATA_G9;
1171 cmd->dw52.brc_distortion_bti = VP9_BTI_BRC_DISTORTION_G9;
1172 cmd->dw53.brc_mmdk_pak_output_bti = VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9;
1173 cmd->dw54.brc_enccurbe_input_bti = VP9_BTI_BRC_MBENC_CURBE_INPUT_G9;
1174 cmd->dw55.brc_enccurbe_output_bti = VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9;
1175 cmd->dw56.brc_pic_state_input_bti = VP9_BTI_BRC_PIC_STATE_INPUT_G9;
1176 cmd->dw57.brc_pic_state_output_bti = VP9_BTI_BRC_PIC_STATE_OUTPUT_G9;
1177 cmd->dw58.brc_seg_state_input_bti = VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9;
1178 cmd->dw59.brc_seg_state_output_bti = VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9;
1179 cmd->dw60.brc_bitstream_size_data_bti = VP9_BTI_BRC_BITSTREAM_SIZE_G9;
1180 cmd->dw61.brc_hfw_data_output_bti = VP9_BTI_BRC_HFW_DATA_G9;
1181
1182 i965_gpe_context_unmap_curbe(gpe_context);
1183 return;
1184 }
1185
1186 static void
gen9_brc_init_reset_add_surfaces_vp9(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context)1187 gen9_brc_init_reset_add_surfaces_vp9(VADriverContextP ctx,
1188 struct encode_state *encode_state,
1189 struct intel_encoder_context *encoder_context,
1190 struct i965_gpe_context *gpe_context)
1191 {
1192 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1193
1194 i965_add_buffer_gpe_surface(ctx,
1195 gpe_context,
1196 &vme_context->res_brc_history_buffer,
1197 0,
1198 vme_context->res_brc_history_buffer.size,
1199 0,
1200 VP9_BTI_BRC_HISTORY_G9);
1201
1202 i965_add_buffer_2d_gpe_surface(ctx,
1203 gpe_context,
1204 &vme_context->s4x_memv_distortion_buffer,
1205 1,
1206 I965_SURFACEFORMAT_R8_UNORM,
1207 VP9_BTI_BRC_DISTORTION_G9);
1208 }
1209
1210 /* The function related with BRC */
1211 static VAStatus
gen9_vp9_brc_init_reset_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)1212 gen9_vp9_brc_init_reset_kernel(VADriverContextP ctx,
1213 struct encode_state *encode_state,
1214 struct intel_encoder_context *encoder_context)
1215 {
1216 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1217 struct vp9_brc_context *brc_context = &vme_context->brc_context;
1218 struct gpe_media_object_parameter media_object_param;
1219 struct i965_gpe_context *gpe_context;
1220 int gpe_index = VP9_BRC_INIT;
1221 int media_function = VP9_MEDIA_STATE_BRC_INIT_RESET;
1222 struct gen9_vp9_brc_curbe_param brc_initreset_curbe;
1223 VAEncPictureParameterBufferVP9 *pic_param;
1224 struct gen9_vp9_state *vp9_state;
1225
1226 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1227
1228 if (!vp9_state || !vp9_state->pic_param)
1229 return VA_STATUS_ERROR_INVALID_PARAMETER;
1230
1231 pic_param = vp9_state->pic_param;
1232
1233 if (vp9_state->brc_inited)
1234 gpe_index = VP9_BRC_RESET;
1235
1236 gpe_context = &brc_context->gpe_contexts[gpe_index];
1237
1238 gen8_gpe_context_init(ctx, gpe_context);
1239 gen9_gpe_reset_binding_table(ctx, gpe_context);
1240
1241 brc_initreset_curbe.media_state_type = media_function;
1242 brc_initreset_curbe.curr_frame = pic_param->reconstructed_frame;
1243 brc_initreset_curbe.ppic_param = vp9_state->pic_param;
1244 brc_initreset_curbe.pseq_param = vp9_state->seq_param;
1245 brc_initreset_curbe.psegment_param = vp9_state->segment_param;
1246 brc_initreset_curbe.frame_width = vp9_state->frame_width;
1247 brc_initreset_curbe.frame_height = vp9_state->frame_height;
1248 brc_initreset_curbe.pbrc_init_current_target_buf_full_in_bits =
1249 &vp9_state->brc_init_current_target_buf_full_in_bits;
1250 brc_initreset_curbe.pbrc_init_reset_buf_size_in_bits =
1251 &vp9_state->brc_init_reset_buf_size_in_bits;
1252 brc_initreset_curbe.pbrc_init_reset_input_bits_per_frame =
1253 &vp9_state->brc_init_reset_input_bits_per_frame;
1254 brc_initreset_curbe.picture_coding_type = vp9_state->picture_coding_type;
1255 brc_initreset_curbe.initbrc = !vp9_state->brc_inited;
1256 brc_initreset_curbe.mbbrc_enabled = 0;
1257 brc_initreset_curbe.ref_frame_flag = vp9_state->ref_frame_flag;
1258
1259 vme_context->pfn_set_curbe_brc(ctx, encode_state,
1260 gpe_context,
1261 encoder_context,
1262 &brc_initreset_curbe);
1263
1264 gen9_brc_init_reset_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1265 gen8_gpe_setup_interface_data(ctx, gpe_context);
1266
1267 memset(&media_object_param, 0, sizeof(media_object_param));
1268 gen9_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
1269
1270 return VA_STATUS_SUCCESS;
1271 }
1272
1273 static void
gen9_brc_intra_dist_add_surfaces_vp9(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context)1274 gen9_brc_intra_dist_add_surfaces_vp9(VADriverContextP ctx,
1275 struct encode_state *encode_state,
1276 struct intel_encoder_context *encoder_context,
1277 struct i965_gpe_context *gpe_context)
1278 {
1279 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1280
1281 struct object_surface *obj_surface;
1282 struct gen9_surface_vp9 *vp9_priv_surface;
1283
1284 /* sScaled4xSurface surface */
1285 obj_surface = encode_state->reconstructed_object;
1286
1287 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
1288
1289 obj_surface = vp9_priv_surface->scaled_4x_surface_obj;
1290 i965_add_2d_gpe_surface(ctx, gpe_context,
1291 obj_surface,
1292 0, 1,
1293 I965_SURFACEFORMAT_R8_UNORM,
1294 VP9_BTI_BRC_SRCY4X_G9
1295 );
1296
1297 i965_add_adv_gpe_surface(ctx, gpe_context,
1298 obj_surface,
1299 VP9_BTI_BRC_VME_COARSE_INTRA_G9);
1300
1301 i965_add_buffer_2d_gpe_surface(ctx,
1302 gpe_context,
1303 &vme_context->s4x_memv_distortion_buffer,
1304 1,
1305 I965_SURFACEFORMAT_R8_UNORM,
1306 VP9_BTI_BRC_DISTORTION_G9);
1307
1308 return;
1309 }
1310
1311 /* The function related with BRC */
1312 static VAStatus
gen9_vp9_brc_intra_dist_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)1313 gen9_vp9_brc_intra_dist_kernel(VADriverContextP ctx,
1314 struct encode_state *encode_state,
1315 struct intel_encoder_context *encoder_context)
1316 {
1317 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1318 struct vp9_brc_context *brc_context = &vme_context->brc_context;
1319 struct i965_gpe_context *gpe_context;
1320 int gpe_index = VP9_BRC_INTRA_DIST;
1321 int media_function = VP9_MEDIA_STATE_ENC_I_FRAME_DIST;
1322 struct gen9_vp9_brc_curbe_param brc_intra_dist_curbe;
1323 VAEncPictureParameterBufferVP9 *pic_param;
1324 struct gen9_vp9_state *vp9_state;
1325 struct gpe_media_object_walker_parameter media_object_walker_param;
1326 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1327
1328 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1329
1330 if (!vp9_state || !vp9_state->pic_param)
1331 return VA_STATUS_ERROR_INVALID_PARAMETER;
1332
1333 pic_param = vp9_state->pic_param;
1334
1335 gpe_context = &brc_context->gpe_contexts[gpe_index];
1336
1337 gen8_gpe_context_init(ctx, gpe_context);
1338 gen9_gpe_reset_binding_table(ctx, gpe_context);
1339
1340 brc_intra_dist_curbe.media_state_type = media_function;
1341 brc_intra_dist_curbe.curr_frame = pic_param->reconstructed_frame;
1342 brc_intra_dist_curbe.ppic_param = vp9_state->pic_param;
1343 brc_intra_dist_curbe.pseq_param = vp9_state->seq_param;
1344 brc_intra_dist_curbe.psegment_param = vp9_state->segment_param;
1345 brc_intra_dist_curbe.frame_width = vp9_state->frame_width;
1346 brc_intra_dist_curbe.frame_height = vp9_state->frame_height;
1347 brc_intra_dist_curbe.pbrc_init_current_target_buf_full_in_bits =
1348 &vp9_state->brc_init_current_target_buf_full_in_bits;
1349 brc_intra_dist_curbe.pbrc_init_reset_buf_size_in_bits =
1350 &vp9_state->brc_init_reset_buf_size_in_bits;
1351 brc_intra_dist_curbe.pbrc_init_reset_input_bits_per_frame =
1352 &vp9_state->brc_init_reset_input_bits_per_frame;
1353 brc_intra_dist_curbe.picture_coding_type = vp9_state->picture_coding_type;
1354 brc_intra_dist_curbe.initbrc = !vp9_state->brc_inited;
1355 brc_intra_dist_curbe.mbbrc_enabled = 0;
1356 brc_intra_dist_curbe.ref_frame_flag = vp9_state->ref_frame_flag;
1357
1358 vme_context->pfn_set_curbe_brc(ctx, encode_state,
1359 gpe_context,
1360 encoder_context,
1361 &brc_intra_dist_curbe);
1362
1363 /* zero distortion buffer */
1364 i965_zero_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
1365
1366 gen9_brc_intra_dist_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1367 gen8_gpe_setup_interface_data(ctx, gpe_context);
1368
1369 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1370 kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
1371 kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
1372 kernel_walker_param.no_dependency = 1;
1373
1374 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
1375
1376 gen9_run_kernel_media_object_walker(ctx, encoder_context,
1377 gpe_context,
1378 media_function,
1379 &media_object_walker_param);
1380
1381 return VA_STATUS_SUCCESS;
1382 }
1383
1384 static void
intel_vp9enc_construct_picstate_batchbuf(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_resource * gpe_resource)1385 intel_vp9enc_construct_picstate_batchbuf(VADriverContextP ctx,
1386 struct encode_state *encode_state,
1387 struct intel_encoder_context *encoder_context,
1388 struct i965_gpe_resource *gpe_resource)
1389 {
1390 struct gen9_vp9_state *vp9_state;
1391 VAEncPictureParameterBufferVP9 *pic_param;
1392 int frame_width_minus1, frame_height_minus1;
1393 int is_lossless = 0;
1394 int is_intra_only = 0;
1395 unsigned int last_frame_type;
1396 unsigned int ref_flags;
1397 unsigned int use_prev_frame_mvs, adapt_flag;
1398 struct gen9_surface_vp9 *vp9_surface = NULL;
1399 struct object_surface *obj_surface = NULL;
1400 uint32_t scale_h = 0;
1401 uint32_t scale_w = 0;
1402
1403 char *pdata;
1404 int i, j;
1405 unsigned int *cmd_ptr, cmd_value, tmp;
1406
1407 pdata = i965_map_gpe_resource(gpe_resource);
1408 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1409
1410 if (!vp9_state || !vp9_state->pic_param || !pdata)
1411 return;
1412
1413 pic_param = vp9_state->pic_param;
1414 frame_width_minus1 = ALIGN(pic_param->frame_width_dst, 8) - 1;
1415 frame_height_minus1 = ALIGN(pic_param->frame_height_dst, 8) - 1;
1416 if ((pic_param->luma_ac_qindex == 0) &&
1417 (pic_param->luma_dc_qindex_delta == 0) &&
1418 (pic_param->chroma_ac_qindex_delta == 0) &&
1419 (pic_param->chroma_dc_qindex_delta == 0))
1420 is_lossless = 1;
1421
1422 if (pic_param->pic_flags.bits.frame_type)
1423 is_intra_only = pic_param->pic_flags.bits.intra_only;
1424
1425 last_frame_type = vp9_state->vp9_last_frame.frame_type;
1426
1427 use_prev_frame_mvs = 0;
1428 if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) {
1429 last_frame_type = 0;
1430 ref_flags = 0;
1431 } else {
1432 ref_flags = ((pic_param->ref_flags.bits.ref_arf_sign_bias << 9) |
1433 (pic_param->ref_flags.bits.ref_gf_sign_bias << 8) |
1434 (pic_param->ref_flags.bits.ref_last_sign_bias << 7)
1435 );
1436 if (!pic_param->pic_flags.bits.error_resilient_mode &&
1437 (pic_param->frame_width_dst == vp9_state->vp9_last_frame.frame_width) &&
1438 (pic_param->frame_height_dst == vp9_state->vp9_last_frame.frame_height) &&
1439 !pic_param->pic_flags.bits.intra_only &&
1440 vp9_state->vp9_last_frame.show_frame &&
1441 ((vp9_state->vp9_last_frame.frame_type == HCP_VP9_INTER_FRAME) &&
1442 !vp9_state->vp9_last_frame.intra_only)
1443 )
1444 use_prev_frame_mvs = 1;
1445 }
1446 adapt_flag = 0;
1447 if (!pic_param->pic_flags.bits.error_resilient_mode &&
1448 !pic_param->pic_flags.bits.frame_parallel_decoding_mode)
1449 adapt_flag = 1;
1450
1451 for (i = 0; i < 4; i++) {
1452 uint32_t non_first_pass;
1453 non_first_pass = 1;
1454 if (i == 0)
1455 non_first_pass = 0;
1456
1457 cmd_ptr = (unsigned int *)(pdata + i * VP9_PIC_STATE_BUFFER_SIZE);
1458
1459 *cmd_ptr++ = (HCP_VP9_PIC_STATE | (33 - 2));
1460 *cmd_ptr++ = (frame_height_minus1 << 16 |
1461 frame_width_minus1);
1462 /* dw2 */
1463 *cmd_ptr++ = (0 << 31 | /* disable segment_in */
1464 0 << 30 | /* disable segment_out */
1465 is_lossless << 29 | /* loseless */
1466 (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_temporal_update) << 28 | /* temporal update */
1467 (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_update_map) << 27 | /* temporal update */
1468 (pic_param->pic_flags.bits.segmentation_enabled << 26) |
1469 (pic_param->sharpness_level << 23) |
1470 (pic_param->filter_level << 17) |
1471 (pic_param->pic_flags.bits.frame_parallel_decoding_mode << 16) |
1472 (pic_param->pic_flags.bits.error_resilient_mode << 15) |
1473 (pic_param->pic_flags.bits.refresh_frame_context << 14) |
1474 (last_frame_type << 13) |
1475 (vp9_state->tx_mode == TX_MODE_SELECT) << 12 |
1476 (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) << 11 |
1477 (use_prev_frame_mvs) << 10 |
1478 ref_flags |
1479 (pic_param->pic_flags.bits.mcomp_filter_type << 4) |
1480 (pic_param->pic_flags.bits.allow_high_precision_mv << 3) |
1481 (is_intra_only << 2) |
1482 (adapt_flag << 1) |
1483 (pic_param->pic_flags.bits.frame_type) << 0);
1484
1485 *cmd_ptr++ = ((0 << 28) | /* VP9Profile0 */
1486 (0 << 24) | /* 8-bit depth */
1487 (0 << 22) | /* only 420 format */
1488 (0 << 0) | /* sse statistics */
1489 (pic_param->log2_tile_rows << 8) |
1490 (pic_param->log2_tile_columns << 0));
1491
1492 /* dw4..6 */
1493 if (pic_param->pic_flags.bits.frame_type &&
1494 !pic_param->pic_flags.bits.intra_only) {
1495 for (j = 0; j < 3; j++) {
1496 obj_surface = encode_state->reference_objects[j];
1497 scale_w = 0;
1498 scale_h = 0;
1499 if (obj_surface && obj_surface->private_data) {
1500 vp9_surface = obj_surface->private_data;
1501 scale_w = (vp9_surface->frame_width << 14) / pic_param->frame_width_dst;
1502 scale_h = (vp9_surface->frame_height << 14) / pic_param->frame_height_dst;
1503 *cmd_ptr++ = (scale_w << 16 |
1504 scale_h);
1505 } else
1506 *cmd_ptr++ = 0;
1507 }
1508 } else {
1509 *cmd_ptr++ = 0;
1510 *cmd_ptr++ = 0;
1511 *cmd_ptr++ = 0;
1512 }
1513 /* dw7..9 */
1514 for (j = 0; j < 3; j++) {
1515 obj_surface = encode_state->reference_objects[j];
1516 vp9_surface = NULL;
1517
1518 if (obj_surface && obj_surface->private_data) {
1519 vp9_surface = obj_surface->private_data;
1520 *cmd_ptr++ = (vp9_surface->frame_height - 1) << 16 |
1521 (vp9_surface->frame_width - 1);
1522 } else
1523 *cmd_ptr++ = 0;
1524 }
1525 /* dw10 */
1526 *cmd_ptr++ = 0;
1527 /* dw11 */
1528 *cmd_ptr++ = (1 << 1);
1529 *cmd_ptr++ = 0;
1530
1531 /* dw13 */
1532 *cmd_ptr++ = ((1 << 25) | /* header insertation for VP9 */
1533 (0 << 24) | /* tail insertation */
1534 (pic_param->luma_ac_qindex << 16) |
1535 0 /* compressed header bin count */);
1536
1537 /* dw14 */
1538 tmp = intel_convert_sign_mag(pic_param->luma_dc_qindex_delta, 5);
1539 cmd_value = (tmp << 16);
1540 tmp = intel_convert_sign_mag(pic_param->chroma_dc_qindex_delta, 5);
1541 cmd_value |= (tmp << 8);
1542 tmp = intel_convert_sign_mag(pic_param->chroma_ac_qindex_delta, 5);
1543 cmd_value |= tmp;
1544 *cmd_ptr++ = cmd_value;
1545
1546 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[0], 7);
1547 cmd_value = tmp;
1548 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[1], 7);
1549 cmd_value |= (tmp << 8);
1550 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[2], 7);
1551 cmd_value |= (tmp << 16);
1552 tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[3], 7);
1553 cmd_value |= (tmp << 24);
1554 *cmd_ptr++ = cmd_value;
1555
1556 /* dw16 */
1557 tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[0], 7);
1558 cmd_value = tmp;
1559 tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[1], 7);
1560 cmd_value |= (tmp << 8);
1561 *cmd_ptr++ = cmd_value;
1562
1563 /* dw17 */
1564 *cmd_ptr++ = vp9_state->frame_header.bit_offset_ref_lf_delta |
1565 (vp9_state->frame_header.bit_offset_mode_lf_delta << 16);
1566 *cmd_ptr++ = vp9_state->frame_header.bit_offset_qindex |
1567 (vp9_state->frame_header.bit_offset_lf_level << 16);
1568
1569 /* dw19 */
1570 *cmd_ptr++ = (1 << 26 | (1 << 25) |
1571 non_first_pass << 16);
1572 /* dw20 */
1573 *cmd_ptr++ = (1 << 31) | (256);
1574
1575 /* dw21 */
1576 *cmd_ptr++ = (0 << 31) | 1;
1577
1578 /* dw22-dw24. Frame_delta_qindex_range */
1579 *cmd_ptr++ = 0;
1580 *cmd_ptr++ = 0;
1581 *cmd_ptr++ = 0;
1582
1583 /* dw25-26. frame_delta_lf_range */
1584 *cmd_ptr++ = 0;
1585 *cmd_ptr++ = 0;
1586
1587 /* dw27. frame_delta_lf_min */
1588 *cmd_ptr++ = 0;
1589
1590 /* dw28..30 */
1591 *cmd_ptr++ = 0;
1592 *cmd_ptr++ = 0;
1593 *cmd_ptr++ = 0;
1594
1595 /* dw31 */
1596 /* dw31 is for restricting the compressed frames minimum size
1597 * and we don't impose any. */
1598 *cmd_ptr++ = 0;
1599
1600 /* dw32 */
1601 *cmd_ptr++ = vp9_state->frame_header.bit_offset_first_partition_size;
1602
1603 *cmd_ptr++ = 0;
1604 *cmd_ptr++ = MI_BATCH_BUFFER_END;
1605 }
1606
1607 i965_unmap_gpe_resource(gpe_resource);
1608 }
1609
1610 static void
gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * brc_gpe_context,struct i965_gpe_context * mbenc_gpe_context)1611 gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
1612 struct encode_state *encode_state,
1613 struct intel_encoder_context *encoder_context,
1614 struct i965_gpe_context *brc_gpe_context,
1615 struct i965_gpe_context *mbenc_gpe_context)
1616 {
1617 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1618
1619 /* 0. BRC history buffer */
1620 i965_add_buffer_gpe_surface(ctx,
1621 brc_gpe_context,
1622 &vme_context->res_brc_history_buffer,
1623 0,
1624 vme_context->res_brc_history_buffer.size,
1625 0,
1626 VP9_BTI_BRC_HISTORY_G9);
1627
1628 /* 1. Constant data buffer */
1629 i965_add_buffer_gpe_surface(ctx,
1630 brc_gpe_context,
1631 &vme_context->res_brc_const_data_buffer,
1632 0,
1633 vme_context->res_brc_const_data_buffer.size,
1634 0,
1635 VP9_BTI_BRC_CONSTANT_DATA_G9);
1636
1637 /* 2. Distortion 2D surface buffer */
1638 i965_add_buffer_2d_gpe_surface(ctx,
1639 brc_gpe_context,
1640 &vme_context->s4x_memv_distortion_buffer,
1641 1,
1642 I965_SURFACEFORMAT_R8_UNORM,
1643 VP9_BTI_BRC_DISTORTION_G9);
1644
1645 /* 3. pak buffer */
1646 i965_add_buffer_gpe_surface(ctx,
1647 brc_gpe_context,
1648 &vme_context->res_brc_mmdk_pak_buffer,
1649 0,
1650 vme_context->res_brc_mmdk_pak_buffer.size,
1651 0,
1652 VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9);
1653 /* 4. Mbenc curbe input buffer */
1654 gen9_add_dri_buffer_gpe_surface(ctx,
1655 brc_gpe_context,
1656 mbenc_gpe_context->curbe.bo,
1657 0,
1658 ALIGN(mbenc_gpe_context->curbe.length, 64),
1659 mbenc_gpe_context->curbe.offset,
1660 VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
1661 /* 5. Mbenc curbe output buffer */
1662 gen9_add_dri_buffer_gpe_surface(ctx,
1663 brc_gpe_context,
1664 mbenc_gpe_context->curbe.bo,
1665 0,
1666 ALIGN(mbenc_gpe_context->curbe.length, 64),
1667 mbenc_gpe_context->curbe.offset,
1668 VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
1669
1670 /* 6. BRC_PIC_STATE read buffer */
1671 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1672 &vme_context->res_pic_state_brc_read_buffer,
1673 0,
1674 vme_context->res_pic_state_brc_read_buffer.size,
1675 0,
1676 VP9_BTI_BRC_PIC_STATE_INPUT_G9);
1677
1678 /* 7. BRC_PIC_STATE write buffer */
1679 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1680 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
1681 0,
1682 vme_context->res_pic_state_brc_write_hfw_read_buffer.size,
1683 0,
1684 VP9_BTI_BRC_PIC_STATE_OUTPUT_G9);
1685
1686 /* 8. SEGMENT_STATE read buffer */
1687 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1688 &vme_context->res_seg_state_brc_read_buffer,
1689 0,
1690 vme_context->res_seg_state_brc_read_buffer.size,
1691 0,
1692 VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9);
1693
1694 /* 9. SEGMENT_STATE write buffer */
1695 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1696 &vme_context->res_seg_state_brc_write_buffer,
1697 0,
1698 vme_context->res_seg_state_brc_write_buffer.size,
1699 0,
1700 VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9);
1701
1702 /* 10. Bitstream size buffer */
1703 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1704 &vme_context->res_brc_bitstream_size_buffer,
1705 0,
1706 vme_context->res_brc_bitstream_size_buffer.size,
1707 0,
1708 VP9_BTI_BRC_BITSTREAM_SIZE_G9);
1709
1710 i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1711 &vme_context->res_brc_hfw_data_buffer,
1712 0,
1713 vme_context->res_brc_hfw_data_buffer.size,
1714 0,
1715 VP9_BTI_BRC_HFW_DATA_G9);
1716
1717 return;
1718 }
1719
1720 static VAStatus
gen9_vp9_brc_update_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)1721 gen9_vp9_brc_update_kernel(VADriverContextP ctx,
1722 struct encode_state *encode_state,
1723 struct intel_encoder_context *encoder_context)
1724 {
1725 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1726 struct vp9_brc_context *brc_context = &vme_context->brc_context;
1727 struct i965_gpe_context *brc_gpe_context, *mbenc_gpe_context;
1728 int mbenc_index, gpe_index = VP9_BRC_UPDATE;
1729 int media_function = VP9_MEDIA_STATE_BRC_UPDATE;
1730 int mbenc_function;
1731 struct gen9_vp9_brc_curbe_param brc_update_curbe_param;
1732 VAEncPictureParameterBufferVP9 *pic_param;
1733 struct gen9_vp9_state *vp9_state;
1734 struct gen9_vp9_mbenc_curbe_param mbenc_curbe_param;
1735 struct gpe_media_object_parameter media_object_param;
1736
1737 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1738 if (!vp9_state || !vp9_state->pic_param)
1739 return VA_STATUS_ERROR_INVALID_PARAMETER;
1740
1741 pic_param = vp9_state->pic_param;
1742 // Setup VP9 MbEnc Curbe
1743 if (vp9_state->picture_coding_type) {
1744 mbenc_function = VP9_MEDIA_STATE_MBENC_P;
1745 mbenc_index = VP9_MBENC_IDX_INTER;
1746 } else {
1747 mbenc_function = VP9_MEDIA_STATE_MBENC_I_32x32;
1748 mbenc_index = VP9_MBENC_IDX_KEY_32x32;
1749 }
1750
1751 mbenc_gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_index]);
1752
1753 memset(&mbenc_curbe_param, 0, sizeof(mbenc_curbe_param));
1754
1755 mbenc_curbe_param.ppic_param = vp9_state->pic_param;
1756 mbenc_curbe_param.pseq_param = vp9_state->seq_param;
1757 mbenc_curbe_param.psegment_param = vp9_state->segment_param;
1758 //mbenc_curbe_param.ppRefList = &(vp9_state->pRefList[0]);
1759 mbenc_curbe_param.last_ref_obj = vp9_state->last_ref_obj;
1760 mbenc_curbe_param.golden_ref_obj = vp9_state->golden_ref_obj;
1761 mbenc_curbe_param.alt_ref_obj = vp9_state->alt_ref_obj;
1762 mbenc_curbe_param.frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
1763 mbenc_curbe_param.frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
1764 mbenc_curbe_param.hme_enabled = vp9_state->hme_enabled;
1765 mbenc_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
1766 mbenc_curbe_param.multi_ref_qp_check = vp9_state->multi_ref_qp_check;
1767 mbenc_curbe_param.picture_coding_type = vp9_state->picture_coding_type;
1768 mbenc_curbe_param.media_state_type = mbenc_function;
1769
1770 vme_context->pfn_set_curbe_mbenc(ctx, encode_state,
1771 mbenc_gpe_context,
1772 encoder_context,
1773 &mbenc_curbe_param);
1774
1775 vp9_state->mbenc_curbe_set_in_brc_update = true;
1776
1777 brc_gpe_context = &brc_context->gpe_contexts[gpe_index];
1778
1779 gen8_gpe_context_init(ctx, brc_gpe_context);
1780 gen9_gpe_reset_binding_table(ctx, brc_gpe_context);
1781
1782 memset(&brc_update_curbe_param, 0, sizeof(brc_update_curbe_param));
1783
1784 // Setup BRC Update Curbe
1785 brc_update_curbe_param.media_state_type = media_function;
1786 brc_update_curbe_param.curr_frame = pic_param->reconstructed_frame;
1787 brc_update_curbe_param.ppic_param = vp9_state->pic_param;
1788 brc_update_curbe_param.pseq_param = vp9_state->seq_param;
1789 brc_update_curbe_param.psegment_param = vp9_state->segment_param;
1790 brc_update_curbe_param.picture_coding_type = vp9_state->picture_coding_type;
1791 brc_update_curbe_param.frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
1792 brc_update_curbe_param.frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
1793 brc_update_curbe_param.hme_enabled = vp9_state->hme_enabled;
1794 brc_update_curbe_param.b_used_ref = 1;
1795 brc_update_curbe_param.frame_number = vp9_state->frame_number;
1796 brc_update_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
1797 brc_update_curbe_param.mbbrc_enabled = 0;
1798 brc_update_curbe_param.multi_ref_qp_check = vp9_state->multi_ref_qp_check;
1799 brc_update_curbe_param.brc_num_pak_passes = vp9_state->num_pak_passes;
1800
1801 brc_update_curbe_param.pbrc_init_current_target_buf_full_in_bits =
1802 &vp9_state->brc_init_current_target_buf_full_in_bits;
1803 brc_update_curbe_param.pbrc_init_reset_buf_size_in_bits =
1804 &vp9_state->brc_init_reset_buf_size_in_bits;
1805 brc_update_curbe_param.pbrc_init_reset_input_bits_per_frame =
1806 &vp9_state->brc_init_reset_input_bits_per_frame;
1807
1808 vme_context->pfn_set_curbe_brc(ctx, encode_state,
1809 brc_gpe_context,
1810 encoder_context,
1811 &brc_update_curbe_param);
1812
1813
1814 // Check if the constant data surface is present
1815 if (vp9_state->brc_constant_buffer_supported) {
1816 char *brc_const_buffer;
1817 brc_const_buffer = i965_map_gpe_resource(&vme_context->res_brc_const_data_buffer);
1818
1819 if (!brc_const_buffer)
1820 return VA_STATUS_ERROR_OPERATION_FAILED;
1821
1822 if (vp9_state->picture_coding_type)
1823 memcpy(brc_const_buffer, vp9_brc_const_data_p_g9,
1824 sizeof(vp9_brc_const_data_p_g9));
1825 else
1826 memcpy(brc_const_buffer, vp9_brc_const_data_i_g9,
1827 sizeof(vp9_brc_const_data_i_g9));
1828
1829 i965_unmap_gpe_resource(&vme_context->res_brc_const_data_buffer);
1830 }
1831
1832 if (pic_param->pic_flags.bits.segmentation_enabled) {
1833 //reallocate the vme_state->mb_segment_map_surface
1834 /* this will be added later */
1835 }
1836
1837 {
1838 pic_param->filter_level = 0;
1839 // clear the filter level value in picParams ebfore programming pic state, as this value will be determined and updated by BRC.
1840 intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
1841 encoder_context, &vme_context->res_pic_state_brc_read_buffer);
1842 }
1843
1844 gen9_brc_update_add_surfaces_vp9(ctx, encode_state,
1845 encoder_context,
1846 brc_gpe_context,
1847 mbenc_gpe_context);
1848
1849 gen8_gpe_setup_interface_data(ctx, brc_gpe_context);
1850 memset(&media_object_param, 0, sizeof(media_object_param));
1851 gen9_run_kernel_media_object(ctx, encoder_context,
1852 brc_gpe_context,
1853 media_function,
1854 &media_object_param);
1855 return VA_STATUS_SUCCESS;
1856 }
1857
1858 static
gen9_vp9_set_curbe_me(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_me_curbe_param * param)1859 void gen9_vp9_set_curbe_me(VADriverContextP ctx,
1860 struct encode_state *encode_state,
1861 struct i965_gpe_context *gpe_context,
1862 struct intel_encoder_context *encoder_context,
1863 struct gen9_vp9_me_curbe_param *param)
1864 {
1865 vp9_me_curbe_data *me_cmd;
1866 int enc_media_state;
1867 int me_mode;
1868 unsigned int width, height;
1869 uint32_t l0_ref_frames;
1870 uint32_t scale_factor;
1871
1872 if (param->b16xme_enabled) {
1873 if (param->use_16x_me)
1874 me_mode = VP9_ENC_ME16X_BEFORE_ME4X;
1875 else
1876 me_mode = VP9_ENC_ME4X_AFTER_ME16X;
1877 } else {
1878 me_mode = VP9_ENC_ME4X_ONLY;
1879 }
1880
1881 if (me_mode == VP9_ENC_ME16X_BEFORE_ME4X)
1882 scale_factor = 16;
1883 else
1884 scale_factor = 4;
1885
1886 if (param->use_16x_me)
1887 enc_media_state = VP9_MEDIA_STATE_16X_ME;
1888 else
1889 enc_media_state = VP9_MEDIA_STATE_4X_ME;
1890
1891 me_cmd = i965_gpe_context_map_curbe(gpe_context);
1892
1893 if (!me_cmd)
1894 return;
1895
1896 memset(me_cmd, 0, sizeof(vp9_me_curbe_data));
1897
1898 me_cmd->dw1.max_num_mvs = 0x10;
1899 me_cmd->dw1.bi_weight = 0x00;
1900
1901 me_cmd->dw2.max_num_su = 0x39;
1902 me_cmd->dw2.max_len_sp = 0x39;
1903
1904 me_cmd->dw3.sub_mb_part_mask = 0x77;
1905 me_cmd->dw3.inter_sad = 0x00;
1906 me_cmd->dw3.intra_sad = 0x00;
1907 me_cmd->dw3.bme_disable_fbr = 0x01;
1908 me_cmd->dw3.sub_pel_mode = 0x03;
1909
1910 width = param->frame_width / scale_factor;
1911 height = param->frame_height / scale_factor;
1912
1913 me_cmd->dw4.picture_width = ALIGN(width, 16) / 16;
1914 me_cmd->dw4.picture_height_minus1 = ALIGN(height, 16) / 16 - 1;
1915
1916 me_cmd->dw5.ref_width = 0x30;
1917 me_cmd->dw5.ref_height = 0x28;
1918
1919 if (enc_media_state == VP9_MEDIA_STATE_4X_ME)
1920 me_cmd->dw6.write_distortions = 0x01;
1921
1922 me_cmd->dw6.use_mv_from_prev_step = me_mode == VP9_ENC_ME4X_AFTER_ME16X ? 1 : 0;
1923 me_cmd->dw6.super_combine_dist = 0x5;
1924 me_cmd->dw6.max_vmvr = 0x7fc;
1925
1926 l0_ref_frames = (param->ref_frame_flag & 0x01) +
1927 !!(param->ref_frame_flag & 0x02) +
1928 !!(param->ref_frame_flag & 0x04);
1929 me_cmd->dw13.num_ref_idx_l0_minus1 = (l0_ref_frames > 0) ? l0_ref_frames - 1 : 0;
1930 me_cmd->dw13.num_ref_idx_l1_minus1 = 0;
1931
1932 me_cmd->dw14.l0_ref_pic_polarity_bits = 0;
1933 me_cmd->dw14.l1_ref_pic_polarity_bits = 0;
1934
1935 me_cmd->dw15.mv_shift_factor = 0x02;
1936
1937 {
1938 memcpy((void *)((char *)me_cmd + 64),
1939 vp9_diamond_ime_search_path_delta,
1940 sizeof(vp9_diamond_ime_search_path_delta));
1941 }
1942
1943
1944 me_cmd->dw32._4x_memv_output_data_surf_index = VP9_BTI_ME_MV_DATA_SURFACE;
1945 me_cmd->dw33._16x_32x_memv_input_data_surf_index = VP9_BTI_16XME_MV_DATA_SURFACE;
1946 me_cmd->dw34._4x_me_output_dist_surf_index = VP9_BTI_ME_DISTORTION_SURFACE;
1947 me_cmd->dw35._4x_me_output_brc_dist_surf_index = VP9_BTI_ME_BRC_DISTORTION_SURFACE;
1948 me_cmd->dw36.vme_fwd_inter_pred_surf_index = VP9_BTI_ME_CURR_PIC_L0;
1949 me_cmd->dw37.vme_bdw_inter_pred_surf_index = VP9_BTI_ME_CURR_PIC_L1;
1950
1951 i965_gpe_context_unmap_curbe(gpe_context);
1952 }
1953
1954 static void
gen9_vp9_send_me_surface(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_me_surface_param * param)1955 gen9_vp9_send_me_surface(VADriverContextP ctx,
1956 struct encode_state *encode_state,
1957 struct i965_gpe_context *gpe_context,
1958 struct intel_encoder_context *encoder_context,
1959 struct gen9_vp9_me_surface_param *param)
1960 {
1961 struct i965_driver_data *i965 = i965_driver_data(ctx);
1962 struct object_surface *obj_surface;
1963 struct gen9_surface_vp9 *vp9_priv_surface;
1964 struct object_surface *input_surface;
1965 struct i965_gpe_resource *gpe_resource;
1966 int ref_bti;
1967
1968 obj_surface = SURFACE(param->curr_pic);
1969
1970 if (!obj_surface || !obj_surface->private_data)
1971 return;
1972
1973 vp9_priv_surface = obj_surface->private_data;
1974 if (param->use_16x_me) {
1975 gpe_resource = param->pres_16x_memv_data_buffer;
1976 } else {
1977 gpe_resource = param->pres_4x_memv_data_buffer;
1978 }
1979
1980 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1981 gpe_resource,
1982 1,
1983 I965_SURFACEFORMAT_R8_UNORM,
1984 VP9_BTI_ME_MV_DATA_SURFACE);
1985
1986 if (param->b16xme_enabled) {
1987 gpe_resource = param->pres_16x_memv_data_buffer;
1988 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1989 gpe_resource,
1990 1,
1991 I965_SURFACEFORMAT_R8_UNORM,
1992 VP9_BTI_16XME_MV_DATA_SURFACE);
1993 }
1994
1995 if (!param->use_16x_me) {
1996 gpe_resource = param->pres_me_brc_distortion_buffer;
1997
1998 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1999 gpe_resource,
2000 1,
2001 I965_SURFACEFORMAT_R8_UNORM,
2002 VP9_BTI_ME_BRC_DISTORTION_SURFACE);
2003
2004 gpe_resource = param->pres_me_distortion_buffer;
2005
2006 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
2007 gpe_resource,
2008 1,
2009 I965_SURFACEFORMAT_R8_UNORM,
2010 VP9_BTI_ME_DISTORTION_SURFACE);
2011 }
2012
2013 if (param->use_16x_me)
2014 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2015 else
2016 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2017
2018 i965_add_adv_gpe_surface(ctx, gpe_context,
2019 input_surface,
2020 VP9_BTI_ME_CURR_PIC_L0);
2021
2022 ref_bti = VP9_BTI_ME_CURR_PIC_L0 + 1;
2023
2024
2025 if (param->last_ref_pic) {
2026 obj_surface = param->last_ref_pic;
2027 vp9_priv_surface = obj_surface->private_data;
2028
2029 if (param->use_16x_me)
2030 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2031 else
2032 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2033
2034 if (param->dys_enabled &&
2035 ((vp9_priv_surface->frame_width != param->frame_width) ||
2036 (vp9_priv_surface->frame_height != param->frame_height))) {
2037 if (param->use_16x_me)
2038 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2039 else
2040 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2041 }
2042 i965_add_adv_gpe_surface(ctx, gpe_context,
2043 input_surface,
2044 ref_bti);
2045 i965_add_adv_gpe_surface(ctx, gpe_context,
2046 input_surface,
2047 ref_bti + 1);
2048 ref_bti += 2;
2049 }
2050
2051 if (param->golden_ref_pic) {
2052 obj_surface = param->golden_ref_pic;
2053 vp9_priv_surface = obj_surface->private_data;
2054
2055 if (param->use_16x_me)
2056 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2057 else
2058 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2059
2060 if (param->dys_enabled &&
2061 ((vp9_priv_surface->frame_width != param->frame_width) ||
2062 (vp9_priv_surface->frame_height != param->frame_height))) {
2063 if (param->use_16x_me)
2064 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2065 else
2066 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2067 }
2068
2069 i965_add_adv_gpe_surface(ctx, gpe_context,
2070 input_surface,
2071 ref_bti);
2072 i965_add_adv_gpe_surface(ctx, gpe_context,
2073 input_surface,
2074 ref_bti + 1);
2075 ref_bti += 2;
2076 }
2077
2078 if (param->alt_ref_pic) {
2079 obj_surface = param->alt_ref_pic;
2080 vp9_priv_surface = obj_surface->private_data;
2081
2082 if (param->use_16x_me)
2083 input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2084 else
2085 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2086
2087 if (param->dys_enabled &&
2088 ((vp9_priv_surface->frame_width != param->frame_width) ||
2089 (vp9_priv_surface->frame_height != param->frame_height))) {
2090 if (param->use_16x_me)
2091 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2092 else
2093 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2094 }
2095 i965_add_adv_gpe_surface(ctx, gpe_context,
2096 input_surface,
2097 ref_bti);
2098 i965_add_adv_gpe_surface(ctx, gpe_context,
2099 input_surface,
2100 ref_bti + 1);
2101 ref_bti += 2;
2102 }
2103
2104 return;
2105 }
2106
2107 static
gen9_me_add_surfaces_vp9(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context,int use_16x_me)2108 void gen9_me_add_surfaces_vp9(VADriverContextP ctx,
2109 struct encode_state *encode_state,
2110 struct intel_encoder_context *encoder_context,
2111 struct i965_gpe_context *gpe_context,
2112 int use_16x_me)
2113 {
2114 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2115 struct gen9_vp9_me_surface_param me_surface_param;
2116 struct gen9_vp9_state *vp9_state;
2117
2118 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
2119
2120 /* sScaled4xSurface surface */
2121 memset(&me_surface_param, 0, sizeof(me_surface_param));
2122 me_surface_param.last_ref_pic = vp9_state->last_ref_obj;
2123 me_surface_param.golden_ref_pic = vp9_state->golden_ref_obj;
2124 me_surface_param.alt_ref_pic = vp9_state->alt_ref_obj;
2125 me_surface_param.curr_pic = vp9_state->curr_frame;
2126 me_surface_param.pres_4x_memv_data_buffer = &vme_context->s4x_memv_data_buffer;
2127 me_surface_param.pres_16x_memv_data_buffer = &vme_context->s16x_memv_data_buffer;
2128 me_surface_param.pres_me_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2129 me_surface_param.pres_me_brc_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2130
2131 if (use_16x_me) {
2132 me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2133 me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2134 } else {
2135 me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2136 me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2137 }
2138 me_surface_param.frame_width = vp9_state->frame_width;
2139 me_surface_param.frame_height = vp9_state->frame_height;
2140
2141 me_surface_param.use_16x_me = use_16x_me;
2142 me_surface_param.b16xme_enabled = vp9_state->b16xme_enabled;
2143 me_surface_param.dys_enabled = vp9_state->dys_in_use;
2144
2145 vme_context->pfn_send_me_surface(ctx, encode_state,
2146 gpe_context,
2147 encoder_context,
2148 &me_surface_param);
2149 return;
2150 }
2151
2152 static VAStatus
gen9_vp9_me_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,int use_16x_me)2153 gen9_vp9_me_kernel(VADriverContextP ctx,
2154 struct encode_state *encode_state,
2155 struct intel_encoder_context *encoder_context,
2156 int use_16x_me)
2157 {
2158 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2159 struct i965_gpe_context *gpe_context;
2160 int media_function;
2161 struct gen9_vp9_me_curbe_param me_curbe_param;
2162 struct gen9_vp9_state *vp9_state;
2163 struct gpe_media_object_walker_parameter media_object_walker_param;
2164 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2165
2166 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2167 if (!vp9_state || !vp9_state->pic_param)
2168 return VA_STATUS_ERROR_INVALID_PARAMETER;
2169
2170 if (use_16x_me)
2171 media_function = VP9_MEDIA_STATE_16X_ME;
2172 else
2173 media_function = VP9_MEDIA_STATE_4X_ME;
2174
2175 gpe_context = &(vme_context->me_context.gpe_context);
2176
2177 gen8_gpe_context_init(ctx, gpe_context);
2178 gen9_gpe_reset_binding_table(ctx, gpe_context);
2179
2180 memset(&me_curbe_param, 0, sizeof(me_curbe_param));
2181 me_curbe_param.ppic_param = vp9_state->pic_param;
2182 me_curbe_param.pseq_param = vp9_state->seq_param;
2183 me_curbe_param.frame_width = vp9_state->frame_width;
2184 me_curbe_param.frame_height = vp9_state->frame_height;
2185 me_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
2186 me_curbe_param.use_16x_me = use_16x_me;
2187 me_curbe_param.b16xme_enabled = vp9_state->b16xme_enabled;
2188 vme_context->pfn_set_curbe_me(ctx, encode_state,
2189 gpe_context,
2190 encoder_context,
2191 &me_curbe_param);
2192
2193 gen9_me_add_surfaces_vp9(ctx, encode_state,
2194 encoder_context,
2195 gpe_context,
2196 use_16x_me);
2197
2198 gen8_gpe_setup_interface_data(ctx, gpe_context);
2199
2200 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2201 if (use_16x_me) {
2202 kernel_walker_param.resolution_x = vp9_state->downscaled_width_16x_in_mb;
2203 kernel_walker_param.resolution_y = vp9_state->downscaled_height_16x_in_mb;
2204 } else {
2205 kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
2206 kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
2207 }
2208 kernel_walker_param.no_dependency = 1;
2209
2210 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2211
2212 gen9_run_kernel_media_object_walker(ctx, encoder_context,
2213 gpe_context,
2214 media_function,
2215 &media_object_walker_param);
2216
2217 return VA_STATUS_SUCCESS;
2218 }
2219
2220 static void
gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_scaling_curbe_param * curbe_param)2221 gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
2222 struct encode_state *encode_state,
2223 struct i965_gpe_context *gpe_context,
2224 struct intel_encoder_context *encoder_context,
2225 struct gen9_vp9_scaling_curbe_param *curbe_param)
2226 {
2227 vp9_scaling4x_curbe_data_cm *curbe_cmd;
2228
2229 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2230
2231 if (!curbe_cmd)
2232 return;
2233
2234 memset(curbe_cmd, 0, sizeof(vp9_scaling4x_curbe_data_cm));
2235
2236 curbe_cmd->dw0.input_picture_width = curbe_param->input_picture_width;
2237 curbe_cmd->dw0.input_picture_height = curbe_param->input_picture_height;
2238
2239 curbe_cmd->dw1.input_y_bti = VP9_BTI_SCALING_FRAME_SRC_Y;
2240 curbe_cmd->dw2.output_y_bti = VP9_BTI_SCALING_FRAME_DST_Y;
2241
2242
2243 curbe_cmd->dw6.enable_mb_variance_output = 0;
2244 curbe_cmd->dw6.enable_mb_pixel_average_output = 0;
2245 curbe_cmd->dw6.enable_blk8x8_stat_output = 0;
2246
2247 if (curbe_param->mb_variance_output_enabled ||
2248 curbe_param->mb_pixel_average_output_enabled) {
2249 curbe_cmd->dw10.mbv_proc_stat_bti = VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
2250 }
2251
2252 i965_gpe_context_unmap_curbe(gpe_context);
2253 return;
2254 }
2255
2256 static void
gen9_vp9_send_scaling_surface(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_scaling_surface_param * scaling_surface_param)2257 gen9_vp9_send_scaling_surface(VADriverContextP ctx,
2258 struct encode_state *encode_state,
2259 struct i965_gpe_context *gpe_context,
2260 struct intel_encoder_context *encoder_context,
2261 struct gen9_vp9_scaling_surface_param *scaling_surface_param)
2262 {
2263 vp9_bti_scaling_offset *scaling_bti;
2264 unsigned int surface_format;
2265
2266 scaling_bti = scaling_surface_param->p_scaling_bti;
2267
2268 if (scaling_surface_param->scaling_out_use_32unorm_surf_fmt)
2269 surface_format = I965_SURFACEFORMAT_R32_UNORM;
2270 else if (scaling_surface_param->scaling_out_use_16unorm_surf_fmt)
2271 surface_format = I965_SURFACEFORMAT_R16_UNORM;
2272 else
2273 surface_format = I965_SURFACEFORMAT_R8_UNORM;
2274
2275 i965_add_2d_gpe_surface(ctx, gpe_context,
2276 scaling_surface_param->input_surface,
2277 0, 1, surface_format,
2278 scaling_bti->scaling_frame_src_y);
2279
2280 i965_add_2d_gpe_surface(ctx, gpe_context,
2281 scaling_surface_param->output_surface,
2282 0, 1, surface_format,
2283 scaling_bti->scaling_frame_dst_y);
2284
2285
2286 return;
2287 }
2288
2289 static VAStatus
gen9_vp9_scaling_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,int use_16x_scaling)2290 gen9_vp9_scaling_kernel(VADriverContextP ctx,
2291 struct encode_state *encode_state,
2292 struct intel_encoder_context *encoder_context,
2293 int use_16x_scaling)
2294 {
2295 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2296 struct i965_gpe_context *gpe_context;
2297 int media_function;
2298 struct gen9_vp9_scaling_curbe_param scaling_curbe_param;
2299 struct gen9_vp9_scaling_surface_param scaling_surface_param;
2300 struct gen9_vp9_state *vp9_state;
2301 VAEncPictureParameterBufferVP9 *pic_param;
2302 struct gpe_media_object_walker_parameter media_object_walker_param;
2303 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2304 struct object_surface *obj_surface;
2305 struct object_surface *input_surface, *output_surface;
2306 struct gen9_surface_vp9 *vp9_priv_surface;
2307 unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
2308 unsigned int input_frame_width, input_frame_height;
2309 unsigned int output_frame_width, output_frame_height;
2310
2311 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2312 if (!vp9_state || !vp9_state->pic_param)
2313 return VA_STATUS_ERROR_INVALID_PARAMETER;
2314
2315 pic_param = vp9_state->pic_param;
2316
2317 if (use_16x_scaling)
2318 media_function = VP9_MEDIA_STATE_16X_SCALING;
2319 else
2320 media_function = VP9_MEDIA_STATE_4X_SCALING;
2321
2322 gpe_context = &(vme_context->scaling_context.gpe_contexts[0]);
2323
2324 gen8_gpe_context_init(ctx, gpe_context);
2325 gen9_gpe_reset_binding_table(ctx, gpe_context);
2326
2327 obj_surface = encode_state->reconstructed_object;
2328 vp9_priv_surface = obj_surface->private_data;
2329
2330 if (use_16x_scaling) {
2331 downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2332 downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2333
2334 input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2335 input_frame_width = vp9_state->frame_width_4x;
2336 input_frame_height = vp9_state->frame_height_4x;
2337
2338 output_surface = vp9_priv_surface->scaled_16x_surface_obj;
2339 output_frame_width = vp9_state->frame_width_16x;
2340 output_frame_height = vp9_state->frame_height_16x;
2341 } else {
2342 downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2343 downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2344
2345 if (vp9_state->dys_in_use &&
2346 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2347 (pic_param->frame_height_src != pic_param->frame_height_dst)))
2348 input_surface = vp9_priv_surface->dys_surface_obj;
2349 else
2350 input_surface = encode_state->input_yuv_object;
2351
2352 input_frame_width = vp9_state->frame_width;
2353 input_frame_height = vp9_state->frame_height;
2354
2355 output_surface = vp9_priv_surface->scaled_4x_surface_obj;
2356 output_frame_width = vp9_state->frame_width_4x;
2357 output_frame_height = vp9_state->frame_height_4x;
2358 }
2359
2360 memset(&scaling_curbe_param, 0, sizeof(scaling_curbe_param));
2361
2362 scaling_curbe_param.input_picture_width = input_frame_width;
2363 scaling_curbe_param.input_picture_height = input_frame_height;
2364
2365 scaling_curbe_param.use_16x_scaling = use_16x_scaling;
2366 scaling_curbe_param.use_32x_scaling = 0;
2367
2368 if (use_16x_scaling)
2369 scaling_curbe_param.mb_variance_output_enabled = 0;
2370 else
2371 scaling_curbe_param.mb_variance_output_enabled = vp9_state->adaptive_transform_decision_enabled;
2372
2373 scaling_curbe_param.blk8x8_stat_enabled = 0;
2374
2375 vme_context->pfn_set_curbe_scaling(ctx, encode_state,
2376 gpe_context,
2377 encoder_context,
2378 &scaling_curbe_param);
2379
2380 memset(&scaling_surface_param, 0, sizeof(scaling_surface_param));
2381 scaling_surface_param.p_scaling_bti = (void *)(&vme_context->scaling_context.scaling_4x_bti);
2382 scaling_surface_param.input_surface = input_surface;
2383 scaling_surface_param.input_frame_width = input_frame_width;
2384 scaling_surface_param.input_frame_height = input_frame_height;
2385
2386 scaling_surface_param.output_surface = output_surface;
2387 scaling_surface_param.output_frame_width = output_frame_width;
2388 scaling_surface_param.output_frame_height = output_frame_height;
2389 scaling_surface_param.scaling_out_use_16unorm_surf_fmt = 0;
2390 scaling_surface_param.scaling_out_use_32unorm_surf_fmt = 1;
2391
2392 vme_context->pfn_send_scaling_surface(ctx, encode_state,
2393 gpe_context,
2394 encoder_context,
2395 &scaling_surface_param);
2396
2397 gen8_gpe_setup_interface_data(ctx, gpe_context);
2398
2399 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2400 /* the scaling is based on 8x8 blk level */
2401 kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
2402 kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
2403 kernel_walker_param.no_dependency = 1;
2404
2405 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2406
2407 gen9_run_kernel_media_object_walker(ctx, encoder_context,
2408 gpe_context,
2409 media_function,
2410 &media_object_walker_param);
2411
2412 return VA_STATUS_SUCCESS;
2413 }
2414
2415 static void
gen9_vp9_dys_set_sampler_state(struct i965_gpe_context * gpe_context)2416 gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
2417 {
2418 struct gen9_sampler_8x8_avs *sampler_cmd;
2419
2420 if (!gpe_context)
2421 return;
2422
2423 dri_bo_map(gpe_context->sampler.bo, 1);
2424
2425 if (!gpe_context->sampler.bo->virtual)
2426 return;
2427
2428 sampler_cmd = (struct gen9_sampler_8x8_avs *)
2429 (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);
2430
2431 memset(sampler_cmd, 0, sizeof(struct gen9_sampler_8x8_avs));
2432
2433 sampler_cmd->dw0.r3c_coefficient = 15;
2434 sampler_cmd->dw0.r3x_coefficient = 6;
2435 sampler_cmd->dw0.strong_edge_threshold = 8;
2436 sampler_cmd->dw0.weak_edge_threshold = 1;
2437 sampler_cmd->dw0.gain_factor = 32;
2438
2439 sampler_cmd->dw2.r5c_coefficient = 3;
2440 sampler_cmd->dw2.r5cx_coefficient = 8;
2441 sampler_cmd->dw2.r5x_coefficient = 9;
2442 sampler_cmd->dw2.strong_edge_weight = 6;
2443 sampler_cmd->dw2.regular_weight = 3;
2444 sampler_cmd->dw2.non_edge_weight = 2;
2445 sampler_cmd->dw2.global_noise_estimation = 255;
2446
2447 sampler_cmd->dw3.enable_8tap_adaptive_filter = 0;
2448 sampler_cmd->dw3.cos_alpha = 79;
2449 sampler_cmd->dw3.sin_alpha = 101;
2450
2451 sampler_cmd->dw5.diamond_du = 0;
2452 sampler_cmd->dw5.hs_margin = 3;
2453 sampler_cmd->dw5.diamond_alpha = 100;
2454
2455 sampler_cmd->dw7.inv_margin_vyl = 3300;
2456
2457 sampler_cmd->dw8.inv_margin_vyu = 1600;
2458
2459 sampler_cmd->dw10.y_slope2 = 24;
2460 sampler_cmd->dw10.s0l = 1792;
2461
2462 sampler_cmd->dw12.y_slope1 = 24;
2463
2464 sampler_cmd->dw14.s0u = 256;
2465
2466 sampler_cmd->dw15.s2u = 1792;
2467 sampler_cmd->dw15.s1u = 0;
2468
2469 memcpy(sampler_cmd->coefficients,
2470 &gen9_vp9_avs_coeffs[0],
2471 17 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2472
2473 sampler_cmd->dw152.default_sharpness_level = 255;
2474 sampler_cmd->dw152.max_derivative_4_pixels = 7;
2475 sampler_cmd->dw152.max_derivative_8_pixels = 20;
2476 sampler_cmd->dw152.transition_area_with_4_pixels = 4;
2477 sampler_cmd->dw152.transition_area_with_8_pixels = 5;
2478
2479 sampler_cmd->dw153.bypass_x_adaptive_filtering = 1;
2480 sampler_cmd->dw153.bypass_y_adaptive_filtering = 1;
2481 sampler_cmd->dw153.adaptive_filter_for_all_channel = 0;
2482
2483 memcpy(sampler_cmd->extra_coefficients,
2484 &gen9_vp9_avs_coeffs[17 * 8],
2485 15 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2486
2487 dri_bo_unmap(gpe_context->sampler.bo);
2488 }
2489
2490 static void
gen9_vp9_set_curbe_dys(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_dys_curbe_param * curbe_param)2491 gen9_vp9_set_curbe_dys(VADriverContextP ctx,
2492 struct encode_state *encode_state,
2493 struct i965_gpe_context *gpe_context,
2494 struct intel_encoder_context *encoder_context,
2495 struct gen9_vp9_dys_curbe_param *curbe_param)
2496 {
2497 vp9_dys_curbe_data *curbe_cmd;
2498
2499 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2500
2501 if (!curbe_cmd)
2502 return;
2503
2504 memset(curbe_cmd, 0, sizeof(vp9_dys_curbe_data));
2505
2506 curbe_cmd->dw0.input_frame_width = curbe_param->input_width;
2507 curbe_cmd->dw0.input_frame_height = curbe_param->input_height;
2508
2509 curbe_cmd->dw1.output_frame_width = curbe_param->output_width;
2510 curbe_cmd->dw1.output_frame_height = curbe_param->output_height;
2511
2512 curbe_cmd->dw2.delta_u = 1.0f / curbe_param->output_width;
2513 curbe_cmd->dw3.delta_v = 1.0f / curbe_param->output_height;
2514
2515 curbe_cmd->dw16.input_frame_nv12_bti = VP9_BTI_DYS_INPUT_NV12;
2516 curbe_cmd->dw17.output_frame_y_bti = VP9_BTI_DYS_OUTPUT_Y;
2517 curbe_cmd->dw18.avs_sample_idx = 0;
2518
2519 i965_gpe_context_unmap_curbe(gpe_context);
2520 }
2521
2522 static void
gen9_vp9_send_dys_surface(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_dys_surface_param * surface_param)2523 gen9_vp9_send_dys_surface(VADriverContextP ctx,
2524 struct encode_state *encode_state,
2525 struct i965_gpe_context *gpe_context,
2526 struct intel_encoder_context *encoder_context,
2527 struct gen9_vp9_dys_surface_param *surface_param)
2528 {
2529
2530 if (surface_param->input_frame)
2531 i965_add_adv_gpe_surface(ctx,
2532 gpe_context,
2533 surface_param->input_frame,
2534 VP9_BTI_DYS_INPUT_NV12);
2535
2536 if (surface_param->output_frame) {
2537 i965_add_2d_gpe_surface(ctx,
2538 gpe_context,
2539 surface_param->output_frame,
2540 0,
2541 1,
2542 I965_SURFACEFORMAT_R8_UNORM,
2543 VP9_BTI_DYS_OUTPUT_Y);
2544
2545 i965_add_2d_gpe_surface(ctx,
2546 gpe_context,
2547 surface_param->output_frame,
2548 1,
2549 1,
2550 I965_SURFACEFORMAT_R16_UINT,
2551 VP9_BTI_DYS_OUTPUT_UV);
2552 }
2553
2554 return;
2555 }
2556
2557 static VAStatus
gen9_vp9_dys_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,gen9_vp9_dys_kernel_param * dys_kernel_param)2558 gen9_vp9_dys_kernel(VADriverContextP ctx,
2559 struct encode_state *encode_state,
2560 struct intel_encoder_context *encoder_context,
2561 gen9_vp9_dys_kernel_param *dys_kernel_param)
2562 {
2563 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2564 struct i965_gpe_context *gpe_context;
2565 int media_function;
2566 struct gen9_vp9_dys_curbe_param curbe_param;
2567 struct gen9_vp9_dys_surface_param surface_param;
2568 struct gpe_media_object_walker_parameter media_object_walker_param;
2569 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2570 unsigned int resolution_x, resolution_y;
2571
2572 media_function = VP9_MEDIA_STATE_DYS;
2573 gpe_context = &vme_context->dys_context.gpe_context;
2574
2575 //gen8_gpe_context_init(ctx, gpe_context);
2576 gen9_gpe_reset_binding_table(ctx, gpe_context);
2577
2578 /* sampler state is configured only when initializing the GPE context */
2579
2580 memset(&curbe_param, 0, sizeof(curbe_param));
2581 curbe_param.input_width = dys_kernel_param->input_width;
2582 curbe_param.input_height = dys_kernel_param->input_height;
2583 curbe_param.output_width = dys_kernel_param->output_width;
2584 curbe_param.output_height = dys_kernel_param->output_height;
2585 vme_context->pfn_set_curbe_dys(ctx, encode_state,
2586 gpe_context,
2587 encoder_context,
2588 &curbe_param);
2589
2590 // Add surface states
2591 memset(&surface_param, 0, sizeof(surface_param));
2592 surface_param.input_frame = dys_kernel_param->input_surface;
2593 surface_param.output_frame = dys_kernel_param->output_surface;
2594 surface_param.vert_line_stride = 0;
2595 surface_param.vert_line_stride_offset = 0;
2596
2597 vme_context->pfn_send_dys_surface(ctx,
2598 encode_state,
2599 gpe_context,
2600 encoder_context,
2601 &surface_param);
2602
2603 resolution_x = ALIGN(dys_kernel_param->output_width, 16) / 16;
2604 resolution_y = ALIGN(dys_kernel_param->output_height, 16) / 16;
2605
2606 gen8_gpe_setup_interface_data(ctx, gpe_context);
2607
2608 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2609 kernel_walker_param.resolution_x = resolution_x;
2610 kernel_walker_param.resolution_y = resolution_y;
2611 kernel_walker_param.no_dependency = 1;
2612
2613 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2614
2615 gen9_run_kernel_media_object_walker(ctx, encoder_context,
2616 gpe_context,
2617 media_function,
2618 &media_object_walker_param);
2619
2620 return VA_STATUS_SUCCESS;
2621 }
2622
2623 static VAStatus
gen9_vp9_run_dys_refframes(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)2624 gen9_vp9_run_dys_refframes(VADriverContextP ctx,
2625 struct encode_state *encode_state,
2626 struct intel_encoder_context *encoder_context)
2627 {
2628 struct gen9_vp9_state *vp9_state;
2629 VAEncPictureParameterBufferVP9 *pic_param;
2630 gen9_vp9_dys_kernel_param dys_kernel_param;
2631 struct object_surface *obj_surface;
2632 struct object_surface *input_surface, *output_surface;
2633 struct gen9_surface_vp9 *vp9_priv_surface;
2634
2635 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2636
2637 if (!vp9_state || !vp9_state->pic_param)
2638 return VA_STATUS_ERROR_INVALID_PARAMETER;
2639
2640 pic_param = vp9_state->pic_param;
2641
2642 if ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2643 (pic_param->frame_height_src != pic_param->frame_height_dst)) {
2644 input_surface = encode_state->input_yuv_object;
2645 obj_surface = encode_state->reconstructed_object;
2646 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2647 output_surface = vp9_priv_surface->dys_surface_obj;
2648
2649 memset(&dys_kernel_param, 0, sizeof(dys_kernel_param));
2650 dys_kernel_param.input_width = pic_param->frame_width_src;
2651 dys_kernel_param.input_height = pic_param->frame_height_src;
2652 dys_kernel_param.input_surface = input_surface;
2653 dys_kernel_param.output_width = pic_param->frame_width_dst;
2654 dys_kernel_param.output_height = pic_param->frame_height_dst;
2655 dys_kernel_param.output_surface = output_surface;
2656 gen9_vp9_dys_kernel(ctx, encode_state,
2657 encoder_context,
2658 &dys_kernel_param);
2659 }
2660
2661 if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
2662 vp9_state->last_ref_obj) {
2663 obj_surface = vp9_state->last_ref_obj;
2664 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2665
2666 input_surface = obj_surface;
2667 output_surface = vp9_priv_surface->dys_surface_obj;
2668
2669 dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2670 dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2671 dys_kernel_param.input_surface = input_surface;
2672
2673 dys_kernel_param.output_width = pic_param->frame_width_dst;
2674 dys_kernel_param.output_height = pic_param->frame_height_dst;
2675 dys_kernel_param.output_surface = output_surface;
2676
2677 gen9_vp9_dys_kernel(ctx, encode_state,
2678 encoder_context,
2679 &dys_kernel_param);
2680
2681 if (vp9_state->hme_enabled) {
2682 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2683 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2684 dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2685
2686 dys_kernel_param.output_width = vp9_state->frame_width_4x;
2687 dys_kernel_param.output_height = vp9_state->frame_height_4x;
2688 dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2689
2690 gen9_vp9_dys_kernel(ctx, encode_state,
2691 encoder_context,
2692 &dys_kernel_param);
2693
2694 /* Does it really need to do the 16x HME if the
2695 * resolution is different?
2696 * Maybe it should be restricted
2697 */
2698 if (vp9_state->b16xme_enabled) {
2699 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2700 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2701 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2702
2703 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2704 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2705 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2706
2707 gen9_vp9_dys_kernel(ctx, encode_state,
2708 encoder_context,
2709 &dys_kernel_param);
2710 }
2711 }
2712 }
2713
2714 if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
2715 vp9_state->golden_ref_obj) {
2716 obj_surface = vp9_state->golden_ref_obj;
2717 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2718
2719 input_surface = obj_surface;
2720 output_surface = vp9_priv_surface->dys_surface_obj;
2721
2722 dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2723 dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2724 dys_kernel_param.input_surface = input_surface;
2725
2726 dys_kernel_param.output_width = pic_param->frame_width_dst;
2727 dys_kernel_param.output_height = pic_param->frame_height_dst;
2728 dys_kernel_param.output_surface = output_surface;
2729
2730 gen9_vp9_dys_kernel(ctx, encode_state,
2731 encoder_context,
2732 &dys_kernel_param);
2733
2734 if (vp9_state->hme_enabled) {
2735 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2736 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2737 dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2738
2739 dys_kernel_param.output_width = vp9_state->frame_width_4x;
2740 dys_kernel_param.output_height = vp9_state->frame_height_4x;
2741 dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2742
2743 gen9_vp9_dys_kernel(ctx, encode_state,
2744 encoder_context,
2745 &dys_kernel_param);
2746
2747 /* Does it really need to do the 16x HME if the
2748 * resolution is different?
2749 * Maybe it should be restricted
2750 */
2751 if (vp9_state->b16xme_enabled) {
2752 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2753 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2754 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2755
2756 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2757 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2758 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2759
2760 gen9_vp9_dys_kernel(ctx, encode_state,
2761 encoder_context,
2762 &dys_kernel_param);
2763 }
2764 }
2765 }
2766
2767 if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
2768 vp9_state->alt_ref_obj) {
2769 obj_surface = vp9_state->alt_ref_obj;
2770 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2771
2772 input_surface = obj_surface;
2773 output_surface = vp9_priv_surface->dys_surface_obj;
2774
2775 dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2776 dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2777 dys_kernel_param.input_surface = input_surface;
2778
2779 dys_kernel_param.output_width = pic_param->frame_width_dst;
2780 dys_kernel_param.output_height = pic_param->frame_height_dst;
2781 dys_kernel_param.output_surface = output_surface;
2782
2783 gen9_vp9_dys_kernel(ctx, encode_state,
2784 encoder_context,
2785 &dys_kernel_param);
2786
2787 if (vp9_state->hme_enabled) {
2788 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2789 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2790 dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2791
2792 dys_kernel_param.output_width = vp9_state->frame_width_4x;
2793 dys_kernel_param.output_height = vp9_state->frame_height_4x;
2794 dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2795
2796 gen9_vp9_dys_kernel(ctx, encode_state,
2797 encoder_context,
2798 &dys_kernel_param);
2799
2800 /* Does it really need to do the 16x HME if the
2801 * resolution is different?
2802 * Maybe it should be restricted
2803 */
2804 if (vp9_state->b16xme_enabled) {
2805 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2806 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2807 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2808
2809 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2810 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2811 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2812
2813 gen9_vp9_dys_kernel(ctx, encode_state,
2814 encoder_context,
2815 &dys_kernel_param);
2816 }
2817 }
2818 }
2819
2820 return VA_STATUS_SUCCESS;
2821 }
2822
2823 static void
gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_mbenc_curbe_param * curbe_param)2824 gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
2825 struct encode_state *encode_state,
2826 struct i965_gpe_context *gpe_context,
2827 struct intel_encoder_context *encoder_context,
2828 struct gen9_vp9_mbenc_curbe_param *curbe_param)
2829 {
2830 struct gen9_vp9_state *vp9_state;
2831 VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
2832 vp9_mbenc_curbe_data *curbe_cmd;
2833 VAEncPictureParameterBufferVP9 *pic_param;
2834 int i, segment_count;
2835 int seg_qindex;
2836 struct object_surface *obj_surface;
2837 struct gen9_surface_vp9 *vp9_priv_surface;
2838
2839 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2840
2841 if (!vp9_state || !vp9_state->pic_param)
2842 return;
2843
2844 pic_param = curbe_param->ppic_param;
2845 seg_param = curbe_param->psegment_param;
2846
2847 if (!seg_param) {
2848 memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
2849 seg_param = &tmp_seg_param;
2850 }
2851
2852 curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2853
2854 if (!curbe_cmd)
2855 return;
2856
2857 memset(curbe_cmd, 0, sizeof(vp9_mbenc_curbe_data));
2858
2859 if (vp9_state->dys_in_use) {
2860 curbe_cmd->dw0.frame_width = pic_param->frame_width_dst;
2861 curbe_cmd->dw0.frame_height = pic_param->frame_height_dst;
2862 } else {
2863 curbe_cmd->dw0.frame_width = pic_param->frame_width_src;
2864 curbe_cmd->dw0.frame_height = pic_param->frame_height_src;
2865 }
2866
2867 curbe_cmd->dw1.frame_type = curbe_param->picture_coding_type;
2868
2869 curbe_cmd->dw1.segmentation_enable = pic_param->pic_flags.bits.segmentation_enabled;
2870 if (pic_param->pic_flags.bits.segmentation_enabled)
2871 segment_count = 8;
2872 else
2873 segment_count = 1;
2874
2875 curbe_cmd->dw1.ref_frame_flags = curbe_param->ref_frame_flag;
2876
2877 //right now set them to normal settings
2878 if (curbe_param->picture_coding_type) {
2879 switch (vp9_state->target_usage) {
2880 case INTEL_ENC_VP9_TU_QUALITY:
2881 curbe_cmd->dw1.min_16for32_check = 0x00;
2882 curbe_cmd->dw2.multi_pred = 0x02;
2883 curbe_cmd->dw2.len_sp = 0x39;
2884 curbe_cmd->dw2.search_x = 0x30;
2885 curbe_cmd->dw2.search_y = 0x28;
2886 curbe_cmd->dw3.min_ref_for32_check = 0x01;
2887 curbe_cmd->dw4.skip16_threshold = 0x000A;
2888 curbe_cmd->dw4.disable_mr_threshold = 0x000C;
2889
2890 memcpy(&curbe_cmd->dw16,
2891 vp9_diamond_ime_search_path_delta,
2892 14 * sizeof(unsigned int));
2893 break;
2894 case INTEL_ENC_VP9_TU_PERFORMANCE:
2895 curbe_cmd->dw1.min_16for32_check = 0x02;
2896 curbe_cmd->dw2.multi_pred = 0x00;
2897 curbe_cmd->dw2.len_sp = 0x10;
2898 curbe_cmd->dw2.search_x = 0x20;
2899 curbe_cmd->dw2.search_y = 0x20;
2900 curbe_cmd->dw3.min_ref_for32_check = 0x03;
2901 curbe_cmd->dw4.skip16_threshold = 0x0014;
2902 curbe_cmd->dw4.disable_mr_threshold = 0x0016;
2903
2904 memcpy(&curbe_cmd->dw16,
2905 vp9_fullspiral_ime_search_path_delta,
2906 14 * sizeof(unsigned int));
2907
2908 break;
2909 default: // normal settings
2910 curbe_cmd->dw1.min_16for32_check = 0x01;
2911 curbe_cmd->dw2.multi_pred = 0x00;
2912 curbe_cmd->dw2.len_sp = 0x19;
2913 curbe_cmd->dw2.search_x = 0x30;
2914 curbe_cmd->dw2.search_y = 0x28;
2915 curbe_cmd->dw3.min_ref_for32_check = 0x02;
2916 curbe_cmd->dw4.skip16_threshold = 0x000F;
2917 curbe_cmd->dw4.disable_mr_threshold = 0x0011;
2918
2919 memcpy(&curbe_cmd->dw16,
2920 vp9_diamond_ime_search_path_delta,
2921 14 * sizeof(unsigned int));
2922 break;
2923 }
2924
2925 curbe_cmd->dw3.hme_enabled = curbe_param->hme_enabled;
2926 curbe_cmd->dw3.multi_ref_qp_check = curbe_param->multi_ref_qp_check;
2927 // co-located predictor must be disabled when dynamic scaling is enabled
2928 curbe_cmd->dw3.disable_temp_pred = vp9_state->dys_in_use;
2929 }
2930
2931 curbe_cmd->dw5.inter_round = 0;
2932 curbe_cmd->dw5.intra_round = 4;
2933 curbe_cmd->dw5.frame_qpindex = pic_param->luma_ac_qindex;
2934
2935 for (i = 0; i < segment_count; i++) {
2936 seg_qindex = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta
2937 + seg_param->seg_data[i].segment_qindex_delta;
2938
2939 seg_qindex = CLAMP(0, 255, seg_qindex);
2940
2941 if (curbe_param->picture_coding_type)
2942 memcpy(&curbe_cmd->segments[i],
2943 &intel_vp9_costlut_p[seg_qindex * 16],
2944 16 * sizeof(unsigned int));
2945 else
2946 memcpy(&curbe_cmd->segments[i],
2947 &intel_vp9_costlut_key[seg_qindex * 16],
2948 16 * sizeof(unsigned int));
2949 }
2950
2951 if (curbe_param->picture_coding_type) {
2952 if (curbe_cmd->dw3.multi_ref_qp_check) {
2953 if (curbe_param->ref_frame_flag & 0x01) {
2954 obj_surface = curbe_param->last_ref_obj;
2955 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2956 curbe_cmd->dw8.last_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2957 }
2958
2959 if (curbe_param->ref_frame_flag & 0x02) {
2960 obj_surface = curbe_param->golden_ref_obj;
2961 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2962 curbe_cmd->dw8.golden_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2963 }
2964
2965 if (curbe_param->ref_frame_flag & 0x04) {
2966 obj_surface = curbe_param->alt_ref_obj;
2967 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2968 curbe_cmd->dw9.alt_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2969 }
2970 }
2971 }
2972 curbe_cmd->dw160.enc_curr_y_surf_bti = VP9_BTI_MBENC_CURR_Y_G9;
2973 curbe_cmd->dw162.enc_curr_nv12_surf_bti = VP9_BTI_MBENC_CURR_NV12_G9;
2974 curbe_cmd->dw166.segmentation_map_bti = VP9_BTI_MBENC_SEGMENTATION_MAP_G9;
2975 curbe_cmd->dw172.mode_decision_bti = VP9_BTI_MBENC_MODE_DECISION_G9;
2976 curbe_cmd->dw167.tx_curbe_bti = VP9_BTI_MBENC_TX_CURBE_G9;
2977 curbe_cmd->dw168.hme_mvdata_bti = VP9_BTI_MBENC_HME_MV_DATA_G9;
2978 curbe_cmd->dw169.hme_distortion_bti = VP9_BTI_MBENC_HME_DISTORTION_G9;
2979 curbe_cmd->dw171.mode_decision_prev_bti = VP9_BTI_MBENC_MODE_DECISION_PREV_G9;
2980 curbe_cmd->dw172.mode_decision_bti = VP9_BTI_MBENC_MODE_DECISION_G9;
2981 curbe_cmd->dw173.output_16x16_inter_modes_bti = VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9;
2982 curbe_cmd->dw174.cu_record_bti = VP9_BTI_MBENC_CU_RECORDS_G9;
2983 curbe_cmd->dw175.pak_data_bti = VP9_BTI_MBENC_PAK_DATA_G9;
2984
2985 i965_gpe_context_unmap_curbe(gpe_context);
2986 return;
2987 }
2988
2989 static void
gen9_vp9_send_mbenc_surface(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_mbenc_surface_param * mbenc_param)2990 gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
2991 struct encode_state *encode_state,
2992 struct i965_gpe_context *gpe_context,
2993 struct intel_encoder_context *encoder_context,
2994 struct gen9_vp9_mbenc_surface_param *mbenc_param)
2995 {
2996 struct gen9_vp9_state *vp9_state;
2997 unsigned int res_size;
2998 unsigned int frame_width_in_sb, frame_height_in_sb;
2999 struct object_surface *obj_surface, *tmp_input;
3000 struct gen9_surface_vp9 *vp9_priv_surface;
3001 int media_function;
3002
3003 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3004
3005 if (!vp9_state || !vp9_state->pic_param)
3006 return;
3007
3008 frame_width_in_sb = ALIGN(mbenc_param->frame_width, 64) / 64;
3009 frame_height_in_sb = ALIGN(mbenc_param->frame_height, 64) / 64;
3010 media_function = mbenc_param->media_state_type;
3011
3012 switch (media_function) {
3013 case VP9_MEDIA_STATE_MBENC_I_32x32: {
3014 obj_surface = mbenc_param->curr_frame_obj;
3015
3016 i965_add_2d_gpe_surface(ctx,
3017 gpe_context,
3018 obj_surface,
3019 0,
3020 1,
3021 I965_SURFACEFORMAT_R8_UNORM,
3022 VP9_BTI_MBENC_CURR_Y_G9);
3023
3024 i965_add_2d_gpe_surface(ctx,
3025 gpe_context,
3026 obj_surface,
3027 1,
3028 1,
3029 I965_SURFACEFORMAT_R16_UINT,
3030 VP9_BTI_MBENC_CURR_UV_G9);
3031
3032
3033 if (mbenc_param->segmentation_enabled) {
3034 i965_add_buffer_2d_gpe_surface(ctx,
3035 gpe_context,
3036 mbenc_param->pres_segmentation_map,
3037 1,
3038 I965_SURFACEFORMAT_R8_UNORM,
3039 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3040
3041 }
3042
3043 res_size = 16 * mbenc_param->frame_width_in_mb *
3044 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3045 i965_add_buffer_gpe_surface(ctx,
3046 gpe_context,
3047 mbenc_param->pres_mode_decision,
3048 0,
3049 res_size / 4,
3050 0,
3051 VP9_BTI_MBENC_MODE_DECISION_G9);
3052
3053 break;
3054 }
3055 case VP9_MEDIA_STATE_MBENC_I_16x16: {
3056 obj_surface = mbenc_param->curr_frame_obj;
3057
3058 i965_add_2d_gpe_surface(ctx,
3059 gpe_context,
3060 obj_surface,
3061 0,
3062 1,
3063 I965_SURFACEFORMAT_R8_UNORM,
3064 VP9_BTI_MBENC_CURR_Y_G9);
3065
3066 i965_add_2d_gpe_surface(ctx,
3067 gpe_context,
3068 obj_surface,
3069 1,
3070 1,
3071 I965_SURFACEFORMAT_R16_UINT,
3072 VP9_BTI_MBENC_CURR_UV_G9);
3073
3074 i965_add_adv_gpe_surface(ctx, gpe_context,
3075 obj_surface,
3076 VP9_BTI_MBENC_CURR_NV12_G9);
3077
3078 if (mbenc_param->segmentation_enabled) {
3079 i965_add_buffer_2d_gpe_surface(ctx,
3080 gpe_context,
3081 mbenc_param->pres_segmentation_map,
3082 1,
3083 I965_SURFACEFORMAT_R8_UNORM,
3084 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3085
3086 }
3087
3088 res_size = 16 * mbenc_param->frame_width_in_mb *
3089 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3090 i965_add_buffer_gpe_surface(ctx,
3091 gpe_context,
3092 mbenc_param->pres_mode_decision,
3093 0,
3094 res_size / 4,
3095 0,
3096 VP9_BTI_MBENC_MODE_DECISION_G9);
3097
3098 res_size = 160;
3099
3100 gen9_add_dri_buffer_gpe_surface(ctx,
3101 gpe_context,
3102 mbenc_param->gpe_context_tx->curbe.bo,
3103 0,
3104 ALIGN(res_size, 64),
3105 mbenc_param->gpe_context_tx->curbe.offset,
3106 VP9_BTI_MBENC_TX_CURBE_G9);
3107
3108 break;
3109 }
3110 case VP9_MEDIA_STATE_MBENC_P: {
3111 obj_surface = mbenc_param->curr_frame_obj;
3112
3113 i965_add_2d_gpe_surface(ctx,
3114 gpe_context,
3115 obj_surface,
3116 0,
3117 1,
3118 I965_SURFACEFORMAT_R8_UNORM,
3119 VP9_BTI_MBENC_CURR_Y_G9);
3120
3121 i965_add_2d_gpe_surface(ctx, gpe_context,
3122 obj_surface,
3123 1,
3124 1,
3125 I965_SURFACEFORMAT_R16_UINT,
3126 VP9_BTI_MBENC_CURR_UV_G9);
3127
3128 i965_add_adv_gpe_surface(ctx, gpe_context,
3129 obj_surface,
3130 VP9_BTI_MBENC_CURR_NV12_G9);
3131
3132 if (mbenc_param->last_ref_obj) {
3133 obj_surface = mbenc_param->last_ref_obj;
3134 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3135
3136 if (vp9_state->dys_in_use &&
3137 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3138 (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3139 tmp_input = vp9_priv_surface->dys_surface_obj;
3140 else
3141 tmp_input = obj_surface;
3142
3143 i965_add_adv_gpe_surface(ctx, gpe_context,
3144 tmp_input,
3145 VP9_BTI_MBENC_LAST_NV12_G9);
3146
3147 i965_add_adv_gpe_surface(ctx, gpe_context,
3148 tmp_input,
3149 VP9_BTI_MBENC_LAST_NV12_G9 + 1);
3150
3151 }
3152
3153 if (mbenc_param->golden_ref_obj) {
3154 obj_surface = mbenc_param->golden_ref_obj;
3155 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3156
3157 if (vp9_state->dys_in_use &&
3158 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3159 (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3160 tmp_input = vp9_priv_surface->dys_surface_obj;
3161 else
3162 tmp_input = obj_surface;
3163
3164 i965_add_adv_gpe_surface(ctx, gpe_context,
3165 tmp_input,
3166 VP9_BTI_MBENC_GOLD_NV12_G9);
3167
3168 i965_add_adv_gpe_surface(ctx, gpe_context,
3169 tmp_input,
3170 VP9_BTI_MBENC_GOLD_NV12_G9 + 1);
3171
3172 }
3173
3174 if (mbenc_param->alt_ref_obj) {
3175 obj_surface = mbenc_param->alt_ref_obj;
3176 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3177
3178 if (vp9_state->dys_in_use &&
3179 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3180 (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3181 tmp_input = vp9_priv_surface->dys_surface_obj;
3182 else
3183 tmp_input = obj_surface;
3184
3185 i965_add_adv_gpe_surface(ctx, gpe_context,
3186 tmp_input,
3187 VP9_BTI_MBENC_ALTREF_NV12_G9);
3188
3189 i965_add_adv_gpe_surface(ctx, gpe_context,
3190 tmp_input,
3191 VP9_BTI_MBENC_ALTREF_NV12_G9 + 1);
3192
3193 }
3194
3195 if (mbenc_param->hme_enabled) {
3196 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3197 mbenc_param->ps4x_memv_data_buffer,
3198 1,
3199 I965_SURFACEFORMAT_R8_UNORM,
3200 VP9_BTI_MBENC_HME_MV_DATA_G9);
3201
3202 i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3203 mbenc_param->ps4x_memv_distortion_buffer,
3204 1,
3205 I965_SURFACEFORMAT_R8_UNORM,
3206 VP9_BTI_MBENC_HME_DISTORTION_G9);
3207 }
3208
3209 if (mbenc_param->segmentation_enabled) {
3210 i965_add_buffer_2d_gpe_surface(ctx,
3211 gpe_context,
3212 mbenc_param->pres_segmentation_map,
3213 1,
3214 I965_SURFACEFORMAT_R8_UNORM,
3215 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3216
3217 }
3218
3219 res_size = 16 * mbenc_param->frame_width_in_mb *
3220 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3221 i965_add_buffer_gpe_surface(ctx,
3222 gpe_context,
3223 mbenc_param->pres_mode_decision_prev,
3224 0,
3225 res_size / 4,
3226 0,
3227 VP9_BTI_MBENC_MODE_DECISION_PREV_G9);
3228
3229 i965_add_buffer_gpe_surface(ctx,
3230 gpe_context,
3231 mbenc_param->pres_mode_decision,
3232 0,
3233 res_size / 4,
3234 0,
3235 VP9_BTI_MBENC_MODE_DECISION_G9);
3236
3237 i965_add_buffer_2d_gpe_surface(ctx,
3238 gpe_context,
3239 mbenc_param->pres_output_16x16_inter_modes,
3240 1,
3241 I965_SURFACEFORMAT_R8_UNORM,
3242 VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9);
3243
3244 res_size = 160;
3245
3246 gen9_add_dri_buffer_gpe_surface(ctx,
3247 gpe_context,
3248 mbenc_param->gpe_context_tx->curbe.bo,
3249 0,
3250 ALIGN(res_size, 64),
3251 mbenc_param->gpe_context_tx->curbe.offset,
3252 VP9_BTI_MBENC_TX_CURBE_G9);
3253
3254
3255 break;
3256 }
3257 case VP9_MEDIA_STATE_MBENC_TX: {
3258 obj_surface = mbenc_param->curr_frame_obj;
3259
3260 i965_add_2d_gpe_surface(ctx,
3261 gpe_context,
3262 obj_surface,
3263 0,
3264 1,
3265 I965_SURFACEFORMAT_R8_UNORM,
3266 VP9_BTI_MBENC_CURR_Y_G9);
3267
3268 i965_add_2d_gpe_surface(ctx,
3269 gpe_context,
3270 obj_surface,
3271 1,
3272 1,
3273 I965_SURFACEFORMAT_R16_UINT,
3274 VP9_BTI_MBENC_CURR_UV_G9);
3275
3276 if (mbenc_param->segmentation_enabled) {
3277 i965_add_buffer_2d_gpe_surface(ctx,
3278 gpe_context,
3279 mbenc_param->pres_segmentation_map,
3280 1,
3281 I965_SURFACEFORMAT_R8_UNORM,
3282 VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3283
3284 }
3285
3286 res_size = 16 * mbenc_param->frame_width_in_mb *
3287 mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3288 i965_add_buffer_gpe_surface(ctx,
3289 gpe_context,
3290 mbenc_param->pres_mode_decision,
3291 0,
3292 res_size / 4,
3293 0,
3294 VP9_BTI_MBENC_MODE_DECISION_G9);
3295
3296 res_size = frame_width_in_sb * frame_height_in_sb * 4 * sizeof(unsigned int);
3297 i965_add_buffer_gpe_surface(ctx,
3298 gpe_context,
3299 mbenc_param->pres_mb_code_surface,
3300 0,
3301 res_size / 4,
3302 0,
3303 VP9_BTI_MBENC_PAK_DATA_G9);
3304
3305 // CU Record
3306 res_size = frame_width_in_sb * frame_height_in_sb *
3307 64 * 16 * sizeof(unsigned int);
3308
3309 i965_add_buffer_gpe_surface(ctx,
3310 gpe_context,
3311 mbenc_param->pres_mb_code_surface,
3312 0,
3313 res_size / 4,
3314 mbenc_param->mb_data_offset,
3315 VP9_BTI_MBENC_CU_RECORDS_G9);
3316 }
3317 default:
3318 break;
3319 }
3320
3321 return;
3322 }
3323
3324 static VAStatus
gen9_vp9_mbenc_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,int media_function)3325 gen9_vp9_mbenc_kernel(VADriverContextP ctx,
3326 struct encode_state *encode_state,
3327 struct intel_encoder_context *encoder_context,
3328 int media_function)
3329 {
3330 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3331 struct i965_gpe_context *gpe_context, *tx_gpe_context;
3332 struct gpe_media_object_walker_parameter media_object_walker_param;
3333 struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
3334 unsigned int resolution_x, resolution_y;
3335 struct gen9_vp9_state *vp9_state;
3336 VAEncPictureParameterBufferVP9 *pic_param;
3337 struct gen9_vp9_mbenc_curbe_param curbe_param;
3338 struct gen9_vp9_mbenc_surface_param surface_param;
3339 VAStatus va_status = VA_STATUS_SUCCESS;
3340 int mbenc_gpe_index = 0;
3341 struct object_surface *obj_surface;
3342 struct gen9_surface_vp9 *vp9_priv_surface;
3343
3344 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3345
3346 if (!vp9_state || !vp9_state->pic_param)
3347 return VA_STATUS_ERROR_ENCODING_ERROR;
3348
3349 pic_param = vp9_state->pic_param;
3350
3351 switch (media_function) {
3352 case VP9_MEDIA_STATE_MBENC_I_32x32:
3353 mbenc_gpe_index = VP9_MBENC_IDX_KEY_32x32;
3354 break;
3355
3356 case VP9_MEDIA_STATE_MBENC_I_16x16:
3357 mbenc_gpe_index = VP9_MBENC_IDX_KEY_16x16;
3358 break;
3359
3360 case VP9_MEDIA_STATE_MBENC_P:
3361 mbenc_gpe_index = VP9_MBENC_IDX_INTER;
3362 break;
3363
3364 case VP9_MEDIA_STATE_MBENC_TX:
3365 mbenc_gpe_index = VP9_MBENC_IDX_TX;
3366 break;
3367
3368 default:
3369 va_status = VA_STATUS_ERROR_OPERATION_FAILED;
3370 return va_status;
3371 }
3372
3373 gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_gpe_index]);
3374 tx_gpe_context = &(vme_context->mbenc_context.gpe_contexts[VP9_MBENC_IDX_TX]);
3375
3376 gen9_gpe_reset_binding_table(ctx, gpe_context);
3377
3378 // Set curbe
3379 if (!vp9_state->mbenc_curbe_set_in_brc_update) {
3380 if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32 ||
3381 media_function == VP9_MEDIA_STATE_MBENC_P) {
3382 memset(&curbe_param, 0, sizeof(curbe_param));
3383 curbe_param.ppic_param = vp9_state->pic_param;
3384 curbe_param.pseq_param = vp9_state->seq_param;
3385 curbe_param.psegment_param = vp9_state->segment_param;
3386 curbe_param.frame_width_in_mb = vp9_state->frame_width_in_mb;
3387 curbe_param.frame_height_in_mb = vp9_state->frame_height_in_mb;
3388 curbe_param.last_ref_obj = vp9_state->last_ref_obj;
3389 curbe_param.golden_ref_obj = vp9_state->golden_ref_obj;
3390 curbe_param.alt_ref_obj = vp9_state->alt_ref_obj;
3391 curbe_param.hme_enabled = vp9_state->hme_enabled;
3392 curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
3393 curbe_param.picture_coding_type = vp9_state->picture_coding_type;
3394 curbe_param.media_state_type = media_function;
3395 curbe_param.mbenc_curbe_set_in_brc_update = vp9_state->mbenc_curbe_set_in_brc_update;
3396
3397 vme_context->pfn_set_curbe_mbenc(ctx,
3398 encode_state,
3399 gpe_context,
3400 encoder_context,
3401 &curbe_param);
3402 }
3403 }
3404
3405 memset(&surface_param, 0, sizeof(surface_param));
3406 surface_param.media_state_type = media_function;
3407 surface_param.picture_coding_type = vp9_state->picture_coding_type;
3408 surface_param.frame_width = vp9_state->frame_width;
3409 surface_param.frame_height = vp9_state->frame_height;
3410 surface_param.frame_width_in_mb = vp9_state->frame_width_in_mb;
3411 surface_param.frame_height_in_mb = vp9_state->frame_height_in_mb;
3412 surface_param.hme_enabled = vp9_state->hme_enabled;
3413 surface_param.segmentation_enabled = pic_param->pic_flags.bits.segmentation_enabled;
3414 surface_param.pres_segmentation_map = &vme_context->mb_segment_map_surface;
3415 surface_param.ps4x_memv_data_buffer = &vme_context->s4x_memv_data_buffer;
3416 surface_param.ps4x_memv_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
3417 surface_param.pres_mode_decision =
3418 &vme_context->res_mode_decision[vp9_state->curr_mode_decision_index];
3419 surface_param.pres_mode_decision_prev =
3420 &vme_context->res_mode_decision[!vp9_state->curr_mode_decision_index];
3421 surface_param.pres_output_16x16_inter_modes = &vme_context->res_output_16x16_inter_modes;
3422 surface_param.pres_mbenc_curbe_buffer = NULL;
3423 surface_param.last_ref_obj = vp9_state->last_ref_obj;
3424 surface_param.golden_ref_obj = vp9_state->golden_ref_obj;
3425 surface_param.alt_ref_obj = vp9_state->alt_ref_obj;
3426 surface_param.pres_mb_code_surface = &vme_context->res_mb_code_surface;
3427 surface_param.gpe_context_tx = tx_gpe_context;
3428 surface_param.mb_data_offset = vp9_state->mb_data_offset;
3429
3430 obj_surface = encode_state->reconstructed_object;
3431 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3432 if (vp9_state->dys_in_use &&
3433 (pic_param->frame_width_src != pic_param->frame_height_dst ||
3434 pic_param->frame_height_src != pic_param->frame_height_dst)) {
3435 obj_surface = vp9_priv_surface->dys_surface_obj;
3436 } else
3437 obj_surface = encode_state->input_yuv_object;
3438
3439 surface_param.curr_frame_obj = obj_surface;
3440
3441 vme_context->pfn_send_mbenc_surface(ctx,
3442 encode_state,
3443 gpe_context,
3444 encoder_context,
3445 &surface_param);
3446
3447 if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32) {
3448 resolution_x = ALIGN(vp9_state->frame_width, 32) / 32;
3449 resolution_y = ALIGN(vp9_state->frame_height, 32) / 32;
3450 } else {
3451 resolution_x = ALIGN(vp9_state->frame_width, 16) / 16;
3452 resolution_y = ALIGN(vp9_state->frame_height, 16) / 16;
3453 }
3454
3455 memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3456 kernel_walker_param.resolution_x = resolution_x;
3457 kernel_walker_param.resolution_y = resolution_y;
3458
3459 if (media_function == VP9_MEDIA_STATE_MBENC_P ||
3460 media_function == VP9_MEDIA_STATE_MBENC_I_16x16) {
3461 kernel_walker_param.use_scoreboard = 1;
3462 kernel_walker_param.no_dependency = 0;
3463 kernel_walker_param.walker_degree = VP9_45Z_DEGREE;
3464 } else {
3465 kernel_walker_param.use_scoreboard = 0;
3466 kernel_walker_param.no_dependency = 1;
3467 }
3468
3469 gen8_gpe_setup_interface_data(ctx, gpe_context);
3470
3471 gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
3472
3473 gen9_run_kernel_media_object_walker(ctx, encoder_context,
3474 gpe_context,
3475 media_function,
3476 &media_object_walker_param);
3477 return va_status;
3478 }
3479
3480 static void
gen9_init_gpe_context_vp9(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct vp9_encoder_kernel_parameter * kernel_param)3481 gen9_init_gpe_context_vp9(VADriverContextP ctx,
3482 struct i965_gpe_context *gpe_context,
3483 struct vp9_encoder_kernel_parameter *kernel_param)
3484 {
3485 struct i965_driver_data *i965 = i965_driver_data(ctx);
3486
3487 gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
3488
3489 gpe_context->sampler.entry_size = 0;
3490 gpe_context->sampler.max_entries = 0;
3491
3492 if (kernel_param->sampler_size) {
3493 gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
3494 gpe_context->sampler.max_entries = 1;
3495 }
3496
3497 gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
3498 gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
3499
3500 gpe_context->surface_state_binding_table.max_entries = MAX_VP9_ENCODER_SURFACES;
3501 gpe_context->surface_state_binding_table.binding_table_offset = 0;
3502 gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64);
3503 gpe_context->surface_state_binding_table.length = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP9_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
3504
3505 if (i965->intel.eu_total > 0)
3506 gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
3507 else
3508 gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
3509
3510 gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
3511 gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
3512 gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
3513 gpe_context->vfe_state.curbe_allocation_size -
3514 ((gpe_context->idrt.entry_size >> 5) *
3515 gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
3516 gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
3517 gpe_context->vfe_state.gpgpu_mode = 0;
3518 }
3519
3520 static void
gen9_init_vfe_scoreboard_vp9(struct i965_gpe_context * gpe_context,struct vp9_encoder_scoreboard_parameter * scoreboard_param)3521 gen9_init_vfe_scoreboard_vp9(struct i965_gpe_context *gpe_context,
3522 struct vp9_encoder_scoreboard_parameter *scoreboard_param)
3523 {
3524 gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
3525 gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
3526 gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
3527
3528 if (scoreboard_param->walkpat_flag) {
3529 gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
3530 gpe_context->vfe_desc5.scoreboard0.type = 1;
3531
3532 gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0;
3533 gpe_context->vfe_desc6.scoreboard1.delta_y0 = -1;
3534
3535 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
3536 gpe_context->vfe_desc6.scoreboard1.delta_y1 = -2;
3537
3538 gpe_context->vfe_desc6.scoreboard1.delta_x2 = -1;
3539 gpe_context->vfe_desc6.scoreboard1.delta_y2 = 3;
3540
3541 gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
3542 gpe_context->vfe_desc6.scoreboard1.delta_y3 = 1;
3543 } else {
3544 // Scoreboard 0
3545 gpe_context->vfe_desc6.scoreboard1.delta_x0 = -1;
3546 gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0;
3547
3548 // Scoreboard 1
3549 gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
3550 gpe_context->vfe_desc6.scoreboard1.delta_y1 = -1;
3551
3552 // Scoreboard 2
3553 gpe_context->vfe_desc6.scoreboard1.delta_x2 = 1;
3554 gpe_context->vfe_desc6.scoreboard1.delta_y2 = -1;
3555
3556 // Scoreboard 3
3557 gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
3558 gpe_context->vfe_desc6.scoreboard1.delta_y3 = -1;
3559
3560 // Scoreboard 4
3561 gpe_context->vfe_desc7.scoreboard2.delta_x4 = -1;
3562 gpe_context->vfe_desc7.scoreboard2.delta_y4 = 1;
3563
3564 // Scoreboard 5
3565 gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0;
3566 gpe_context->vfe_desc7.scoreboard2.delta_y5 = -2;
3567
3568 // Scoreboard 6
3569 gpe_context->vfe_desc7.scoreboard2.delta_x6 = 1;
3570 gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
3571
3572 // Scoreboard 7
3573 gpe_context->vfe_desc7.scoreboard2.delta_x6 = -1;
3574 gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
3575 }
3576 }
3577
3578 #define VP9_VME_REF_WIN 48
3579
3580 static VAStatus
gen9_encode_vp9_check_parameter(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)3581 gen9_encode_vp9_check_parameter(VADriverContextP ctx,
3582 struct encode_state *encode_state,
3583 struct intel_encoder_context *encoder_context)
3584 {
3585 struct i965_driver_data *i965 = i965_driver_data(ctx);
3586 struct gen9_vp9_state *vp9_state;
3587 VAEncPictureParameterBufferVP9 *pic_param;
3588 VAEncMiscParameterTypeVP9PerSegmantParam *seg_param;
3589 VAEncSequenceParameterBufferVP9 *seq_param;
3590 struct object_surface *obj_surface;
3591 struct object_buffer *obj_buffer;
3592 struct gen9_surface_vp9 *vp9_priv_surface;
3593 bool need_brc_reset = false;
3594
3595 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3596
3597 if (!encode_state->pic_param_ext ||
3598 !encode_state->pic_param_ext->buffer) {
3599 return VA_STATUS_ERROR_INVALID_PARAMETER;
3600 }
3601 pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
3602
3603 obj_buffer = BUFFER(pic_param->coded_buf);
3604
3605 if (!obj_buffer ||
3606 !obj_buffer->buffer_store ||
3607 !obj_buffer->buffer_store->bo)
3608 return VA_STATUS_ERROR_INVALID_PARAMETER;
3609
3610 encode_state->coded_buf_object = obj_buffer;
3611
3612 vp9_state->status_buffer.bo = obj_buffer->buffer_store->bo;
3613
3614 encode_state->reconstructed_object = SURFACE(pic_param->reconstructed_frame);
3615
3616 if (!encode_state->reconstructed_object ||
3617 !encode_state->input_yuv_object)
3618 return VA_STATUS_ERROR_INVALID_PARAMETER;
3619
3620 vp9_state->curr_frame = pic_param->reconstructed_frame;
3621 vp9_state->ref_frame_flag = 0;
3622 if (pic_param->pic_flags.bits.frame_type == KEY_FRAME ||
3623 pic_param->pic_flags.bits.intra_only) {
3624 /* this will be regarded as I-frame type */
3625 vp9_state->picture_coding_type = 0;
3626 vp9_state->last_ref_obj = NULL;
3627 vp9_state->golden_ref_obj = NULL;
3628 vp9_state->alt_ref_obj = NULL;
3629 } else {
3630 vp9_state->picture_coding_type = 1;
3631 vp9_state->ref_frame_flag = pic_param->ref_flags.bits.ref_frame_ctrl_l0 |
3632 pic_param->ref_flags.bits.ref_frame_ctrl_l1;
3633
3634 obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx]);
3635 vp9_state->last_ref_obj = obj_surface;
3636 if (!obj_surface ||
3637 !obj_surface->bo ||
3638 !obj_surface->private_data) {
3639 vp9_state->last_ref_obj = NULL;
3640 vp9_state->ref_frame_flag &= ~(VP9_LAST_REF);
3641 }
3642
3643 obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]);
3644 vp9_state->golden_ref_obj = obj_surface;
3645 if (!obj_surface ||
3646 !obj_surface->bo ||
3647 !obj_surface->private_data) {
3648 vp9_state->golden_ref_obj = NULL;
3649 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3650 }
3651
3652 obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]);
3653 vp9_state->alt_ref_obj = obj_surface;
3654 if (!obj_surface ||
3655 !obj_surface->bo ||
3656 !obj_surface->private_data) {
3657 vp9_state->alt_ref_obj = NULL;
3658 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3659 }
3660
3661 /* remove the duplicated flag and ref frame list */
3662 if (vp9_state->ref_frame_flag & VP9_LAST_REF) {
3663 if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3664 pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]) {
3665 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3666 vp9_state->golden_ref_obj = NULL;
3667 }
3668
3669 if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3670 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3671 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3672 vp9_state->alt_ref_obj = NULL;
3673 }
3674 }
3675
3676 if (vp9_state->ref_frame_flag & VP9_GOLDEN_REF) {
3677 if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx] ==
3678 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3679 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3680 vp9_state->alt_ref_obj = NULL;
3681 }
3682 }
3683
3684 if (vp9_state->ref_frame_flag == 0)
3685 return VA_STATUS_ERROR_INVALID_PARAMETER;
3686 }
3687
3688 seg_param = NULL;
3689 if (pic_param->pic_flags.bits.segmentation_enabled) {
3690 if (!encode_state->q_matrix ||
3691 !encode_state->q_matrix->buffer) {
3692 return VA_STATUS_ERROR_INVALID_PARAMETER;
3693 }
3694 seg_param = (VAEncMiscParameterTypeVP9PerSegmantParam *)
3695 encode_state->q_matrix->buffer;
3696 }
3697
3698 seq_param = NULL;
3699 if (encode_state->seq_param_ext &&
3700 encode_state->seq_param_ext->buffer)
3701 seq_param = (VAEncSequenceParameterBufferVP9 *)encode_state->seq_param_ext->buffer;
3702
3703 if (!seq_param) {
3704 seq_param = &vp9_state->bogus_seq_param;
3705 }
3706
3707 vp9_state->pic_param = pic_param;
3708 vp9_state->segment_param = seg_param;
3709 vp9_state->seq_param = seq_param;
3710
3711 obj_surface = encode_state->reconstructed_object;
3712 if (pic_param->frame_width_dst > obj_surface->orig_width ||
3713 pic_param->frame_height_dst > obj_surface->orig_height)
3714 return VA_STATUS_ERROR_INVALID_SURFACE;
3715
3716 if (!vp9_state->dys_enabled &&
3717 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
3718 (pic_param->frame_height_src != pic_param->frame_height_dst)))
3719 return VA_STATUS_ERROR_UNIMPLEMENTED;
3720
3721 if (vp9_state->brc_enabled) {
3722
3723 if (encoder_context->rate_control_mode == VA_RC_CBR)
3724 need_brc_reset = vp9_state->target_bit_rate != encoder_context->brc.bits_per_second[0] ? true : false;
3725 else if (encoder_context->rate_control_mode == VA_RC_VBR)
3726 need_brc_reset = vp9_state->max_bit_rate != encoder_context->brc.bits_per_second[0] ? true : false;
3727
3728 if (vp9_state->first_frame || vp9_state->picture_coding_type == KEY_FRAME || need_brc_reset) {
3729 vp9_state->brc_reset = encoder_context->brc.need_reset || vp9_state->first_frame;
3730
3731 if (!encoder_context->brc.framerate[0].num || !encoder_context->brc.framerate[0].den ||
3732 !encoder_context->brc.bits_per_second[0])
3733 return VA_STATUS_ERROR_INVALID_PARAMETER;
3734
3735 vp9_state->gop_size = encoder_context->brc.gop_size;
3736 vp9_state->framerate = encoder_context->brc.framerate[0];
3737 if ((vp9_state->framerate.num / vp9_state->framerate.den) > MAX_VP9_ENCODER_FRAMERATE) {
3738 vp9_state->framerate.num = MAX_VP9_ENCODER_FRAMERATE * vp9_state->framerate.den;
3739 i965_log_info(ctx, "gen9_encode_vp9_check_parameter: Too high frame rate(num: %d, den: %d), max supported is %d fps.\n",
3740 vp9_state->framerate.num, vp9_state->framerate.den, MAX_VP9_ENCODER_FRAMERATE);
3741 }
3742
3743 if (encoder_context->rate_control_mode == VA_RC_CBR ||
3744 !encoder_context->brc.target_percentage[0]) {
3745 vp9_state->target_bit_rate = encoder_context->brc.bits_per_second[0];
3746 vp9_state->max_bit_rate = vp9_state->target_bit_rate;
3747 vp9_state->min_bit_rate = vp9_state->target_bit_rate;
3748 } else {
3749 vp9_state->max_bit_rate = encoder_context->brc.bits_per_second[0];
3750 vp9_state->target_bit_rate = vp9_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
3751 if (2 * vp9_state->target_bit_rate < vp9_state->max_bit_rate)
3752 vp9_state->min_bit_rate = 0;
3753 else
3754 vp9_state->min_bit_rate = 2 * vp9_state->target_bit_rate - vp9_state->max_bit_rate;
3755 }
3756
3757 if (encoder_context->brc.hrd_buffer_size)
3758 vp9_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
3759 else if (encoder_context->brc.window_size)
3760 vp9_state->vbv_buffer_size_in_bit = (uint64_t)vp9_state->max_bit_rate * encoder_context->brc.window_size / 1000;
3761 else
3762 vp9_state->vbv_buffer_size_in_bit = vp9_state->max_bit_rate;
3763 if (encoder_context->brc.hrd_initial_buffer_fullness)
3764 vp9_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
3765 else
3766 vp9_state->init_vbv_buffer_fullness_in_bit = vp9_state->vbv_buffer_size_in_bit / 2;
3767 }
3768 }
3769
3770 vp9_state->frame_width = pic_param->frame_width_dst;
3771 vp9_state->frame_height = pic_param->frame_height_dst;
3772
3773 vp9_state->frame_width_4x = ALIGN(vp9_state->frame_width / 4, 16);
3774 vp9_state->frame_height_4x = ALIGN(vp9_state->frame_height / 4, 16);
3775
3776 vp9_state->frame_width_16x = ALIGN(vp9_state->frame_width / 16, 16);
3777 vp9_state->frame_height_16x = ALIGN(vp9_state->frame_height / 16, 16);
3778
3779 vp9_state->frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
3780 vp9_state->frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
3781
3782 vp9_state->downscaled_width_4x_in_mb = vp9_state->frame_width_4x / 16;
3783 vp9_state->downscaled_height_4x_in_mb = vp9_state->frame_height_4x / 16;
3784 vp9_state->downscaled_width_16x_in_mb = vp9_state->frame_width_16x / 16;
3785 vp9_state->downscaled_height_16x_in_mb = vp9_state->frame_height_16x / 16;
3786
3787 vp9_state->dys_in_use = 0;
3788 if (pic_param->frame_width_src != pic_param->frame_width_dst ||
3789 pic_param->frame_height_src != pic_param->frame_height_dst)
3790 vp9_state->dys_in_use = 1;
3791 vp9_state->dys_ref_frame_flag = 0;
3792 /* check the dys setting. The dys is supported by default. */
3793 if (pic_param->pic_flags.bits.frame_type != KEY_FRAME &&
3794 !pic_param->pic_flags.bits.intra_only) {
3795 vp9_state->dys_ref_frame_flag = vp9_state->ref_frame_flag;
3796
3797 if ((vp9_state->ref_frame_flag & VP9_LAST_REF) &&
3798 vp9_state->last_ref_obj) {
3799 obj_surface = vp9_state->last_ref_obj;
3800 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3801
3802 if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3803 vp9_state->frame_height == vp9_priv_surface->frame_height)
3804 vp9_state->dys_ref_frame_flag &= ~(VP9_LAST_REF);
3805 }
3806 if ((vp9_state->ref_frame_flag & VP9_GOLDEN_REF) &&
3807 vp9_state->golden_ref_obj) {
3808 obj_surface = vp9_state->golden_ref_obj;
3809 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3810
3811 if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3812 vp9_state->frame_height == vp9_priv_surface->frame_height)
3813 vp9_state->dys_ref_frame_flag &= ~(VP9_GOLDEN_REF);
3814 }
3815 if ((vp9_state->ref_frame_flag & VP9_ALT_REF) &&
3816 vp9_state->alt_ref_obj) {
3817 obj_surface = vp9_state->alt_ref_obj;
3818 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3819
3820 if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3821 vp9_state->frame_height == vp9_priv_surface->frame_height)
3822 vp9_state->dys_ref_frame_flag &= ~(VP9_ALT_REF);
3823 }
3824 if (vp9_state->dys_ref_frame_flag)
3825 vp9_state->dys_in_use = 1;
3826 }
3827
3828 if (vp9_state->hme_supported) {
3829 vp9_state->hme_enabled = 1;
3830 } else {
3831 vp9_state->hme_enabled = 0;
3832 }
3833
3834 if (vp9_state->b16xme_supported) {
3835 vp9_state->b16xme_enabled = 1;
3836 } else {
3837 vp9_state->b16xme_enabled = 0;
3838 }
3839
3840 /* disable HME/16xME if the size is too small */
3841 if (vp9_state->frame_width_4x <= VP9_VME_REF_WIN ||
3842 vp9_state->frame_height_4x <= VP9_VME_REF_WIN) {
3843 vp9_state->hme_enabled = 0;
3844 vp9_state->b16xme_enabled = 0;
3845 }
3846
3847 if (vp9_state->frame_width_16x < VP9_VME_REF_WIN ||
3848 vp9_state->frame_height_16x < VP9_VME_REF_WIN)
3849 vp9_state->b16xme_enabled = 0;
3850
3851 if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
3852 pic_param->pic_flags.bits.intra_only) {
3853 vp9_state->hme_enabled = 0;
3854 vp9_state->b16xme_enabled = 0;
3855 }
3856
3857 vp9_state->mbenc_keyframe_dist_enabled = 0;
3858 if ((vp9_state->picture_coding_type == KEY_FRAME) &&
3859 vp9_state->brc_distortion_buffer_supported)
3860 vp9_state->mbenc_keyframe_dist_enabled = 1;
3861
3862 return VA_STATUS_SUCCESS;
3863 }
3864
3865 static VAStatus
gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)3866 gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx,
3867 struct encode_state *encode_state,
3868 struct intel_encoder_context *encoder_context)
3869 {
3870 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3871 struct vp9_surface_param surface_param;
3872 struct gen9_vp9_state *vp9_state;
3873 VAEncPictureParameterBufferVP9 *pic_param;
3874 struct object_surface *obj_surface;
3875 struct gen9_surface_vp9 *vp9_surface;
3876 int driver_header_flag = 0;
3877 VAStatus va_status;
3878
3879 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3880
3881 if (!vp9_state || !vp9_state->pic_param)
3882 return VA_STATUS_ERROR_INVALID_PARAMETER;
3883
3884 pic_param = vp9_state->pic_param;
3885
3886 /* this is to check whether the driver should generate the uncompressed header */
3887 driver_header_flag = 1;
3888 if (encode_state->packed_header_data_ext &&
3889 encode_state->packed_header_data_ext[0] &&
3890 pic_param->bit_offset_first_partition_size) {
3891 VAEncPackedHeaderParameterBuffer *param = NULL;
3892
3893 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_params_ext[0]->buffer;
3894
3895 if (param->type == VAEncPackedHeaderRawData) {
3896 char *header_data;
3897 unsigned int length_in_bits;
3898
3899 header_data = (char *)encode_state->packed_header_data_ext[0]->buffer;
3900 length_in_bits = param->bit_length;
3901 driver_header_flag = 0;
3902
3903 vp9_state->frame_header.bit_offset_first_partition_size =
3904 pic_param->bit_offset_first_partition_size;
3905 vp9_state->header_length = ALIGN(length_in_bits, 8) >> 3;
3906 vp9_state->alias_insert_data = header_data;
3907
3908 vp9_state->frame_header.bit_offset_ref_lf_delta = pic_param->bit_offset_ref_lf_delta;
3909 vp9_state->frame_header.bit_offset_mode_lf_delta = pic_param->bit_offset_mode_lf_delta;
3910 vp9_state->frame_header.bit_offset_lf_level = pic_param->bit_offset_lf_level;
3911 vp9_state->frame_header.bit_offset_qindex = pic_param->bit_offset_qindex;
3912 vp9_state->frame_header.bit_offset_segmentation = pic_param->bit_offset_segmentation;
3913 vp9_state->frame_header.bit_size_segmentation = pic_param->bit_size_segmentation;
3914 }
3915 }
3916
3917 if (driver_header_flag) {
3918 memset(&vp9_state->frame_header, 0, sizeof(vp9_state->frame_header));
3919 intel_write_uncompressed_header(encode_state,
3920 VAProfileVP9Profile0,
3921 vme_context->frame_header_data,
3922 &vp9_state->header_length,
3923 &vp9_state->frame_header);
3924 vp9_state->alias_insert_data = vme_context->frame_header_data;
3925 }
3926
3927 va_status = i965_check_alloc_surface_bo(ctx, encode_state->input_yuv_object,
3928 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3929 if (va_status != VA_STATUS_SUCCESS)
3930 return va_status;
3931
3932 va_status = i965_check_alloc_surface_bo(ctx, encode_state->reconstructed_object,
3933 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3934
3935 if (va_status != VA_STATUS_SUCCESS)
3936 return va_status;
3937
3938 surface_param.frame_width = vp9_state->frame_width;
3939 surface_param.frame_height = vp9_state->frame_height;
3940 va_status = gen9_vp9_init_check_surfaces(ctx,
3941 encode_state->reconstructed_object,
3942 &surface_param);
3943
3944 {
3945 vp9_surface = (struct gen9_surface_vp9*)encode_state->reconstructed_object;
3946
3947 vp9_surface->qp_value = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta;
3948 }
3949 if (vp9_state->dys_in_use &&
3950 (pic_param->frame_width_src != pic_param->frame_width_dst ||
3951 pic_param->frame_height_src != pic_param->frame_height_dst)) {
3952 surface_param.frame_width = pic_param->frame_width_dst;
3953 surface_param.frame_height = pic_param->frame_height_dst;
3954 va_status = gen9_vp9_check_dys_surfaces(ctx,
3955 encode_state->reconstructed_object,
3956 &surface_param);
3957
3958 if (va_status)
3959 return va_status;
3960 }
3961
3962 if (vp9_state->dys_ref_frame_flag) {
3963 if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
3964 vp9_state->last_ref_obj) {
3965 obj_surface = vp9_state->last_ref_obj;
3966 surface_param.frame_width = vp9_state->frame_width;
3967 surface_param.frame_height = vp9_state->frame_height;
3968 va_status = gen9_vp9_check_dys_surfaces(ctx,
3969 obj_surface,
3970 &surface_param);
3971
3972 if (va_status)
3973 return va_status;
3974 }
3975 if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
3976 vp9_state->golden_ref_obj) {
3977 obj_surface = vp9_state->golden_ref_obj;
3978 surface_param.frame_width = vp9_state->frame_width;
3979 surface_param.frame_height = vp9_state->frame_height;
3980 va_status = gen9_vp9_check_dys_surfaces(ctx,
3981 obj_surface,
3982 &surface_param);
3983
3984 if (va_status)
3985 return va_status;
3986 }
3987 if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
3988 vp9_state->alt_ref_obj) {
3989 obj_surface = vp9_state->alt_ref_obj;
3990 surface_param.frame_width = vp9_state->frame_width;
3991 surface_param.frame_height = vp9_state->frame_height;
3992 va_status = gen9_vp9_check_dys_surfaces(ctx,
3993 obj_surface,
3994 &surface_param);
3995
3996 if (va_status)
3997 return va_status;
3998 }
3999 }
4000
4001 if (va_status != VA_STATUS_SUCCESS)
4002 return va_status;
4003 /* check the corresponding ref_frame_flag && dys_ref_frame_flag */
4004
4005 return VA_STATUS_SUCCESS;
4006 }
4007
4008 static VAStatus
gen9_vme_gpe_kernel_init_vp9(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)4009 gen9_vme_gpe_kernel_init_vp9(VADriverContextP ctx,
4010 struct encode_state *encode_state,
4011 struct intel_encoder_context *encoder_context)
4012 {
4013 struct i965_driver_data *i965 = i965_driver_data(ctx);
4014 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4015 struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4016 struct vp9_dys_context *dys_context = &vme_context->dys_context;
4017 struct gpe_dynamic_state_parameter ds_param;
4018 int i;
4019
4020 /*
4021 * BRC will update MBEnc curbe data buffer, so initialize GPE context for
4022 * MBEnc first
4023 */
4024 for (i = 0; i < NUM_VP9_MBENC; i++) {
4025 gen8_gpe_context_init(ctx, &mbenc_context->gpe_contexts[i]);
4026 }
4027
4028 /*
4029 * VP9_MBENC_XXX uses the same dynamic state buffer as they share the same
4030 * curbe_buffer.
4031 */
4032 ds_param.bo_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
4033 ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
4034 mbenc_context->mbenc_bo_dys = dri_bo_alloc(i965->intel.bufmgr,
4035 "mbenc_dys",
4036 ds_param.bo_size,
4037 0x1000);
4038 mbenc_context->mbenc_bo_size = ds_param.bo_size;
4039
4040 ds_param.bo = mbenc_context->mbenc_bo_dys;
4041 ds_param.curbe_offset = 0;
4042 ds_param.sampler_offset = ALIGN(sizeof(vp9_mbenc_curbe_data), 64);
4043 for (i = 0; i < NUM_VP9_MBENC; i++) {
4044 ds_param.idrt_offset = ds_param.sampler_offset + 128 +
4045 ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * i;
4046
4047 gen8_gpe_context_set_dynamic_buffer(ctx,
4048 &mbenc_context->gpe_contexts[i],
4049 &ds_param);
4050 }
4051
4052 gen8_gpe_context_init(ctx, &dys_context->gpe_context);
4053 gen9_vp9_dys_set_sampler_state(&dys_context->gpe_context);
4054
4055 return VA_STATUS_SUCCESS;
4056 }
4057
4058 static VAStatus
gen9_vme_gpe_kernel_final_vp9(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)4059 gen9_vme_gpe_kernel_final_vp9(VADriverContextP ctx,
4060 struct encode_state *encode_state,
4061 struct intel_encoder_context *encoder_context)
4062 {
4063 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4064 struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4065
4066 dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4067 mbenc_context->mbenc_bo_dys = NULL;
4068
4069 return VA_STATUS_SUCCESS;
4070 }
4071
4072 static VAStatus
gen9_vme_gpe_kernel_run_vp9(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)4073 gen9_vme_gpe_kernel_run_vp9(VADriverContextP ctx,
4074 struct encode_state *encode_state,
4075 struct intel_encoder_context *encoder_context)
4076 {
4077 struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4078 struct gen9_vp9_state *vp9_state;
4079 int i;
4080
4081 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4082
4083 if (!vp9_state || !vp9_state->pic_param)
4084 return VA_STATUS_ERROR_INVALID_PARAMETER;
4085
4086 if (vp9_state->dys_in_use) {
4087 gen9_vp9_run_dys_refframes(ctx, encode_state, encoder_context);
4088 }
4089
4090 if (vp9_state->brc_enabled && (vp9_state->brc_reset || !vp9_state->brc_inited)) {
4091 gen9_vp9_brc_init_reset_kernel(ctx, encode_state, encoder_context);
4092 }
4093
4094 if (vp9_state->picture_coding_type == KEY_FRAME) {
4095 for (i = 0; i < 2; i++)
4096 i965_zero_gpe_resource(&vme_context->res_mode_decision[i]);
4097 }
4098
4099 if (vp9_state->hme_supported) {
4100 gen9_vp9_scaling_kernel(ctx, encode_state,
4101 encoder_context,
4102 0);
4103 if (vp9_state->b16xme_supported) {
4104 gen9_vp9_scaling_kernel(ctx, encode_state,
4105 encoder_context,
4106 1);
4107 }
4108 }
4109
4110 if (vp9_state->picture_coding_type && vp9_state->hme_enabled) {
4111 if (vp9_state->b16xme_enabled)
4112 gen9_vp9_me_kernel(ctx, encode_state,
4113 encoder_context,
4114 1);
4115
4116 gen9_vp9_me_kernel(ctx, encode_state,
4117 encoder_context,
4118 0);
4119 }
4120
4121 if (vp9_state->brc_enabled) {
4122 if (vp9_state->mbenc_keyframe_dist_enabled)
4123 gen9_vp9_brc_intra_dist_kernel(ctx,
4124 encode_state,
4125 encoder_context);
4126
4127 gen9_vp9_brc_update_kernel(ctx, encode_state,
4128 encoder_context);
4129 }
4130
4131 if (vp9_state->picture_coding_type == KEY_FRAME) {
4132 gen9_vp9_mbenc_kernel(ctx, encode_state,
4133 encoder_context,
4134 VP9_MEDIA_STATE_MBENC_I_32x32);
4135 gen9_vp9_mbenc_kernel(ctx, encode_state,
4136 encoder_context,
4137 VP9_MEDIA_STATE_MBENC_I_16x16);
4138 } else {
4139 gen9_vp9_mbenc_kernel(ctx, encode_state,
4140 encoder_context,
4141 VP9_MEDIA_STATE_MBENC_P);
4142 }
4143
4144 gen9_vp9_mbenc_kernel(ctx, encode_state,
4145 encoder_context,
4146 VP9_MEDIA_STATE_MBENC_TX);
4147
4148 vp9_state->curr_mode_decision_index ^= 1;
4149 if (vp9_state->brc_enabled) {
4150 vp9_state->brc_inited = 1;
4151 vp9_state->brc_reset = 0;
4152 }
4153
4154 return VA_STATUS_SUCCESS;
4155 }
4156
4157 static VAStatus
gen9_vme_pipeline_vp9(VADriverContextP ctx,VAProfile profile,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)4158 gen9_vme_pipeline_vp9(VADriverContextP ctx,
4159 VAProfile profile,
4160 struct encode_state *encode_state,
4161 struct intel_encoder_context *encoder_context)
4162 {
4163 VAStatus va_status;
4164 struct gen9_vp9_state *vp9_state;
4165
4166 vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4167
4168 if (!vp9_state)
4169 return VA_STATUS_ERROR_INVALID_CONTEXT;
4170
4171 va_status = gen9_encode_vp9_check_parameter(ctx, encode_state, encoder_context);
4172 if (va_status != VA_STATUS_SUCCESS)
4173 return va_status;
4174
4175 va_status = gen9_vp9_allocate_resources(ctx, encode_state,
4176 encoder_context,
4177 !vp9_state->brc_allocated);
4178
4179 if (va_status != VA_STATUS_SUCCESS)
4180 return va_status;
4181 vp9_state->brc_allocated = 1;
4182
4183 va_status = gen9_vme_gpe_kernel_prepare_vp9(ctx, encode_state, encoder_context);
4184
4185 if (va_status != VA_STATUS_SUCCESS)
4186 return va_status;
4187
4188 va_status = gen9_vme_gpe_kernel_init_vp9(ctx, encode_state, encoder_context);
4189 if (va_status != VA_STATUS_SUCCESS)
4190 return va_status;
4191
4192 va_status = gen9_vme_gpe_kernel_run_vp9(ctx, encode_state, encoder_context);
4193 if (va_status != VA_STATUS_SUCCESS)
4194 return va_status;
4195
4196 gen9_vme_gpe_kernel_final_vp9(ctx, encode_state, encoder_context);
4197
4198 return VA_STATUS_SUCCESS;
4199 }
4200
4201 static void
gen9_vme_brc_context_destroy_vp9(struct vp9_brc_context * brc_context)4202 gen9_vme_brc_context_destroy_vp9(struct vp9_brc_context *brc_context)
4203 {
4204 int i;
4205
4206 for (i = 0; i < NUM_VP9_BRC; i++)
4207 gen8_gpe_context_destroy(&brc_context->gpe_contexts[i]);
4208 }
4209
4210 static void
gen9_vme_scaling_context_destroy_vp9(struct vp9_scaling_context * scaling_context)4211 gen9_vme_scaling_context_destroy_vp9(struct vp9_scaling_context *scaling_context)
4212 {
4213 int i;
4214
4215 for (i = 0; i < NUM_VP9_SCALING; i++)
4216 gen8_gpe_context_destroy(&scaling_context->gpe_contexts[i]);
4217 }
4218
4219 static void
gen9_vme_me_context_destroy_vp9(struct vp9_me_context * me_context)4220 gen9_vme_me_context_destroy_vp9(struct vp9_me_context *me_context)
4221 {
4222 gen8_gpe_context_destroy(&me_context->gpe_context);
4223 }
4224
4225 static void
gen9_vme_mbenc_context_destroy_vp9(struct vp9_mbenc_context * mbenc_context)4226 gen9_vme_mbenc_context_destroy_vp9(struct vp9_mbenc_context *mbenc_context)
4227 {
4228 int i;
4229
4230 for (i = 0; i < NUM_VP9_MBENC; i++)
4231 gen8_gpe_context_destroy(&mbenc_context->gpe_contexts[i]);
4232 dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4233 mbenc_context->mbenc_bo_size = 0;
4234 }
4235
4236 static void
gen9_vme_dys_context_destroy_vp9(struct vp9_dys_context * dys_context)4237 gen9_vme_dys_context_destroy_vp9(struct vp9_dys_context *dys_context)
4238 {
4239 gen8_gpe_context_destroy(&dys_context->gpe_context);
4240 }
4241
4242 static void
gen9_vme_kernel_context_destroy_vp9(struct gen9_encoder_context_vp9 * vme_context)4243 gen9_vme_kernel_context_destroy_vp9(struct gen9_encoder_context_vp9 *vme_context)
4244 {
4245 gen9_vp9_free_resources(vme_context);
4246 gen9_vme_scaling_context_destroy_vp9(&vme_context->scaling_context);
4247 gen9_vme_me_context_destroy_vp9(&vme_context->me_context);
4248 gen9_vme_mbenc_context_destroy_vp9(&vme_context->mbenc_context);
4249 gen9_vme_brc_context_destroy_vp9(&vme_context->brc_context);
4250 gen9_vme_dys_context_destroy_vp9(&vme_context->dys_context);
4251
4252 return;
4253 }
4254
4255 static void
gen9_vme_context_destroy_vp9(void * context)4256 gen9_vme_context_destroy_vp9(void *context)
4257 {
4258 struct gen9_encoder_context_vp9 *vme_context = context;
4259
4260 if (!vme_context)
4261 return;
4262
4263 gen9_vme_kernel_context_destroy_vp9(vme_context);
4264
4265 free(vme_context);
4266
4267 return;
4268 }
4269
4270 static void
gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,struct gen9_encoder_context_vp9 * vme_context,struct vp9_scaling_context * scaling_context)4271 gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
4272 struct gen9_encoder_context_vp9 *vme_context,
4273 struct vp9_scaling_context *scaling_context)
4274 {
4275 struct i965_gpe_context *gpe_context = NULL;
4276 struct vp9_encoder_kernel_parameter kernel_param;
4277 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4278 struct i965_kernel scale_kernel;
4279
4280 kernel_param.curbe_size = sizeof(vp9_scaling4x_curbe_data_cm);
4281 kernel_param.inline_data_size = sizeof(vp9_scaling4x_inline_data_cm);
4282 kernel_param.sampler_size = 0;
4283
4284 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4285 scoreboard_param.mask = 0xFF;
4286 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4287 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4288 scoreboard_param.walkpat_flag = 0;
4289
4290 gpe_context = &scaling_context->gpe_contexts[0];
4291 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4292 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4293
4294 scaling_context->scaling_4x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4295 scaling_context->scaling_4x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4296 scaling_context->scaling_4x_bti.scaling_frame_mbv_proc_stat_dst =
4297 VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
4298
4299 memset(&scale_kernel, 0, sizeof(scale_kernel));
4300
4301 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4302 sizeof(media_vp9_kernels),
4303 INTEL_VP9_ENC_SCALING4X,
4304 0,
4305 &scale_kernel);
4306
4307 gen8_gpe_load_kernels(ctx,
4308 gpe_context,
4309 &scale_kernel,
4310 1);
4311
4312 kernel_param.curbe_size = sizeof(vp9_scaling2x_curbe_data_cm);
4313 kernel_param.inline_data_size = 0;
4314 kernel_param.sampler_size = 0;
4315
4316 gpe_context = &scaling_context->gpe_contexts[1];
4317 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4318 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4319
4320 memset(&scale_kernel, 0, sizeof(scale_kernel));
4321
4322 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4323 sizeof(media_vp9_kernels),
4324 INTEL_VP9_ENC_SCALING2X,
4325 0,
4326 &scale_kernel);
4327
4328 gen8_gpe_load_kernels(ctx,
4329 gpe_context,
4330 &scale_kernel,
4331 1);
4332
4333 scaling_context->scaling_2x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4334 scaling_context->scaling_2x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4335 return;
4336 }
4337
4338 static void
gen9_vme_me_context_init_vp9(VADriverContextP ctx,struct gen9_encoder_context_vp9 * vme_context,struct vp9_me_context * me_context)4339 gen9_vme_me_context_init_vp9(VADriverContextP ctx,
4340 struct gen9_encoder_context_vp9 *vme_context,
4341 struct vp9_me_context *me_context)
4342 {
4343 struct i965_gpe_context *gpe_context = NULL;
4344 struct vp9_encoder_kernel_parameter kernel_param;
4345 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4346 struct i965_kernel scale_kernel;
4347
4348 kernel_param.curbe_size = sizeof(vp9_me_curbe_data);
4349 kernel_param.inline_data_size = 0;
4350 kernel_param.sampler_size = 0;
4351
4352 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4353 scoreboard_param.mask = 0xFF;
4354 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4355 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4356 scoreboard_param.walkpat_flag = 0;
4357
4358 gpe_context = &me_context->gpe_context;
4359 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4360 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4361
4362 memset(&scale_kernel, 0, sizeof(scale_kernel));
4363
4364 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4365 sizeof(media_vp9_kernels),
4366 INTEL_VP9_ENC_ME,
4367 0,
4368 &scale_kernel);
4369
4370 gen8_gpe_load_kernels(ctx,
4371 gpe_context,
4372 &scale_kernel,
4373 1);
4374
4375 return;
4376 }
4377
4378 static void
gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,struct gen9_encoder_context_vp9 * vme_context,struct vp9_mbenc_context * mbenc_context)4379 gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,
4380 struct gen9_encoder_context_vp9 *vme_context,
4381 struct vp9_mbenc_context *mbenc_context)
4382 {
4383 struct i965_gpe_context *gpe_context = NULL;
4384 struct vp9_encoder_kernel_parameter kernel_param;
4385 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4386 int i;
4387 struct i965_kernel scale_kernel;
4388
4389 kernel_param.curbe_size = sizeof(vp9_mbenc_curbe_data);
4390 kernel_param.inline_data_size = 0;
4391 kernel_param.sampler_size = 0;
4392
4393 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4394 scoreboard_param.mask = 0xFF;
4395 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4396 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4397
4398 for (i = 0; i < NUM_VP9_MBENC; i++) {
4399 gpe_context = &mbenc_context->gpe_contexts[i];
4400
4401 if ((i == VP9_MBENC_IDX_KEY_16x16) ||
4402 (i == VP9_MBENC_IDX_INTER)) {
4403 scoreboard_param.walkpat_flag = 1;
4404 } else
4405 scoreboard_param.walkpat_flag = 0;
4406
4407 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4408 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4409
4410 memset(&scale_kernel, 0, sizeof(scale_kernel));
4411
4412 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4413 sizeof(media_vp9_kernels),
4414 INTEL_VP9_ENC_MBENC,
4415 i,
4416 &scale_kernel);
4417
4418 gen8_gpe_load_kernels(ctx,
4419 gpe_context,
4420 &scale_kernel,
4421 1);
4422 }
4423 }
4424
4425 static void
gen9_vme_brc_context_init_vp9(VADriverContextP ctx,struct gen9_encoder_context_vp9 * vme_context,struct vp9_brc_context * brc_context)4426 gen9_vme_brc_context_init_vp9(VADriverContextP ctx,
4427 struct gen9_encoder_context_vp9 *vme_context,
4428 struct vp9_brc_context *brc_context)
4429 {
4430 struct i965_gpe_context *gpe_context = NULL;
4431 struct vp9_encoder_kernel_parameter kernel_param;
4432 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4433 int i;
4434 struct i965_kernel scale_kernel;
4435
4436 kernel_param.curbe_size = sizeof(vp9_brc_curbe_data);
4437 kernel_param.inline_data_size = 0;
4438 kernel_param.sampler_size = 0;
4439
4440 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4441 scoreboard_param.mask = 0xFF;
4442 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4443 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4444
4445 for (i = 0; i < NUM_VP9_BRC; i++) {
4446 gpe_context = &brc_context->gpe_contexts[i];
4447 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4448 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4449
4450 memset(&scale_kernel, 0, sizeof(scale_kernel));
4451
4452 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4453 sizeof(media_vp9_kernels),
4454 INTEL_VP9_ENC_BRC,
4455 i,
4456 &scale_kernel);
4457
4458 gen8_gpe_load_kernels(ctx,
4459 gpe_context,
4460 &scale_kernel,
4461 1);
4462 }
4463 }
4464
4465 static void
gen9_vme_dys_context_init_vp9(VADriverContextP ctx,struct gen9_encoder_context_vp9 * vme_context,struct vp9_dys_context * dys_context)4466 gen9_vme_dys_context_init_vp9(VADriverContextP ctx,
4467 struct gen9_encoder_context_vp9 *vme_context,
4468 struct vp9_dys_context *dys_context)
4469 {
4470 struct i965_gpe_context *gpe_context = NULL;
4471 struct vp9_encoder_kernel_parameter kernel_param;
4472 struct vp9_encoder_scoreboard_parameter scoreboard_param;
4473 struct i965_kernel scale_kernel;
4474
4475 kernel_param.curbe_size = sizeof(vp9_dys_curbe_data);
4476 kernel_param.inline_data_size = 0;
4477 kernel_param.sampler_size = sizeof(struct gen9_sampler_8x8_avs);
4478
4479 memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4480 scoreboard_param.mask = 0xFF;
4481 scoreboard_param.enable = vme_context->use_hw_scoreboard;
4482 scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4483 scoreboard_param.walkpat_flag = 0;
4484
4485 gpe_context = &dys_context->gpe_context;
4486 gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4487 gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4488
4489 memset(&scale_kernel, 0, sizeof(scale_kernel));
4490
4491 intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4492 sizeof(media_vp9_kernels),
4493 INTEL_VP9_ENC_DYS,
4494 0,
4495 &scale_kernel);
4496
4497 gen8_gpe_load_kernels(ctx,
4498 gpe_context,
4499 &scale_kernel,
4500 1);
4501
4502 return;
4503 }
4504
4505 static Bool
gen9_vme_kernels_context_init_vp9(VADriverContextP ctx,struct intel_encoder_context * encoder_context,struct gen9_encoder_context_vp9 * vme_context)4506 gen9_vme_kernels_context_init_vp9(VADriverContextP ctx,
4507 struct intel_encoder_context *encoder_context,
4508 struct gen9_encoder_context_vp9 *vme_context)
4509 {
4510 gen9_vme_scaling_context_init_vp9(ctx, vme_context, &vme_context->scaling_context);
4511 gen9_vme_me_context_init_vp9(ctx, vme_context, &vme_context->me_context);
4512 gen9_vme_mbenc_context_init_vp9(ctx, vme_context, &vme_context->mbenc_context);
4513 gen9_vme_dys_context_init_vp9(ctx, vme_context, &vme_context->dys_context);
4514 gen9_vme_brc_context_init_vp9(ctx, vme_context, &vme_context->brc_context);
4515
4516 vme_context->pfn_set_curbe_brc = gen9_vp9_set_curbe_brc;
4517 vme_context->pfn_set_curbe_me = gen9_vp9_set_curbe_me;
4518 vme_context->pfn_send_me_surface = gen9_vp9_send_me_surface;
4519 vme_context->pfn_send_scaling_surface = gen9_vp9_send_scaling_surface;
4520
4521 vme_context->pfn_set_curbe_scaling = gen9_vp9_set_curbe_scaling_cm;
4522
4523 vme_context->pfn_send_dys_surface = gen9_vp9_send_dys_surface;
4524 vme_context->pfn_set_curbe_dys = gen9_vp9_set_curbe_dys;
4525 vme_context->pfn_set_curbe_mbenc = gen9_vp9_set_curbe_mbenc;
4526 vme_context->pfn_send_mbenc_surface = gen9_vp9_send_mbenc_surface;
4527 return true;
4528 }
4529
4530 static
gen9_vp9_write_compressed_element(char * buffer,int index,int prob,bool value)4531 void gen9_vp9_write_compressed_element(char *buffer,
4532 int index,
4533 int prob,
4534 bool value)
4535 {
4536 struct vp9_compressed_element *base_element, *vp9_element;
4537 base_element = (struct vp9_compressed_element *)buffer;
4538
4539 vp9_element = base_element + (index >> 1);
4540 if (index % 2) {
4541 vp9_element->b_valid = 1;
4542 vp9_element->b_probdiff_select = 1;
4543 vp9_element->b_prob_select = (prob == 252) ? 1 : 0;
4544 vp9_element->b_bin = value;
4545 } else {
4546 vp9_element->a_valid = 1;
4547 vp9_element->a_probdiff_select = 1;
4548 vp9_element->a_prob_select = (prob == 252) ? 1 : 0;
4549 vp9_element->a_bin = value;
4550 }
4551 }
4552
4553 static void
intel_vp9enc_refresh_frame_internal_buffers(VADriverContextP ctx,struct intel_encoder_context * encoder_context)4554 intel_vp9enc_refresh_frame_internal_buffers(VADriverContextP ctx,
4555 struct intel_encoder_context *encoder_context)
4556 {
4557 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4558 VAEncPictureParameterBufferVP9 *pic_param;
4559 struct gen9_vp9_state *vp9_state;
4560 char *buffer;
4561 int i;
4562
4563 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4564
4565 if (!pak_context || !vp9_state || !vp9_state->pic_param)
4566 return;
4567
4568 pic_param = vp9_state->pic_param;
4569 if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4570 (pic_param->pic_flags.bits.intra_only) ||
4571 pic_param->pic_flags.bits.error_resilient_mode) {
4572 /* reset current frame_context */
4573 intel_init_default_vp9_probs(&vp9_state->vp9_current_fc);
4574 if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4575 pic_param->pic_flags.bits.error_resilient_mode ||
4576 (pic_param->pic_flags.bits.reset_frame_context == 3)) {
4577 for (i = 0; i < 4; i++)
4578 memcpy(&vp9_state->vp9_frame_ctx[i],
4579 &vp9_state->vp9_current_fc,
4580 sizeof(FRAME_CONTEXT));
4581 } else if (pic_param->pic_flags.bits.reset_frame_context == 2) {
4582 i = pic_param->pic_flags.bits.frame_context_idx;
4583 memcpy(&vp9_state->vp9_frame_ctx[i],
4584 &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
4585 }
4586 /* reset the frame_ctx_idx = 0 */
4587 vp9_state->frame_ctx_idx = 0;
4588 } else {
4589 vp9_state->frame_ctx_idx = pic_param->pic_flags.bits.frame_context_idx;
4590 }
4591
4592 i965_zero_gpe_resource(&pak_context->res_compressed_input_buffer);
4593 buffer = i965_map_gpe_resource(&pak_context->res_compressed_input_buffer);
4594
4595 if (!buffer)
4596 return;
4597
4598 /* write tx_size */
4599 if ((pic_param->luma_ac_qindex == 0) &&
4600 (pic_param->luma_dc_qindex_delta == 0) &&
4601 (pic_param->chroma_ac_qindex_delta == 0) &&
4602 (pic_param->chroma_dc_qindex_delta == 0)) {
4603 /* lossless flag */
4604 /* nothing is needed */
4605 gen9_vp9_write_compressed_element(buffer,
4606 0, 128, 0);
4607 gen9_vp9_write_compressed_element(buffer,
4608 1, 128, 0);
4609 gen9_vp9_write_compressed_element(buffer,
4610 2, 128, 0);
4611 } else {
4612 if (vp9_state->tx_mode == TX_MODE_SELECT) {
4613 gen9_vp9_write_compressed_element(buffer,
4614 0, 128, 1);
4615 gen9_vp9_write_compressed_element(buffer,
4616 1, 128, 1);
4617 gen9_vp9_write_compressed_element(buffer,
4618 2, 128, 1);
4619 } else if (vp9_state->tx_mode == ALLOW_32X32) {
4620 gen9_vp9_write_compressed_element(buffer,
4621 0, 128, 1);
4622 gen9_vp9_write_compressed_element(buffer,
4623 1, 128, 1);
4624 gen9_vp9_write_compressed_element(buffer,
4625 2, 128, 0);
4626 } else {
4627 unsigned int tx_mode;
4628
4629 tx_mode = vp9_state->tx_mode;
4630 gen9_vp9_write_compressed_element(buffer,
4631 0, 128, ((tx_mode) & 2));
4632 gen9_vp9_write_compressed_element(buffer,
4633 1, 128, ((tx_mode) & 1));
4634 gen9_vp9_write_compressed_element(buffer,
4635 2, 128, 0);
4636 }
4637
4638 if (vp9_state->tx_mode == TX_MODE_SELECT) {
4639
4640 gen9_vp9_write_compressed_element(buffer,
4641 3, 128, 0);
4642
4643 gen9_vp9_write_compressed_element(buffer,
4644 7, 128, 0);
4645
4646 gen9_vp9_write_compressed_element(buffer,
4647 15, 128, 0);
4648 }
4649 }
4650 /*Setup all the input&output object*/
4651
4652 {
4653 /* update the coeff_update flag */
4654 gen9_vp9_write_compressed_element(buffer,
4655 27, 128, 0);
4656 gen9_vp9_write_compressed_element(buffer,
4657 820, 128, 0);
4658 gen9_vp9_write_compressed_element(buffer,
4659 1613, 128, 0);
4660 gen9_vp9_write_compressed_element(buffer,
4661 2406, 128, 0);
4662 }
4663
4664
4665 if (pic_param->pic_flags.bits.frame_type && !pic_param->pic_flags.bits.intra_only) {
4666 bool allow_comp = !(
4667 (pic_param->ref_flags.bits.ref_last_sign_bias && pic_param->ref_flags.bits.ref_gf_sign_bias && pic_param->ref_flags.bits.ref_arf_sign_bias) ||
4668 (!pic_param->ref_flags.bits.ref_last_sign_bias && !pic_param->ref_flags.bits.ref_gf_sign_bias && !pic_param->ref_flags.bits.ref_arf_sign_bias)
4669 );
4670
4671 if (allow_comp) {
4672 if (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) {
4673 gen9_vp9_write_compressed_element(buffer,
4674 3271, 128, 1);
4675 gen9_vp9_write_compressed_element(buffer,
4676 3272, 128, 1);
4677 } else if (pic_param->pic_flags.bits.comp_prediction_mode == COMPOUND_REFERENCE) {
4678 gen9_vp9_write_compressed_element(buffer,
4679 3271, 128, 1);
4680 gen9_vp9_write_compressed_element(buffer,
4681 3272, 128, 0);
4682 } else {
4683
4684 gen9_vp9_write_compressed_element(buffer,
4685 3271, 128, 0);
4686 gen9_vp9_write_compressed_element(buffer,
4687 3272, 128, 0);
4688 }
4689 }
4690 }
4691
4692 i965_unmap_gpe_resource(&pak_context->res_compressed_input_buffer);
4693 }
4694
4695
4696 static void
gen9_pak_vp9_pipe_mode_select(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct gen9_hcpe_pipe_mode_select_param * pipe_mode_param)4697 gen9_pak_vp9_pipe_mode_select(VADriverContextP ctx,
4698 struct encode_state *encode_state,
4699 struct intel_encoder_context *encoder_context,
4700 struct gen9_hcpe_pipe_mode_select_param *pipe_mode_param)
4701 {
4702 struct intel_batchbuffer *batch = encoder_context->base.batch;
4703
4704 BEGIN_BCS_BATCH(batch, 6);
4705
4706 OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
4707 OUT_BCS_BATCH(batch,
4708 (pipe_mode_param->stream_out << 12) |
4709 (pipe_mode_param->codec_mode << 5) |
4710 (0 << 3) | /* disable Pic Status / Error Report */
4711 (pipe_mode_param->stream_out << 2) |
4712 HCP_CODEC_SELECT_ENCODE);
4713 OUT_BCS_BATCH(batch, 0);
4714 OUT_BCS_BATCH(batch, 0);
4715 OUT_BCS_BATCH(batch, (1 << 6));
4716 OUT_BCS_BATCH(batch, 0);
4717
4718 ADVANCE_BCS_BATCH(batch);
4719 }
4720
4721 static void
gen9_vp9_add_surface_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,hcp_surface_state * hcp_state)4722 gen9_vp9_add_surface_state(VADriverContextP ctx,
4723 struct encode_state *encode_state,
4724 struct intel_encoder_context *encoder_context,
4725 hcp_surface_state *hcp_state)
4726 {
4727 struct intel_batchbuffer *batch = encoder_context->base.batch;
4728 if (!hcp_state)
4729 return;
4730
4731 BEGIN_BCS_BATCH(batch, 3);
4732 OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
4733 OUT_BCS_BATCH(batch,
4734 (hcp_state->dw1.surface_id << 28) |
4735 (hcp_state->dw1.surface_pitch - 1)
4736 );
4737 OUT_BCS_BATCH(batch,
4738 (hcp_state->dw2.surface_format << 28) |
4739 (hcp_state->dw2.y_cb_offset)
4740 );
4741 ADVANCE_BCS_BATCH(batch);
4742 }
4743
4744 static void
gen9_pak_vp9_pipe_buf_addr_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)4745 gen9_pak_vp9_pipe_buf_addr_state(VADriverContextP ctx,
4746 struct encode_state *encode_state,
4747 struct intel_encoder_context *encoder_context)
4748 {
4749 struct i965_driver_data *i965 = i965_driver_data(ctx);
4750 struct intel_batchbuffer *batch = encoder_context->base.batch;
4751 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4752 struct gen9_vp9_state *vp9_state;
4753 unsigned int i;
4754 struct object_surface *obj_surface;
4755
4756 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4757
4758 if (!vp9_state || !vp9_state->pic_param)
4759 return;
4760
4761
4762 BEGIN_BCS_BATCH(batch, 104);
4763
4764 OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
4765
4766 obj_surface = encode_state->reconstructed_object;
4767
4768 /* reconstructed obj_surface is already checked. So this is skipped */
4769 /* DW 1..3 decoded surface */
4770 OUT_RELOC64(batch,
4771 obj_surface->bo,
4772 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4773 0);
4774 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4775
4776 /* DW 4..6 deblocking line */
4777 OUT_RELOC64(batch,
4778 pak_context->res_deblocking_filter_line_buffer.bo,
4779 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4780 0);
4781 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4782
4783 /* DW 7..9 deblocking tile line */
4784 OUT_RELOC64(batch,
4785 pak_context->res_deblocking_filter_tile_line_buffer.bo,
4786 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4787 0);
4788 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4789
4790 /* DW 10..12 deblocking tile col */
4791 OUT_RELOC64(batch,
4792 pak_context->res_deblocking_filter_tile_col_buffer.bo,
4793 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4794 0);
4795 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4796
4797 /* DW 13..15 metadata line */
4798 OUT_RELOC64(batch,
4799 pak_context->res_metadata_line_buffer.bo,
4800 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4801 0);
4802 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4803
4804 /* DW 16..18 metadata tile line */
4805 OUT_RELOC64(batch,
4806 pak_context->res_metadata_tile_line_buffer.bo,
4807 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4808 0);
4809 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4810
4811 /* DW 19..21 metadata tile col */
4812 OUT_RELOC64(batch,
4813 pak_context->res_metadata_tile_col_buffer.bo,
4814 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4815 0);
4816 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4817
4818 /* DW 22..30 SAO is not used for VP9 */
4819 OUT_BCS_BATCH(batch, 0);
4820 OUT_BCS_BATCH(batch, 0);
4821 OUT_BCS_BATCH(batch, 0);
4822 OUT_BCS_BATCH(batch, 0);
4823 OUT_BCS_BATCH(batch, 0);
4824 OUT_BCS_BATCH(batch, 0);
4825 OUT_BCS_BATCH(batch, 0);
4826 OUT_BCS_BATCH(batch, 0);
4827 OUT_BCS_BATCH(batch, 0);
4828
4829 /* DW 31..33 Current Motion vector temporal buffer */
4830 OUT_RELOC64(batch,
4831 pak_context->res_mv_temporal_buffer[vp9_state->curr_mv_temporal_index].bo,
4832 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4833 0);
4834 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4835
4836 /* DW 34..36 Not used */
4837 OUT_BCS_BATCH(batch, 0);
4838 OUT_BCS_BATCH(batch, 0);
4839 OUT_BCS_BATCH(batch, 0);
4840
4841 /* Only the first three reference_frame is used for VP9 */
4842 /* DW 37..52 for reference_frame */
4843 i = 0;
4844 if (vp9_state->picture_coding_type) {
4845 for (i = 0; i < 3; i++) {
4846
4847 if (pak_context->reference_surfaces[i].bo) {
4848 OUT_RELOC64(batch,
4849 pak_context->reference_surfaces[i].bo,
4850 I915_GEM_DOMAIN_INSTRUCTION, 0,
4851 0);
4852 } else {
4853 OUT_BCS_BATCH(batch, 0);
4854 OUT_BCS_BATCH(batch, 0);
4855 }
4856 }
4857 }
4858
4859 for (; i < 8; i++) {
4860 OUT_BCS_BATCH(batch, 0);
4861 OUT_BCS_BATCH(batch, 0);
4862 }
4863
4864 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4865
4866 /* DW 54..56 for source input */
4867 OUT_RELOC64(batch,
4868 pak_context->uncompressed_picture_source.bo,
4869 I915_GEM_DOMAIN_INSTRUCTION, 0,
4870 0);
4871 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4872
4873 /* DW 57..59 StreamOut is not used */
4874 OUT_BCS_BATCH(batch, 0);
4875 OUT_BCS_BATCH(batch, 0);
4876 OUT_BCS_BATCH(batch, 0);
4877
4878 /* DW 60..62. Not used for encoder */
4879 OUT_BCS_BATCH(batch, 0);
4880 OUT_BCS_BATCH(batch, 0);
4881 OUT_BCS_BATCH(batch, 0);
4882
4883 /* DW 63..65. ILDB Not used for encoder */
4884 OUT_BCS_BATCH(batch, 0);
4885 OUT_BCS_BATCH(batch, 0);
4886 OUT_BCS_BATCH(batch, 0);
4887
4888 /* DW 66..81 For the collocated motion vector temporal buffer */
4889 if (vp9_state->picture_coding_type) {
4890 int prev_index = vp9_state->curr_mv_temporal_index ^ 0x01;
4891 OUT_RELOC64(batch,
4892 pak_context->res_mv_temporal_buffer[prev_index].bo,
4893 I915_GEM_DOMAIN_INSTRUCTION, 0,
4894 0);
4895 } else {
4896 OUT_BCS_BATCH(batch, 0);
4897 OUT_BCS_BATCH(batch, 0);
4898 }
4899
4900 for (i = 1; i < 8; i++) {
4901 OUT_BCS_BATCH(batch, 0);
4902 OUT_BCS_BATCH(batch, 0);
4903 }
4904 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4905
4906 /* DW 83..85 VP9 prob buffer */
4907 OUT_RELOC64(batch,
4908 pak_context->res_prob_buffer.bo,
4909 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4910 0);
4911
4912 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4913
4914 /* DW 86..88 Segment id buffer */
4915 if (pak_context->res_segmentid_buffer.bo) {
4916 OUT_RELOC64(batch,
4917 pak_context->res_segmentid_buffer.bo,
4918 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4919 0);
4920 } else {
4921 OUT_BCS_BATCH(batch, 0);
4922 OUT_BCS_BATCH(batch, 0);
4923 }
4924 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4925
4926 /* DW 89..91 HVD line rowstore buffer */
4927 OUT_RELOC64(batch,
4928 pak_context->res_hvd_line_buffer.bo,
4929 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4930 0);
4931 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4932
4933 /* DW 92..94 HVD tile line rowstore buffer */
4934 OUT_RELOC64(batch,
4935 pak_context->res_hvd_tile_line_buffer.bo,
4936 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4937 0);
4938 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4939
4940 /* DW 95..97 SAO streamout. Not used for VP9 */
4941 OUT_BCS_BATCH(batch, 0);
4942 OUT_BCS_BATCH(batch, 0);
4943 OUT_BCS_BATCH(batch, 0);
4944
4945 /* reserved for KBL. 98..100 */
4946 OUT_BCS_BATCH(batch, 0);
4947 OUT_BCS_BATCH(batch, 0);
4948 OUT_BCS_BATCH(batch, 0);
4949
4950 /* 101..103 */
4951 OUT_BCS_BATCH(batch, 0);
4952 OUT_BCS_BATCH(batch, 0);
4953 OUT_BCS_BATCH(batch, 0);
4954
4955 ADVANCE_BCS_BATCH(batch);
4956 }
4957
4958 static void
gen9_pak_vp9_ind_obj_base_addr_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)4959 gen9_pak_vp9_ind_obj_base_addr_state(VADriverContextP ctx,
4960 struct encode_state *encode_state,
4961 struct intel_encoder_context *encoder_context)
4962 {
4963 struct i965_driver_data *i965 = i965_driver_data(ctx);
4964 struct intel_batchbuffer *batch = encoder_context->base.batch;
4965 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4966 struct gen9_vp9_state *vp9_state;
4967
4968 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4969
4970 /* to do */
4971 BEGIN_BCS_BATCH(batch, 29);
4972
4973 OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (29 - 2));
4974
4975 /* indirect bitstream object base */
4976 OUT_BCS_BATCH(batch, 0);
4977 OUT_BCS_BATCH(batch, 0);
4978 OUT_BCS_BATCH(batch, 0);
4979 /* the upper bound of indirect bitstream object */
4980 OUT_BCS_BATCH(batch, 0);
4981 OUT_BCS_BATCH(batch, 0);
4982
4983 /* DW 6: Indirect CU object base address */
4984 OUT_RELOC64(batch,
4985 pak_context->res_mb_code_surface.bo,
4986 I915_GEM_DOMAIN_INSTRUCTION, 0, /* No write domain */
4987 vp9_state->mb_data_offset);
4988 /* default attribute */
4989 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4990
4991 /* DW 9..11, PAK-BSE */
4992 OUT_RELOC64(batch,
4993 pak_context->indirect_pak_bse_object.bo,
4994 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4995 pak_context->indirect_pak_bse_object.offset);
4996 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4997
4998 /* DW 12..13 upper bound */
4999 OUT_RELOC64(batch,
5000 pak_context->indirect_pak_bse_object.bo,
5001 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5002 pak_context->indirect_pak_bse_object.end_offset);
5003
5004 /* DW 14..16 compressed header buffer */
5005 OUT_RELOC64(batch,
5006 pak_context->res_compressed_input_buffer.bo,
5007 I915_GEM_DOMAIN_INSTRUCTION, 0,
5008 0);
5009 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5010
5011 /* DW 17..19 prob counter streamout */
5012 OUT_RELOC64(batch,
5013 pak_context->res_prob_counter_buffer.bo,
5014 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5015 0);
5016 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5017
5018 /* DW 20..22 prob delta streamin */
5019 OUT_RELOC64(batch,
5020 pak_context->res_prob_delta_buffer.bo,
5021 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5022 0);
5023 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5024
5025 /* DW 23..25 Tile record streamout */
5026 OUT_RELOC64(batch,
5027 pak_context->res_tile_record_streamout_buffer.bo,
5028 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5029 0);
5030 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5031
5032 /* DW 26..28 CU record streamout */
5033 OUT_RELOC64(batch,
5034 pak_context->res_cu_stat_streamout_buffer.bo,
5035 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5036 0);
5037 OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5038
5039 ADVANCE_BCS_BATCH(batch);
5040 }
5041
5042 static void
gen9_pak_vp9_segment_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,VAEncSegParamVP9 * seg_param,uint8_t seg_id)5043 gen9_pak_vp9_segment_state(VADriverContextP ctx,
5044 struct encode_state *encode_state,
5045 struct intel_encoder_context *encoder_context,
5046 VAEncSegParamVP9 *seg_param, uint8_t seg_id)
5047 {
5048 struct intel_batchbuffer *batch = encoder_context->base.batch;
5049 uint32_t batch_value, tmp;
5050 VAEncPictureParameterBufferVP9 *pic_param;
5051
5052 if (!encode_state->pic_param_ext ||
5053 !encode_state->pic_param_ext->buffer) {
5054 return;
5055 }
5056
5057 pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
5058
5059 batch_value = seg_param->seg_flags.bits.segment_reference;
5060 if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
5061 pic_param->pic_flags.bits.intra_only)
5062 batch_value = 0;
5063
5064 BEGIN_BCS_BATCH(batch, 8);
5065
5066 OUT_BCS_BATCH(batch, HCP_VP9_SEGMENT_STATE | (8 - 2));
5067 OUT_BCS_BATCH(batch, seg_id << 0); /* DW 1 - SegmentID */
5068 OUT_BCS_BATCH(batch,
5069 (seg_param->seg_flags.bits.segment_reference_enabled << 3) |
5070 (batch_value << 1) |
5071 (seg_param->seg_flags.bits.segment_reference_skipped << 0)
5072 );
5073
5074 /* DW 3..6 is not used for encoder */
5075 OUT_BCS_BATCH(batch, 0);
5076 OUT_BCS_BATCH(batch, 0);
5077 OUT_BCS_BATCH(batch, 0);
5078 OUT_BCS_BATCH(batch, 0);
5079
5080 /* DW 7 Mode */
5081 tmp = intel_convert_sign_mag(seg_param->segment_qindex_delta, 9);
5082 batch_value = tmp;
5083 tmp = intel_convert_sign_mag(seg_param->segment_lf_level_delta, 7);
5084 batch_value |= (tmp << 16);
5085 OUT_BCS_BATCH(batch, batch_value);
5086
5087 ADVANCE_BCS_BATCH(batch);
5088
5089 }
5090
5091 static void
intel_vp9enc_construct_pak_insertobj_batchbuffer(VADriverContextP ctx,struct intel_encoder_context * encoder_context,struct i965_gpe_resource * obj_batch_buffer)5092 intel_vp9enc_construct_pak_insertobj_batchbuffer(VADriverContextP ctx,
5093 struct intel_encoder_context *encoder_context,
5094 struct i965_gpe_resource *obj_batch_buffer)
5095 {
5096 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5097 struct gen9_vp9_state *vp9_state;
5098 int uncompressed_header_length;
5099 unsigned int *cmd_ptr;
5100 unsigned int dw_length, bits_in_last_dw;
5101
5102 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5103
5104 if (!pak_context || !vp9_state || !vp9_state->pic_param)
5105 return;
5106
5107 uncompressed_header_length = vp9_state->header_length;
5108 cmd_ptr = i965_map_gpe_resource(obj_batch_buffer);
5109
5110 if (!cmd_ptr)
5111 return;
5112
5113 bits_in_last_dw = uncompressed_header_length % 4;
5114 bits_in_last_dw *= 8;
5115
5116 if (bits_in_last_dw == 0)
5117 bits_in_last_dw = 32;
5118
5119 /* get the DWORD length of the inserted_data */
5120 dw_length = ALIGN(uncompressed_header_length, 4) / 4;
5121 *cmd_ptr++ = HCP_INSERT_PAK_OBJECT | dw_length;
5122
5123 *cmd_ptr++ = ((0 << 31) | /* indirect payload */
5124 (0 << 16) | /* the start offset in first DW */
5125 (0 << 15) |
5126 (bits_in_last_dw << 8) | /* bits_in_last_dw */
5127 (0 << 4) | /* skip emulation byte count. 0 for VP9 */
5128 (0 << 3) | /* emulation flag. 0 for VP9 */
5129 (1 << 2) | /* last header flag. */
5130 (0 << 1));
5131 memcpy(cmd_ptr, vp9_state->alias_insert_data, dw_length * sizeof(unsigned int));
5132
5133 cmd_ptr += dw_length;
5134
5135 *cmd_ptr++ = MI_NOOP;
5136 *cmd_ptr++ = MI_BATCH_BUFFER_END;
5137 i965_unmap_gpe_resource(obj_batch_buffer);
5138 }
5139
5140 static void
gen9_vp9_pak_picture_level(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)5141 gen9_vp9_pak_picture_level(VADriverContextP ctx,
5142 struct encode_state *encode_state,
5143 struct intel_encoder_context *encoder_context)
5144 {
5145 struct intel_batchbuffer *batch = encoder_context->base.batch;
5146 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5147 struct object_surface *obj_surface;
5148 VAEncPictureParameterBufferVP9 *pic_param;
5149 VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
5150 struct gen9_vp9_state *vp9_state;
5151 struct gen9_surface_vp9 *vp9_priv_surface;
5152 int i;
5153 struct gen9_hcpe_pipe_mode_select_param mode_param;
5154 hcp_surface_state hcp_surface;
5155 struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5156 int segment_count;
5157
5158 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5159
5160 if (!pak_context || !vp9_state || !vp9_state->pic_param)
5161 return;
5162
5163 pic_param = vp9_state->pic_param;
5164 seg_param = vp9_state->segment_param;
5165
5166 if (vp9_state->curr_pak_pass == 0) {
5167 intel_vp9enc_construct_pak_insertobj_batchbuffer(ctx, encoder_context,
5168 &pak_context->res_pak_uncompressed_input_buffer);
5169
5170 // Check if driver already programmed pic state as part of BRC update kernel programming.
5171 if (!vp9_state->brc_enabled) {
5172 intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
5173 encoder_context, &pak_context->res_pic_state_brc_write_hfw_read_buffer);
5174 }
5175 }
5176
5177 if (vp9_state->curr_pak_pass == 0) {
5178 intel_vp9enc_refresh_frame_internal_buffers(ctx, encoder_context);
5179 }
5180
5181 {
5182 /* copy the frame_context[frame_idx] into curr_frame_context */
5183 memcpy(&vp9_state->vp9_current_fc,
5184 &(vp9_state->vp9_frame_ctx[vp9_state->frame_ctx_idx]),
5185 sizeof(FRAME_CONTEXT));
5186 {
5187 uint8_t *prob_ptr;
5188
5189 prob_ptr = i965_map_gpe_resource(&pak_context->res_prob_buffer);
5190
5191 if (!prob_ptr)
5192 return;
5193
5194 /* copy the current fc to vp9_prob buffer */
5195 memcpy(prob_ptr, &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
5196 if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
5197 pic_param->pic_flags.bits.intra_only) {
5198 FRAME_CONTEXT *frame_ptr = (FRAME_CONTEXT *)prob_ptr;
5199
5200 memcpy(frame_ptr->partition_prob, vp9_kf_partition_probs,
5201 sizeof(vp9_kf_partition_probs));
5202 memcpy(frame_ptr->uv_mode_prob, vp9_kf_uv_mode_prob,
5203 sizeof(vp9_kf_uv_mode_prob));
5204 }
5205 i965_unmap_gpe_resource(&pak_context->res_prob_buffer);
5206 }
5207 }
5208
5209 if (vp9_state->brc_enabled && vp9_state->curr_pak_pass) {
5210 /* read image status and insert the conditional end cmd */
5211 /* image ctrl/status is already accessed */
5212 struct gpe_mi_conditional_batch_buffer_end_parameter mi_cond_end;
5213 struct vp9_encode_status_buffer_internal *status_buffer;
5214
5215 status_buffer = &vp9_state->status_buffer;
5216 memset(&mi_cond_end, 0, sizeof(mi_cond_end));
5217 mi_cond_end.offset = status_buffer->image_status_mask_offset;
5218 mi_cond_end.bo = status_buffer->bo;
5219 mi_cond_end.compare_data = 0;
5220 mi_cond_end.compare_mask_mode_disabled = 1;
5221 gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch,
5222 &mi_cond_end);
5223 }
5224
5225 mode_param.codec_mode = 1;
5226 mode_param.stream_out = 0;
5227 gen9_pak_vp9_pipe_mode_select(ctx, encode_state, encoder_context, &mode_param);
5228
5229 /* reconstructed surface */
5230 memset(&hcp_surface, 0, sizeof(hcp_surface));
5231 obj_surface = encode_state->reconstructed_object;
5232 hcp_surface.dw1.surface_id = 0;
5233 hcp_surface.dw1.surface_pitch = obj_surface->width;
5234 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5235 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5236 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5237 &hcp_surface);
5238
5239 /* Input surface */
5240 if (vp9_state->dys_in_use &&
5241 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5242 (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5243 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
5244 obj_surface = vp9_priv_surface->dys_surface_obj;
5245 } else {
5246 obj_surface = encode_state->input_yuv_object;
5247 }
5248
5249 hcp_surface.dw1.surface_id = 1;
5250 hcp_surface.dw1.surface_pitch = obj_surface->width;
5251 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5252 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5253 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5254 &hcp_surface);
5255
5256 if (vp9_state->picture_coding_type) {
5257 /* Add surface for last */
5258 if (vp9_state->last_ref_obj) {
5259 obj_surface = vp9_state->last_ref_obj;
5260 hcp_surface.dw1.surface_id = 2;
5261 hcp_surface.dw1.surface_pitch = obj_surface->width;
5262 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5263 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5264 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5265 &hcp_surface);
5266 }
5267 if (vp9_state->golden_ref_obj) {
5268 obj_surface = vp9_state->golden_ref_obj;
5269 hcp_surface.dw1.surface_id = 3;
5270 hcp_surface.dw1.surface_pitch = obj_surface->width;
5271 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5272 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5273 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5274 &hcp_surface);
5275 }
5276 if (vp9_state->alt_ref_obj) {
5277 obj_surface = vp9_state->alt_ref_obj;
5278 hcp_surface.dw1.surface_id = 4;
5279 hcp_surface.dw1.surface_pitch = obj_surface->width;
5280 hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5281 hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5282 gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5283 &hcp_surface);
5284 }
5285 }
5286
5287 gen9_pak_vp9_pipe_buf_addr_state(ctx, encode_state, encoder_context);
5288
5289 gen9_pak_vp9_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
5290
5291 // Using picstate zero with updated QP and LF deltas by HuC for repak, irrespective of how many Pak passes were run in multi-pass mode.
5292 memset(&second_level_batch, 0, sizeof(second_level_batch));
5293
5294 if (vp9_state->curr_pak_pass == 0) {
5295 second_level_batch.offset = 0;
5296 } else
5297 second_level_batch.offset = vp9_state->curr_pak_pass * VP9_PIC_STATE_BUFFER_SIZE;
5298
5299 second_level_batch.is_second_level = 1;
5300 second_level_batch.bo = pak_context->res_pic_state_brc_write_hfw_read_buffer.bo;
5301
5302 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5303
5304 if (pic_param->pic_flags.bits.segmentation_enabled &&
5305 seg_param)
5306 segment_count = 8;
5307 else {
5308 segment_count = 1;
5309 memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
5310 seg_param = &tmp_seg_param;
5311 }
5312 for (i = 0; i < segment_count; i++) {
5313 gen9_pak_vp9_segment_state(ctx, encode_state,
5314 encoder_context,
5315 &seg_param->seg_data[i], i);
5316 }
5317
5318 /* Insert the uncompressed header buffer */
5319 second_level_batch.is_second_level = 1;
5320 second_level_batch.offset = 0;
5321 second_level_batch.bo = pak_context->res_pak_uncompressed_input_buffer.bo;
5322
5323 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5324
5325 /* PAK_OBJECT */
5326 second_level_batch.is_second_level = 1;
5327 second_level_batch.offset = 0;
5328 second_level_batch.bo = pak_context->res_mb_code_surface.bo;
5329 gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5330
5331 return;
5332 }
5333
5334 static void
gen9_vp9_read_mfc_status(VADriverContextP ctx,struct intel_encoder_context * encoder_context)5335 gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5336 {
5337 struct intel_batchbuffer *batch = encoder_context->base.batch;
5338 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5339 struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
5340 struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
5341 //struct gpe_mi_copy_mem_parameter mi_copy_mem_param;
5342 struct vp9_encode_status_buffer_internal *status_buffer;
5343 struct gen9_vp9_state *vp9_state;
5344
5345 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5346 if (!vp9_state || !pak_context || !batch)
5347 return;
5348
5349 status_buffer = &(vp9_state->status_buffer);
5350
5351 memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
5352 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5353
5354 memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
5355 mi_store_reg_mem_param.bo = status_buffer->bo;
5356 mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_offset;
5357 mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5358 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5359
5360 mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5361 mi_store_reg_mem_param.offset = 0;
5362 mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5363 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5364
5365 /* Read HCP Image status */
5366 mi_store_reg_mem_param.bo = status_buffer->bo;
5367 mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
5368 mi_store_reg_mem_param.mmio_offset =
5369 status_buffer->vp9_image_mask_reg_offset;
5370 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5371
5372 mi_store_reg_mem_param.bo = status_buffer->bo;
5373 mi_store_reg_mem_param.offset = status_buffer->image_status_ctrl_offset;
5374 mi_store_reg_mem_param.mmio_offset =
5375 status_buffer->vp9_image_ctrl_reg_offset;
5376 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5377
5378 mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5379 mi_store_reg_mem_param.offset = 4;
5380 mi_store_reg_mem_param.mmio_offset =
5381 status_buffer->vp9_image_ctrl_reg_offset;
5382 gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5383
5384 gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5385
5386 return;
5387 }
5388
5389 static VAStatus
gen9_vp9_pak_pipeline_prepare(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)5390 gen9_vp9_pak_pipeline_prepare(VADriverContextP ctx,
5391 struct encode_state *encode_state,
5392 struct intel_encoder_context *encoder_context)
5393 {
5394 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5395 struct object_surface *obj_surface;
5396 struct object_buffer *obj_buffer;
5397 struct i965_coded_buffer_segment *coded_buffer_segment;
5398 VAEncPictureParameterBufferVP9 *pic_param;
5399 struct gen9_vp9_state *vp9_state;
5400 dri_bo *bo;
5401 int i;
5402
5403 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5404 if (!vp9_state ||
5405 !vp9_state->pic_param)
5406 return VA_STATUS_ERROR_INVALID_PARAMETER;
5407
5408 pic_param = vp9_state->pic_param;
5409
5410 /* reconstructed surface */
5411 obj_surface = encode_state->reconstructed_object;
5412 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5413
5414 dri_bo_unreference(pak_context->reconstructed_object.bo);
5415
5416 pak_context->reconstructed_object.bo = obj_surface->bo;
5417 dri_bo_reference(pak_context->reconstructed_object.bo);
5418
5419 /* set vp9 reference frames */
5420 for (i = 0; i < ARRAY_ELEMS(pak_context->reference_surfaces); i++) {
5421 if (pak_context->reference_surfaces[i].bo)
5422 dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5423 pak_context->reference_surfaces[i].bo = NULL;
5424 }
5425
5426 /* Three reference frames are enough for VP9 */
5427 if (pic_param->pic_flags.bits.frame_type &&
5428 !pic_param->pic_flags.bits.intra_only) {
5429 for (i = 0; i < 3; i++) {
5430 obj_surface = encode_state->reference_objects[i];
5431 if (obj_surface && obj_surface->bo) {
5432 pak_context->reference_surfaces[i].bo = obj_surface->bo;
5433 dri_bo_reference(obj_surface->bo);
5434 }
5435 }
5436 }
5437
5438 /* input YUV surface */
5439 dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5440 pak_context->uncompressed_picture_source.bo = NULL;
5441 obj_surface = encode_state->reconstructed_object;
5442 if (vp9_state->dys_in_use &&
5443 ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5444 (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5445 struct gen9_surface_vp9 *vp9_priv_surface =
5446 (struct gen9_surface_vp9 *)(obj_surface->private_data);
5447 obj_surface = vp9_priv_surface->dys_surface_obj;
5448 } else
5449 obj_surface = encode_state->input_yuv_object;
5450
5451 pak_context->uncompressed_picture_source.bo = obj_surface->bo;
5452 dri_bo_reference(pak_context->uncompressed_picture_source.bo);
5453
5454 /* coded buffer */
5455 dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5456 pak_context->indirect_pak_bse_object.bo = NULL;
5457 obj_buffer = encode_state->coded_buf_object;
5458 bo = obj_buffer->buffer_store->bo;
5459 pak_context->indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
5460 pak_context->indirect_pak_bse_object.end_offset = ALIGN((obj_buffer->size_element - 0x1000), 0x1000);
5461 pak_context->indirect_pak_bse_object.bo = bo;
5462 dri_bo_reference(pak_context->indirect_pak_bse_object.bo);
5463
5464 /* set the internal flag to 0 to indicate the coded size is unknown */
5465 dri_bo_map(bo, 1);
5466 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5467 coded_buffer_segment->mapped = 0;
5468 coded_buffer_segment->codec = encoder_context->codec;
5469 coded_buffer_segment->status_support = 1;
5470 dri_bo_unmap(bo);
5471
5472 return VA_STATUS_SUCCESS;
5473 }
5474
5475 static void
gen9_vp9_pak_brc_prepare(struct encode_state * encode_state,struct intel_encoder_context * encoder_context)5476 gen9_vp9_pak_brc_prepare(struct encode_state *encode_state,
5477 struct intel_encoder_context *encoder_context)
5478 {
5479 }
5480
5481 static void
gen9_vp9_pak_context_destroy(void * context)5482 gen9_vp9_pak_context_destroy(void *context)
5483 {
5484 struct gen9_encoder_context_vp9 *pak_context = context;
5485 int i;
5486
5487 dri_bo_unreference(pak_context->reconstructed_object.bo);
5488 pak_context->reconstructed_object.bo = NULL;
5489
5490 dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5491 pak_context->uncompressed_picture_source.bo = NULL;
5492
5493 dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5494 pak_context->indirect_pak_bse_object.bo = NULL;
5495
5496 for (i = 0; i < 8; i++) {
5497 dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5498 pak_context->reference_surfaces[i].bo = NULL;
5499 }
5500
5501 /* vme & pak same the same structure, so don't free the context here */
5502 }
5503
5504 static VAStatus
gen9_vp9_pak_pipeline(VADriverContextP ctx,VAProfile profile,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)5505 gen9_vp9_pak_pipeline(VADriverContextP ctx,
5506 VAProfile profile,
5507 struct encode_state *encode_state,
5508 struct intel_encoder_context *encoder_context)
5509 {
5510 struct i965_driver_data *i965 = i965_driver_data(ctx);
5511 struct intel_batchbuffer *batch = encoder_context->base.batch;
5512 struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5513 VAStatus va_status;
5514 struct gen9_vp9_state *vp9_state;
5515 VAEncPictureParameterBufferVP9 *pic_param;
5516 int i;
5517
5518 vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5519
5520 if (!vp9_state || !vp9_state->pic_param || !pak_context)
5521 return VA_STATUS_ERROR_INVALID_PARAMETER;
5522
5523 va_status = gen9_vp9_pak_pipeline_prepare(ctx, encode_state, encoder_context);
5524
5525 if (va_status != VA_STATUS_SUCCESS)
5526 return va_status;
5527
5528 if (i965->intel.has_bsd2)
5529 intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5530 else
5531 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5532
5533 intel_batchbuffer_emit_mi_flush(batch);
5534
5535 BEGIN_BCS_BATCH(batch, 64);
5536 for (i = 0; i < 64; i++)
5537 OUT_BCS_BATCH(batch, MI_NOOP);
5538
5539 ADVANCE_BCS_BATCH(batch);
5540
5541 for (vp9_state->curr_pak_pass = 0;
5542 vp9_state->curr_pak_pass < vp9_state->num_pak_passes;
5543 vp9_state->curr_pak_pass++) {
5544
5545 if (vp9_state->curr_pak_pass == 0) {
5546 /* Initialize the VP9 Image Ctrl reg for the first pass */
5547 struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
5548 struct vp9_encode_status_buffer_internal *status_buffer;
5549
5550 status_buffer = &(vp9_state->status_buffer);
5551 memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
5552 mi_load_reg_imm.mmio_offset = status_buffer->vp9_image_ctrl_reg_offset;
5553 mi_load_reg_imm.data = 0;
5554 gen8_gpe_mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
5555 }
5556 gen9_vp9_pak_picture_level(ctx, encode_state, encoder_context);
5557 gen9_vp9_read_mfc_status(ctx, encoder_context);
5558 }
5559
5560 intel_batchbuffer_end_atomic(batch);
5561 intel_batchbuffer_flush(batch);
5562
5563 pic_param = vp9_state->pic_param;
5564 vp9_state->vp9_last_frame.frame_width = pic_param->frame_width_dst;
5565 vp9_state->vp9_last_frame.frame_height = pic_param->frame_height_dst;
5566 vp9_state->vp9_last_frame.frame_type = pic_param->pic_flags.bits.frame_type;
5567 vp9_state->vp9_last_frame.show_frame = pic_param->pic_flags.bits.show_frame;
5568 vp9_state->vp9_last_frame.refresh_frame_context = pic_param->pic_flags.bits.refresh_frame_context;
5569 vp9_state->vp9_last_frame.frame_context_idx = pic_param->pic_flags.bits.frame_context_idx;
5570 vp9_state->vp9_last_frame.intra_only = pic_param->pic_flags.bits.intra_only;
5571 vp9_state->frame_number++;
5572 vp9_state->curr_mv_temporal_index ^= 1;
5573 vp9_state->first_frame = 0;
5574
5575 return VA_STATUS_SUCCESS;
5576 }
5577
5578 Bool
gen9_vp9_vme_context_init(VADriverContextP ctx,struct intel_encoder_context * encoder_context)5579 gen9_vp9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5580 {
5581 struct gen9_encoder_context_vp9 *vme_context = NULL;
5582 struct gen9_vp9_state *vp9_state = NULL;
5583
5584 vme_context = calloc(1, sizeof(struct gen9_encoder_context_vp9));
5585 vp9_state = calloc(1, sizeof(struct gen9_vp9_state));
5586
5587 if (!vme_context || !vp9_state) {
5588 if (vme_context)
5589 free(vme_context);
5590 if (vp9_state)
5591 free(vp9_state);
5592 return false;
5593 }
5594
5595 encoder_context->enc_priv_state = vp9_state;
5596 vme_context->enc_priv_state = vp9_state;
5597
5598 /* Initialize the features that are supported by VP9 */
5599 vme_context->hme_supported = 1;
5600 vme_context->use_hw_scoreboard = 1;
5601 vme_context->use_hw_non_stalling_scoreboard = 1;
5602
5603 vp9_state->tx_mode = TX_MODE_SELECT;
5604 vp9_state->multi_ref_qp_check = 0;
5605 vp9_state->target_usage = INTEL_ENC_VP9_TU_NORMAL;
5606 vp9_state->num_pak_passes = 1;
5607 vp9_state->hme_supported = vme_context->hme_supported;
5608 vp9_state->b16xme_supported = 1;
5609
5610 if (encoder_context->rate_control_mode != VA_RC_NONE &&
5611 encoder_context->rate_control_mode != VA_RC_CQP) {
5612 vp9_state->brc_enabled = 1;
5613 vp9_state->brc_distortion_buffer_supported = 1;
5614 vp9_state->brc_constant_buffer_supported = 1;
5615 vp9_state->num_pak_passes = 4;
5616 }
5617 vp9_state->dys_enabled = 1; /* this is supported by default */
5618 vp9_state->first_frame = 1;
5619
5620 /* the definition of status buffer offset for VP9 */
5621 {
5622 struct vp9_encode_status_buffer_internal *status_buffer;
5623 uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
5624
5625 status_buffer = &vp9_state->status_buffer;
5626 memset(status_buffer, 0,
5627 sizeof(struct vp9_encode_status_buffer_internal));
5628
5629 status_buffer->bs_byte_count_offset = base_offset + offsetof(struct vp9_encode_status, bs_byte_count);
5630 status_buffer->image_status_mask_offset = base_offset + offsetof(struct vp9_encode_status, image_status_mask);
5631 status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct vp9_encode_status, image_status_ctrl);
5632 status_buffer->media_index_offset = base_offset + offsetof(struct vp9_encode_status, media_index);
5633
5634 status_buffer->vp9_bs_frame_reg_offset = 0x1E9E0;
5635 status_buffer->vp9_image_mask_reg_offset = 0x1E9F0;
5636 status_buffer->vp9_image_ctrl_reg_offset = 0x1E9F4;
5637 }
5638
5639 gen9_vme_kernels_context_init_vp9(ctx, encoder_context, vme_context);
5640
5641 encoder_context->vme_context = vme_context;
5642 encoder_context->vme_pipeline = gen9_vme_pipeline_vp9;
5643 encoder_context->vme_context_destroy = gen9_vme_context_destroy_vp9;
5644
5645 return true;
5646 }
5647
5648 static VAStatus
gen9_vp9_get_coded_status(VADriverContextP ctx,struct intel_encoder_context * encoder_context,struct i965_coded_buffer_segment * coded_buf_seg)5649 gen9_vp9_get_coded_status(VADriverContextP ctx,
5650 struct intel_encoder_context *encoder_context,
5651 struct i965_coded_buffer_segment *coded_buf_seg)
5652 {
5653 struct vp9_encode_status *vp9_encode_status;
5654
5655 if (!encoder_context || !coded_buf_seg)
5656 return VA_STATUS_ERROR_INVALID_BUFFER;
5657
5658 vp9_encode_status = (struct vp9_encode_status *)coded_buf_seg->codec_private_data;
5659 coded_buf_seg->base.size = vp9_encode_status->bs_byte_count;
5660
5661 /* One VACodedBufferSegment for VP9 will be added later.
5662 * It will be linked to the next element of coded_buf_seg->base.next
5663 */
5664
5665 return VA_STATUS_SUCCESS;
5666 }
5667
5668 Bool
gen9_vp9_pak_context_init(VADriverContextP ctx,struct intel_encoder_context * encoder_context)5669 gen9_vp9_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5670 {
5671 /* VME & PAK share the same context */
5672 struct gen9_encoder_context_vp9 *pak_context = encoder_context->vme_context;
5673
5674 if (!pak_context)
5675 return false;
5676
5677 encoder_context->mfc_context = pak_context;
5678 encoder_context->mfc_context_destroy = gen9_vp9_pak_context_destroy;
5679 encoder_context->mfc_pipeline = gen9_vp9_pak_pipeline;
5680 encoder_context->mfc_brc_prepare = gen9_vp9_pak_brc_prepare;
5681 encoder_context->get_status = gen9_vp9_get_coded_status;
5682 return true;
5683 }
5684