1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWAR
23  *
24  * Authors:
25  *    Zhao, Yakui <yakui.zhao@intel.com>
26  *
27  */
28 
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <math.h>
34 #include <assert.h>
35 #include <va/va.h>
36 
37 #include "intel_batchbuffer.h"
38 #include "intel_driver.h"
39 
40 #include "i965_defines.h"
41 #include "i965_drv_video.h"
42 #include "i965_encoder.h"
43 #include "gen9_vp9_encapi.h"
44 #include "gen9_vp9_encoder.h"
45 #include "gen9_vp9_encoder_kernels.h"
46 #include "vp9_probs.h"
47 #include "gen9_vp9_const_def.h"
48 
49 #define MAX_VP9_ENCODER_FRAMERATE       60
50 #define MAX_VP9_ENCODER_SURFACES        64
51 
52 #define MAX_URB_SIZE                    4096 /* In register */
53 #define NUM_KERNELS_PER_GPE_CONTEXT     1
54 
55 #define VP9_BRC_KBPS                    1000
56 
57 #define BRC_KERNEL_CBR                  0x0010
58 #define BRC_KERNEL_VBR                  0x0020
59 #define BRC_KERNEL_AVBR                 0x0040
60 #define BRC_KERNEL_CQL                  0x0080
61 
62 #define VP9_PIC_STATE_BUFFER_SIZE 192
63 
64 typedef struct _intel_kernel_header_ {
65     uint32_t       reserved                        : 6;
66     uint32_t       kernel_start_pointer            : 26;
67 } intel_kernel_header;
68 
69 typedef struct _intel_vp9_kernel_header {
70     int nKernelCount;
71     intel_kernel_header PLY_DSCALE;
72     intel_kernel_header VP9_ME_P;
73     intel_kernel_header VP9_Enc_I_32x32;
74     intel_kernel_header VP9_Enc_I_16x16;
75     intel_kernel_header VP9_Enc_P;
76     intel_kernel_header VP9_Enc_TX;
77     intel_kernel_header VP9_DYS;
78 
79     intel_kernel_header VP9BRC_Intra_Distortion;
80     intel_kernel_header VP9BRC_Init;
81     intel_kernel_header VP9BRC_Reset;
82     intel_kernel_header VP9BRC_Update;
83 } intel_vp9_kernel_header;
84 
85 #define DYS_1X_FLAG    0x01
86 #define DYS_4X_FLAG    0x02
87 #define DYS_16X_FLAG   0x04
88 
89 struct vp9_surface_param {
90     uint32_t frame_width;
91     uint32_t frame_height;
92 };
93 
intel_convert_sign_mag(int val,int sign_bit_pos)94 static uint32_t intel_convert_sign_mag(int val, int sign_bit_pos)
95 {
96     uint32_t ret_val = 0;
97     if (val < 0) {
98         val = -val;
99         ret_val = ((1 << (sign_bit_pos - 1)) | (val & ((1 << (sign_bit_pos - 1)) - 1)));
100     } else {
101         ret_val = val & ((1 << (sign_bit_pos - 1)) - 1);
102     }
103     return ret_val;
104 }
105 
106 static bool
intel_vp9_get_kernel_header_and_size(void * pvbinary,int binary_size,INTEL_VP9_ENC_OPERATION operation,int krnstate_idx,struct i965_kernel * ret_kernel)107 intel_vp9_get_kernel_header_and_size(
108     void                             *pvbinary,
109     int                              binary_size,
110     INTEL_VP9_ENC_OPERATION          operation,
111     int                              krnstate_idx,
112     struct i965_kernel               *ret_kernel)
113 {
114     typedef uint32_t BIN_PTR[4];
115 
116     char *bin_start;
117     intel_vp9_kernel_header      *pkh_table;
118     intel_kernel_header          *pcurr_header, *pinvalid_entry, *pnext_header;
119     int next_krnoffset;
120 
121     if (!pvbinary || !ret_kernel)
122         return false;
123 
124     bin_start = (char *)pvbinary;
125     pkh_table = (intel_vp9_kernel_header *)pvbinary;
126     pinvalid_entry = &(pkh_table->VP9BRC_Update) + 1;
127     next_krnoffset = binary_size;
128 
129     if ((operation == INTEL_VP9_ENC_SCALING4X) || (operation == INTEL_VP9_ENC_SCALING2X)) {
130         pcurr_header = &pkh_table->PLY_DSCALE;
131     } else if (operation == INTEL_VP9_ENC_ME) {
132         pcurr_header = &pkh_table->VP9_ME_P;
133     } else if (operation == INTEL_VP9_ENC_MBENC) {
134         pcurr_header = &pkh_table->VP9_Enc_I_32x32;
135     } else if (operation == INTEL_VP9_ENC_DYS) {
136         pcurr_header = &pkh_table->VP9_DYS;
137     } else if (operation == INTEL_VP9_ENC_BRC) {
138         pcurr_header = &pkh_table->VP9BRC_Intra_Distortion;
139     } else {
140         return false;
141     }
142 
143     pcurr_header += krnstate_idx;
144     ret_kernel->bin = (const BIN_PTR *)(bin_start + (pcurr_header->kernel_start_pointer << 6));
145 
146     pnext_header = (pcurr_header + 1);
147     if (pnext_header < pinvalid_entry) {
148         next_krnoffset = pnext_header->kernel_start_pointer << 6;
149     }
150     ret_kernel->size = next_krnoffset - (pcurr_header->kernel_start_pointer << 6);
151 
152     return true;
153 }
154 
155 
156 static void
gen9_free_surfaces_vp9(void ** data)157 gen9_free_surfaces_vp9(void **data)
158 {
159     struct gen9_surface_vp9 *vp9_surface;
160 
161     if (!data || !*data)
162         return;
163 
164     vp9_surface = *data;
165 
166     if (vp9_surface->scaled_4x_surface_obj) {
167         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_4x_surface_id, 1);
168         vp9_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
169         vp9_surface->scaled_4x_surface_obj = NULL;
170     }
171 
172     if (vp9_surface->scaled_16x_surface_obj) {
173         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->scaled_16x_surface_id, 1);
174         vp9_surface->scaled_16x_surface_id = VA_INVALID_SURFACE;
175         vp9_surface->scaled_16x_surface_obj = NULL;
176     }
177 
178     if (vp9_surface->dys_4x_surface_obj) {
179         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
180         vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
181         vp9_surface->dys_4x_surface_obj = NULL;
182     }
183 
184     if (vp9_surface->dys_16x_surface_obj) {
185         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
186         vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
187         vp9_surface->dys_16x_surface_obj = NULL;
188     }
189 
190     if (vp9_surface->dys_surface_obj) {
191         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
192         vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
193         vp9_surface->dys_surface_obj = NULL;
194     }
195 
196     free(vp9_surface);
197 
198     *data = NULL;
199 
200     return;
201 }
202 
203 static VAStatus
gen9_vp9_init_check_surfaces(VADriverContextP ctx,struct object_surface * obj_surface,struct vp9_surface_param * surface_param)204 gen9_vp9_init_check_surfaces(VADriverContextP ctx,
205                              struct object_surface *obj_surface,
206                              struct vp9_surface_param *surface_param)
207 {
208     struct i965_driver_data *i965 = i965_driver_data(ctx);
209     struct gen9_surface_vp9 *vp9_surface;
210     int downscaled_width_4x, downscaled_height_4x;
211     int downscaled_width_16x, downscaled_height_16x;
212 
213     if (!obj_surface || !obj_surface->bo)
214         return VA_STATUS_ERROR_INVALID_SURFACE;
215 
216     if (obj_surface->private_data &&
217         obj_surface->free_private_data != gen9_free_surfaces_vp9) {
218         obj_surface->free_private_data(&obj_surface->private_data);
219         obj_surface->private_data = NULL;
220     }
221 
222     if (obj_surface->private_data) {
223         /* if the frame width/height is already the same as the expected,
224          * it is unncessary to reallocate it.
225          */
226         vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
227         if (vp9_surface->frame_width >= surface_param->frame_width ||
228             vp9_surface->frame_height >= surface_param->frame_height)
229             return VA_STATUS_SUCCESS;
230 
231         obj_surface->free_private_data(&obj_surface->private_data);
232         obj_surface->private_data = NULL;
233         vp9_surface = NULL;
234     }
235 
236     vp9_surface = calloc(1, sizeof(struct gen9_surface_vp9));
237 
238     if (!vp9_surface)
239         return VA_STATUS_ERROR_ALLOCATION_FAILED;
240 
241     vp9_surface->ctx = ctx;
242     obj_surface->private_data = vp9_surface;
243     obj_surface->free_private_data = gen9_free_surfaces_vp9;
244 
245     vp9_surface->frame_width = surface_param->frame_width;
246     vp9_surface->frame_height = surface_param->frame_height;
247 
248     downscaled_width_4x = ALIGN(surface_param->frame_width / 4, 16);
249     downscaled_height_4x = ALIGN(surface_param->frame_height / 4, 16);
250 
251     i965_CreateSurfaces(ctx,
252                         downscaled_width_4x,
253                         downscaled_height_4x,
254                         VA_RT_FORMAT_YUV420,
255                         1,
256                         &vp9_surface->scaled_4x_surface_id);
257 
258     vp9_surface->scaled_4x_surface_obj = SURFACE(vp9_surface->scaled_4x_surface_id);
259 
260     if (!vp9_surface->scaled_4x_surface_obj) {
261         return VA_STATUS_ERROR_ALLOCATION_FAILED;
262     }
263 
264     i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_4x_surface_obj, 1,
265                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
266 
267     downscaled_width_16x = ALIGN(surface_param->frame_width / 16, 16);
268     downscaled_height_16x = ALIGN(surface_param->frame_height / 16, 16);
269     i965_CreateSurfaces(ctx,
270                         downscaled_width_16x,
271                         downscaled_height_16x,
272                         VA_RT_FORMAT_YUV420,
273                         1,
274                         &vp9_surface->scaled_16x_surface_id);
275     vp9_surface->scaled_16x_surface_obj = SURFACE(vp9_surface->scaled_16x_surface_id);
276 
277     if (!vp9_surface->scaled_16x_surface_obj) {
278         return VA_STATUS_ERROR_ALLOCATION_FAILED;
279     }
280 
281     i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_16x_surface_obj, 1,
282                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
283 
284     return VA_STATUS_SUCCESS;
285 }
286 
287 static VAStatus
gen9_vp9_check_dys_surfaces(VADriverContextP ctx,struct object_surface * obj_surface,struct vp9_surface_param * surface_param)288 gen9_vp9_check_dys_surfaces(VADriverContextP ctx,
289                             struct object_surface *obj_surface,
290                             struct vp9_surface_param *surface_param)
291 {
292     struct i965_driver_data *i965 = i965_driver_data(ctx);
293     struct gen9_surface_vp9 *vp9_surface;
294     int dys_width_4x, dys_height_4x;
295     int dys_width_16x, dys_height_16x;
296 
297     /* As this is handled after the surface checking, it is unnecessary
298      * to check the surface bo and vp9_priv_surface again
299      */
300 
301     vp9_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
302 
303     if (!vp9_surface)
304         return VA_STATUS_ERROR_INVALID_SURFACE;
305 
306     /* if the frame_width/height of dys_surface is the same as
307      * the expected, it is unnecessary to allocate it again
308      */
309     if (vp9_surface->dys_frame_width == surface_param->frame_width &&
310         vp9_surface->dys_frame_height == surface_param->frame_height)
311         return VA_STATUS_SUCCESS;
312 
313     if (vp9_surface->dys_4x_surface_obj) {
314         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_4x_surface_id, 1);
315         vp9_surface->dys_4x_surface_id = VA_INVALID_SURFACE;
316         vp9_surface->dys_4x_surface_obj = NULL;
317     }
318 
319     if (vp9_surface->dys_16x_surface_obj) {
320         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_16x_surface_id, 1);
321         vp9_surface->dys_16x_surface_id = VA_INVALID_SURFACE;
322         vp9_surface->dys_16x_surface_obj = NULL;
323     }
324 
325     if (vp9_surface->dys_surface_obj) {
326         i965_DestroySurfaces(vp9_surface->ctx, &vp9_surface->dys_surface_id, 1);
327         vp9_surface->dys_surface_id = VA_INVALID_SURFACE;
328         vp9_surface->dys_surface_obj = NULL;
329     }
330 
331     vp9_surface->dys_frame_width = surface_param->frame_width;
332     vp9_surface->dys_frame_height = surface_param->frame_height;
333 
334     i965_CreateSurfaces(ctx,
335                         surface_param->frame_width,
336                         surface_param->frame_height,
337                         VA_RT_FORMAT_YUV420,
338                         1,
339                         &vp9_surface->dys_surface_id);
340     vp9_surface->dys_surface_obj = SURFACE(vp9_surface->dys_surface_id);
341 
342     if (!vp9_surface->dys_surface_obj) {
343         return VA_STATUS_ERROR_ALLOCATION_FAILED;
344     }
345 
346     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_surface_obj, 1,
347                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
348 
349     dys_width_4x = ALIGN(surface_param->frame_width / 4, 16);
350     dys_height_4x = ALIGN(surface_param->frame_width / 4, 16);
351 
352     i965_CreateSurfaces(ctx,
353                         dys_width_4x,
354                         dys_height_4x,
355                         VA_RT_FORMAT_YUV420,
356                         1,
357                         &vp9_surface->dys_4x_surface_id);
358 
359     vp9_surface->dys_4x_surface_obj = SURFACE(vp9_surface->dys_4x_surface_id);
360 
361     if (!vp9_surface->dys_4x_surface_obj) {
362         return VA_STATUS_ERROR_ALLOCATION_FAILED;
363     }
364 
365     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_4x_surface_obj, 1,
366                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
367 
368     dys_width_16x = ALIGN(surface_param->frame_width / 16, 16);
369     dys_height_16x = ALIGN(surface_param->frame_width / 16, 16);
370     i965_CreateSurfaces(ctx,
371                         dys_width_16x,
372                         dys_height_16x,
373                         VA_RT_FORMAT_YUV420,
374                         1,
375                         &vp9_surface->dys_16x_surface_id);
376     vp9_surface->dys_16x_surface_obj = SURFACE(vp9_surface->dys_16x_surface_id);
377 
378     if (!vp9_surface->dys_16x_surface_obj) {
379         return VA_STATUS_ERROR_ALLOCATION_FAILED;
380     }
381 
382     i965_check_alloc_surface_bo(ctx, vp9_surface->dys_16x_surface_obj, 1,
383                                 VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
384 
385     return VA_STATUS_SUCCESS;
386 }
387 
388 static VAStatus
gen9_vp9_allocate_resources(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,int allocate)389 gen9_vp9_allocate_resources(VADriverContextP ctx,
390                             struct encode_state *encode_state,
391                             struct intel_encoder_context *encoder_context,
392                             int allocate)
393 {
394     struct i965_driver_data *i965 = i965_driver_data(ctx);
395     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
396     struct gen9_vp9_state *vp9_state;
397     int allocate_flag, i;
398     int res_size;
399     uint32_t        frame_width_in_sb, frame_height_in_sb, frame_sb_num;
400     unsigned int width, height;
401 
402     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
403 
404     if (!vp9_state || !vp9_state->pic_param)
405         return VA_STATUS_ERROR_INVALID_PARAMETER;
406 
407     /* the buffer related with BRC is not changed. So it is allocated
408      * based on the input parameter
409      */
410     if (allocate) {
411         i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
412         i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
413         i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
414         i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
415         i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
416         i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
417         i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
418         i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
419         i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
420         i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
421         i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
422 
423         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
424                                                    &vme_context->res_brc_history_buffer,
425                                                    VP9_BRC_HISTORY_BUFFER_SIZE,
426                                                    "Brc History buffer");
427         if (!allocate_flag)
428             goto failed_allocation;
429         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
430                                                    &vme_context->res_brc_const_data_buffer,
431                                                    VP9_BRC_CONSTANTSURFACE_SIZE,
432                                                    "Brc Constant buffer");
433         if (!allocate_flag)
434             goto failed_allocation;
435 
436         res_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
437                    ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
438         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
439                                                    &vme_context->res_brc_mbenc_curbe_write_buffer,
440                                                    res_size,
441                                                    "Brc Curbe write");
442         if (!allocate_flag)
443             goto failed_allocation;
444 
445         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
446         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
447                                                    &vme_context->res_pic_state_brc_read_buffer,
448                                                    res_size,
449                                                    "Pic State Brc_read");
450         if (!allocate_flag)
451             goto failed_allocation;
452 
453         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
454         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
455                                                    &vme_context->res_pic_state_brc_write_hfw_read_buffer,
456                                                    res_size,
457                                                    "Pic State Brc_write Hfw_Read");
458         if (!allocate_flag)
459             goto failed_allocation;
460 
461         res_size = VP9_PIC_STATE_BUFFER_SIZE * 4;
462         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
463                                                    &vme_context->res_pic_state_hfw_write_buffer,
464                                                    res_size,
465                                                    "Pic State Hfw Write");
466         if (!allocate_flag)
467             goto failed_allocation;
468 
469         res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
470         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
471                                                    &vme_context->res_seg_state_brc_read_buffer,
472                                                    res_size,
473                                                    "Segment state brc_read");
474         if (!allocate_flag)
475             goto failed_allocation;
476 
477         res_size = VP9_SEGMENT_STATE_BUFFER_SIZE;
478         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
479                                                    &vme_context->res_seg_state_brc_write_buffer,
480                                                    res_size,
481                                                    "Segment state brc_write");
482         if (!allocate_flag)
483             goto failed_allocation;
484 
485         res_size = VP9_BRC_BITSTREAM_SIZE_BUFFER_SIZE;
486         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
487                                                    &vme_context->res_brc_bitstream_size_buffer,
488                                                    res_size,
489                                                    "Brc bitstream buffer");
490         if (!allocate_flag)
491             goto failed_allocation;
492 
493         res_size = VP9_HFW_BRC_DATA_BUFFER_SIZE;
494         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
495                                                    &vme_context->res_brc_hfw_data_buffer,
496                                                    res_size,
497                                                    "mfw Brc data");
498         if (!allocate_flag)
499             goto failed_allocation;
500 
501         res_size = VP9_BRC_MMDK_PAK_BUFFER_SIZE;
502         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
503                                                    &vme_context->res_brc_mmdk_pak_buffer,
504                                                    res_size,
505                                                    "Brc mmdk_pak");
506         if (!allocate_flag)
507             goto failed_allocation;
508     }
509 
510     /* If the width/height of allocated buffer is greater than the expected,
511      * it is unnecessary to allocate it again
512      */
513     if (vp9_state->res_width >= vp9_state->frame_width &&
514         vp9_state->res_height >= vp9_state->frame_height) {
515 
516         return VA_STATUS_SUCCESS;
517     }
518     frame_width_in_sb = ALIGN(vp9_state->frame_width, 64) / 64;
519     frame_height_in_sb = ALIGN(vp9_state->frame_height, 64) / 64;
520     frame_sb_num  = frame_width_in_sb * frame_height_in_sb;
521 
522     i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
523     res_size = frame_width_in_sb * 64;
524     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
525                                                &vme_context->res_hvd_line_buffer,
526                                                res_size,
527                                                "VP9 hvd line line");
528     if (!allocate_flag)
529         goto failed_allocation;
530 
531     i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
532     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
533                                                &vme_context->res_hvd_tile_line_buffer,
534                                                res_size,
535                                                "VP9 hvd tile_line line");
536     if (!allocate_flag)
537         goto failed_allocation;
538 
539     i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
540     res_size = frame_width_in_sb * 18 * 64;
541     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
542                                                &vme_context->res_deblocking_filter_line_buffer,
543                                                res_size,
544                                                "VP9 deblocking filter line");
545     if (!allocate_flag)
546         goto failed_allocation;
547 
548     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
549     res_size = frame_width_in_sb * 18 * 64;
550     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
551                                                &vme_context->res_deblocking_filter_tile_line_buffer,
552                                                res_size,
553                                                "VP9 deblocking tile line");
554     if (!allocate_flag)
555         goto failed_allocation;
556 
557     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
558     res_size = frame_height_in_sb * 17 * 64;
559     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
560                                                &vme_context->res_deblocking_filter_tile_col_buffer,
561                                                res_size,
562                                                "VP9 deblocking tile col");
563     if (!allocate_flag)
564         goto failed_allocation;
565 
566     i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
567     res_size = frame_width_in_sb * 5 * 64;
568     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
569                                                &vme_context->res_metadata_line_buffer,
570                                                res_size,
571                                                "VP9 metadata line");
572     if (!allocate_flag)
573         goto failed_allocation;
574 
575     i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
576     res_size = frame_width_in_sb * 5 * 64;
577     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
578                                                &vme_context->res_metadata_tile_line_buffer,
579                                                res_size,
580                                                "VP9 metadata tile line");
581     if (!allocate_flag)
582         goto failed_allocation;
583 
584     i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
585     res_size = frame_height_in_sb * 5 * 64;
586     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
587                                                &vme_context->res_metadata_tile_col_buffer,
588                                                res_size,
589                                                "VP9 metadata tile col");
590     if (!allocate_flag)
591         goto failed_allocation;
592 
593     i965_free_gpe_resource(&vme_context->res_prob_buffer);
594     res_size = 2048;
595     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
596                                                &vme_context->res_prob_buffer,
597                                                res_size,
598                                                "VP9 prob");
599     if (!allocate_flag)
600         goto failed_allocation;
601 
602     i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
603     res_size = frame_sb_num * 64;
604     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
605                                                &vme_context->res_segmentid_buffer,
606                                                res_size,
607                                                "VP9 segment id");
608     if (!allocate_flag)
609         goto failed_allocation;
610 
611     i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
612 
613     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
614     res_size = 29 * 64;
615     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
616                                                &vme_context->res_prob_delta_buffer,
617                                                res_size,
618                                                "VP9 prob delta");
619     if (!allocate_flag)
620         goto failed_allocation;
621 
622     i965_zero_gpe_resource(&vme_context->res_segmentid_buffer);
623 
624     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
625     res_size = 29 * 64;
626     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
627                                                &vme_context->res_prob_delta_buffer,
628                                                res_size,
629                                                "VP9 prob delta");
630     if (!allocate_flag)
631         goto failed_allocation;
632 
633     i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
634     res_size = 32 * 64;
635     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
636                                                &vme_context->res_compressed_input_buffer,
637                                                res_size,
638                                                "VP9 compressed_input buffer");
639     if (!allocate_flag)
640         goto failed_allocation;
641 
642     i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
643     res_size = 193 * 64;
644     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
645                                                &vme_context->res_prob_counter_buffer,
646                                                res_size,
647                                                "VP9 prob counter");
648     if (!allocate_flag)
649         goto failed_allocation;
650 
651     i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
652     res_size = frame_sb_num * 64;
653     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
654                                                &vme_context->res_tile_record_streamout_buffer,
655                                                res_size,
656                                                "VP9 tile record stream_out");
657     if (!allocate_flag)
658         goto failed_allocation;
659 
660     i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
661     res_size = frame_sb_num * 64;
662     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
663                                                &vme_context->res_cu_stat_streamout_buffer,
664                                                res_size,
665                                                "VP9 CU stat stream_out");
666     if (!allocate_flag)
667         goto failed_allocation;
668 
669     width = vp9_state->downscaled_width_4x_in_mb * 32;
670     height = vp9_state->downscaled_height_4x_in_mb * 16;
671     i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
672     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
673                                                   &vme_context->s4x_memv_data_buffer,
674                                                   width, height,
675                                                   ALIGN(width, 64),
676                                                   "VP9 4x MEMV data");
677     if (!allocate_flag)
678         goto failed_allocation;
679 
680     width = vp9_state->downscaled_width_4x_in_mb * 8;
681     height = vp9_state->downscaled_height_4x_in_mb * 16;
682     i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
683     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
684                                                   &vme_context->s4x_memv_distortion_buffer,
685                                                   width, height,
686                                                   ALIGN(width, 64),
687                                                   "VP9 4x MEMV distorion");
688     if (!allocate_flag)
689         goto failed_allocation;
690 
691     width = ALIGN(vp9_state->downscaled_width_16x_in_mb * 32, 64);
692     height = vp9_state->downscaled_height_16x_in_mb * 16;
693     i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
694     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
695                                                   &vme_context->s16x_memv_data_buffer,
696                                                   width, height,
697                                                   width,
698                                                   "VP9 16x MEMV data");
699     if (!allocate_flag)
700         goto failed_allocation;
701 
702     width = vp9_state->frame_width_in_mb * 16;
703     height = vp9_state->frame_height_in_mb * 8;
704     i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
705     allocate_flag = i965_gpe_allocate_2d_resource(i965->intel.bufmgr,
706                                                   &vme_context->res_output_16x16_inter_modes,
707                                                   width, height,
708                                                   ALIGN(width, 64),
709                                                   "VP9 output inter_mode");
710     if (!allocate_flag)
711         goto failed_allocation;
712 
713     res_size = vp9_state->frame_width_in_mb * vp9_state->frame_height_in_mb *
714                16 * 4;
715     for (i = 0; i < 2; i++) {
716         i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
717         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
718                                                    &vme_context->res_mode_decision[i],
719                                                    res_size,
720                                                    "VP9 mode decision");
721         if (!allocate_flag)
722             goto failed_allocation;
723 
724     }
725 
726     res_size = frame_sb_num * 9 * 64;
727     for (i = 0; i < 2; i++) {
728         i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
729         allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
730                                                    &vme_context->res_mv_temporal_buffer[i],
731                                                    res_size,
732                                                    "VP9 temporal mv");
733         if (!allocate_flag)
734             goto failed_allocation;
735     }
736 
737     vp9_state->mb_data_offset = ALIGN(frame_sb_num * 16, 4096) + 4096;
738     res_size = vp9_state->mb_data_offset + frame_sb_num * 64 * 64 + 1000;
739     i965_free_gpe_resource(&vme_context->res_mb_code_surface);
740     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
741                                                &vme_context->res_mb_code_surface,
742                                                ALIGN(res_size, 4096),
743                                                "VP9 mb_code surface");
744     if (!allocate_flag)
745         goto failed_allocation;
746 
747     res_size = 128;
748     i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
749     allocate_flag = i965_allocate_gpe_resource(i965->intel.bufmgr,
750                                                &vme_context->res_pak_uncompressed_input_buffer,
751                                                ALIGN(res_size, 4096),
752                                                "VP9 pak_uncompressed_input");
753     if (!allocate_flag)
754         goto failed_allocation;
755 
756     if (!vme_context->frame_header_data) {
757         /* allocate 512 bytes for generating the uncompressed header */
758         vme_context->frame_header_data = calloc(1, 512);
759     }
760 
761     vp9_state->res_width = vp9_state->frame_width;
762     vp9_state->res_height = vp9_state->frame_height;
763 
764     return VA_STATUS_SUCCESS;
765 
766 failed_allocation:
767     return VA_STATUS_ERROR_ALLOCATION_FAILED;
768 }
769 
770 static void
gen9_vp9_free_resources(struct gen9_encoder_context_vp9 * vme_context)771 gen9_vp9_free_resources(struct gen9_encoder_context_vp9 *vme_context)
772 {
773     int i;
774     struct gen9_vp9_state *vp9_state = (struct gen9_vp9_state *) vme_context->enc_priv_state;
775 
776     if (vp9_state->brc_enabled) {
777         i965_free_gpe_resource(&vme_context->res_brc_history_buffer);
778         i965_free_gpe_resource(&vme_context->res_brc_const_data_buffer);
779         i965_free_gpe_resource(&vme_context->res_brc_mbenc_curbe_write_buffer);
780         i965_free_gpe_resource(&vme_context->res_pic_state_brc_read_buffer);
781         i965_free_gpe_resource(&vme_context->res_pic_state_brc_write_hfw_read_buffer);
782         i965_free_gpe_resource(&vme_context->res_pic_state_hfw_write_buffer);
783         i965_free_gpe_resource(&vme_context->res_seg_state_brc_read_buffer);
784         i965_free_gpe_resource(&vme_context->res_seg_state_brc_write_buffer);
785         i965_free_gpe_resource(&vme_context->res_brc_bitstream_size_buffer);
786         i965_free_gpe_resource(&vme_context->res_brc_hfw_data_buffer);
787         i965_free_gpe_resource(&vme_context->res_brc_mmdk_pak_buffer);
788     }
789 
790     i965_free_gpe_resource(&vme_context->res_hvd_line_buffer);
791     i965_free_gpe_resource(&vme_context->res_hvd_tile_line_buffer);
792     i965_free_gpe_resource(&vme_context->res_deblocking_filter_line_buffer);
793     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_line_buffer);
794     i965_free_gpe_resource(&vme_context->res_deblocking_filter_tile_col_buffer);
795     i965_free_gpe_resource(&vme_context->res_metadata_line_buffer);
796     i965_free_gpe_resource(&vme_context->res_metadata_tile_line_buffer);
797     i965_free_gpe_resource(&vme_context->res_metadata_tile_col_buffer);
798     i965_free_gpe_resource(&vme_context->res_prob_buffer);
799     i965_free_gpe_resource(&vme_context->res_segmentid_buffer);
800     i965_free_gpe_resource(&vme_context->res_prob_delta_buffer);
801     i965_free_gpe_resource(&vme_context->res_prob_counter_buffer);
802     i965_free_gpe_resource(&vme_context->res_tile_record_streamout_buffer);
803     i965_free_gpe_resource(&vme_context->res_cu_stat_streamout_buffer);
804     i965_free_gpe_resource(&vme_context->s4x_memv_data_buffer);
805     i965_free_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
806     i965_free_gpe_resource(&vme_context->s16x_memv_data_buffer);
807     i965_free_gpe_resource(&vme_context->res_output_16x16_inter_modes);
808     for (i = 0; i < 2; i++) {
809         i965_free_gpe_resource(&vme_context->res_mode_decision[i]);
810     }
811 
812     for (i = 0; i < 2; i++) {
813         i965_free_gpe_resource(&vme_context->res_mv_temporal_buffer[i]);
814     }
815 
816     i965_free_gpe_resource(&vme_context->res_compressed_input_buffer);
817     i965_free_gpe_resource(&vme_context->res_mb_code_surface);
818     i965_free_gpe_resource(&vme_context->res_pak_uncompressed_input_buffer);
819 
820     if (vme_context->frame_header_data) {
821         free(vme_context->frame_header_data);
822         vme_context->frame_header_data = NULL;
823     }
824     return;
825 }
826 
827 static void
gen9_init_media_object_walker_parameter(struct intel_encoder_context * encoder_context,struct gpe_encoder_kernel_walker_parameter * kernel_walker_param,struct gpe_media_object_walker_parameter * walker_param)828 gen9_init_media_object_walker_parameter(struct intel_encoder_context *encoder_context,
829                                         struct gpe_encoder_kernel_walker_parameter *kernel_walker_param,
830                                         struct gpe_media_object_walker_parameter *walker_param)
831 {
832     memset(walker_param, 0, sizeof(*walker_param));
833 
834     walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
835 
836     walker_param->block_resolution.x = kernel_walker_param->resolution_x;
837     walker_param->block_resolution.y = kernel_walker_param->resolution_y;
838 
839     walker_param->global_resolution.x = kernel_walker_param->resolution_x;
840     walker_param->global_resolution.y = kernel_walker_param->resolution_y;
841 
842     walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
843     walker_param->global_outer_loop_stride.y = 0;
844 
845     walker_param->global_inner_loop_unit.x = 0;
846     walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
847 
848     walker_param->local_loop_exec_count = 0xFFFF;  //MAX VALUE
849     walker_param->global_loop_exec_count = 0xFFFF;  //MAX VALUE
850 
851     if (kernel_walker_param->no_dependency) {
852         walker_param->scoreboard_mask = 0;
853         walker_param->use_scoreboard = 0;
854         // Raster scan walking pattern
855         walker_param->local_outer_loop_stride.x = 0;
856         walker_param->local_outer_loop_stride.y = 1;
857         walker_param->local_inner_loop_unit.x = 1;
858         walker_param->local_inner_loop_unit.y = 0;
859         walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
860         walker_param->local_end.y = 0;
861     } else {
862         walker_param->local_end.x = 0;
863         walker_param->local_end.y = 0;
864 
865         if (kernel_walker_param->walker_degree == VP9_45Z_DEGREE) {
866             // 45z degree
867             walker_param->scoreboard_mask = 0x0F;
868 
869             walker_param->global_loop_exec_count = 0x3FF;
870             walker_param->local_loop_exec_count = 0x3FF;
871 
872             walker_param->global_resolution.x = (unsigned int)(kernel_walker_param->resolution_x / 2.f) + 1;
873             walker_param->global_resolution.y = 2 * kernel_walker_param->resolution_y;
874 
875             walker_param->global_start.x = 0;
876             walker_param->global_start.y = 0;
877 
878             walker_param->global_outer_loop_stride.x = walker_param->global_resolution.x;
879             walker_param->global_outer_loop_stride.y = 0;
880 
881             walker_param->global_inner_loop_unit.x = 0;
882             walker_param->global_inner_loop_unit.y = walker_param->global_resolution.y;
883 
884             walker_param->block_resolution.x = walker_param->global_resolution.x;
885             walker_param->block_resolution.y = walker_param->global_resolution.y;
886 
887             walker_param->local_start.x = 0;
888             walker_param->local_start.y = 0;
889 
890             walker_param->local_outer_loop_stride.x = 1;
891             walker_param->local_outer_loop_stride.y = 0;
892 
893             walker_param->local_inner_loop_unit.x = -1;
894             walker_param->local_inner_loop_unit.y = 4;
895 
896             walker_param->middle_loop_extra_steps = 3;
897             walker_param->mid_loop_unit_x = 0;
898             walker_param->mid_loop_unit_y = 1;
899         } else {
900             // 26 degree
901             walker_param->scoreboard_mask = 0x0F;
902             walker_param->local_outer_loop_stride.x = 1;
903             walker_param->local_outer_loop_stride.y = 0;
904             walker_param->local_inner_loop_unit.x = -2;
905             walker_param->local_inner_loop_unit.y = 1;
906         }
907     }
908 }
909 
910 static void
gen9_run_kernel_media_object(VADriverContextP ctx,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context,int media_function,struct gpe_media_object_parameter * param)911 gen9_run_kernel_media_object(VADriverContextP ctx,
912                              struct intel_encoder_context *encoder_context,
913                              struct i965_gpe_context *gpe_context,
914                              int media_function,
915                              struct gpe_media_object_parameter *param)
916 {
917     struct intel_batchbuffer *batch = encoder_context->base.batch;
918     struct vp9_encode_status_buffer_internal *status_buffer;
919     struct gen9_vp9_state *vp9_state;
920     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
921 
922     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
923     if (!vp9_state || !batch)
924         return;
925 
926     intel_batchbuffer_start_atomic(batch, 0x1000);
927 
928     status_buffer = &(vp9_state->status_buffer);
929     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
930     mi_store_data_imm.bo = status_buffer->bo;
931     mi_store_data_imm.offset = status_buffer->media_index_offset;
932     mi_store_data_imm.dw0 = media_function;
933     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
934 
935     intel_batchbuffer_emit_mi_flush(batch);
936     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
937     gen8_gpe_media_object(ctx, gpe_context, batch, param);
938     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
939 
940     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
941 
942     intel_batchbuffer_end_atomic(batch);
943 
944     intel_batchbuffer_flush(batch);
945 }
946 
947 static void
gen9_run_kernel_media_object_walker(VADriverContextP ctx,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context,int media_function,struct gpe_media_object_walker_parameter * param)948 gen9_run_kernel_media_object_walker(VADriverContextP ctx,
949                                     struct intel_encoder_context *encoder_context,
950                                     struct i965_gpe_context *gpe_context,
951                                     int media_function,
952                                     struct gpe_media_object_walker_parameter *param)
953 {
954     struct intel_batchbuffer *batch = encoder_context->base.batch;
955     struct vp9_encode_status_buffer_internal *status_buffer;
956     struct gen9_vp9_state *vp9_state;
957     struct gpe_mi_store_data_imm_parameter mi_store_data_imm;
958 
959     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
960     if (!vp9_state || !batch)
961         return;
962 
963     intel_batchbuffer_start_atomic(batch, 0x1000);
964 
965     intel_batchbuffer_emit_mi_flush(batch);
966 
967     status_buffer = &(vp9_state->status_buffer);
968     memset(&mi_store_data_imm, 0, sizeof(mi_store_data_imm));
969     mi_store_data_imm.bo = status_buffer->bo;
970     mi_store_data_imm.offset = status_buffer->media_index_offset;
971     mi_store_data_imm.dw0 = media_function;
972     gen8_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm);
973 
974     gen9_gpe_pipeline_setup(ctx, gpe_context, batch);
975     gen8_gpe_media_object_walker(ctx, gpe_context, batch, param);
976     gen8_gpe_media_state_flush(ctx, gpe_context, batch);
977 
978     gen9_gpe_pipeline_end(ctx, gpe_context, batch);
979 
980     intel_batchbuffer_end_atomic(batch);
981 
982     intel_batchbuffer_flush(batch);
983 }
984 
985 static
gen9_vp9_set_curbe_brc(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_brc_curbe_param * param)986 void gen9_vp9_set_curbe_brc(VADriverContextP ctx,
987                             struct encode_state *encode_state,
988                             struct i965_gpe_context *gpe_context,
989                             struct intel_encoder_context *encoder_context,
990                             struct gen9_vp9_brc_curbe_param *param)
991 {
992     VAEncSequenceParameterBufferVP9 *seq_param;
993     VAEncPictureParameterBufferVP9  *pic_param;
994     VAEncMiscParameterTypeVP9PerSegmantParam *segment_param;
995     vp9_brc_curbe_data      *cmd;
996     double                  dbps_ratio, dInputBitsPerFrame;
997     struct gen9_vp9_state *vp9_state;
998 
999     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1000 
1001     pic_param      = param->ppic_param;
1002     seq_param      = param->pseq_param;
1003     segment_param  = param->psegment_param;
1004 
1005     cmd = i965_gpe_context_map_curbe(gpe_context);
1006 
1007     if (!cmd)
1008         return;
1009 
1010     memset(cmd, 0, sizeof(vp9_brc_curbe_data));
1011 
1012     if (!vp9_state->dys_enabled) {
1013         cmd->dw0.frame_width  = pic_param->frame_width_src;
1014         cmd->dw0.frame_height = pic_param->frame_height_src;
1015     } else {
1016         cmd->dw0.frame_width  = pic_param->frame_width_dst;
1017         cmd->dw0.frame_height = pic_param->frame_height_dst;
1018     }
1019 
1020     cmd->dw1.frame_type           = vp9_state->picture_coding_type;
1021     cmd->dw1.segmentation_enable  = 0;
1022     cmd->dw1.ref_frame_flags      = vp9_state->ref_frame_flag;
1023     cmd->dw1.num_tlevels          = 1;
1024 
1025     switch (param->media_state_type) {
1026     case VP9_MEDIA_STATE_BRC_INIT_RESET: {
1027         cmd->dw3.max_level_ratiot0 = 0;
1028         cmd->dw3.max_level_ratiot1 = 0;
1029         cmd->dw3.max_level_ratiot2 = 0;
1030         cmd->dw3.max_level_ratiot3 = 0;
1031 
1032         cmd->dw4.profile_level_max_frame    = seq_param->max_frame_width *
1033                                               seq_param->max_frame_height;
1034         cmd->dw5.init_buf_fullness         = vp9_state->init_vbv_buffer_fullness_in_bit;
1035         cmd->dw6.buf_size                  = vp9_state->vbv_buffer_size_in_bit;
1036         cmd->dw7.target_bit_rate           = (vp9_state->target_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1037                                              VP9_BRC_KBPS;
1038         cmd->dw8.max_bit_rate           = (vp9_state->max_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1039                                           VP9_BRC_KBPS;
1040         cmd->dw9.min_bit_rate           = (vp9_state->min_bit_rate  + VP9_BRC_KBPS - 1) / VP9_BRC_KBPS *
1041                                           VP9_BRC_KBPS;
1042         cmd->dw10.frame_ratem           = vp9_state->framerate.num;
1043         cmd->dw11.frame_rated           = vp9_state->framerate.den;
1044 
1045         cmd->dw14.avbr_accuracy         = 30;
1046         cmd->dw14.avbr_convergence      = 150;
1047 
1048         if (encoder_context->rate_control_mode == VA_RC_CBR) {
1049             cmd->dw12.brc_flag    = BRC_KERNEL_CBR;
1050             cmd->dw8.max_bit_rate  = cmd->dw7.target_bit_rate;
1051             cmd->dw9.min_bit_rate  = 0;
1052         } else if (encoder_context->rate_control_mode == VA_RC_VBR) {
1053             cmd->dw12.brc_flag    = BRC_KERNEL_VBR;
1054         } else {
1055             cmd->dw12.brc_flag = BRC_KERNEL_CQL;
1056             cmd->dw16.cq_level = 30;
1057         }
1058         cmd->dw12.gopp = seq_param->intra_period - 1;
1059 
1060         cmd->dw13.init_frame_width   = pic_param->frame_width_src;
1061         cmd->dw13.init_frame_height   = pic_param->frame_height_src;
1062 
1063         cmd->dw15.min_qp          = 1;
1064         cmd->dw15.max_qp          = 255;
1065 
1066         cmd->dw16.cq_level            = 30;
1067 
1068         cmd->dw17.enable_dynamic_scaling = vp9_state->dys_in_use;
1069         cmd->dw17.brc_overshoot_cbr_pct = 150;
1070 
1071         dInputBitsPerFrame = (double)cmd->dw8.max_bit_rate * (double)vp9_state->framerate.den / (double)vp9_state->framerate.num;
1072         dbps_ratio         = dInputBitsPerFrame / ((double)vp9_state->vbv_buffer_size_in_bit / 30.0);
1073         if (dbps_ratio < 0.1)
1074             dbps_ratio = 0.1;
1075         if (dbps_ratio > 3.5)
1076             dbps_ratio = 3.5;
1077 
1078         *param->pbrc_init_reset_buf_size_in_bits  = cmd->dw6.buf_size;
1079         *param->pbrc_init_reset_input_bits_per_frame  = dInputBitsPerFrame;
1080         *param->pbrc_init_current_target_buf_full_in_bits = cmd->dw6.buf_size >> 1;
1081 
1082         cmd->dw18.pframe_deviation_threshold0 = (uint32_t)(-50 * pow(0.90, dbps_ratio));
1083         cmd->dw18.pframe_deviation_threshold1  = (uint32_t)(-50 * pow(0.66, dbps_ratio));
1084         cmd->dw18.pframe_deviation_threshold2  = (uint32_t)(-50 * pow(0.46, dbps_ratio));
1085         cmd->dw18.pframe_deviation_threshold3  = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1086         cmd->dw19.pframe_deviation_threshold4  = (uint32_t)(50 * pow(0.3, dbps_ratio));
1087         cmd->dw19.pframe_deviation_threshold5  = (uint32_t)(50 * pow(0.46, dbps_ratio));
1088         cmd->dw19.pframe_deviation_threshold6  = (uint32_t)(50 * pow(0.7, dbps_ratio));
1089         cmd->dw19.pframe_deviation_threshold7  = (uint32_t)(50 * pow(0.9, dbps_ratio));
1090 
1091         cmd->dw20.vbr_deviation_threshold0     = (uint32_t)(-50 * pow(0.9, dbps_ratio));
1092         cmd->dw20.vbr_deviation_threshold1     = (uint32_t)(-50 * pow(0.7, dbps_ratio));
1093         cmd->dw20.vbr_deviation_threshold2     = (uint32_t)(-50 * pow(0.5, dbps_ratio));
1094         cmd->dw20.vbr_deviation_threshold3     = (uint32_t)(-50 * pow(0.3, dbps_ratio));
1095         cmd->dw21.vbr_deviation_threshold4     = (uint32_t)(100 * pow(0.4, dbps_ratio));
1096         cmd->dw21.vbr_deviation_threshold5     = (uint32_t)(100 * pow(0.5, dbps_ratio));
1097         cmd->dw21.vbr_deviation_threshold6     = (uint32_t)(100 * pow(0.75, dbps_ratio));
1098         cmd->dw21.vbr_deviation_threshold7     = (uint32_t)(100 * pow(0.9, dbps_ratio));
1099 
1100         cmd->dw22.kframe_deviation_threshold0  = (uint32_t)(-50 * pow(0.8, dbps_ratio));
1101         cmd->dw22.kframe_deviation_threshold1  = (uint32_t)(-50 * pow(0.6, dbps_ratio));
1102         cmd->dw22.kframe_deviation_threshold2  = (uint32_t)(-50 * pow(0.34, dbps_ratio));
1103         cmd->dw22.kframe_deviation_threshold3  = (uint32_t)(-50 * pow(0.2, dbps_ratio));
1104         cmd->dw23.kframe_deviation_threshold4  = (uint32_t)(50 * pow(0.2, dbps_ratio));
1105         cmd->dw23.kframe_deviation_threshold5  = (uint32_t)(50 * pow(0.4, dbps_ratio));
1106         cmd->dw23.kframe_deviation_threshold6  = (uint32_t)(50 * pow(0.66, dbps_ratio));
1107         cmd->dw23.kframe_deviation_threshold7  = (uint32_t)(50 * pow(0.9, dbps_ratio));
1108 
1109         break;
1110     }
1111     case VP9_MEDIA_STATE_BRC_UPDATE: {
1112         cmd->dw15.min_qp          = 1;
1113         cmd->dw15.max_qp          = 255;
1114 
1115         cmd->dw25.frame_number    = param->frame_number;
1116 
1117         // Used in dynamic scaling. set to zero for now
1118         cmd->dw27.hrd_buffer_fullness_upper_limit = 0;
1119         cmd->dw28.hrd_buffer_fullness_lower_limit = 0;
1120 
1121         if (pic_param->pic_flags.bits.segmentation_enabled) {
1122             cmd->dw32.seg_delta_qp0              = segment_param->seg_data[0].segment_qindex_delta;
1123             cmd->dw32.seg_delta_qp1              = segment_param->seg_data[1].segment_qindex_delta;
1124             cmd->dw32.seg_delta_qp2              = segment_param->seg_data[2].segment_qindex_delta;
1125             cmd->dw32.seg_delta_qp3              = segment_param->seg_data[3].segment_qindex_delta;
1126 
1127             cmd->dw33.seg_delta_qp4              = segment_param->seg_data[4].segment_qindex_delta;
1128             cmd->dw33.seg_delta_qp5              = segment_param->seg_data[5].segment_qindex_delta;
1129             cmd->dw33.seg_delta_qp6              = segment_param->seg_data[6].segment_qindex_delta;
1130             cmd->dw33.seg_delta_qp7              = segment_param->seg_data[7].segment_qindex_delta;
1131         }
1132 
1133         //cmd->dw34.temporal_id                = pPicParams->temporal_idi;
1134         cmd->dw34.temporal_id                = 0;
1135         cmd->dw34.multi_ref_qp_check         = param->multi_ref_qp_check;
1136 
1137         cmd->dw35.max_num_pak_passes         = param->brc_num_pak_passes;
1138         cmd->dw35.sync_async                 = 0;
1139         cmd->dw35.mbrc                       = param->mbbrc_enabled;
1140         if (*param->pbrc_init_current_target_buf_full_in_bits >
1141             ((double)(*param->pbrc_init_reset_buf_size_in_bits))) {
1142             *param->pbrc_init_current_target_buf_full_in_bits -=
1143                 (double)(*param->pbrc_init_reset_buf_size_in_bits);
1144             cmd->dw35.overflow = 1;
1145         } else
1146             cmd->dw35.overflow = 0;
1147 
1148         cmd->dw24.target_size                 = (uint32_t)(*param->pbrc_init_current_target_buf_full_in_bits);
1149 
1150         cmd->dw36.segmentation               = pic_param->pic_flags.bits.segmentation_enabled;
1151 
1152         *param->pbrc_init_current_target_buf_full_in_bits += *param->pbrc_init_reset_input_bits_per_frame;
1153 
1154         cmd->dw38.qdelta_ydc  = pic_param->luma_dc_qindex_delta;
1155         cmd->dw38.qdelta_uvdc = pic_param->chroma_dc_qindex_delta;
1156         cmd->dw38.qdelta_uvac = pic_param->chroma_ac_qindex_delta;
1157 
1158         break;
1159     }
1160     case VP9_MEDIA_STATE_ENC_I_FRAME_DIST:
1161         cmd->dw2.intra_mode_disable        = 0;
1162         break;
1163     default:
1164         break;
1165     }
1166 
1167     cmd->dw48.brc_y4x_input_bti                = VP9_BTI_BRC_SRCY4X_G9;
1168     cmd->dw49.brc_vme_coarse_intra_input_bti   = VP9_BTI_BRC_VME_COARSE_INTRA_G9;
1169     cmd->dw50.brc_history_buffer_bti           = VP9_BTI_BRC_HISTORY_G9;
1170     cmd->dw51.brc_const_data_input_bti         = VP9_BTI_BRC_CONSTANT_DATA_G9;
1171     cmd->dw52.brc_distortion_bti               = VP9_BTI_BRC_DISTORTION_G9;
1172     cmd->dw53.brc_mmdk_pak_output_bti          = VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9;
1173     cmd->dw54.brc_enccurbe_input_bti           = VP9_BTI_BRC_MBENC_CURBE_INPUT_G9;
1174     cmd->dw55.brc_enccurbe_output_bti          = VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9;
1175     cmd->dw56.brc_pic_state_input_bti          = VP9_BTI_BRC_PIC_STATE_INPUT_G9;
1176     cmd->dw57.brc_pic_state_output_bti         = VP9_BTI_BRC_PIC_STATE_OUTPUT_G9;
1177     cmd->dw58.brc_seg_state_input_bti          = VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9;
1178     cmd->dw59.brc_seg_state_output_bti         = VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9;
1179     cmd->dw60.brc_bitstream_size_data_bti      = VP9_BTI_BRC_BITSTREAM_SIZE_G9;
1180     cmd->dw61.brc_hfw_data_output_bti          = VP9_BTI_BRC_HFW_DATA_G9;
1181 
1182     i965_gpe_context_unmap_curbe(gpe_context);
1183     return;
1184 }
1185 
1186 static void
gen9_brc_init_reset_add_surfaces_vp9(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context)1187 gen9_brc_init_reset_add_surfaces_vp9(VADriverContextP ctx,
1188                                      struct encode_state *encode_state,
1189                                      struct intel_encoder_context *encoder_context,
1190                                      struct i965_gpe_context *gpe_context)
1191 {
1192     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1193 
1194     i965_add_buffer_gpe_surface(ctx,
1195                                 gpe_context,
1196                                 &vme_context->res_brc_history_buffer,
1197                                 0,
1198                                 vme_context->res_brc_history_buffer.size,
1199                                 0,
1200                                 VP9_BTI_BRC_HISTORY_G9);
1201 
1202     i965_add_buffer_2d_gpe_surface(ctx,
1203                                    gpe_context,
1204                                    &vme_context->s4x_memv_distortion_buffer,
1205                                    1,
1206                                    I965_SURFACEFORMAT_R8_UNORM,
1207                                    VP9_BTI_BRC_DISTORTION_G9);
1208 }
1209 
1210 /* The function related with BRC */
1211 static VAStatus
gen9_vp9_brc_init_reset_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)1212 gen9_vp9_brc_init_reset_kernel(VADriverContextP ctx,
1213                                struct encode_state *encode_state,
1214                                struct intel_encoder_context *encoder_context)
1215 {
1216     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1217     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1218     struct gpe_media_object_parameter media_object_param;
1219     struct i965_gpe_context *gpe_context;
1220     int gpe_index = VP9_BRC_INIT;
1221     int media_function = VP9_MEDIA_STATE_BRC_INIT_RESET;
1222     struct gen9_vp9_brc_curbe_param                brc_initreset_curbe;
1223     VAEncPictureParameterBufferVP9 *pic_param;
1224     struct gen9_vp9_state *vp9_state;
1225 
1226     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1227 
1228     if (!vp9_state || !vp9_state->pic_param)
1229         return VA_STATUS_ERROR_INVALID_PARAMETER;
1230 
1231     pic_param = vp9_state->pic_param;
1232 
1233     if (vp9_state->brc_inited)
1234         gpe_index = VP9_BRC_RESET;
1235 
1236     gpe_context = &brc_context->gpe_contexts[gpe_index];
1237 
1238     gen8_gpe_context_init(ctx, gpe_context);
1239     gen9_gpe_reset_binding_table(ctx, gpe_context);
1240 
1241     brc_initreset_curbe.media_state_type    = media_function;
1242     brc_initreset_curbe.curr_frame          = pic_param->reconstructed_frame;
1243     brc_initreset_curbe.ppic_param          = vp9_state->pic_param;
1244     brc_initreset_curbe.pseq_param          = vp9_state->seq_param;
1245     brc_initreset_curbe.psegment_param      = vp9_state->segment_param;
1246     brc_initreset_curbe.frame_width         = vp9_state->frame_width;
1247     brc_initreset_curbe.frame_height        = vp9_state->frame_height;
1248     brc_initreset_curbe.pbrc_init_current_target_buf_full_in_bits =
1249         &vp9_state->brc_init_current_target_buf_full_in_bits;
1250     brc_initreset_curbe.pbrc_init_reset_buf_size_in_bits =
1251         &vp9_state->brc_init_reset_buf_size_in_bits;
1252     brc_initreset_curbe.pbrc_init_reset_input_bits_per_frame =
1253         &vp9_state->brc_init_reset_input_bits_per_frame;
1254     brc_initreset_curbe.picture_coding_type  = vp9_state->picture_coding_type;
1255     brc_initreset_curbe.initbrc            = !vp9_state->brc_inited;
1256     brc_initreset_curbe.mbbrc_enabled      = 0;
1257     brc_initreset_curbe.ref_frame_flag      = vp9_state->ref_frame_flag;
1258 
1259     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1260                                    gpe_context,
1261                                    encoder_context,
1262                                    &brc_initreset_curbe);
1263 
1264     gen9_brc_init_reset_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1265     gen8_gpe_setup_interface_data(ctx, gpe_context);
1266 
1267     memset(&media_object_param, 0, sizeof(media_object_param));
1268     gen9_run_kernel_media_object(ctx, encoder_context, gpe_context, media_function, &media_object_param);
1269 
1270     return VA_STATUS_SUCCESS;
1271 }
1272 
1273 static void
gen9_brc_intra_dist_add_surfaces_vp9(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context)1274 gen9_brc_intra_dist_add_surfaces_vp9(VADriverContextP ctx,
1275                                      struct encode_state *encode_state,
1276                                      struct intel_encoder_context *encoder_context,
1277                                      struct i965_gpe_context *gpe_context)
1278 {
1279     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1280 
1281     struct object_surface *obj_surface;
1282     struct gen9_surface_vp9 *vp9_priv_surface;
1283 
1284     /* sScaled4xSurface surface */
1285     obj_surface = encode_state->reconstructed_object;
1286 
1287     vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
1288 
1289     obj_surface = vp9_priv_surface->scaled_4x_surface_obj;
1290     i965_add_2d_gpe_surface(ctx, gpe_context,
1291                             obj_surface,
1292                             0, 1,
1293                             I965_SURFACEFORMAT_R8_UNORM,
1294                             VP9_BTI_BRC_SRCY4X_G9
1295                            );
1296 
1297     i965_add_adv_gpe_surface(ctx, gpe_context,
1298                              obj_surface,
1299                              VP9_BTI_BRC_VME_COARSE_INTRA_G9);
1300 
1301     i965_add_buffer_2d_gpe_surface(ctx,
1302                                    gpe_context,
1303                                    &vme_context->s4x_memv_distortion_buffer,
1304                                    1,
1305                                    I965_SURFACEFORMAT_R8_UNORM,
1306                                    VP9_BTI_BRC_DISTORTION_G9);
1307 
1308     return;
1309 }
1310 
1311 /* The function related with BRC */
1312 static VAStatus
gen9_vp9_brc_intra_dist_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)1313 gen9_vp9_brc_intra_dist_kernel(VADriverContextP ctx,
1314                                struct encode_state *encode_state,
1315                                struct intel_encoder_context *encoder_context)
1316 {
1317     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1318     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1319     struct i965_gpe_context *gpe_context;
1320     int gpe_index = VP9_BRC_INTRA_DIST;
1321     int media_function = VP9_MEDIA_STATE_ENC_I_FRAME_DIST;
1322     struct gen9_vp9_brc_curbe_param                brc_intra_dist_curbe;
1323     VAEncPictureParameterBufferVP9 *pic_param;
1324     struct gen9_vp9_state *vp9_state;
1325     struct gpe_media_object_walker_parameter media_object_walker_param;
1326     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
1327 
1328     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1329 
1330     if (!vp9_state || !vp9_state->pic_param)
1331         return VA_STATUS_ERROR_INVALID_PARAMETER;
1332 
1333     pic_param = vp9_state->pic_param;
1334 
1335     gpe_context = &brc_context->gpe_contexts[gpe_index];
1336 
1337     gen8_gpe_context_init(ctx, gpe_context);
1338     gen9_gpe_reset_binding_table(ctx, gpe_context);
1339 
1340     brc_intra_dist_curbe.media_state_type    = media_function;
1341     brc_intra_dist_curbe.curr_frame          = pic_param->reconstructed_frame;
1342     brc_intra_dist_curbe.ppic_param          = vp9_state->pic_param;
1343     brc_intra_dist_curbe.pseq_param          = vp9_state->seq_param;
1344     brc_intra_dist_curbe.psegment_param      = vp9_state->segment_param;
1345     brc_intra_dist_curbe.frame_width         = vp9_state->frame_width;
1346     brc_intra_dist_curbe.frame_height        = vp9_state->frame_height;
1347     brc_intra_dist_curbe.pbrc_init_current_target_buf_full_in_bits =
1348         &vp9_state->brc_init_current_target_buf_full_in_bits;
1349     brc_intra_dist_curbe.pbrc_init_reset_buf_size_in_bits =
1350         &vp9_state->brc_init_reset_buf_size_in_bits;
1351     brc_intra_dist_curbe.pbrc_init_reset_input_bits_per_frame =
1352         &vp9_state->brc_init_reset_input_bits_per_frame;
1353     brc_intra_dist_curbe.picture_coding_type  = vp9_state->picture_coding_type;
1354     brc_intra_dist_curbe.initbrc            = !vp9_state->brc_inited;
1355     brc_intra_dist_curbe.mbbrc_enabled      = 0;
1356     brc_intra_dist_curbe.ref_frame_flag      = vp9_state->ref_frame_flag;
1357 
1358     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1359                                    gpe_context,
1360                                    encoder_context,
1361                                    &brc_intra_dist_curbe);
1362 
1363     /* zero distortion buffer */
1364     i965_zero_gpe_resource(&vme_context->s4x_memv_distortion_buffer);
1365 
1366     gen9_brc_intra_dist_add_surfaces_vp9(ctx, encode_state, encoder_context, gpe_context);
1367     gen8_gpe_setup_interface_data(ctx, gpe_context);
1368 
1369     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
1370     kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
1371     kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
1372     kernel_walker_param.no_dependency = 1;
1373 
1374     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
1375 
1376     gen9_run_kernel_media_object_walker(ctx, encoder_context,
1377                                         gpe_context,
1378                                         media_function,
1379                                         &media_object_walker_param);
1380 
1381     return VA_STATUS_SUCCESS;
1382 }
1383 
1384 static void
intel_vp9enc_construct_picstate_batchbuf(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_resource * gpe_resource)1385 intel_vp9enc_construct_picstate_batchbuf(VADriverContextP ctx,
1386                                          struct encode_state *encode_state,
1387                                          struct intel_encoder_context *encoder_context,
1388                                          struct i965_gpe_resource *gpe_resource)
1389 {
1390     struct gen9_vp9_state *vp9_state;
1391     VAEncPictureParameterBufferVP9 *pic_param;
1392     int frame_width_minus1, frame_height_minus1;
1393     int is_lossless = 0;
1394     int is_intra_only = 0;
1395     unsigned int last_frame_type;
1396     unsigned int ref_flags;
1397     unsigned int use_prev_frame_mvs, adapt_flag;
1398     struct gen9_surface_vp9 *vp9_surface = NULL;
1399     struct object_surface *obj_surface = NULL;
1400     uint32_t scale_h = 0;
1401     uint32_t scale_w = 0;
1402 
1403     char *pdata;
1404     int i, j;
1405     unsigned int *cmd_ptr, cmd_value, tmp;
1406 
1407     pdata = i965_map_gpe_resource(gpe_resource);
1408     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1409 
1410     if (!vp9_state || !vp9_state->pic_param || !pdata)
1411         return;
1412 
1413     pic_param = vp9_state->pic_param;
1414     frame_width_minus1 = ALIGN(pic_param->frame_width_dst, 8) - 1;
1415     frame_height_minus1 = ALIGN(pic_param->frame_height_dst, 8) - 1;
1416     if ((pic_param->luma_ac_qindex == 0) &&
1417         (pic_param->luma_dc_qindex_delta == 0) &&
1418         (pic_param->chroma_ac_qindex_delta == 0) &&
1419         (pic_param->chroma_dc_qindex_delta == 0))
1420         is_lossless = 1;
1421 
1422     if (pic_param->pic_flags.bits.frame_type)
1423         is_intra_only = pic_param->pic_flags.bits.intra_only;
1424 
1425     last_frame_type = vp9_state->vp9_last_frame.frame_type;
1426 
1427     use_prev_frame_mvs = 0;
1428     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) {
1429         last_frame_type = 0;
1430         ref_flags = 0;
1431     } else {
1432         ref_flags = ((pic_param->ref_flags.bits.ref_arf_sign_bias << 9) |
1433                      (pic_param->ref_flags.bits.ref_gf_sign_bias << 8) |
1434                      (pic_param->ref_flags.bits.ref_last_sign_bias << 7)
1435                     );
1436         if (!pic_param->pic_flags.bits.error_resilient_mode &&
1437             (pic_param->frame_width_dst == vp9_state->vp9_last_frame.frame_width) &&
1438             (pic_param->frame_height_dst == vp9_state->vp9_last_frame.frame_height) &&
1439             !pic_param->pic_flags.bits.intra_only &&
1440             vp9_state->vp9_last_frame.show_frame &&
1441             ((vp9_state->vp9_last_frame.frame_type == HCP_VP9_INTER_FRAME) &&
1442              !vp9_state->vp9_last_frame.intra_only)
1443            )
1444             use_prev_frame_mvs = 1;
1445     }
1446     adapt_flag = 0;
1447     if (!pic_param->pic_flags.bits.error_resilient_mode &&
1448         !pic_param->pic_flags.bits.frame_parallel_decoding_mode)
1449         adapt_flag = 1;
1450 
1451     for (i = 0; i < 4; i++) {
1452         uint32_t non_first_pass;
1453         non_first_pass = 1;
1454         if (i == 0)
1455             non_first_pass = 0;
1456 
1457         cmd_ptr = (unsigned int *)(pdata + i * VP9_PIC_STATE_BUFFER_SIZE);
1458 
1459         *cmd_ptr++ = (HCP_VP9_PIC_STATE | (33 - 2));
1460         *cmd_ptr++ = (frame_height_minus1 << 16 |
1461                       frame_width_minus1);
1462         /* dw2 */
1463         *cmd_ptr++ = (0 << 31 |  /* disable segment_in */
1464                       0 << 30 | /* disable segment_out */
1465                       is_lossless << 29 | /* loseless */
1466                       (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_temporal_update) << 28 | /* temporal update */
1467                       (pic_param->pic_flags.bits.segmentation_enabled && pic_param->pic_flags.bits.segmentation_update_map) << 27 | /* temporal update */
1468                       (pic_param->pic_flags.bits.segmentation_enabled << 26) |
1469                       (pic_param->sharpness_level << 23) |
1470                       (pic_param->filter_level << 17) |
1471                       (pic_param->pic_flags.bits.frame_parallel_decoding_mode << 16) |
1472                       (pic_param->pic_flags.bits.error_resilient_mode << 15) |
1473                       (pic_param->pic_flags.bits.refresh_frame_context << 14) |
1474                       (last_frame_type << 13) |
1475                       (vp9_state->tx_mode == TX_MODE_SELECT) << 12 |
1476                       (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) << 11 |
1477                       (use_prev_frame_mvs) << 10 |
1478                       ref_flags |
1479                       (pic_param->pic_flags.bits.mcomp_filter_type << 4) |
1480                       (pic_param->pic_flags.bits.allow_high_precision_mv << 3) |
1481                       (is_intra_only << 2) |
1482                       (adapt_flag << 1) |
1483                       (pic_param->pic_flags.bits.frame_type) << 0);
1484 
1485         *cmd_ptr++ = ((0 << 28) | /* VP9Profile0 */
1486                       (0 << 24) | /* 8-bit depth */
1487                       (0 << 22) | /* only 420 format */
1488                       (0 << 0)  | /* sse statistics */
1489                       (pic_param->log2_tile_rows << 8) |
1490                       (pic_param->log2_tile_columns << 0));
1491 
1492         /* dw4..6 */
1493         if (pic_param->pic_flags.bits.frame_type &&
1494             !pic_param->pic_flags.bits.intra_only) {
1495             for (j = 0; j < 3; j++) {
1496                 obj_surface = encode_state->reference_objects[j];
1497                 scale_w = 0;
1498                 scale_h = 0;
1499                 if (obj_surface && obj_surface->private_data) {
1500                     vp9_surface = obj_surface->private_data;
1501                     scale_w = (vp9_surface->frame_width  << 14) / pic_param->frame_width_dst;
1502                     scale_h = (vp9_surface->frame_height << 14) / pic_param->frame_height_dst;
1503                     *cmd_ptr++ = (scale_w << 16 |
1504                                   scale_h);
1505                 } else
1506                     *cmd_ptr++ = 0;
1507             }
1508         } else {
1509             *cmd_ptr++ = 0;
1510             *cmd_ptr++ = 0;
1511             *cmd_ptr++ = 0;
1512         }
1513         /* dw7..9 */
1514         for (j = 0; j < 3; j++) {
1515             obj_surface = encode_state->reference_objects[j];
1516             vp9_surface = NULL;
1517 
1518             if (obj_surface && obj_surface->private_data) {
1519                 vp9_surface = obj_surface->private_data;
1520                 *cmd_ptr++ = (vp9_surface->frame_height - 1) << 16 |
1521                              (vp9_surface->frame_width - 1);
1522             } else
1523                 *cmd_ptr++ = 0;
1524         }
1525         /* dw10 */
1526         *cmd_ptr++ = 0;
1527         /* dw11 */
1528         *cmd_ptr++ = (1 << 1);
1529         *cmd_ptr++ = 0;
1530 
1531         /* dw13 */
1532         *cmd_ptr++ = ((1 << 25) | /* header insertation for VP9 */
1533                       (0 << 24) | /* tail insertation */
1534                       (pic_param->luma_ac_qindex << 16) |
1535                       0 /* compressed header bin count */);
1536 
1537         /* dw14 */
1538         tmp = intel_convert_sign_mag(pic_param->luma_dc_qindex_delta, 5);
1539         cmd_value = (tmp << 16);
1540         tmp = intel_convert_sign_mag(pic_param->chroma_dc_qindex_delta, 5);
1541         cmd_value |= (tmp << 8);
1542         tmp = intel_convert_sign_mag(pic_param->chroma_ac_qindex_delta, 5);
1543         cmd_value |= tmp;
1544         *cmd_ptr++ = cmd_value;
1545 
1546         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[0], 7);
1547         cmd_value = tmp;
1548         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[1], 7);
1549         cmd_value |= (tmp << 8);
1550         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[2], 7);
1551         cmd_value |= (tmp << 16);
1552         tmp = intel_convert_sign_mag(pic_param->ref_lf_delta[3], 7);
1553         cmd_value |= (tmp << 24);
1554         *cmd_ptr++ = cmd_value;
1555 
1556         /* dw16 */
1557         tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[0], 7);
1558         cmd_value = tmp;
1559         tmp = intel_convert_sign_mag(pic_param->mode_lf_delta[1], 7);
1560         cmd_value |= (tmp << 8);
1561         *cmd_ptr++ = cmd_value;
1562 
1563         /* dw17 */
1564         *cmd_ptr++ = vp9_state->frame_header.bit_offset_ref_lf_delta |
1565                      (vp9_state->frame_header.bit_offset_mode_lf_delta << 16);
1566         *cmd_ptr++ = vp9_state->frame_header.bit_offset_qindex |
1567                      (vp9_state->frame_header.bit_offset_lf_level << 16);
1568 
1569         /* dw19 */
1570         *cmd_ptr++ = (1 << 26 | (1 << 25) |
1571                       non_first_pass << 16);
1572         /* dw20 */
1573         *cmd_ptr++ = (1 << 31) | (256);
1574 
1575         /* dw21 */
1576         *cmd_ptr++ = (0 << 31) | 1;
1577 
1578         /* dw22-dw24. Frame_delta_qindex_range */
1579         *cmd_ptr++ = 0;
1580         *cmd_ptr++ = 0;
1581         *cmd_ptr++ = 0;
1582 
1583         /* dw25-26. frame_delta_lf_range */
1584         *cmd_ptr++ = 0;
1585         *cmd_ptr++ = 0;
1586 
1587         /* dw27. frame_delta_lf_min */
1588         *cmd_ptr++ = 0;
1589 
1590         /* dw28..30 */
1591         *cmd_ptr++ = 0;
1592         *cmd_ptr++ = 0;
1593         *cmd_ptr++ = 0;
1594 
1595         /* dw31 */
1596         /* dw31 is for restricting the compressed frames minimum size
1597          * and we don't impose any. */
1598         *cmd_ptr++ = 0;
1599 
1600         /* dw32 */
1601         *cmd_ptr++ = vp9_state->frame_header.bit_offset_first_partition_size;
1602 
1603         *cmd_ptr++ = 0;
1604         *cmd_ptr++ = MI_BATCH_BUFFER_END;
1605     }
1606 
1607     i965_unmap_gpe_resource(gpe_resource);
1608 }
1609 
1610 static void
gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * brc_gpe_context,struct i965_gpe_context * mbenc_gpe_context)1611 gen9_brc_update_add_surfaces_vp9(VADriverContextP ctx,
1612                                  struct encode_state *encode_state,
1613                                  struct intel_encoder_context *encoder_context,
1614                                  struct i965_gpe_context *brc_gpe_context,
1615                                  struct i965_gpe_context *mbenc_gpe_context)
1616 {
1617     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1618 
1619     /* 0. BRC history buffer */
1620     i965_add_buffer_gpe_surface(ctx,
1621                                 brc_gpe_context,
1622                                 &vme_context->res_brc_history_buffer,
1623                                 0,
1624                                 vme_context->res_brc_history_buffer.size,
1625                                 0,
1626                                 VP9_BTI_BRC_HISTORY_G9);
1627 
1628     /* 1. Constant data buffer */
1629     i965_add_buffer_gpe_surface(ctx,
1630                                 brc_gpe_context,
1631                                 &vme_context->res_brc_const_data_buffer,
1632                                 0,
1633                                 vme_context->res_brc_const_data_buffer.size,
1634                                 0,
1635                                 VP9_BTI_BRC_CONSTANT_DATA_G9);
1636 
1637     /* 2. Distortion 2D surface buffer */
1638     i965_add_buffer_2d_gpe_surface(ctx,
1639                                    brc_gpe_context,
1640                                    &vme_context->s4x_memv_distortion_buffer,
1641                                    1,
1642                                    I965_SURFACEFORMAT_R8_UNORM,
1643                                    VP9_BTI_BRC_DISTORTION_G9);
1644 
1645     /* 3. pak buffer */
1646     i965_add_buffer_gpe_surface(ctx,
1647                                 brc_gpe_context,
1648                                 &vme_context->res_brc_mmdk_pak_buffer,
1649                                 0,
1650                                 vme_context->res_brc_mmdk_pak_buffer.size,
1651                                 0,
1652                                 VP9_BTI_BRC_MMDK_PAK_OUTPUT_G9);
1653     /* 4. Mbenc curbe input buffer */
1654     gen9_add_dri_buffer_gpe_surface(ctx,
1655                                     brc_gpe_context,
1656                                     mbenc_gpe_context->curbe.bo,
1657                                     0,
1658                                     ALIGN(mbenc_gpe_context->curbe.length, 64),
1659                                     mbenc_gpe_context->curbe.offset,
1660                                     VP9_BTI_BRC_MBENC_CURBE_INPUT_G9);
1661     /* 5. Mbenc curbe output buffer */
1662     gen9_add_dri_buffer_gpe_surface(ctx,
1663                                     brc_gpe_context,
1664                                     mbenc_gpe_context->curbe.bo,
1665                                     0,
1666                                     ALIGN(mbenc_gpe_context->curbe.length, 64),
1667                                     mbenc_gpe_context->curbe.offset,
1668                                     VP9_BTI_BRC_MBENC_CURBE_OUTPUT_G9);
1669 
1670     /* 6. BRC_PIC_STATE read buffer */
1671     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1672                                 &vme_context->res_pic_state_brc_read_buffer,
1673                                 0,
1674                                 vme_context->res_pic_state_brc_read_buffer.size,
1675                                 0,
1676                                 VP9_BTI_BRC_PIC_STATE_INPUT_G9);
1677 
1678     /* 7. BRC_PIC_STATE write buffer */
1679     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1680                                 &vme_context->res_pic_state_brc_write_hfw_read_buffer,
1681                                 0,
1682                                 vme_context->res_pic_state_brc_write_hfw_read_buffer.size,
1683                                 0,
1684                                 VP9_BTI_BRC_PIC_STATE_OUTPUT_G9);
1685 
1686     /* 8. SEGMENT_STATE read buffer */
1687     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1688                                 &vme_context->res_seg_state_brc_read_buffer,
1689                                 0,
1690                                 vme_context->res_seg_state_brc_read_buffer.size,
1691                                 0,
1692                                 VP9_BTI_BRC_SEGMENT_STATE_INPUT_G9);
1693 
1694     /* 9. SEGMENT_STATE write buffer */
1695     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1696                                 &vme_context->res_seg_state_brc_write_buffer,
1697                                 0,
1698                                 vme_context->res_seg_state_brc_write_buffer.size,
1699                                 0,
1700                                 VP9_BTI_BRC_SEGMENT_STATE_OUTPUT_G9);
1701 
1702     /* 10. Bitstream size buffer */
1703     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1704                                 &vme_context->res_brc_bitstream_size_buffer,
1705                                 0,
1706                                 vme_context->res_brc_bitstream_size_buffer.size,
1707                                 0,
1708                                 VP9_BTI_BRC_BITSTREAM_SIZE_G9);
1709 
1710     i965_add_buffer_gpe_surface(ctx, brc_gpe_context,
1711                                 &vme_context->res_brc_hfw_data_buffer,
1712                                 0,
1713                                 vme_context->res_brc_hfw_data_buffer.size,
1714                                 0,
1715                                 VP9_BTI_BRC_HFW_DATA_G9);
1716 
1717     return;
1718 }
1719 
1720 static VAStatus
gen9_vp9_brc_update_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)1721 gen9_vp9_brc_update_kernel(VADriverContextP ctx,
1722                            struct encode_state *encode_state,
1723                            struct intel_encoder_context *encoder_context)
1724 {
1725     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
1726     struct vp9_brc_context *brc_context = &vme_context->brc_context;
1727     struct i965_gpe_context *brc_gpe_context, *mbenc_gpe_context;
1728     int mbenc_index, gpe_index = VP9_BRC_UPDATE;
1729     int media_function = VP9_MEDIA_STATE_BRC_UPDATE;
1730     int mbenc_function;
1731     struct gen9_vp9_brc_curbe_param        brc_update_curbe_param;
1732     VAEncPictureParameterBufferVP9 *pic_param;
1733     struct gen9_vp9_state *vp9_state;
1734     struct gen9_vp9_mbenc_curbe_param    mbenc_curbe_param;
1735     struct gpe_media_object_parameter media_object_param;
1736 
1737     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
1738     if (!vp9_state || !vp9_state->pic_param)
1739         return VA_STATUS_ERROR_INVALID_PARAMETER;
1740 
1741     pic_param = vp9_state->pic_param;
1742     // Setup VP9 MbEnc Curbe
1743     if (vp9_state->picture_coding_type) {
1744         mbenc_function = VP9_MEDIA_STATE_MBENC_P;
1745         mbenc_index = VP9_MBENC_IDX_INTER;
1746     } else {
1747         mbenc_function = VP9_MEDIA_STATE_MBENC_I_32x32;
1748         mbenc_index = VP9_MBENC_IDX_KEY_32x32;
1749     }
1750 
1751     mbenc_gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_index]);
1752 
1753     memset(&mbenc_curbe_param, 0, sizeof(mbenc_curbe_param));
1754 
1755     mbenc_curbe_param.ppic_param             = vp9_state->pic_param;
1756     mbenc_curbe_param.pseq_param             = vp9_state->seq_param;
1757     mbenc_curbe_param.psegment_param         = vp9_state->segment_param;
1758     //mbenc_curbe_param.ppRefList              = &(vp9_state->pRefList[0]);
1759     mbenc_curbe_param.last_ref_obj           = vp9_state->last_ref_obj;
1760     mbenc_curbe_param.golden_ref_obj         = vp9_state->golden_ref_obj;
1761     mbenc_curbe_param.alt_ref_obj            = vp9_state->alt_ref_obj;
1762     mbenc_curbe_param.frame_width_in_mb      = ALIGN(vp9_state->frame_width, 16) / 16;
1763     mbenc_curbe_param.frame_height_in_mb     = ALIGN(vp9_state->frame_height, 16) / 16;
1764     mbenc_curbe_param.hme_enabled            = vp9_state->hme_enabled;
1765     mbenc_curbe_param.ref_frame_flag         = vp9_state->ref_frame_flag;
1766     mbenc_curbe_param.multi_ref_qp_check     = vp9_state->multi_ref_qp_check;
1767     mbenc_curbe_param.picture_coding_type    = vp9_state->picture_coding_type;
1768     mbenc_curbe_param.media_state_type       = mbenc_function;
1769 
1770     vme_context->pfn_set_curbe_mbenc(ctx, encode_state,
1771                                      mbenc_gpe_context,
1772                                      encoder_context,
1773                                      &mbenc_curbe_param);
1774 
1775     vp9_state->mbenc_curbe_set_in_brc_update = true;
1776 
1777     brc_gpe_context = &brc_context->gpe_contexts[gpe_index];
1778 
1779     gen8_gpe_context_init(ctx, brc_gpe_context);
1780     gen9_gpe_reset_binding_table(ctx, brc_gpe_context);
1781 
1782     memset(&brc_update_curbe_param, 0, sizeof(brc_update_curbe_param));
1783 
1784     // Setup BRC Update Curbe
1785     brc_update_curbe_param.media_state_type       = media_function;
1786     brc_update_curbe_param.curr_frame               = pic_param->reconstructed_frame;
1787     brc_update_curbe_param.ppic_param             = vp9_state->pic_param;
1788     brc_update_curbe_param.pseq_param             = vp9_state->seq_param;
1789     brc_update_curbe_param.psegment_param         = vp9_state->segment_param;
1790     brc_update_curbe_param.picture_coding_type    = vp9_state->picture_coding_type;
1791     brc_update_curbe_param.frame_width_in_mb      = ALIGN(vp9_state->frame_width, 16) / 16;
1792     brc_update_curbe_param.frame_height_in_mb     = ALIGN(vp9_state->frame_height, 16) / 16;
1793     brc_update_curbe_param.hme_enabled            = vp9_state->hme_enabled;
1794     brc_update_curbe_param.b_used_ref             = 1;
1795     brc_update_curbe_param.frame_number           = vp9_state->frame_number;
1796     brc_update_curbe_param.ref_frame_flag         = vp9_state->ref_frame_flag;
1797     brc_update_curbe_param.mbbrc_enabled          = 0;
1798     brc_update_curbe_param.multi_ref_qp_check     = vp9_state->multi_ref_qp_check;
1799     brc_update_curbe_param.brc_num_pak_passes     = vp9_state->num_pak_passes;
1800 
1801     brc_update_curbe_param.pbrc_init_current_target_buf_full_in_bits =
1802         &vp9_state->brc_init_current_target_buf_full_in_bits;
1803     brc_update_curbe_param.pbrc_init_reset_buf_size_in_bits =
1804         &vp9_state->brc_init_reset_buf_size_in_bits;
1805     brc_update_curbe_param.pbrc_init_reset_input_bits_per_frame =
1806         &vp9_state->brc_init_reset_input_bits_per_frame;
1807 
1808     vme_context->pfn_set_curbe_brc(ctx, encode_state,
1809                                    brc_gpe_context,
1810                                    encoder_context,
1811                                    &brc_update_curbe_param);
1812 
1813 
1814     // Check if the constant data surface is present
1815     if (vp9_state->brc_constant_buffer_supported) {
1816         char *brc_const_buffer;
1817         brc_const_buffer = i965_map_gpe_resource(&vme_context->res_brc_const_data_buffer);
1818 
1819         if (!brc_const_buffer)
1820             return VA_STATUS_ERROR_OPERATION_FAILED;
1821 
1822         if (vp9_state->picture_coding_type)
1823             memcpy(brc_const_buffer, vp9_brc_const_data_p_g9,
1824                    sizeof(vp9_brc_const_data_p_g9));
1825         else
1826             memcpy(brc_const_buffer, vp9_brc_const_data_i_g9,
1827                    sizeof(vp9_brc_const_data_i_g9));
1828 
1829         i965_unmap_gpe_resource(&vme_context->res_brc_const_data_buffer);
1830     }
1831 
1832     if (pic_param->pic_flags.bits.segmentation_enabled) {
1833         //reallocate the vme_state->mb_segment_map_surface
1834         /* this will be added later */
1835     }
1836 
1837     {
1838         pic_param->filter_level = 0;
1839         // clear the filter level value in picParams ebfore programming pic state, as this value will be determined and updated by BRC.
1840         intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
1841                                                  encoder_context, &vme_context->res_pic_state_brc_read_buffer);
1842     }
1843 
1844     gen9_brc_update_add_surfaces_vp9(ctx, encode_state,
1845                                      encoder_context,
1846                                      brc_gpe_context,
1847                                      mbenc_gpe_context);
1848 
1849     gen8_gpe_setup_interface_data(ctx, brc_gpe_context);
1850     memset(&media_object_param, 0, sizeof(media_object_param));
1851     gen9_run_kernel_media_object(ctx, encoder_context,
1852                                  brc_gpe_context,
1853                                  media_function,
1854                                  &media_object_param);
1855     return VA_STATUS_SUCCESS;
1856 }
1857 
1858 static
gen9_vp9_set_curbe_me(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_me_curbe_param * param)1859 void gen9_vp9_set_curbe_me(VADriverContextP ctx,
1860                            struct encode_state *encode_state,
1861                            struct i965_gpe_context *gpe_context,
1862                            struct intel_encoder_context *encoder_context,
1863                            struct gen9_vp9_me_curbe_param *param)
1864 {
1865     vp9_me_curbe_data        *me_cmd;
1866     int enc_media_state;
1867     int                                       me_mode;
1868     unsigned int                                       width, height;
1869     uint32_t                                  l0_ref_frames;
1870     uint32_t                                  scale_factor;
1871 
1872     if (param->b16xme_enabled) {
1873         if (param->use_16x_me)
1874             me_mode = VP9_ENC_ME16X_BEFORE_ME4X;
1875         else
1876             me_mode = VP9_ENC_ME4X_AFTER_ME16X;
1877     } else {
1878         me_mode = VP9_ENC_ME4X_ONLY;
1879     }
1880 
1881     if (me_mode == VP9_ENC_ME16X_BEFORE_ME4X)
1882         scale_factor = 16;
1883     else
1884         scale_factor = 4;
1885 
1886     if (param->use_16x_me)
1887         enc_media_state = VP9_MEDIA_STATE_16X_ME;
1888     else
1889         enc_media_state = VP9_MEDIA_STATE_4X_ME;
1890 
1891     me_cmd = i965_gpe_context_map_curbe(gpe_context);
1892 
1893     if (!me_cmd)
1894         return;
1895 
1896     memset(me_cmd, 0, sizeof(vp9_me_curbe_data));
1897 
1898     me_cmd->dw1.max_num_mvs           = 0x10;
1899     me_cmd->dw1.bi_weight             = 0x00;
1900 
1901     me_cmd->dw2.max_num_su            = 0x39;
1902     me_cmd->dw2.max_len_sp            = 0x39;
1903 
1904     me_cmd->dw3.sub_mb_part_mask       = 0x77;
1905     me_cmd->dw3.inter_sad             = 0x00;
1906     me_cmd->dw3.intra_sad            = 0x00;
1907     me_cmd->dw3.bme_disable_fbr      = 0x01;
1908     me_cmd->dw3.sub_pel_mode         = 0x03;
1909 
1910     width = param->frame_width / scale_factor;
1911     height = param->frame_height / scale_factor;
1912 
1913     me_cmd->dw4.picture_width        = ALIGN(width, 16) / 16;
1914     me_cmd->dw4.picture_height_minus1       = ALIGN(height, 16) / 16 - 1;
1915 
1916     me_cmd->dw5.ref_width            = 0x30;
1917     me_cmd->dw5.ref_height           = 0x28;
1918 
1919     if (enc_media_state == VP9_MEDIA_STATE_4X_ME)
1920         me_cmd->dw6.write_distortions = 0x01;
1921 
1922     me_cmd->dw6.use_mv_from_prev_step   = me_mode == VP9_ENC_ME4X_AFTER_ME16X ? 1 : 0;
1923     me_cmd->dw6.super_combine_dist    = 0x5;
1924     me_cmd->dw6.max_vmvr              = 0x7fc;
1925 
1926     l0_ref_frames = (param->ref_frame_flag & 0x01) +
1927                     !!(param->ref_frame_flag & 0x02) +
1928                     !!(param->ref_frame_flag & 0x04);
1929     me_cmd->dw13.num_ref_idx_l0_minus1 = (l0_ref_frames > 0) ? l0_ref_frames - 1 : 0;
1930     me_cmd->dw13.num_ref_idx_l1_minus1 =  0;
1931 
1932     me_cmd->dw14.l0_ref_pic_polarity_bits = 0;
1933     me_cmd->dw14.l1_ref_pic_polarity_bits = 0;
1934 
1935     me_cmd->dw15.mv_shift_factor        = 0x02;
1936 
1937     {
1938         memcpy((void *)((char *)me_cmd + 64),
1939                vp9_diamond_ime_search_path_delta,
1940                sizeof(vp9_diamond_ime_search_path_delta));
1941     }
1942 
1943 
1944     me_cmd->dw32._4x_memv_output_data_surf_index     = VP9_BTI_ME_MV_DATA_SURFACE;
1945     me_cmd->dw33._16x_32x_memv_input_data_surf_index = VP9_BTI_16XME_MV_DATA_SURFACE;
1946     me_cmd->dw34._4x_me_output_dist_surf_index       = VP9_BTI_ME_DISTORTION_SURFACE;
1947     me_cmd->dw35._4x_me_output_brc_dist_surf_index   = VP9_BTI_ME_BRC_DISTORTION_SURFACE;
1948     me_cmd->dw36.vme_fwd_inter_pred_surf_index       = VP9_BTI_ME_CURR_PIC_L0;
1949     me_cmd->dw37.vme_bdw_inter_pred_surf_index       = VP9_BTI_ME_CURR_PIC_L1;
1950 
1951     i965_gpe_context_unmap_curbe(gpe_context);
1952 }
1953 
1954 static void
gen9_vp9_send_me_surface(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_me_surface_param * param)1955 gen9_vp9_send_me_surface(VADriverContextP ctx,
1956                          struct encode_state *encode_state,
1957                          struct i965_gpe_context *gpe_context,
1958                          struct intel_encoder_context *encoder_context,
1959                          struct gen9_vp9_me_surface_param *param)
1960 {
1961     struct i965_driver_data *i965 = i965_driver_data(ctx);
1962     struct object_surface *obj_surface;
1963     struct gen9_surface_vp9 *vp9_priv_surface;
1964     struct object_surface *input_surface;
1965     struct i965_gpe_resource *gpe_resource;
1966     int ref_bti;
1967 
1968     obj_surface = SURFACE(param->curr_pic);
1969 
1970     if (!obj_surface || !obj_surface->private_data)
1971         return;
1972 
1973     vp9_priv_surface = obj_surface->private_data;
1974     if (param->use_16x_me) {
1975         gpe_resource = param->pres_16x_memv_data_buffer;
1976     } else {
1977         gpe_resource = param->pres_4x_memv_data_buffer;
1978     }
1979 
1980     i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1981                                    gpe_resource,
1982                                    1,
1983                                    I965_SURFACEFORMAT_R8_UNORM,
1984                                    VP9_BTI_ME_MV_DATA_SURFACE);
1985 
1986     if (param->b16xme_enabled) {
1987         gpe_resource = param->pres_16x_memv_data_buffer;
1988         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1989                                        gpe_resource,
1990                                        1,
1991                                        I965_SURFACEFORMAT_R8_UNORM,
1992                                        VP9_BTI_16XME_MV_DATA_SURFACE);
1993     }
1994 
1995     if (!param->use_16x_me) {
1996         gpe_resource = param->pres_me_brc_distortion_buffer;
1997 
1998         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
1999                                        gpe_resource,
2000                                        1,
2001                                        I965_SURFACEFORMAT_R8_UNORM,
2002                                        VP9_BTI_ME_BRC_DISTORTION_SURFACE);
2003 
2004         gpe_resource = param->pres_me_distortion_buffer;
2005 
2006         i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
2007                                        gpe_resource,
2008                                        1,
2009                                        I965_SURFACEFORMAT_R8_UNORM,
2010                                        VP9_BTI_ME_DISTORTION_SURFACE);
2011     }
2012 
2013     if (param->use_16x_me)
2014         input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2015     else
2016         input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2017 
2018     i965_add_adv_gpe_surface(ctx, gpe_context,
2019                              input_surface,
2020                              VP9_BTI_ME_CURR_PIC_L0);
2021 
2022     ref_bti = VP9_BTI_ME_CURR_PIC_L0 + 1;
2023 
2024 
2025     if (param->last_ref_pic) {
2026         obj_surface = param->last_ref_pic;
2027         vp9_priv_surface = obj_surface->private_data;
2028 
2029         if (param->use_16x_me)
2030             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2031         else
2032             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2033 
2034         if (param->dys_enabled &&
2035             ((vp9_priv_surface->frame_width != param->frame_width) ||
2036              (vp9_priv_surface->frame_height != param->frame_height))) {
2037             if (param->use_16x_me)
2038                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2039             else
2040                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2041         }
2042         i965_add_adv_gpe_surface(ctx, gpe_context,
2043                                  input_surface,
2044                                  ref_bti);
2045         i965_add_adv_gpe_surface(ctx, gpe_context,
2046                                  input_surface,
2047                                  ref_bti + 1);
2048         ref_bti += 2;
2049     }
2050 
2051     if (param->golden_ref_pic) {
2052         obj_surface = param->golden_ref_pic;
2053         vp9_priv_surface = obj_surface->private_data;
2054 
2055         if (param->use_16x_me)
2056             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2057         else
2058             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2059 
2060         if (param->dys_enabled &&
2061             ((vp9_priv_surface->frame_width != param->frame_width) ||
2062              (vp9_priv_surface->frame_height != param->frame_height))) {
2063             if (param->use_16x_me)
2064                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2065             else
2066                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2067         }
2068 
2069         i965_add_adv_gpe_surface(ctx, gpe_context,
2070                                  input_surface,
2071                                  ref_bti);
2072         i965_add_adv_gpe_surface(ctx, gpe_context,
2073                                  input_surface,
2074                                  ref_bti + 1);
2075         ref_bti += 2;
2076     }
2077 
2078     if (param->alt_ref_pic) {
2079         obj_surface = param->alt_ref_pic;
2080         vp9_priv_surface = obj_surface->private_data;
2081 
2082         if (param->use_16x_me)
2083             input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2084         else
2085             input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2086 
2087         if (param->dys_enabled &&
2088             ((vp9_priv_surface->frame_width != param->frame_width) ||
2089              (vp9_priv_surface->frame_height != param->frame_height))) {
2090             if (param->use_16x_me)
2091                 input_surface = vp9_priv_surface->dys_16x_surface_obj;
2092             else
2093                 input_surface = vp9_priv_surface->dys_4x_surface_obj;
2094         }
2095         i965_add_adv_gpe_surface(ctx, gpe_context,
2096                                  input_surface,
2097                                  ref_bti);
2098         i965_add_adv_gpe_surface(ctx, gpe_context,
2099                                  input_surface,
2100                                  ref_bti + 1);
2101         ref_bti += 2;
2102     }
2103 
2104     return;
2105 }
2106 
2107 static
gen9_me_add_surfaces_vp9(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct i965_gpe_context * gpe_context,int use_16x_me)2108 void gen9_me_add_surfaces_vp9(VADriverContextP ctx,
2109                               struct encode_state *encode_state,
2110                               struct intel_encoder_context *encoder_context,
2111                               struct i965_gpe_context *gpe_context,
2112                               int use_16x_me)
2113 {
2114     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2115     struct gen9_vp9_me_surface_param  me_surface_param;
2116     struct gen9_vp9_state *vp9_state;
2117 
2118     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
2119 
2120     /* sScaled4xSurface surface */
2121     memset(&me_surface_param, 0, sizeof(me_surface_param));
2122     me_surface_param.last_ref_pic = vp9_state->last_ref_obj;
2123     me_surface_param.golden_ref_pic = vp9_state->golden_ref_obj;
2124     me_surface_param.alt_ref_pic = vp9_state->alt_ref_obj;
2125     me_surface_param.curr_pic = vp9_state->curr_frame;
2126     me_surface_param.pres_4x_memv_data_buffer  = &vme_context->s4x_memv_data_buffer;
2127     me_surface_param.pres_16x_memv_data_buffer = &vme_context->s16x_memv_data_buffer;
2128     me_surface_param.pres_me_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2129     me_surface_param.pres_me_brc_distortion_buffer = &vme_context->s4x_memv_distortion_buffer;
2130 
2131     if (use_16x_me) {
2132         me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_16x_in_mb;
2133         me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_16x_in_mb;
2134     } else {
2135         me_surface_param.downscaled_width_in_mb = vp9_state->downscaled_width_4x_in_mb;
2136         me_surface_param.downscaled_height_in_mb = vp9_state->downscaled_height_4x_in_mb;
2137     }
2138     me_surface_param.frame_width  = vp9_state->frame_width;
2139     me_surface_param.frame_height  = vp9_state->frame_height;
2140 
2141     me_surface_param.use_16x_me = use_16x_me;
2142     me_surface_param.b16xme_enabled = vp9_state->b16xme_enabled;
2143     me_surface_param.dys_enabled = vp9_state->dys_in_use;
2144 
2145     vme_context->pfn_send_me_surface(ctx, encode_state,
2146                                      gpe_context,
2147                                      encoder_context,
2148                                      &me_surface_param);
2149     return;
2150 }
2151 
2152 static VAStatus
gen9_vp9_me_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,int use_16x_me)2153 gen9_vp9_me_kernel(VADriverContextP ctx,
2154                    struct encode_state *encode_state,
2155                    struct intel_encoder_context *encoder_context,
2156                    int use_16x_me)
2157 {
2158     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2159     struct i965_gpe_context *gpe_context;
2160     int media_function;
2161     struct gen9_vp9_me_curbe_param me_curbe_param;
2162     struct gen9_vp9_state *vp9_state;
2163     struct gpe_media_object_walker_parameter media_object_walker_param;
2164     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2165 
2166     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2167     if (!vp9_state || !vp9_state->pic_param)
2168         return VA_STATUS_ERROR_INVALID_PARAMETER;
2169 
2170     if (use_16x_me)
2171         media_function = VP9_MEDIA_STATE_16X_ME;
2172     else
2173         media_function = VP9_MEDIA_STATE_4X_ME;
2174 
2175     gpe_context = &(vme_context->me_context.gpe_context);
2176 
2177     gen8_gpe_context_init(ctx, gpe_context);
2178     gen9_gpe_reset_binding_table(ctx, gpe_context);
2179 
2180     memset(&me_curbe_param, 0, sizeof(me_curbe_param));
2181     me_curbe_param.ppic_param = vp9_state->pic_param;
2182     me_curbe_param.pseq_param = vp9_state->seq_param;
2183     me_curbe_param.frame_width = vp9_state->frame_width;
2184     me_curbe_param.frame_height = vp9_state->frame_height;
2185     me_curbe_param.ref_frame_flag = vp9_state->ref_frame_flag;
2186     me_curbe_param.use_16x_me = use_16x_me;
2187     me_curbe_param.b16xme_enabled = vp9_state->b16xme_enabled;
2188     vme_context->pfn_set_curbe_me(ctx, encode_state,
2189                                   gpe_context,
2190                                   encoder_context,
2191                                   &me_curbe_param);
2192 
2193     gen9_me_add_surfaces_vp9(ctx, encode_state,
2194                              encoder_context,
2195                              gpe_context,
2196                              use_16x_me);
2197 
2198     gen8_gpe_setup_interface_data(ctx, gpe_context);
2199 
2200     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2201     if (use_16x_me) {
2202         kernel_walker_param.resolution_x = vp9_state->downscaled_width_16x_in_mb;
2203         kernel_walker_param.resolution_y = vp9_state->downscaled_height_16x_in_mb;
2204     } else {
2205         kernel_walker_param.resolution_x = vp9_state->downscaled_width_4x_in_mb;
2206         kernel_walker_param.resolution_y = vp9_state->downscaled_height_4x_in_mb;
2207     }
2208     kernel_walker_param.no_dependency = 1;
2209 
2210     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2211 
2212     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2213                                         gpe_context,
2214                                         media_function,
2215                                         &media_object_walker_param);
2216 
2217     return VA_STATUS_SUCCESS;
2218 }
2219 
2220 static void
gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_scaling_curbe_param * curbe_param)2221 gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx,
2222                               struct encode_state *encode_state,
2223                               struct i965_gpe_context *gpe_context,
2224                               struct intel_encoder_context *encoder_context,
2225                               struct gen9_vp9_scaling_curbe_param *curbe_param)
2226 {
2227     vp9_scaling4x_curbe_data_cm *curbe_cmd;
2228 
2229     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2230 
2231     if (!curbe_cmd)
2232         return;
2233 
2234     memset(curbe_cmd, 0, sizeof(vp9_scaling4x_curbe_data_cm));
2235 
2236     curbe_cmd->dw0.input_picture_width = curbe_param->input_picture_width;
2237     curbe_cmd->dw0.input_picture_height = curbe_param->input_picture_height;
2238 
2239     curbe_cmd->dw1.input_y_bti = VP9_BTI_SCALING_FRAME_SRC_Y;
2240     curbe_cmd->dw2.output_y_bti = VP9_BTI_SCALING_FRAME_DST_Y;
2241 
2242 
2243     curbe_cmd->dw6.enable_mb_variance_output = 0;
2244     curbe_cmd->dw6.enable_mb_pixel_average_output = 0;
2245     curbe_cmd->dw6.enable_blk8x8_stat_output = 0;
2246 
2247     if (curbe_param->mb_variance_output_enabled ||
2248         curbe_param->mb_pixel_average_output_enabled) {
2249         curbe_cmd->dw10.mbv_proc_stat_bti = VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
2250     }
2251 
2252     i965_gpe_context_unmap_curbe(gpe_context);
2253     return;
2254 }
2255 
2256 static void
gen9_vp9_send_scaling_surface(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_scaling_surface_param * scaling_surface_param)2257 gen9_vp9_send_scaling_surface(VADriverContextP ctx,
2258                               struct encode_state *encode_state,
2259                               struct i965_gpe_context *gpe_context,
2260                               struct intel_encoder_context *encoder_context,
2261                               struct gen9_vp9_scaling_surface_param *scaling_surface_param)
2262 {
2263     vp9_bti_scaling_offset *scaling_bti;
2264     unsigned int surface_format;
2265 
2266     scaling_bti = scaling_surface_param->p_scaling_bti;
2267 
2268     if (scaling_surface_param->scaling_out_use_32unorm_surf_fmt)
2269         surface_format = I965_SURFACEFORMAT_R32_UNORM;
2270     else if (scaling_surface_param->scaling_out_use_16unorm_surf_fmt)
2271         surface_format = I965_SURFACEFORMAT_R16_UNORM;
2272     else
2273         surface_format = I965_SURFACEFORMAT_R8_UNORM;
2274 
2275     i965_add_2d_gpe_surface(ctx, gpe_context,
2276                             scaling_surface_param->input_surface,
2277                             0, 1, surface_format,
2278                             scaling_bti->scaling_frame_src_y);
2279 
2280     i965_add_2d_gpe_surface(ctx, gpe_context,
2281                             scaling_surface_param->output_surface,
2282                             0, 1, surface_format,
2283                             scaling_bti->scaling_frame_dst_y);
2284 
2285 
2286     return;
2287 }
2288 
2289 static VAStatus
gen9_vp9_scaling_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,int use_16x_scaling)2290 gen9_vp9_scaling_kernel(VADriverContextP ctx,
2291                         struct encode_state *encode_state,
2292                         struct intel_encoder_context *encoder_context,
2293                         int use_16x_scaling)
2294 {
2295     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2296     struct i965_gpe_context *gpe_context;
2297     int media_function;
2298     struct gen9_vp9_scaling_curbe_param scaling_curbe_param;
2299     struct gen9_vp9_scaling_surface_param scaling_surface_param;
2300     struct gen9_vp9_state *vp9_state;
2301     VAEncPictureParameterBufferVP9  *pic_param;
2302     struct gpe_media_object_walker_parameter media_object_walker_param;
2303     struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
2304     struct object_surface *obj_surface;
2305     struct object_surface *input_surface, *output_surface;
2306     struct gen9_surface_vp9 *vp9_priv_surface;
2307     unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
2308     unsigned int input_frame_width, input_frame_height;
2309     unsigned int output_frame_width, output_frame_height;
2310 
2311     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2312     if (!vp9_state || !vp9_state->pic_param)
2313         return VA_STATUS_ERROR_INVALID_PARAMETER;
2314 
2315     pic_param = vp9_state->pic_param;
2316 
2317     if (use_16x_scaling)
2318         media_function = VP9_MEDIA_STATE_16X_SCALING;
2319     else
2320         media_function = VP9_MEDIA_STATE_4X_SCALING;
2321 
2322     gpe_context = &(vme_context->scaling_context.gpe_contexts[0]);
2323 
2324     gen8_gpe_context_init(ctx, gpe_context);
2325     gen9_gpe_reset_binding_table(ctx, gpe_context);
2326 
2327     obj_surface = encode_state->reconstructed_object;
2328     vp9_priv_surface = obj_surface->private_data;
2329 
2330     if (use_16x_scaling) {
2331         downscaled_width_in_mb      = vp9_state->downscaled_width_16x_in_mb;
2332         downscaled_height_in_mb      = vp9_state->downscaled_height_16x_in_mb;
2333 
2334         input_surface               = vp9_priv_surface->scaled_4x_surface_obj;
2335         input_frame_width           = vp9_state->frame_width_4x;
2336         input_frame_height          = vp9_state->frame_height_4x;
2337 
2338         output_surface              = vp9_priv_surface->scaled_16x_surface_obj;
2339         output_frame_width          = vp9_state->frame_width_16x;
2340         output_frame_height         = vp9_state->frame_height_16x;
2341     } else {
2342         downscaled_width_in_mb      = vp9_state->downscaled_width_4x_in_mb;
2343         downscaled_height_in_mb      = vp9_state->downscaled_height_4x_in_mb;
2344 
2345         if (vp9_state->dys_in_use &&
2346             ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2347              (pic_param->frame_height_src != pic_param->frame_height_dst)))
2348             input_surface               = vp9_priv_surface->dys_surface_obj;
2349         else
2350             input_surface               = encode_state->input_yuv_object;
2351 
2352         input_frame_width           = vp9_state->frame_width;
2353         input_frame_height          = vp9_state->frame_height;
2354 
2355         output_surface              = vp9_priv_surface->scaled_4x_surface_obj;
2356         output_frame_width          = vp9_state->frame_width_4x;
2357         output_frame_height         = vp9_state->frame_height_4x;
2358     }
2359 
2360     memset(&scaling_curbe_param, 0, sizeof(scaling_curbe_param));
2361 
2362     scaling_curbe_param.input_picture_width  = input_frame_width;
2363     scaling_curbe_param.input_picture_height = input_frame_height;
2364 
2365     scaling_curbe_param.use_16x_scaling = use_16x_scaling;
2366     scaling_curbe_param.use_32x_scaling = 0;
2367 
2368     if (use_16x_scaling)
2369         scaling_curbe_param.mb_variance_output_enabled = 0;
2370     else
2371         scaling_curbe_param.mb_variance_output_enabled = vp9_state->adaptive_transform_decision_enabled;
2372 
2373     scaling_curbe_param.blk8x8_stat_enabled = 0;
2374 
2375     vme_context->pfn_set_curbe_scaling(ctx, encode_state,
2376                                        gpe_context,
2377                                        encoder_context,
2378                                        &scaling_curbe_param);
2379 
2380     memset(&scaling_surface_param, 0, sizeof(scaling_surface_param));
2381     scaling_surface_param.p_scaling_bti = (void *)(&vme_context->scaling_context.scaling_4x_bti);
2382     scaling_surface_param.input_surface                      = input_surface;
2383     scaling_surface_param.input_frame_width                  = input_frame_width;
2384     scaling_surface_param.input_frame_height                 = input_frame_height;
2385 
2386     scaling_surface_param.output_surface                     = output_surface;
2387     scaling_surface_param.output_frame_width                 = output_frame_width;
2388     scaling_surface_param.output_frame_height                = output_frame_height;
2389     scaling_surface_param.scaling_out_use_16unorm_surf_fmt   = 0;
2390     scaling_surface_param.scaling_out_use_32unorm_surf_fmt   = 1;
2391 
2392     vme_context->pfn_send_scaling_surface(ctx, encode_state,
2393                                           gpe_context,
2394                                           encoder_context,
2395                                           &scaling_surface_param);
2396 
2397     gen8_gpe_setup_interface_data(ctx, gpe_context);
2398 
2399     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2400     /* the scaling is based on 8x8 blk level */
2401     kernel_walker_param.resolution_x = downscaled_width_in_mb * 2;
2402     kernel_walker_param.resolution_y = downscaled_height_in_mb * 2;
2403     kernel_walker_param.no_dependency = 1;
2404 
2405     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2406 
2407     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2408                                         gpe_context,
2409                                         media_function,
2410                                         &media_object_walker_param);
2411 
2412     return VA_STATUS_SUCCESS;
2413 }
2414 
2415 static void
gen9_vp9_dys_set_sampler_state(struct i965_gpe_context * gpe_context)2416 gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context)
2417 {
2418     struct gen9_sampler_8x8_avs                *sampler_cmd;
2419 
2420     if (!gpe_context)
2421         return;
2422 
2423     dri_bo_map(gpe_context->sampler.bo, 1);
2424 
2425     if (!gpe_context->sampler.bo->virtual)
2426         return;
2427 
2428     sampler_cmd = (struct gen9_sampler_8x8_avs *)
2429                   (gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);
2430 
2431     memset(sampler_cmd, 0, sizeof(struct gen9_sampler_8x8_avs));
2432 
2433     sampler_cmd->dw0.r3c_coefficient                      = 15;
2434     sampler_cmd->dw0.r3x_coefficient                      = 6;
2435     sampler_cmd->dw0.strong_edge_threshold                = 8;
2436     sampler_cmd->dw0.weak_edge_threshold                  = 1;
2437     sampler_cmd->dw0.gain_factor                          = 32;
2438 
2439     sampler_cmd->dw2.r5c_coefficient                     = 3;
2440     sampler_cmd->dw2.r5cx_coefficient                    = 8;
2441     sampler_cmd->dw2.r5x_coefficient                     = 9;
2442     sampler_cmd->dw2.strong_edge_weight                  = 6;
2443     sampler_cmd->dw2.regular_weight                      = 3;
2444     sampler_cmd->dw2.non_edge_weight                     = 2;
2445     sampler_cmd->dw2.global_noise_estimation             = 255;
2446 
2447     sampler_cmd->dw3.enable_8tap_adaptive_filter         = 0;
2448     sampler_cmd->dw3.cos_alpha                           = 79;
2449     sampler_cmd->dw3.sin_alpha                           = 101;
2450 
2451     sampler_cmd->dw5.diamond_du                           = 0;
2452     sampler_cmd->dw5.hs_margin                            = 3;
2453     sampler_cmd->dw5.diamond_alpha                        = 100;
2454 
2455     sampler_cmd->dw7.inv_margin_vyl                       = 3300;
2456 
2457     sampler_cmd->dw8.inv_margin_vyu                       = 1600;
2458 
2459     sampler_cmd->dw10.y_slope2                            = 24;
2460     sampler_cmd->dw10.s0l                                 = 1792;
2461 
2462     sampler_cmd->dw12.y_slope1                            = 24;
2463 
2464     sampler_cmd->dw14.s0u                                = 256;
2465 
2466     sampler_cmd->dw15.s2u                                = 1792;
2467     sampler_cmd->dw15.s1u                                = 0;
2468 
2469     memcpy(sampler_cmd->coefficients,
2470            &gen9_vp9_avs_coeffs[0],
2471            17 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2472 
2473     sampler_cmd->dw152.default_sharpness_level     = 255;
2474     sampler_cmd->dw152.max_derivative_4_pixels     = 7;
2475     sampler_cmd->dw152.max_derivative_8_pixels     = 20;
2476     sampler_cmd->dw152.transition_area_with_4_pixels    = 4;
2477     sampler_cmd->dw152.transition_area_with_8_pixels    = 5;
2478 
2479     sampler_cmd->dw153.bypass_x_adaptive_filtering  = 1;
2480     sampler_cmd->dw153.bypass_y_adaptive_filtering  = 1;
2481     sampler_cmd->dw153.adaptive_filter_for_all_channel = 0;
2482 
2483     memcpy(sampler_cmd->extra_coefficients,
2484            &gen9_vp9_avs_coeffs[17 * 8],
2485            15 * sizeof(struct gen8_sampler_8x8_avs_coefficients));
2486 
2487     dri_bo_unmap(gpe_context->sampler.bo);
2488 }
2489 
2490 static void
gen9_vp9_set_curbe_dys(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_dys_curbe_param * curbe_param)2491 gen9_vp9_set_curbe_dys(VADriverContextP ctx,
2492                        struct encode_state *encode_state,
2493                        struct i965_gpe_context *gpe_context,
2494                        struct intel_encoder_context *encoder_context,
2495                        struct gen9_vp9_dys_curbe_param *curbe_param)
2496 {
2497     vp9_dys_curbe_data  *curbe_cmd;
2498 
2499     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2500 
2501     if (!curbe_cmd)
2502         return;
2503 
2504     memset(curbe_cmd, 0, sizeof(vp9_dys_curbe_data));
2505 
2506     curbe_cmd->dw0.input_frame_width    = curbe_param->input_width;
2507     curbe_cmd->dw0.input_frame_height   = curbe_param->input_height;
2508 
2509     curbe_cmd->dw1.output_frame_width   = curbe_param->output_width;
2510     curbe_cmd->dw1.output_frame_height  = curbe_param->output_height;
2511 
2512     curbe_cmd->dw2.delta_u                 = 1.0f / curbe_param->output_width;
2513     curbe_cmd->dw3.delta_v                 = 1.0f / curbe_param->output_height;
2514 
2515     curbe_cmd->dw16.input_frame_nv12_bti  = VP9_BTI_DYS_INPUT_NV12;
2516     curbe_cmd->dw17.output_frame_y_bti    = VP9_BTI_DYS_OUTPUT_Y;
2517     curbe_cmd->dw18.avs_sample_idx            = 0;
2518 
2519     i965_gpe_context_unmap_curbe(gpe_context);
2520 }
2521 
2522 static void
gen9_vp9_send_dys_surface(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_dys_surface_param * surface_param)2523 gen9_vp9_send_dys_surface(VADriverContextP ctx,
2524                           struct encode_state *encode_state,
2525                           struct i965_gpe_context *gpe_context,
2526                           struct intel_encoder_context *encoder_context,
2527                           struct gen9_vp9_dys_surface_param *surface_param)
2528 {
2529 
2530     if (surface_param->input_frame)
2531         i965_add_adv_gpe_surface(ctx,
2532                                  gpe_context,
2533                                  surface_param->input_frame,
2534                                  VP9_BTI_DYS_INPUT_NV12);
2535 
2536     if (surface_param->output_frame) {
2537         i965_add_2d_gpe_surface(ctx,
2538                                 gpe_context,
2539                                 surface_param->output_frame,
2540                                 0,
2541                                 1,
2542                                 I965_SURFACEFORMAT_R8_UNORM,
2543                                 VP9_BTI_DYS_OUTPUT_Y);
2544 
2545         i965_add_2d_gpe_surface(ctx,
2546                                 gpe_context,
2547                                 surface_param->output_frame,
2548                                 1,
2549                                 1,
2550                                 I965_SURFACEFORMAT_R16_UINT,
2551                                 VP9_BTI_DYS_OUTPUT_UV);
2552     }
2553 
2554     return;
2555 }
2556 
2557 static VAStatus
gen9_vp9_dys_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,gen9_vp9_dys_kernel_param * dys_kernel_param)2558 gen9_vp9_dys_kernel(VADriverContextP ctx,
2559                     struct encode_state *encode_state,
2560                     struct intel_encoder_context *encoder_context,
2561                     gen9_vp9_dys_kernel_param *dys_kernel_param)
2562 {
2563     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
2564     struct i965_gpe_context *gpe_context;
2565     int media_function;
2566     struct gen9_vp9_dys_curbe_param                 curbe_param;
2567     struct gen9_vp9_dys_surface_param               surface_param;
2568     struct gpe_media_object_walker_parameter        media_object_walker_param;
2569     struct gpe_encoder_kernel_walker_parameter      kernel_walker_param;
2570     unsigned int                                    resolution_x, resolution_y;
2571 
2572     media_function = VP9_MEDIA_STATE_DYS;
2573     gpe_context = &vme_context->dys_context.gpe_context;
2574 
2575     //gen8_gpe_context_init(ctx, gpe_context);
2576     gen9_gpe_reset_binding_table(ctx, gpe_context);
2577 
2578     /* sampler state is configured only when initializing the GPE context */
2579 
2580     memset(&curbe_param, 0, sizeof(curbe_param));
2581     curbe_param.input_width   = dys_kernel_param->input_width;
2582     curbe_param.input_height  = dys_kernel_param->input_height;
2583     curbe_param.output_width = dys_kernel_param->output_width;
2584     curbe_param.output_height = dys_kernel_param->output_height;
2585     vme_context->pfn_set_curbe_dys(ctx, encode_state,
2586                                    gpe_context,
2587                                    encoder_context,
2588                                    &curbe_param);
2589 
2590     // Add surface states
2591     memset(&surface_param, 0, sizeof(surface_param));
2592     surface_param.input_frame = dys_kernel_param->input_surface;
2593     surface_param.output_frame = dys_kernel_param->output_surface;
2594     surface_param.vert_line_stride = 0;
2595     surface_param.vert_line_stride_offset = 0;
2596 
2597     vme_context->pfn_send_dys_surface(ctx,
2598                                       encode_state,
2599                                       gpe_context,
2600                                       encoder_context,
2601                                       &surface_param);
2602 
2603     resolution_x = ALIGN(dys_kernel_param->output_width, 16) / 16;
2604     resolution_y = ALIGN(dys_kernel_param->output_height, 16) / 16;
2605 
2606     gen8_gpe_setup_interface_data(ctx, gpe_context);
2607 
2608     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
2609     kernel_walker_param.resolution_x = resolution_x;
2610     kernel_walker_param.resolution_y = resolution_y;
2611     kernel_walker_param.no_dependency = 1;
2612 
2613     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
2614 
2615     gen9_run_kernel_media_object_walker(ctx, encoder_context,
2616                                         gpe_context,
2617                                         media_function,
2618                                         &media_object_walker_param);
2619 
2620     return VA_STATUS_SUCCESS;
2621 }
2622 
2623 static VAStatus
gen9_vp9_run_dys_refframes(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)2624 gen9_vp9_run_dys_refframes(VADriverContextP ctx,
2625                            struct encode_state *encode_state,
2626                            struct intel_encoder_context *encoder_context)
2627 {
2628     struct gen9_vp9_state *vp9_state;
2629     VAEncPictureParameterBufferVP9  *pic_param;
2630     gen9_vp9_dys_kernel_param dys_kernel_param;
2631     struct object_surface *obj_surface;
2632     struct object_surface *input_surface, *output_surface;
2633     struct gen9_surface_vp9 *vp9_priv_surface;
2634 
2635     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2636 
2637     if (!vp9_state || !vp9_state->pic_param)
2638         return VA_STATUS_ERROR_INVALID_PARAMETER;
2639 
2640     pic_param = vp9_state->pic_param;
2641 
2642     if ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
2643         (pic_param->frame_height_src != pic_param->frame_height_dst)) {
2644         input_surface = encode_state->input_yuv_object;
2645         obj_surface = encode_state->reconstructed_object;
2646         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2647         output_surface = vp9_priv_surface->dys_surface_obj;
2648 
2649         memset(&dys_kernel_param, 0, sizeof(dys_kernel_param));
2650         dys_kernel_param.input_width = pic_param->frame_width_src;
2651         dys_kernel_param.input_height = pic_param->frame_height_src;
2652         dys_kernel_param.input_surface = input_surface;
2653         dys_kernel_param.output_width = pic_param->frame_width_dst;
2654         dys_kernel_param.output_height = pic_param->frame_height_dst;
2655         dys_kernel_param.output_surface = output_surface;
2656         gen9_vp9_dys_kernel(ctx, encode_state,
2657                             encoder_context,
2658                             &dys_kernel_param);
2659     }
2660 
2661     if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
2662         vp9_state->last_ref_obj) {
2663         obj_surface = vp9_state->last_ref_obj;
2664         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2665 
2666         input_surface = obj_surface;
2667         output_surface = vp9_priv_surface->dys_surface_obj;
2668 
2669         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2670         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2671         dys_kernel_param.input_surface = input_surface;
2672 
2673         dys_kernel_param.output_width = pic_param->frame_width_dst;
2674         dys_kernel_param.output_height = pic_param->frame_height_dst;
2675         dys_kernel_param.output_surface = output_surface;
2676 
2677         gen9_vp9_dys_kernel(ctx, encode_state,
2678                             encoder_context,
2679                             &dys_kernel_param);
2680 
2681         if (vp9_state->hme_enabled) {
2682             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2683             dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2684             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2685 
2686             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2687             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2688             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2689 
2690             gen9_vp9_dys_kernel(ctx, encode_state,
2691                                 encoder_context,
2692                                 &dys_kernel_param);
2693 
2694             /* Does it really need to do the 16x HME if the
2695              * resolution is different?
2696              * Maybe it should be restricted
2697              */
2698             if (vp9_state->b16xme_enabled) {
2699                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2700                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2701                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2702 
2703                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2704                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2705                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2706 
2707                 gen9_vp9_dys_kernel(ctx, encode_state,
2708                                     encoder_context,
2709                                     &dys_kernel_param);
2710             }
2711         }
2712     }
2713 
2714     if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
2715         vp9_state->golden_ref_obj) {
2716         obj_surface = vp9_state->golden_ref_obj;
2717         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2718 
2719         input_surface = obj_surface;
2720         output_surface = vp9_priv_surface->dys_surface_obj;
2721 
2722         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2723         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2724         dys_kernel_param.input_surface = input_surface;
2725 
2726         dys_kernel_param.output_width = pic_param->frame_width_dst;
2727         dys_kernel_param.output_height = pic_param->frame_height_dst;
2728         dys_kernel_param.output_surface = output_surface;
2729 
2730         gen9_vp9_dys_kernel(ctx, encode_state,
2731                             encoder_context,
2732                             &dys_kernel_param);
2733 
2734         if (vp9_state->hme_enabled) {
2735             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2736             dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2737             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2738 
2739             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2740             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2741             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2742 
2743             gen9_vp9_dys_kernel(ctx, encode_state,
2744                                 encoder_context,
2745                                 &dys_kernel_param);
2746 
2747             /* Does it really need to do the 16x HME if the
2748              * resolution is different?
2749              * Maybe it should be restricted
2750              */
2751             if (vp9_state->b16xme_enabled) {
2752                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2753                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2754                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2755 
2756                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2757                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2758                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2759 
2760                 gen9_vp9_dys_kernel(ctx, encode_state,
2761                                     encoder_context,
2762                                     &dys_kernel_param);
2763             }
2764         }
2765     }
2766 
2767     if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
2768         vp9_state->alt_ref_obj) {
2769         obj_surface = vp9_state->alt_ref_obj;
2770         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2771 
2772         input_surface = obj_surface;
2773         output_surface = vp9_priv_surface->dys_surface_obj;
2774 
2775         dys_kernel_param.input_width = vp9_priv_surface->frame_width;
2776         dys_kernel_param.input_height = vp9_priv_surface->frame_height;
2777         dys_kernel_param.input_surface = input_surface;
2778 
2779         dys_kernel_param.output_width = pic_param->frame_width_dst;
2780         dys_kernel_param.output_height = pic_param->frame_height_dst;
2781         dys_kernel_param.output_surface = output_surface;
2782 
2783         gen9_vp9_dys_kernel(ctx, encode_state,
2784                             encoder_context,
2785                             &dys_kernel_param);
2786 
2787         if (vp9_state->hme_enabled) {
2788             dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 4), 16);
2789             dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 4), 16);
2790             dys_kernel_param.input_surface = vp9_priv_surface->scaled_4x_surface_obj;
2791 
2792             dys_kernel_param.output_width = vp9_state->frame_width_4x;
2793             dys_kernel_param.output_height = vp9_state->frame_height_4x;
2794             dys_kernel_param.output_surface = vp9_priv_surface->dys_4x_surface_obj;
2795 
2796             gen9_vp9_dys_kernel(ctx, encode_state,
2797                                 encoder_context,
2798                                 &dys_kernel_param);
2799 
2800             /* Does it really need to do the 16x HME if the
2801              * resolution is different?
2802              * Maybe it should be restricted
2803              */
2804             if (vp9_state->b16xme_enabled) {
2805                 dys_kernel_param.input_width = ALIGN((vp9_priv_surface->frame_width / 16), 16);
2806                 dys_kernel_param.input_height = ALIGN((vp9_priv_surface->frame_height / 16), 16);
2807                 dys_kernel_param.input_surface = vp9_priv_surface->scaled_16x_surface_obj;
2808 
2809                 dys_kernel_param.output_width = vp9_state->frame_width_16x;
2810                 dys_kernel_param.output_height = vp9_state->frame_height_16x;
2811                 dys_kernel_param.output_surface = vp9_priv_surface->dys_16x_surface_obj;
2812 
2813                 gen9_vp9_dys_kernel(ctx, encode_state,
2814                                     encoder_context,
2815                                     &dys_kernel_param);
2816             }
2817         }
2818     }
2819 
2820     return VA_STATUS_SUCCESS;
2821 }
2822 
2823 static void
gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_mbenc_curbe_param * curbe_param)2824 gen9_vp9_set_curbe_mbenc(VADriverContextP ctx,
2825                          struct encode_state *encode_state,
2826                          struct i965_gpe_context *gpe_context,
2827                          struct intel_encoder_context *encoder_context,
2828                          struct gen9_vp9_mbenc_curbe_param *curbe_param)
2829 {
2830     struct gen9_vp9_state *vp9_state;
2831     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
2832     vp9_mbenc_curbe_data  *curbe_cmd;
2833     VAEncPictureParameterBufferVP9  *pic_param;
2834     int i, segment_count;
2835     int seg_qindex;
2836     struct object_surface *obj_surface;
2837     struct gen9_surface_vp9 *vp9_priv_surface;
2838 
2839     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
2840 
2841     if (!vp9_state || !vp9_state->pic_param)
2842         return;
2843 
2844     pic_param = curbe_param->ppic_param;
2845     seg_param = curbe_param->psegment_param;
2846 
2847     if (!seg_param) {
2848         memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
2849         seg_param = &tmp_seg_param;
2850     }
2851 
2852     curbe_cmd = i965_gpe_context_map_curbe(gpe_context);
2853 
2854     if (!curbe_cmd)
2855         return;
2856 
2857     memset(curbe_cmd, 0, sizeof(vp9_mbenc_curbe_data));
2858 
2859     if (vp9_state->dys_in_use) {
2860         curbe_cmd->dw0.frame_width = pic_param->frame_width_dst;
2861         curbe_cmd->dw0.frame_height = pic_param->frame_height_dst;
2862     } else {
2863         curbe_cmd->dw0.frame_width = pic_param->frame_width_src;
2864         curbe_cmd->dw0.frame_height = pic_param->frame_height_src;
2865     }
2866 
2867     curbe_cmd->dw1.frame_type = curbe_param->picture_coding_type;
2868 
2869     curbe_cmd->dw1.segmentation_enable = pic_param->pic_flags.bits.segmentation_enabled;
2870     if (pic_param->pic_flags.bits.segmentation_enabled)
2871         segment_count = 8;
2872     else
2873         segment_count = 1;
2874 
2875     curbe_cmd->dw1.ref_frame_flags = curbe_param->ref_frame_flag;
2876 
2877     //right now set them to normal settings
2878     if (curbe_param->picture_coding_type) {
2879         switch (vp9_state->target_usage) {
2880         case INTEL_ENC_VP9_TU_QUALITY:
2881             curbe_cmd->dw1.min_16for32_check    = 0x00;
2882             curbe_cmd->dw2.multi_pred           = 0x02;
2883             curbe_cmd->dw2.len_sp               = 0x39;
2884             curbe_cmd->dw2.search_x             = 0x30;
2885             curbe_cmd->dw2.search_y             = 0x28;
2886             curbe_cmd->dw3.min_ref_for32_check = 0x01;
2887             curbe_cmd->dw4.skip16_threshold     = 0x000A;
2888             curbe_cmd->dw4.disable_mr_threshold = 0x000C;
2889 
2890             memcpy(&curbe_cmd->dw16,
2891                    vp9_diamond_ime_search_path_delta,
2892                    14 * sizeof(unsigned int));
2893             break;
2894         case INTEL_ENC_VP9_TU_PERFORMANCE:
2895             curbe_cmd->dw1.min_16for32_check    = 0x02;
2896             curbe_cmd->dw2.multi_pred           = 0x00;
2897             curbe_cmd->dw2.len_sp               = 0x10;
2898             curbe_cmd->dw2.search_x             = 0x20;
2899             curbe_cmd->dw2.search_y             = 0x20;
2900             curbe_cmd->dw3.min_ref_for32_check = 0x03;
2901             curbe_cmd->dw4.skip16_threshold     = 0x0014;
2902             curbe_cmd->dw4.disable_mr_threshold = 0x0016;
2903 
2904             memcpy(&curbe_cmd->dw16,
2905                    vp9_fullspiral_ime_search_path_delta,
2906                    14 * sizeof(unsigned int));
2907 
2908             break;
2909         default:  // normal settings
2910             curbe_cmd->dw1.min_16for32_check     = 0x01;
2911             curbe_cmd->dw2.multi_pred           = 0x00;
2912             curbe_cmd->dw2.len_sp               = 0x19;
2913             curbe_cmd->dw2.search_x             = 0x30;
2914             curbe_cmd->dw2.search_y             = 0x28;
2915             curbe_cmd->dw3.min_ref_for32_check = 0x02;
2916             curbe_cmd->dw4.skip16_threshold     = 0x000F;
2917             curbe_cmd->dw4.disable_mr_threshold = 0x0011;
2918 
2919             memcpy(&curbe_cmd->dw16,
2920                    vp9_diamond_ime_search_path_delta,
2921                    14 * sizeof(unsigned int));
2922             break;
2923         }
2924 
2925         curbe_cmd->dw3.hme_enabled               = curbe_param->hme_enabled;
2926         curbe_cmd->dw3.multi_ref_qp_check         = curbe_param->multi_ref_qp_check;
2927         // co-located predictor must be disabled when dynamic scaling is enabled
2928         curbe_cmd->dw3.disable_temp_pred    = vp9_state->dys_in_use;
2929     }
2930 
2931     curbe_cmd->dw5.inter_round = 0;
2932     curbe_cmd->dw5.intra_round = 4;
2933     curbe_cmd->dw5.frame_qpindex = pic_param->luma_ac_qindex;
2934 
2935     for (i = 0; i < segment_count; i++) {
2936         seg_qindex = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta
2937                      + seg_param->seg_data[i].segment_qindex_delta;
2938 
2939         seg_qindex = CLAMP(0, 255, seg_qindex);
2940 
2941         if (curbe_param->picture_coding_type)
2942             memcpy(&curbe_cmd->segments[i],
2943                    &intel_vp9_costlut_p[seg_qindex * 16],
2944                    16 * sizeof(unsigned int));
2945         else
2946             memcpy(&curbe_cmd->segments[i],
2947                    &intel_vp9_costlut_key[seg_qindex * 16],
2948                    16 * sizeof(unsigned int));
2949     }
2950 
2951     if (curbe_param->picture_coding_type) {
2952         if (curbe_cmd->dw3.multi_ref_qp_check) {
2953             if (curbe_param->ref_frame_flag & 0x01) {
2954                 obj_surface = curbe_param->last_ref_obj;
2955                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2956                 curbe_cmd->dw8.last_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2957             }
2958 
2959             if (curbe_param->ref_frame_flag & 0x02) {
2960                 obj_surface = curbe_param->golden_ref_obj;
2961                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2962                 curbe_cmd->dw8.golden_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2963             }
2964 
2965             if (curbe_param->ref_frame_flag & 0x04) {
2966                 obj_surface = curbe_param->alt_ref_obj;
2967                 vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
2968                 curbe_cmd->dw9.alt_ref_qp = vp9_quant_dc[vp9_priv_surface->qp_value];
2969             }
2970         }
2971     }
2972     curbe_cmd->dw160.enc_curr_y_surf_bti           = VP9_BTI_MBENC_CURR_Y_G9;
2973     curbe_cmd->dw162.enc_curr_nv12_surf_bti        = VP9_BTI_MBENC_CURR_NV12_G9;
2974     curbe_cmd->dw166.segmentation_map_bti          = VP9_BTI_MBENC_SEGMENTATION_MAP_G9;
2975     curbe_cmd->dw172.mode_decision_bti           = VP9_BTI_MBENC_MODE_DECISION_G9;
2976     curbe_cmd->dw167.tx_curbe_bti                = VP9_BTI_MBENC_TX_CURBE_G9;
2977     curbe_cmd->dw168.hme_mvdata_bti             = VP9_BTI_MBENC_HME_MV_DATA_G9;
2978     curbe_cmd->dw169.hme_distortion_bti          = VP9_BTI_MBENC_HME_DISTORTION_G9;
2979     curbe_cmd->dw171.mode_decision_prev_bti      = VP9_BTI_MBENC_MODE_DECISION_PREV_G9;
2980     curbe_cmd->dw172.mode_decision_bti           = VP9_BTI_MBENC_MODE_DECISION_G9;
2981     curbe_cmd->dw173.output_16x16_inter_modes_bti = VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9;
2982     curbe_cmd->dw174.cu_record_bti               = VP9_BTI_MBENC_CU_RECORDS_G9;
2983     curbe_cmd->dw175.pak_data_bti                = VP9_BTI_MBENC_PAK_DATA_G9;
2984 
2985     i965_gpe_context_unmap_curbe(gpe_context);
2986     return;
2987 }
2988 
2989 static void
gen9_vp9_send_mbenc_surface(VADriverContextP ctx,struct encode_state * encode_state,struct i965_gpe_context * gpe_context,struct intel_encoder_context * encoder_context,struct gen9_vp9_mbenc_surface_param * mbenc_param)2990 gen9_vp9_send_mbenc_surface(VADriverContextP ctx,
2991                             struct encode_state *encode_state,
2992                             struct i965_gpe_context *gpe_context,
2993                             struct intel_encoder_context *encoder_context,
2994                             struct gen9_vp9_mbenc_surface_param *mbenc_param)
2995 {
2996     struct gen9_vp9_state *vp9_state;
2997     unsigned int            res_size;
2998     unsigned int            frame_width_in_sb, frame_height_in_sb;
2999     struct object_surface   *obj_surface, *tmp_input;
3000     struct gen9_surface_vp9 *vp9_priv_surface;
3001     int media_function;
3002 
3003     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3004 
3005     if (!vp9_state || !vp9_state->pic_param)
3006         return;
3007 
3008     frame_width_in_sb = ALIGN(mbenc_param->frame_width, 64) / 64;
3009     frame_height_in_sb = ALIGN(mbenc_param->frame_height, 64) / 64;
3010     media_function = mbenc_param->media_state_type;
3011 
3012     switch (media_function) {
3013     case VP9_MEDIA_STATE_MBENC_I_32x32: {
3014         obj_surface = mbenc_param->curr_frame_obj;
3015 
3016         i965_add_2d_gpe_surface(ctx,
3017                                 gpe_context,
3018                                 obj_surface,
3019                                 0,
3020                                 1,
3021                                 I965_SURFACEFORMAT_R8_UNORM,
3022                                 VP9_BTI_MBENC_CURR_Y_G9);
3023 
3024         i965_add_2d_gpe_surface(ctx,
3025                                 gpe_context,
3026                                 obj_surface,
3027                                 1,
3028                                 1,
3029                                 I965_SURFACEFORMAT_R16_UINT,
3030                                 VP9_BTI_MBENC_CURR_UV_G9);
3031 
3032 
3033         if (mbenc_param->segmentation_enabled) {
3034             i965_add_buffer_2d_gpe_surface(ctx,
3035                                            gpe_context,
3036                                            mbenc_param->pres_segmentation_map,
3037                                            1,
3038                                            I965_SURFACEFORMAT_R8_UNORM,
3039                                            VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3040 
3041         }
3042 
3043         res_size = 16 * mbenc_param->frame_width_in_mb *
3044                    mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3045         i965_add_buffer_gpe_surface(ctx,
3046                                     gpe_context,
3047                                     mbenc_param->pres_mode_decision,
3048                                     0,
3049                                     res_size / 4,
3050                                     0,
3051                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3052 
3053         break;
3054     }
3055     case VP9_MEDIA_STATE_MBENC_I_16x16: {
3056         obj_surface = mbenc_param->curr_frame_obj;
3057 
3058         i965_add_2d_gpe_surface(ctx,
3059                                 gpe_context,
3060                                 obj_surface,
3061                                 0,
3062                                 1,
3063                                 I965_SURFACEFORMAT_R8_UNORM,
3064                                 VP9_BTI_MBENC_CURR_Y_G9);
3065 
3066         i965_add_2d_gpe_surface(ctx,
3067                                 gpe_context,
3068                                 obj_surface,
3069                                 1,
3070                                 1,
3071                                 I965_SURFACEFORMAT_R16_UINT,
3072                                 VP9_BTI_MBENC_CURR_UV_G9);
3073 
3074         i965_add_adv_gpe_surface(ctx, gpe_context,
3075                                  obj_surface,
3076                                  VP9_BTI_MBENC_CURR_NV12_G9);
3077 
3078         if (mbenc_param->segmentation_enabled) {
3079             i965_add_buffer_2d_gpe_surface(ctx,
3080                                            gpe_context,
3081                                            mbenc_param->pres_segmentation_map,
3082                                            1,
3083                                            I965_SURFACEFORMAT_R8_UNORM,
3084                                            VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3085 
3086         }
3087 
3088         res_size = 16 * mbenc_param->frame_width_in_mb *
3089                    mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3090         i965_add_buffer_gpe_surface(ctx,
3091                                     gpe_context,
3092                                     mbenc_param->pres_mode_decision,
3093                                     0,
3094                                     res_size / 4,
3095                                     0,
3096                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3097 
3098         res_size = 160;
3099 
3100         gen9_add_dri_buffer_gpe_surface(ctx,
3101                                         gpe_context,
3102                                         mbenc_param->gpe_context_tx->curbe.bo,
3103                                         0,
3104                                         ALIGN(res_size, 64),
3105                                         mbenc_param->gpe_context_tx->curbe.offset,
3106                                         VP9_BTI_MBENC_TX_CURBE_G9);
3107 
3108         break;
3109     }
3110     case VP9_MEDIA_STATE_MBENC_P: {
3111         obj_surface = mbenc_param->curr_frame_obj;
3112 
3113         i965_add_2d_gpe_surface(ctx,
3114                                 gpe_context,
3115                                 obj_surface,
3116                                 0,
3117                                 1,
3118                                 I965_SURFACEFORMAT_R8_UNORM,
3119                                 VP9_BTI_MBENC_CURR_Y_G9);
3120 
3121         i965_add_2d_gpe_surface(ctx, gpe_context,
3122                                 obj_surface,
3123                                 1,
3124                                 1,
3125                                 I965_SURFACEFORMAT_R16_UINT,
3126                                 VP9_BTI_MBENC_CURR_UV_G9);
3127 
3128         i965_add_adv_gpe_surface(ctx, gpe_context,
3129                                  obj_surface,
3130                                  VP9_BTI_MBENC_CURR_NV12_G9);
3131 
3132         if (mbenc_param->last_ref_obj) {
3133             obj_surface = mbenc_param->last_ref_obj;
3134             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3135 
3136             if (vp9_state->dys_in_use &&
3137                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3138                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3139                 tmp_input = vp9_priv_surface->dys_surface_obj;
3140             else
3141                 tmp_input = obj_surface;
3142 
3143             i965_add_adv_gpe_surface(ctx, gpe_context,
3144                                      tmp_input,
3145                                      VP9_BTI_MBENC_LAST_NV12_G9);
3146 
3147             i965_add_adv_gpe_surface(ctx, gpe_context,
3148                                      tmp_input,
3149                                      VP9_BTI_MBENC_LAST_NV12_G9 + 1);
3150 
3151         }
3152 
3153         if (mbenc_param->golden_ref_obj) {
3154             obj_surface = mbenc_param->golden_ref_obj;
3155             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3156 
3157             if (vp9_state->dys_in_use &&
3158                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3159                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3160                 tmp_input = vp9_priv_surface->dys_surface_obj;
3161             else
3162                 tmp_input = obj_surface;
3163 
3164             i965_add_adv_gpe_surface(ctx, gpe_context,
3165                                      tmp_input,
3166                                      VP9_BTI_MBENC_GOLD_NV12_G9);
3167 
3168             i965_add_adv_gpe_surface(ctx, gpe_context,
3169                                      tmp_input,
3170                                      VP9_BTI_MBENC_GOLD_NV12_G9 + 1);
3171 
3172         }
3173 
3174         if (mbenc_param->alt_ref_obj) {
3175             obj_surface = mbenc_param->alt_ref_obj;
3176             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3177 
3178             if (vp9_state->dys_in_use &&
3179                 ((vp9_priv_surface->frame_width != vp9_state->frame_width) ||
3180                  (vp9_priv_surface->frame_height != vp9_state->frame_height)))
3181                 tmp_input = vp9_priv_surface->dys_surface_obj;
3182             else
3183                 tmp_input = obj_surface;
3184 
3185             i965_add_adv_gpe_surface(ctx, gpe_context,
3186                                      tmp_input,
3187                                      VP9_BTI_MBENC_ALTREF_NV12_G9);
3188 
3189             i965_add_adv_gpe_surface(ctx, gpe_context,
3190                                      tmp_input,
3191                                      VP9_BTI_MBENC_ALTREF_NV12_G9 + 1);
3192 
3193         }
3194 
3195         if (mbenc_param->hme_enabled) {
3196             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3197                                            mbenc_param->ps4x_memv_data_buffer,
3198                                            1,
3199                                            I965_SURFACEFORMAT_R8_UNORM,
3200                                            VP9_BTI_MBENC_HME_MV_DATA_G9);
3201 
3202             i965_add_buffer_2d_gpe_surface(ctx, gpe_context,
3203                                            mbenc_param->ps4x_memv_distortion_buffer,
3204                                            1,
3205                                            I965_SURFACEFORMAT_R8_UNORM,
3206                                            VP9_BTI_MBENC_HME_DISTORTION_G9);
3207         }
3208 
3209         if (mbenc_param->segmentation_enabled) {
3210             i965_add_buffer_2d_gpe_surface(ctx,
3211                                            gpe_context,
3212                                            mbenc_param->pres_segmentation_map,
3213                                            1,
3214                                            I965_SURFACEFORMAT_R8_UNORM,
3215                                            VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3216 
3217         }
3218 
3219         res_size = 16 * mbenc_param->frame_width_in_mb *
3220                    mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3221         i965_add_buffer_gpe_surface(ctx,
3222                                     gpe_context,
3223                                     mbenc_param->pres_mode_decision_prev,
3224                                     0,
3225                                     res_size / 4,
3226                                     0,
3227                                     VP9_BTI_MBENC_MODE_DECISION_PREV_G9);
3228 
3229         i965_add_buffer_gpe_surface(ctx,
3230                                     gpe_context,
3231                                     mbenc_param->pres_mode_decision,
3232                                     0,
3233                                     res_size / 4,
3234                                     0,
3235                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3236 
3237         i965_add_buffer_2d_gpe_surface(ctx,
3238                                        gpe_context,
3239                                        mbenc_param->pres_output_16x16_inter_modes,
3240                                        1,
3241                                        I965_SURFACEFORMAT_R8_UNORM,
3242                                        VP9_BTI_MBENC_OUT_16x16_INTER_MODES_G9);
3243 
3244         res_size = 160;
3245 
3246         gen9_add_dri_buffer_gpe_surface(ctx,
3247                                         gpe_context,
3248                                         mbenc_param->gpe_context_tx->curbe.bo,
3249                                         0,
3250                                         ALIGN(res_size, 64),
3251                                         mbenc_param->gpe_context_tx->curbe.offset,
3252                                         VP9_BTI_MBENC_TX_CURBE_G9);
3253 
3254 
3255         break;
3256     }
3257     case VP9_MEDIA_STATE_MBENC_TX: {
3258         obj_surface = mbenc_param->curr_frame_obj;
3259 
3260         i965_add_2d_gpe_surface(ctx,
3261                                 gpe_context,
3262                                 obj_surface,
3263                                 0,
3264                                 1,
3265                                 I965_SURFACEFORMAT_R8_UNORM,
3266                                 VP9_BTI_MBENC_CURR_Y_G9);
3267 
3268         i965_add_2d_gpe_surface(ctx,
3269                                 gpe_context,
3270                                 obj_surface,
3271                                 1,
3272                                 1,
3273                                 I965_SURFACEFORMAT_R16_UINT,
3274                                 VP9_BTI_MBENC_CURR_UV_G9);
3275 
3276         if (mbenc_param->segmentation_enabled) {
3277             i965_add_buffer_2d_gpe_surface(ctx,
3278                                            gpe_context,
3279                                            mbenc_param->pres_segmentation_map,
3280                                            1,
3281                                            I965_SURFACEFORMAT_R8_UNORM,
3282                                            VP9_BTI_MBENC_SEGMENTATION_MAP_G9);
3283 
3284         }
3285 
3286         res_size = 16 * mbenc_param->frame_width_in_mb *
3287                    mbenc_param->frame_height_in_mb * sizeof(unsigned int);
3288         i965_add_buffer_gpe_surface(ctx,
3289                                     gpe_context,
3290                                     mbenc_param->pres_mode_decision,
3291                                     0,
3292                                     res_size / 4,
3293                                     0,
3294                                     VP9_BTI_MBENC_MODE_DECISION_G9);
3295 
3296         res_size = frame_width_in_sb * frame_height_in_sb * 4 * sizeof(unsigned int);
3297         i965_add_buffer_gpe_surface(ctx,
3298                                     gpe_context,
3299                                     mbenc_param->pres_mb_code_surface,
3300                                     0,
3301                                     res_size / 4,
3302                                     0,
3303                                     VP9_BTI_MBENC_PAK_DATA_G9);
3304 
3305         // CU Record
3306         res_size = frame_width_in_sb * frame_height_in_sb *
3307                    64 * 16 * sizeof(unsigned int);
3308 
3309         i965_add_buffer_gpe_surface(ctx,
3310                                     gpe_context,
3311                                     mbenc_param->pres_mb_code_surface,
3312                                     0,
3313                                     res_size / 4,
3314                                     mbenc_param->mb_data_offset,
3315                                     VP9_BTI_MBENC_CU_RECORDS_G9);
3316     }
3317     default:
3318         break;
3319     }
3320 
3321     return;
3322 }
3323 
3324 static VAStatus
gen9_vp9_mbenc_kernel(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,int media_function)3325 gen9_vp9_mbenc_kernel(VADriverContextP ctx,
3326                       struct encode_state *encode_state,
3327                       struct intel_encoder_context *encoder_context,
3328                       int media_function)
3329 {
3330     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3331     struct i965_gpe_context *gpe_context, *tx_gpe_context;
3332     struct gpe_media_object_walker_parameter        media_object_walker_param;
3333     struct gpe_encoder_kernel_walker_parameter      kernel_walker_param;
3334     unsigned int    resolution_x, resolution_y;
3335     struct gen9_vp9_state *vp9_state;
3336     VAEncPictureParameterBufferVP9  *pic_param;
3337     struct gen9_vp9_mbenc_curbe_param               curbe_param;
3338     struct gen9_vp9_mbenc_surface_param             surface_param;
3339     VAStatus    va_status = VA_STATUS_SUCCESS;
3340     int mbenc_gpe_index = 0;
3341     struct object_surface *obj_surface;
3342     struct gen9_surface_vp9 *vp9_priv_surface;
3343 
3344     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3345 
3346     if (!vp9_state || !vp9_state->pic_param)
3347         return VA_STATUS_ERROR_ENCODING_ERROR;
3348 
3349     pic_param = vp9_state->pic_param;
3350 
3351     switch (media_function) {
3352     case VP9_MEDIA_STATE_MBENC_I_32x32:
3353         mbenc_gpe_index = VP9_MBENC_IDX_KEY_32x32;
3354         break;
3355 
3356     case VP9_MEDIA_STATE_MBENC_I_16x16:
3357         mbenc_gpe_index = VP9_MBENC_IDX_KEY_16x16;
3358         break;
3359 
3360     case VP9_MEDIA_STATE_MBENC_P:
3361         mbenc_gpe_index = VP9_MBENC_IDX_INTER;
3362         break;
3363 
3364     case VP9_MEDIA_STATE_MBENC_TX:
3365         mbenc_gpe_index = VP9_MBENC_IDX_TX;
3366         break;
3367 
3368     default:
3369         va_status = VA_STATUS_ERROR_OPERATION_FAILED;
3370         return va_status;
3371     }
3372 
3373     gpe_context = &(vme_context->mbenc_context.gpe_contexts[mbenc_gpe_index]);
3374     tx_gpe_context = &(vme_context->mbenc_context.gpe_contexts[VP9_MBENC_IDX_TX]);
3375 
3376     gen9_gpe_reset_binding_table(ctx, gpe_context);
3377 
3378     // Set curbe
3379     if (!vp9_state->mbenc_curbe_set_in_brc_update) {
3380         if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32 ||
3381             media_function == VP9_MEDIA_STATE_MBENC_P) {
3382             memset(&curbe_param, 0, sizeof(curbe_param));
3383             curbe_param.ppic_param            = vp9_state->pic_param;
3384             curbe_param.pseq_param            = vp9_state->seq_param;
3385             curbe_param.psegment_param        = vp9_state->segment_param;
3386             curbe_param.frame_width_in_mb     = vp9_state->frame_width_in_mb;
3387             curbe_param.frame_height_in_mb    = vp9_state->frame_height_in_mb;
3388             curbe_param.last_ref_obj          = vp9_state->last_ref_obj;
3389             curbe_param.golden_ref_obj        = vp9_state->golden_ref_obj;
3390             curbe_param.alt_ref_obj           = vp9_state->alt_ref_obj;
3391             curbe_param.hme_enabled           = vp9_state->hme_enabled;
3392             curbe_param.ref_frame_flag        = vp9_state->ref_frame_flag;
3393             curbe_param.picture_coding_type   = vp9_state->picture_coding_type;
3394             curbe_param.media_state_type      = media_function;
3395             curbe_param.mbenc_curbe_set_in_brc_update = vp9_state->mbenc_curbe_set_in_brc_update;
3396 
3397             vme_context->pfn_set_curbe_mbenc(ctx,
3398                                              encode_state,
3399                                              gpe_context,
3400                                              encoder_context,
3401                                              &curbe_param);
3402         }
3403     }
3404 
3405     memset(&surface_param, 0, sizeof(surface_param));
3406     surface_param.media_state_type             = media_function;
3407     surface_param.picture_coding_type          = vp9_state->picture_coding_type;
3408     surface_param.frame_width                  = vp9_state->frame_width;
3409     surface_param.frame_height                 = vp9_state->frame_height;
3410     surface_param.frame_width_in_mb            = vp9_state->frame_width_in_mb;
3411     surface_param.frame_height_in_mb           = vp9_state->frame_height_in_mb;
3412     surface_param.hme_enabled                  = vp9_state->hme_enabled;
3413     surface_param.segmentation_enabled         = pic_param->pic_flags.bits.segmentation_enabled;
3414     surface_param.pres_segmentation_map        = &vme_context->mb_segment_map_surface;
3415     surface_param.ps4x_memv_data_buffer        = &vme_context->s4x_memv_data_buffer;
3416     surface_param.ps4x_memv_distortion_buffer  = &vme_context->s4x_memv_distortion_buffer;
3417     surface_param.pres_mode_decision           =
3418         &vme_context->res_mode_decision[vp9_state->curr_mode_decision_index];
3419     surface_param.pres_mode_decision_prev      =
3420         &vme_context->res_mode_decision[!vp9_state->curr_mode_decision_index];
3421     surface_param.pres_output_16x16_inter_modes = &vme_context->res_output_16x16_inter_modes;
3422     surface_param.pres_mbenc_curbe_buffer      = NULL;
3423     surface_param.last_ref_obj               = vp9_state->last_ref_obj;
3424     surface_param.golden_ref_obj             = vp9_state->golden_ref_obj;
3425     surface_param.alt_ref_obj                  = vp9_state->alt_ref_obj;
3426     surface_param.pres_mb_code_surface         = &vme_context->res_mb_code_surface;
3427     surface_param.gpe_context_tx               = tx_gpe_context;
3428     surface_param.mb_data_offset             = vp9_state->mb_data_offset;
3429 
3430     obj_surface = encode_state->reconstructed_object;
3431     vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3432     if (vp9_state->dys_in_use &&
3433         (pic_param->frame_width_src != pic_param->frame_height_dst ||
3434          pic_param->frame_height_src != pic_param->frame_height_dst)) {
3435         obj_surface = vp9_priv_surface->dys_surface_obj;
3436     } else
3437         obj_surface = encode_state->input_yuv_object;
3438 
3439     surface_param.curr_frame_obj             = obj_surface;
3440 
3441     vme_context->pfn_send_mbenc_surface(ctx,
3442                                         encode_state,
3443                                         gpe_context,
3444                                         encoder_context,
3445                                         &surface_param);
3446 
3447     if (media_function == VP9_MEDIA_STATE_MBENC_I_32x32) {
3448         resolution_x = ALIGN(vp9_state->frame_width, 32) / 32;
3449         resolution_y = ALIGN(vp9_state->frame_height, 32) / 32;
3450     } else {
3451         resolution_x = ALIGN(vp9_state->frame_width, 16) / 16;
3452         resolution_y = ALIGN(vp9_state->frame_height, 16) / 16;
3453     }
3454 
3455     memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
3456     kernel_walker_param.resolution_x = resolution_x;
3457     kernel_walker_param.resolution_y = resolution_y;
3458 
3459     if (media_function == VP9_MEDIA_STATE_MBENC_P ||
3460         media_function == VP9_MEDIA_STATE_MBENC_I_16x16) {
3461         kernel_walker_param.use_scoreboard = 1;
3462         kernel_walker_param.no_dependency = 0;
3463         kernel_walker_param.walker_degree = VP9_45Z_DEGREE;
3464     } else {
3465         kernel_walker_param.use_scoreboard = 0;
3466         kernel_walker_param.no_dependency = 1;
3467     }
3468 
3469     gen8_gpe_setup_interface_data(ctx, gpe_context);
3470 
3471     gen9_init_media_object_walker_parameter(encoder_context, &kernel_walker_param, &media_object_walker_param);
3472 
3473     gen9_run_kernel_media_object_walker(ctx, encoder_context,
3474                                         gpe_context,
3475                                         media_function,
3476                                         &media_object_walker_param);
3477     return va_status;
3478 }
3479 
3480 static void
gen9_init_gpe_context_vp9(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct vp9_encoder_kernel_parameter * kernel_param)3481 gen9_init_gpe_context_vp9(VADriverContextP ctx,
3482                           struct i965_gpe_context *gpe_context,
3483                           struct vp9_encoder_kernel_parameter *kernel_param)
3484 {
3485     struct i965_driver_data *i965 = i965_driver_data(ctx);
3486 
3487     gpe_context->curbe.length = kernel_param->curbe_size; // in bytes
3488 
3489     gpe_context->sampler.entry_size = 0;
3490     gpe_context->sampler.max_entries = 0;
3491 
3492     if (kernel_param->sampler_size) {
3493         gpe_context->sampler.entry_size = ALIGN(kernel_param->sampler_size, 64);
3494         gpe_context->sampler.max_entries = 1;
3495     }
3496 
3497     gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); // 8 dws, 1 register
3498     gpe_context->idrt.max_entries = NUM_KERNELS_PER_GPE_CONTEXT;
3499 
3500     gpe_context->surface_state_binding_table.max_entries = MAX_VP9_ENCODER_SURFACES;
3501     gpe_context->surface_state_binding_table.binding_table_offset = 0;
3502     gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64);
3503     gpe_context->surface_state_binding_table.length = ALIGN(MAX_VP9_ENCODER_SURFACES * 4, 64) + ALIGN(MAX_VP9_ENCODER_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
3504 
3505     if (i965->intel.eu_total > 0)
3506         gpe_context->vfe_state.max_num_threads = 6 * i965->intel.eu_total;
3507     else
3508         gpe_context->vfe_state.max_num_threads = 112; // 16 EU * 7 threads
3509 
3510     gpe_context->vfe_state.curbe_allocation_size = MAX(1, ALIGN(gpe_context->curbe.length, 32) >> 5); // in registers
3511     gpe_context->vfe_state.urb_entry_size = MAX(1, ALIGN(kernel_param->inline_data_size, 32) >> 5); // in registers
3512     gpe_context->vfe_state.num_urb_entries = (MAX_URB_SIZE -
3513                                               gpe_context->vfe_state.curbe_allocation_size -
3514                                               ((gpe_context->idrt.entry_size >> 5) *
3515                                                gpe_context->idrt.max_entries)) / gpe_context->vfe_state.urb_entry_size;
3516     gpe_context->vfe_state.num_urb_entries = CLAMP(1, 127, gpe_context->vfe_state.num_urb_entries);
3517     gpe_context->vfe_state.gpgpu_mode = 0;
3518 }
3519 
3520 static void
gen9_init_vfe_scoreboard_vp9(struct i965_gpe_context * gpe_context,struct vp9_encoder_scoreboard_parameter * scoreboard_param)3521 gen9_init_vfe_scoreboard_vp9(struct i965_gpe_context *gpe_context,
3522                              struct vp9_encoder_scoreboard_parameter *scoreboard_param)
3523 {
3524     gpe_context->vfe_desc5.scoreboard0.mask = scoreboard_param->mask;
3525     gpe_context->vfe_desc5.scoreboard0.type = scoreboard_param->type;
3526     gpe_context->vfe_desc5.scoreboard0.enable = scoreboard_param->enable;
3527 
3528     if (scoreboard_param->walkpat_flag) {
3529         gpe_context->vfe_desc5.scoreboard0.mask = 0x0F;
3530         gpe_context->vfe_desc5.scoreboard0.type = 1;
3531 
3532         gpe_context->vfe_desc6.scoreboard1.delta_x0 = 0;
3533         gpe_context->vfe_desc6.scoreboard1.delta_y0 = -1;
3534 
3535         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
3536         gpe_context->vfe_desc6.scoreboard1.delta_y1 = -2;
3537 
3538         gpe_context->vfe_desc6.scoreboard1.delta_x2 = -1;
3539         gpe_context->vfe_desc6.scoreboard1.delta_y2 = 3;
3540 
3541         gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
3542         gpe_context->vfe_desc6.scoreboard1.delta_y3 = 1;
3543     } else {
3544         // Scoreboard 0
3545         gpe_context->vfe_desc6.scoreboard1.delta_x0 = -1;
3546         gpe_context->vfe_desc6.scoreboard1.delta_y0 = 0;
3547 
3548         // Scoreboard 1
3549         gpe_context->vfe_desc6.scoreboard1.delta_x1 = 0;
3550         gpe_context->vfe_desc6.scoreboard1.delta_y1 = -1;
3551 
3552         // Scoreboard 2
3553         gpe_context->vfe_desc6.scoreboard1.delta_x2 = 1;
3554         gpe_context->vfe_desc6.scoreboard1.delta_y2 = -1;
3555 
3556         // Scoreboard 3
3557         gpe_context->vfe_desc6.scoreboard1.delta_x3 = -1;
3558         gpe_context->vfe_desc6.scoreboard1.delta_y3 = -1;
3559 
3560         // Scoreboard 4
3561         gpe_context->vfe_desc7.scoreboard2.delta_x4 = -1;
3562         gpe_context->vfe_desc7.scoreboard2.delta_y4 = 1;
3563 
3564         // Scoreboard 5
3565         gpe_context->vfe_desc7.scoreboard2.delta_x5 = 0;
3566         gpe_context->vfe_desc7.scoreboard2.delta_y5 = -2;
3567 
3568         // Scoreboard 6
3569         gpe_context->vfe_desc7.scoreboard2.delta_x6 = 1;
3570         gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
3571 
3572         // Scoreboard 7
3573         gpe_context->vfe_desc7.scoreboard2.delta_x6 = -1;
3574         gpe_context->vfe_desc7.scoreboard2.delta_y6 = -2;
3575     }
3576 }
3577 
3578 #define VP9_VME_REF_WIN       48
3579 
3580 static VAStatus
gen9_encode_vp9_check_parameter(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)3581 gen9_encode_vp9_check_parameter(VADriverContextP ctx,
3582                                 struct encode_state *encode_state,
3583                                 struct intel_encoder_context *encoder_context)
3584 {
3585     struct i965_driver_data *i965 = i965_driver_data(ctx);
3586     struct gen9_vp9_state *vp9_state;
3587     VAEncPictureParameterBufferVP9  *pic_param;
3588     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param;
3589     VAEncSequenceParameterBufferVP9 *seq_param;
3590     struct object_surface *obj_surface;
3591     struct object_buffer *obj_buffer;
3592     struct gen9_surface_vp9 *vp9_priv_surface;
3593     bool need_brc_reset = false;
3594 
3595     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3596 
3597     if (!encode_state->pic_param_ext ||
3598         !encode_state->pic_param_ext->buffer) {
3599         return VA_STATUS_ERROR_INVALID_PARAMETER;
3600     }
3601     pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
3602 
3603     obj_buffer = BUFFER(pic_param->coded_buf);
3604 
3605     if (!obj_buffer ||
3606         !obj_buffer->buffer_store ||
3607         !obj_buffer->buffer_store->bo)
3608         return VA_STATUS_ERROR_INVALID_PARAMETER;
3609 
3610     encode_state->coded_buf_object = obj_buffer;
3611 
3612     vp9_state->status_buffer.bo = obj_buffer->buffer_store->bo;
3613 
3614     encode_state->reconstructed_object = SURFACE(pic_param->reconstructed_frame);
3615 
3616     if (!encode_state->reconstructed_object ||
3617         !encode_state->input_yuv_object)
3618         return VA_STATUS_ERROR_INVALID_PARAMETER;
3619 
3620     vp9_state->curr_frame = pic_param->reconstructed_frame;
3621     vp9_state->ref_frame_flag = 0;
3622     if (pic_param->pic_flags.bits.frame_type == KEY_FRAME ||
3623         pic_param->pic_flags.bits.intra_only) {
3624         /* this will be regarded as I-frame type */
3625         vp9_state->picture_coding_type = 0;
3626         vp9_state->last_ref_obj = NULL;
3627         vp9_state->golden_ref_obj = NULL;
3628         vp9_state->alt_ref_obj = NULL;
3629     } else {
3630         vp9_state->picture_coding_type = 1;
3631         vp9_state->ref_frame_flag = pic_param->ref_flags.bits.ref_frame_ctrl_l0 |
3632                                     pic_param->ref_flags.bits.ref_frame_ctrl_l1;
3633 
3634         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx]);
3635         vp9_state->last_ref_obj = obj_surface;
3636         if (!obj_surface ||
3637             !obj_surface->bo ||
3638             !obj_surface->private_data) {
3639             vp9_state->last_ref_obj = NULL;
3640             vp9_state->ref_frame_flag &= ~(VP9_LAST_REF);
3641         }
3642 
3643         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]);
3644         vp9_state->golden_ref_obj = obj_surface;
3645         if (!obj_surface ||
3646             !obj_surface->bo ||
3647             !obj_surface->private_data) {
3648             vp9_state->golden_ref_obj = NULL;
3649             vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3650         }
3651 
3652         obj_surface = SURFACE(pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]);
3653         vp9_state->alt_ref_obj = obj_surface;
3654         if (!obj_surface ||
3655             !obj_surface->bo ||
3656             !obj_surface->private_data) {
3657             vp9_state->alt_ref_obj = NULL;
3658             vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3659         }
3660 
3661         /* remove the duplicated flag and ref frame list */
3662         if (vp9_state->ref_frame_flag & VP9_LAST_REF) {
3663             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3664                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx]) {
3665                 vp9_state->ref_frame_flag &= ~(VP9_GOLDEN_REF);
3666                 vp9_state->golden_ref_obj = NULL;
3667             }
3668 
3669             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_last_idx] ==
3670                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3671                 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3672                 vp9_state->alt_ref_obj = NULL;
3673             }
3674         }
3675 
3676         if (vp9_state->ref_frame_flag & VP9_GOLDEN_REF) {
3677             if (pic_param->reference_frames[pic_param->ref_flags.bits.ref_gf_idx] ==
3678                 pic_param->reference_frames[pic_param->ref_flags.bits.ref_arf_idx]) {
3679                 vp9_state->ref_frame_flag &= ~(VP9_ALT_REF);
3680                 vp9_state->alt_ref_obj = NULL;
3681             }
3682         }
3683 
3684         if (vp9_state->ref_frame_flag == 0)
3685             return VA_STATUS_ERROR_INVALID_PARAMETER;
3686     }
3687 
3688     seg_param = NULL;
3689     if (pic_param->pic_flags.bits.segmentation_enabled) {
3690         if (!encode_state->q_matrix ||
3691             !encode_state->q_matrix->buffer) {
3692             return VA_STATUS_ERROR_INVALID_PARAMETER;
3693         }
3694         seg_param = (VAEncMiscParameterTypeVP9PerSegmantParam *)
3695                     encode_state->q_matrix->buffer;
3696     }
3697 
3698     seq_param = NULL;
3699     if (encode_state->seq_param_ext &&
3700         encode_state->seq_param_ext->buffer)
3701         seq_param = (VAEncSequenceParameterBufferVP9 *)encode_state->seq_param_ext->buffer;
3702 
3703     if (!seq_param) {
3704         seq_param = &vp9_state->bogus_seq_param;
3705     }
3706 
3707     vp9_state->pic_param = pic_param;
3708     vp9_state->segment_param = seg_param;
3709     vp9_state->seq_param = seq_param;
3710 
3711     obj_surface = encode_state->reconstructed_object;
3712     if (pic_param->frame_width_dst > obj_surface->orig_width ||
3713         pic_param->frame_height_dst > obj_surface->orig_height)
3714         return VA_STATUS_ERROR_INVALID_SURFACE;
3715 
3716     if (!vp9_state->dys_enabled &&
3717         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
3718          (pic_param->frame_height_src != pic_param->frame_height_dst)))
3719         return VA_STATUS_ERROR_UNIMPLEMENTED;
3720 
3721     if (vp9_state->brc_enabled) {
3722 
3723         if (encoder_context->rate_control_mode == VA_RC_CBR)
3724             need_brc_reset = vp9_state->target_bit_rate != encoder_context->brc.bits_per_second[0] ? true : false;
3725         else if (encoder_context->rate_control_mode == VA_RC_VBR)
3726             need_brc_reset = vp9_state->max_bit_rate != encoder_context->brc.bits_per_second[0] ? true : false;
3727 
3728         if (vp9_state->first_frame || vp9_state->picture_coding_type == KEY_FRAME || need_brc_reset) {
3729             vp9_state->brc_reset = encoder_context->brc.need_reset || vp9_state->first_frame;
3730 
3731             if (!encoder_context->brc.framerate[0].num || !encoder_context->brc.framerate[0].den ||
3732                 !encoder_context->brc.bits_per_second[0])
3733                 return VA_STATUS_ERROR_INVALID_PARAMETER;
3734 
3735             vp9_state->gop_size = encoder_context->brc.gop_size;
3736             vp9_state->framerate = encoder_context->brc.framerate[0];
3737             if ((vp9_state->framerate.num / vp9_state->framerate.den) > MAX_VP9_ENCODER_FRAMERATE) {
3738                 vp9_state->framerate.num = MAX_VP9_ENCODER_FRAMERATE * vp9_state->framerate.den;
3739                 i965_log_info(ctx, "gen9_encode_vp9_check_parameter: Too high frame rate(num: %d, den: %d), max supported is %d fps.\n",
3740                               vp9_state->framerate.num, vp9_state->framerate.den, MAX_VP9_ENCODER_FRAMERATE);
3741             }
3742 
3743             if (encoder_context->rate_control_mode == VA_RC_CBR ||
3744                 !encoder_context->brc.target_percentage[0]) {
3745                 vp9_state->target_bit_rate = encoder_context->brc.bits_per_second[0];
3746                 vp9_state->max_bit_rate = vp9_state->target_bit_rate;
3747                 vp9_state->min_bit_rate = vp9_state->target_bit_rate;
3748             } else {
3749                 vp9_state->max_bit_rate = encoder_context->brc.bits_per_second[0];
3750                 vp9_state->target_bit_rate = vp9_state->max_bit_rate * encoder_context->brc.target_percentage[0] / 100;
3751                 if (2 * vp9_state->target_bit_rate < vp9_state->max_bit_rate)
3752                     vp9_state->min_bit_rate = 0;
3753                 else
3754                     vp9_state->min_bit_rate = 2 * vp9_state->target_bit_rate - vp9_state->max_bit_rate;
3755             }
3756 
3757             if (encoder_context->brc.hrd_buffer_size)
3758                 vp9_state->vbv_buffer_size_in_bit = encoder_context->brc.hrd_buffer_size;
3759             else if (encoder_context->brc.window_size)
3760                 vp9_state->vbv_buffer_size_in_bit = (uint64_t)vp9_state->max_bit_rate * encoder_context->brc.window_size / 1000;
3761             else
3762                 vp9_state->vbv_buffer_size_in_bit = vp9_state->max_bit_rate;
3763             if (encoder_context->brc.hrd_initial_buffer_fullness)
3764                 vp9_state->init_vbv_buffer_fullness_in_bit = encoder_context->brc.hrd_initial_buffer_fullness;
3765             else
3766                 vp9_state->init_vbv_buffer_fullness_in_bit = vp9_state->vbv_buffer_size_in_bit / 2;
3767         }
3768     }
3769 
3770     vp9_state->frame_width = pic_param->frame_width_dst;
3771     vp9_state->frame_height = pic_param->frame_height_dst;
3772 
3773     vp9_state->frame_width_4x = ALIGN(vp9_state->frame_width / 4, 16);
3774     vp9_state->frame_height_4x = ALIGN(vp9_state->frame_height / 4, 16);
3775 
3776     vp9_state->frame_width_16x = ALIGN(vp9_state->frame_width / 16, 16);
3777     vp9_state->frame_height_16x = ALIGN(vp9_state->frame_height / 16, 16);
3778 
3779     vp9_state->frame_width_in_mb = ALIGN(vp9_state->frame_width, 16) / 16;
3780     vp9_state->frame_height_in_mb = ALIGN(vp9_state->frame_height, 16) / 16;
3781 
3782     vp9_state->downscaled_width_4x_in_mb = vp9_state->frame_width_4x / 16;
3783     vp9_state->downscaled_height_4x_in_mb = vp9_state->frame_height_4x / 16;
3784     vp9_state->downscaled_width_16x_in_mb = vp9_state->frame_width_16x / 16;
3785     vp9_state->downscaled_height_16x_in_mb = vp9_state->frame_height_16x / 16;
3786 
3787     vp9_state->dys_in_use = 0;
3788     if (pic_param->frame_width_src != pic_param->frame_width_dst ||
3789         pic_param->frame_height_src != pic_param->frame_height_dst)
3790         vp9_state->dys_in_use = 1;
3791     vp9_state->dys_ref_frame_flag = 0;
3792     /* check the dys setting. The dys is supported by default. */
3793     if (pic_param->pic_flags.bits.frame_type != KEY_FRAME &&
3794         !pic_param->pic_flags.bits.intra_only) {
3795         vp9_state->dys_ref_frame_flag = vp9_state->ref_frame_flag;
3796 
3797         if ((vp9_state->ref_frame_flag & VP9_LAST_REF) &&
3798             vp9_state->last_ref_obj) {
3799             obj_surface = vp9_state->last_ref_obj;
3800             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3801 
3802             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3803                 vp9_state->frame_height == vp9_priv_surface->frame_height)
3804                 vp9_state->dys_ref_frame_flag &= ~(VP9_LAST_REF);
3805         }
3806         if ((vp9_state->ref_frame_flag & VP9_GOLDEN_REF) &&
3807             vp9_state->golden_ref_obj) {
3808             obj_surface = vp9_state->golden_ref_obj;
3809             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3810 
3811             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3812                 vp9_state->frame_height == vp9_priv_surface->frame_height)
3813                 vp9_state->dys_ref_frame_flag &= ~(VP9_GOLDEN_REF);
3814         }
3815         if ((vp9_state->ref_frame_flag & VP9_ALT_REF) &&
3816             vp9_state->alt_ref_obj) {
3817             obj_surface = vp9_state->alt_ref_obj;
3818             vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
3819 
3820             if (vp9_state->frame_width == vp9_priv_surface->frame_width &&
3821                 vp9_state->frame_height == vp9_priv_surface->frame_height)
3822                 vp9_state->dys_ref_frame_flag &= ~(VP9_ALT_REF);
3823         }
3824         if (vp9_state->dys_ref_frame_flag)
3825             vp9_state->dys_in_use = 1;
3826     }
3827 
3828     if (vp9_state->hme_supported) {
3829         vp9_state->hme_enabled = 1;
3830     } else {
3831         vp9_state->hme_enabled = 0;
3832     }
3833 
3834     if (vp9_state->b16xme_supported) {
3835         vp9_state->b16xme_enabled = 1;
3836     } else {
3837         vp9_state->b16xme_enabled = 0;
3838     }
3839 
3840     /* disable HME/16xME if the size is too small */
3841     if (vp9_state->frame_width_4x <= VP9_VME_REF_WIN ||
3842         vp9_state->frame_height_4x <= VP9_VME_REF_WIN) {
3843         vp9_state->hme_enabled = 0;
3844         vp9_state->b16xme_enabled = 0;
3845     }
3846 
3847     if (vp9_state->frame_width_16x < VP9_VME_REF_WIN ||
3848         vp9_state->frame_height_16x < VP9_VME_REF_WIN)
3849         vp9_state->b16xme_enabled = 0;
3850 
3851     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
3852         pic_param->pic_flags.bits.intra_only) {
3853         vp9_state->hme_enabled = 0;
3854         vp9_state->b16xme_enabled = 0;
3855     }
3856 
3857     vp9_state->mbenc_keyframe_dist_enabled = 0;
3858     if ((vp9_state->picture_coding_type == KEY_FRAME) &&
3859         vp9_state->brc_distortion_buffer_supported)
3860         vp9_state->mbenc_keyframe_dist_enabled = 1;
3861 
3862     return VA_STATUS_SUCCESS;
3863 }
3864 
3865 static VAStatus
gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)3866 gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx,
3867                                 struct encode_state *encode_state,
3868                                 struct intel_encoder_context *encoder_context)
3869 {
3870     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
3871     struct vp9_surface_param surface_param;
3872     struct gen9_vp9_state *vp9_state;
3873     VAEncPictureParameterBufferVP9  *pic_param;
3874     struct object_surface *obj_surface;
3875     struct gen9_surface_vp9 *vp9_surface;
3876     int driver_header_flag = 0;
3877     VAStatus va_status;
3878 
3879     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
3880 
3881     if (!vp9_state || !vp9_state->pic_param)
3882         return VA_STATUS_ERROR_INVALID_PARAMETER;
3883 
3884     pic_param = vp9_state->pic_param;
3885 
3886     /* this is to check whether the driver should generate the uncompressed header */
3887     driver_header_flag = 1;
3888     if (encode_state->packed_header_data_ext &&
3889         encode_state->packed_header_data_ext[0] &&
3890         pic_param->bit_offset_first_partition_size) {
3891         VAEncPackedHeaderParameterBuffer *param = NULL;
3892 
3893         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_params_ext[0]->buffer;
3894 
3895         if (param->type == VAEncPackedHeaderRawData) {
3896             char *header_data;
3897             unsigned int length_in_bits;
3898 
3899             header_data = (char *)encode_state->packed_header_data_ext[0]->buffer;
3900             length_in_bits = param->bit_length;
3901             driver_header_flag = 0;
3902 
3903             vp9_state->frame_header.bit_offset_first_partition_size =
3904                 pic_param->bit_offset_first_partition_size;
3905             vp9_state->header_length = ALIGN(length_in_bits, 8) >> 3;
3906             vp9_state->alias_insert_data = header_data;
3907 
3908             vp9_state->frame_header.bit_offset_ref_lf_delta = pic_param->bit_offset_ref_lf_delta;
3909             vp9_state->frame_header.bit_offset_mode_lf_delta = pic_param->bit_offset_mode_lf_delta;
3910             vp9_state->frame_header.bit_offset_lf_level = pic_param->bit_offset_lf_level;
3911             vp9_state->frame_header.bit_offset_qindex = pic_param->bit_offset_qindex;
3912             vp9_state->frame_header.bit_offset_segmentation = pic_param->bit_offset_segmentation;
3913             vp9_state->frame_header.bit_size_segmentation = pic_param->bit_size_segmentation;
3914         }
3915     }
3916 
3917     if (driver_header_flag) {
3918         memset(&vp9_state->frame_header, 0, sizeof(vp9_state->frame_header));
3919         intel_write_uncompressed_header(encode_state,
3920                                         VAProfileVP9Profile0,
3921                                         vme_context->frame_header_data,
3922                                         &vp9_state->header_length,
3923                                         &vp9_state->frame_header);
3924         vp9_state->alias_insert_data = vme_context->frame_header_data;
3925     }
3926 
3927     va_status = i965_check_alloc_surface_bo(ctx, encode_state->input_yuv_object,
3928                                             1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3929     if (va_status != VA_STATUS_SUCCESS)
3930         return va_status;
3931 
3932     va_status = i965_check_alloc_surface_bo(ctx, encode_state->reconstructed_object,
3933                                             1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
3934 
3935     if (va_status != VA_STATUS_SUCCESS)
3936         return va_status;
3937 
3938     surface_param.frame_width = vp9_state->frame_width;
3939     surface_param.frame_height = vp9_state->frame_height;
3940     va_status = gen9_vp9_init_check_surfaces(ctx,
3941                                              encode_state->reconstructed_object,
3942                                              &surface_param);
3943 
3944     {
3945         vp9_surface = (struct gen9_surface_vp9*)encode_state->reconstructed_object;
3946 
3947         vp9_surface->qp_value = pic_param->luma_ac_qindex + pic_param->luma_dc_qindex_delta;
3948     }
3949     if (vp9_state->dys_in_use &&
3950         (pic_param->frame_width_src != pic_param->frame_width_dst ||
3951          pic_param->frame_height_src != pic_param->frame_height_dst)) {
3952         surface_param.frame_width = pic_param->frame_width_dst;
3953         surface_param.frame_height = pic_param->frame_height_dst;
3954         va_status = gen9_vp9_check_dys_surfaces(ctx,
3955                                                 encode_state->reconstructed_object,
3956                                                 &surface_param);
3957 
3958         if (va_status)
3959             return va_status;
3960     }
3961 
3962     if (vp9_state->dys_ref_frame_flag) {
3963         if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) &&
3964             vp9_state->last_ref_obj) {
3965             obj_surface = vp9_state->last_ref_obj;
3966             surface_param.frame_width = vp9_state->frame_width;
3967             surface_param.frame_height = vp9_state->frame_height;
3968             va_status = gen9_vp9_check_dys_surfaces(ctx,
3969                                                     obj_surface,
3970                                                     &surface_param);
3971 
3972             if (va_status)
3973                 return va_status;
3974         }
3975         if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) &&
3976             vp9_state->golden_ref_obj) {
3977             obj_surface = vp9_state->golden_ref_obj;
3978             surface_param.frame_width = vp9_state->frame_width;
3979             surface_param.frame_height = vp9_state->frame_height;
3980             va_status = gen9_vp9_check_dys_surfaces(ctx,
3981                                                     obj_surface,
3982                                                     &surface_param);
3983 
3984             if (va_status)
3985                 return va_status;
3986         }
3987         if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) &&
3988             vp9_state->alt_ref_obj) {
3989             obj_surface = vp9_state->alt_ref_obj;
3990             surface_param.frame_width = vp9_state->frame_width;
3991             surface_param.frame_height = vp9_state->frame_height;
3992             va_status = gen9_vp9_check_dys_surfaces(ctx,
3993                                                     obj_surface,
3994                                                     &surface_param);
3995 
3996             if (va_status)
3997                 return va_status;
3998         }
3999     }
4000 
4001     if (va_status != VA_STATUS_SUCCESS)
4002         return va_status;
4003     /* check the corresponding ref_frame_flag && dys_ref_frame_flag */
4004 
4005     return VA_STATUS_SUCCESS;
4006 }
4007 
4008 static VAStatus
gen9_vme_gpe_kernel_init_vp9(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)4009 gen9_vme_gpe_kernel_init_vp9(VADriverContextP ctx,
4010                              struct encode_state *encode_state,
4011                              struct intel_encoder_context *encoder_context)
4012 {
4013     struct i965_driver_data *i965 = i965_driver_data(ctx);
4014     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4015     struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4016     struct vp9_dys_context *dys_context = &vme_context->dys_context;
4017     struct gpe_dynamic_state_parameter ds_param;
4018     int i;
4019 
4020     /*
4021      * BRC will update MBEnc curbe data buffer, so initialize GPE context for
4022      * MBEnc first
4023      */
4024     for (i = 0; i < NUM_VP9_MBENC; i++) {
4025         gen8_gpe_context_init(ctx, &mbenc_context->gpe_contexts[i]);
4026     }
4027 
4028     /*
4029      * VP9_MBENC_XXX uses the same dynamic state buffer as they share the same
4030      * curbe_buffer.
4031      */
4032     ds_param.bo_size = ALIGN(sizeof(vp9_mbenc_curbe_data), 64) + 128 +
4033                        ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * NUM_VP9_MBENC;
4034     mbenc_context->mbenc_bo_dys = dri_bo_alloc(i965->intel.bufmgr,
4035                                                "mbenc_dys",
4036                                                ds_param.bo_size,
4037                                                0x1000);
4038     mbenc_context->mbenc_bo_size = ds_param.bo_size;
4039 
4040     ds_param.bo = mbenc_context->mbenc_bo_dys;
4041     ds_param.curbe_offset = 0;
4042     ds_param.sampler_offset = ALIGN(sizeof(vp9_mbenc_curbe_data), 64);
4043     for (i = 0; i < NUM_VP9_MBENC; i++) {
4044         ds_param.idrt_offset = ds_param.sampler_offset + 128 +
4045                                ALIGN(sizeof(struct gen8_interface_descriptor_data), 64) * i;
4046 
4047         gen8_gpe_context_set_dynamic_buffer(ctx,
4048                                             &mbenc_context->gpe_contexts[i],
4049                                             &ds_param);
4050     }
4051 
4052     gen8_gpe_context_init(ctx, &dys_context->gpe_context);
4053     gen9_vp9_dys_set_sampler_state(&dys_context->gpe_context);
4054 
4055     return VA_STATUS_SUCCESS;
4056 }
4057 
4058 static VAStatus
gen9_vme_gpe_kernel_final_vp9(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)4059 gen9_vme_gpe_kernel_final_vp9(VADriverContextP ctx,
4060                               struct encode_state *encode_state,
4061                               struct intel_encoder_context *encoder_context)
4062 {
4063     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4064     struct vp9_mbenc_context *mbenc_context = &vme_context->mbenc_context;
4065 
4066     dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4067     mbenc_context->mbenc_bo_dys = NULL;
4068 
4069     return VA_STATUS_SUCCESS;
4070 }
4071 
4072 static VAStatus
gen9_vme_gpe_kernel_run_vp9(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)4073 gen9_vme_gpe_kernel_run_vp9(VADriverContextP ctx,
4074                             struct encode_state *encode_state,
4075                             struct intel_encoder_context *encoder_context)
4076 {
4077     struct gen9_encoder_context_vp9 *vme_context = encoder_context->vme_context;
4078     struct gen9_vp9_state *vp9_state;
4079     int i;
4080 
4081     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4082 
4083     if (!vp9_state || !vp9_state->pic_param)
4084         return VA_STATUS_ERROR_INVALID_PARAMETER;
4085 
4086     if (vp9_state->dys_in_use) {
4087         gen9_vp9_run_dys_refframes(ctx, encode_state, encoder_context);
4088     }
4089 
4090     if (vp9_state->brc_enabled && (vp9_state->brc_reset || !vp9_state->brc_inited)) {
4091         gen9_vp9_brc_init_reset_kernel(ctx, encode_state, encoder_context);
4092     }
4093 
4094     if (vp9_state->picture_coding_type == KEY_FRAME) {
4095         for (i = 0; i < 2; i++)
4096             i965_zero_gpe_resource(&vme_context->res_mode_decision[i]);
4097     }
4098 
4099     if (vp9_state->hme_supported) {
4100         gen9_vp9_scaling_kernel(ctx, encode_state,
4101                                 encoder_context,
4102                                 0);
4103         if (vp9_state->b16xme_supported) {
4104             gen9_vp9_scaling_kernel(ctx, encode_state,
4105                                     encoder_context,
4106                                     1);
4107         }
4108     }
4109 
4110     if (vp9_state->picture_coding_type && vp9_state->hme_enabled) {
4111         if (vp9_state->b16xme_enabled)
4112             gen9_vp9_me_kernel(ctx, encode_state,
4113                                encoder_context,
4114                                1);
4115 
4116         gen9_vp9_me_kernel(ctx, encode_state,
4117                            encoder_context,
4118                            0);
4119     }
4120 
4121     if (vp9_state->brc_enabled) {
4122         if (vp9_state->mbenc_keyframe_dist_enabled)
4123             gen9_vp9_brc_intra_dist_kernel(ctx,
4124                                            encode_state,
4125                                            encoder_context);
4126 
4127         gen9_vp9_brc_update_kernel(ctx, encode_state,
4128                                    encoder_context);
4129     }
4130 
4131     if (vp9_state->picture_coding_type == KEY_FRAME) {
4132         gen9_vp9_mbenc_kernel(ctx, encode_state,
4133                               encoder_context,
4134                               VP9_MEDIA_STATE_MBENC_I_32x32);
4135         gen9_vp9_mbenc_kernel(ctx, encode_state,
4136                               encoder_context,
4137                               VP9_MEDIA_STATE_MBENC_I_16x16);
4138     } else {
4139         gen9_vp9_mbenc_kernel(ctx, encode_state,
4140                               encoder_context,
4141                               VP9_MEDIA_STATE_MBENC_P);
4142     }
4143 
4144     gen9_vp9_mbenc_kernel(ctx, encode_state,
4145                           encoder_context,
4146                           VP9_MEDIA_STATE_MBENC_TX);
4147 
4148     vp9_state->curr_mode_decision_index ^= 1;
4149     if (vp9_state->brc_enabled) {
4150         vp9_state->brc_inited = 1;
4151         vp9_state->brc_reset = 0;
4152     }
4153 
4154     return VA_STATUS_SUCCESS;
4155 }
4156 
4157 static VAStatus
gen9_vme_pipeline_vp9(VADriverContextP ctx,VAProfile profile,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)4158 gen9_vme_pipeline_vp9(VADriverContextP ctx,
4159                       VAProfile profile,
4160                       struct encode_state *encode_state,
4161                       struct intel_encoder_context *encoder_context)
4162 {
4163     VAStatus va_status;
4164     struct gen9_vp9_state *vp9_state;
4165 
4166     vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state;
4167 
4168     if (!vp9_state)
4169         return VA_STATUS_ERROR_INVALID_CONTEXT;
4170 
4171     va_status = gen9_encode_vp9_check_parameter(ctx, encode_state, encoder_context);
4172     if (va_status != VA_STATUS_SUCCESS)
4173         return va_status;
4174 
4175     va_status = gen9_vp9_allocate_resources(ctx, encode_state,
4176                                             encoder_context,
4177                                             !vp9_state->brc_allocated);
4178 
4179     if (va_status != VA_STATUS_SUCCESS)
4180         return va_status;
4181     vp9_state->brc_allocated = 1;
4182 
4183     va_status = gen9_vme_gpe_kernel_prepare_vp9(ctx, encode_state, encoder_context);
4184 
4185     if (va_status != VA_STATUS_SUCCESS)
4186         return va_status;
4187 
4188     va_status = gen9_vme_gpe_kernel_init_vp9(ctx, encode_state, encoder_context);
4189     if (va_status != VA_STATUS_SUCCESS)
4190         return va_status;
4191 
4192     va_status = gen9_vme_gpe_kernel_run_vp9(ctx, encode_state, encoder_context);
4193     if (va_status != VA_STATUS_SUCCESS)
4194         return va_status;
4195 
4196     gen9_vme_gpe_kernel_final_vp9(ctx, encode_state, encoder_context);
4197 
4198     return VA_STATUS_SUCCESS;
4199 }
4200 
4201 static void
gen9_vme_brc_context_destroy_vp9(struct vp9_brc_context * brc_context)4202 gen9_vme_brc_context_destroy_vp9(struct vp9_brc_context *brc_context)
4203 {
4204     int i;
4205 
4206     for (i = 0; i < NUM_VP9_BRC; i++)
4207         gen8_gpe_context_destroy(&brc_context->gpe_contexts[i]);
4208 }
4209 
4210 static void
gen9_vme_scaling_context_destroy_vp9(struct vp9_scaling_context * scaling_context)4211 gen9_vme_scaling_context_destroy_vp9(struct vp9_scaling_context *scaling_context)
4212 {
4213     int i;
4214 
4215     for (i = 0; i < NUM_VP9_SCALING; i++)
4216         gen8_gpe_context_destroy(&scaling_context->gpe_contexts[i]);
4217 }
4218 
4219 static void
gen9_vme_me_context_destroy_vp9(struct vp9_me_context * me_context)4220 gen9_vme_me_context_destroy_vp9(struct vp9_me_context *me_context)
4221 {
4222     gen8_gpe_context_destroy(&me_context->gpe_context);
4223 }
4224 
4225 static void
gen9_vme_mbenc_context_destroy_vp9(struct vp9_mbenc_context * mbenc_context)4226 gen9_vme_mbenc_context_destroy_vp9(struct vp9_mbenc_context *mbenc_context)
4227 {
4228     int i;
4229 
4230     for (i = 0; i < NUM_VP9_MBENC; i++)
4231         gen8_gpe_context_destroy(&mbenc_context->gpe_contexts[i]);
4232     dri_bo_unreference(mbenc_context->mbenc_bo_dys);
4233     mbenc_context->mbenc_bo_size = 0;
4234 }
4235 
4236 static void
gen9_vme_dys_context_destroy_vp9(struct vp9_dys_context * dys_context)4237 gen9_vme_dys_context_destroy_vp9(struct vp9_dys_context *dys_context)
4238 {
4239     gen8_gpe_context_destroy(&dys_context->gpe_context);
4240 }
4241 
4242 static void
gen9_vme_kernel_context_destroy_vp9(struct gen9_encoder_context_vp9 * vme_context)4243 gen9_vme_kernel_context_destroy_vp9(struct gen9_encoder_context_vp9 *vme_context)
4244 {
4245     gen9_vp9_free_resources(vme_context);
4246     gen9_vme_scaling_context_destroy_vp9(&vme_context->scaling_context);
4247     gen9_vme_me_context_destroy_vp9(&vme_context->me_context);
4248     gen9_vme_mbenc_context_destroy_vp9(&vme_context->mbenc_context);
4249     gen9_vme_brc_context_destroy_vp9(&vme_context->brc_context);
4250     gen9_vme_dys_context_destroy_vp9(&vme_context->dys_context);
4251 
4252     return;
4253 }
4254 
4255 static void
gen9_vme_context_destroy_vp9(void * context)4256 gen9_vme_context_destroy_vp9(void *context)
4257 {
4258     struct gen9_encoder_context_vp9 *vme_context = context;
4259 
4260     if (!vme_context)
4261         return;
4262 
4263     gen9_vme_kernel_context_destroy_vp9(vme_context);
4264 
4265     free(vme_context);
4266 
4267     return;
4268 }
4269 
4270 static void
gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,struct gen9_encoder_context_vp9 * vme_context,struct vp9_scaling_context * scaling_context)4271 gen9_vme_scaling_context_init_vp9(VADriverContextP ctx,
4272                                   struct gen9_encoder_context_vp9 *vme_context,
4273                                   struct vp9_scaling_context *scaling_context)
4274 {
4275     struct i965_gpe_context *gpe_context = NULL;
4276     struct vp9_encoder_kernel_parameter kernel_param;
4277     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4278     struct i965_kernel scale_kernel;
4279 
4280     kernel_param.curbe_size = sizeof(vp9_scaling4x_curbe_data_cm);
4281     kernel_param.inline_data_size = sizeof(vp9_scaling4x_inline_data_cm);
4282     kernel_param.sampler_size = 0;
4283 
4284     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4285     scoreboard_param.mask = 0xFF;
4286     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4287     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4288     scoreboard_param.walkpat_flag = 0;
4289 
4290     gpe_context = &scaling_context->gpe_contexts[0];
4291     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4292     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4293 
4294     scaling_context->scaling_4x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4295     scaling_context->scaling_4x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4296     scaling_context->scaling_4x_bti.scaling_frame_mbv_proc_stat_dst =
4297         VP9_BTI_SCALING_FRAME_MBVPROCSTATS_DST_CM;
4298 
4299     memset(&scale_kernel, 0, sizeof(scale_kernel));
4300 
4301     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4302                                          sizeof(media_vp9_kernels),
4303                                          INTEL_VP9_ENC_SCALING4X,
4304                                          0,
4305                                          &scale_kernel);
4306 
4307     gen8_gpe_load_kernels(ctx,
4308                           gpe_context,
4309                           &scale_kernel,
4310                           1);
4311 
4312     kernel_param.curbe_size = sizeof(vp9_scaling2x_curbe_data_cm);
4313     kernel_param.inline_data_size = 0;
4314     kernel_param.sampler_size = 0;
4315 
4316     gpe_context = &scaling_context->gpe_contexts[1];
4317     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4318     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4319 
4320     memset(&scale_kernel, 0, sizeof(scale_kernel));
4321 
4322     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4323                                          sizeof(media_vp9_kernels),
4324                                          INTEL_VP9_ENC_SCALING2X,
4325                                          0,
4326                                          &scale_kernel);
4327 
4328     gen8_gpe_load_kernels(ctx,
4329                           gpe_context,
4330                           &scale_kernel,
4331                           1);
4332 
4333     scaling_context->scaling_2x_bti.scaling_frame_src_y = VP9_BTI_SCALING_FRAME_SRC_Y;
4334     scaling_context->scaling_2x_bti.scaling_frame_dst_y = VP9_BTI_SCALING_FRAME_DST_Y;
4335     return;
4336 }
4337 
4338 static void
gen9_vme_me_context_init_vp9(VADriverContextP ctx,struct gen9_encoder_context_vp9 * vme_context,struct vp9_me_context * me_context)4339 gen9_vme_me_context_init_vp9(VADriverContextP ctx,
4340                              struct gen9_encoder_context_vp9 *vme_context,
4341                              struct vp9_me_context *me_context)
4342 {
4343     struct i965_gpe_context *gpe_context = NULL;
4344     struct vp9_encoder_kernel_parameter kernel_param;
4345     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4346     struct i965_kernel scale_kernel;
4347 
4348     kernel_param.curbe_size = sizeof(vp9_me_curbe_data);
4349     kernel_param.inline_data_size = 0;
4350     kernel_param.sampler_size = 0;
4351 
4352     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4353     scoreboard_param.mask = 0xFF;
4354     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4355     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4356     scoreboard_param.walkpat_flag = 0;
4357 
4358     gpe_context = &me_context->gpe_context;
4359     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4360     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4361 
4362     memset(&scale_kernel, 0, sizeof(scale_kernel));
4363 
4364     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4365                                          sizeof(media_vp9_kernels),
4366                                          INTEL_VP9_ENC_ME,
4367                                          0,
4368                                          &scale_kernel);
4369 
4370     gen8_gpe_load_kernels(ctx,
4371                           gpe_context,
4372                           &scale_kernel,
4373                           1);
4374 
4375     return;
4376 }
4377 
4378 static void
gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,struct gen9_encoder_context_vp9 * vme_context,struct vp9_mbenc_context * mbenc_context)4379 gen9_vme_mbenc_context_init_vp9(VADriverContextP ctx,
4380                                 struct gen9_encoder_context_vp9 *vme_context,
4381                                 struct vp9_mbenc_context *mbenc_context)
4382 {
4383     struct i965_gpe_context *gpe_context = NULL;
4384     struct vp9_encoder_kernel_parameter kernel_param;
4385     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4386     int i;
4387     struct i965_kernel scale_kernel;
4388 
4389     kernel_param.curbe_size = sizeof(vp9_mbenc_curbe_data);
4390     kernel_param.inline_data_size = 0;
4391     kernel_param.sampler_size = 0;
4392 
4393     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4394     scoreboard_param.mask = 0xFF;
4395     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4396     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4397 
4398     for (i = 0; i < NUM_VP9_MBENC; i++) {
4399         gpe_context = &mbenc_context->gpe_contexts[i];
4400 
4401         if ((i == VP9_MBENC_IDX_KEY_16x16) ||
4402             (i == VP9_MBENC_IDX_INTER)) {
4403             scoreboard_param.walkpat_flag = 1;
4404         } else
4405             scoreboard_param.walkpat_flag = 0;
4406 
4407         gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4408         gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4409 
4410         memset(&scale_kernel, 0, sizeof(scale_kernel));
4411 
4412         intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4413                                              sizeof(media_vp9_kernels),
4414                                              INTEL_VP9_ENC_MBENC,
4415                                              i,
4416                                              &scale_kernel);
4417 
4418         gen8_gpe_load_kernels(ctx,
4419                               gpe_context,
4420                               &scale_kernel,
4421                               1);
4422     }
4423 }
4424 
4425 static void
gen9_vme_brc_context_init_vp9(VADriverContextP ctx,struct gen9_encoder_context_vp9 * vme_context,struct vp9_brc_context * brc_context)4426 gen9_vme_brc_context_init_vp9(VADriverContextP ctx,
4427                               struct gen9_encoder_context_vp9 *vme_context,
4428                               struct vp9_brc_context *brc_context)
4429 {
4430     struct i965_gpe_context *gpe_context = NULL;
4431     struct vp9_encoder_kernel_parameter kernel_param;
4432     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4433     int i;
4434     struct i965_kernel scale_kernel;
4435 
4436     kernel_param.curbe_size = sizeof(vp9_brc_curbe_data);
4437     kernel_param.inline_data_size = 0;
4438     kernel_param.sampler_size = 0;
4439 
4440     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4441     scoreboard_param.mask = 0xFF;
4442     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4443     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4444 
4445     for (i = 0; i < NUM_VP9_BRC; i++) {
4446         gpe_context = &brc_context->gpe_contexts[i];
4447         gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4448         gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4449 
4450         memset(&scale_kernel, 0, sizeof(scale_kernel));
4451 
4452         intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4453                                              sizeof(media_vp9_kernels),
4454                                              INTEL_VP9_ENC_BRC,
4455                                              i,
4456                                              &scale_kernel);
4457 
4458         gen8_gpe_load_kernels(ctx,
4459                               gpe_context,
4460                               &scale_kernel,
4461                               1);
4462     }
4463 }
4464 
4465 static void
gen9_vme_dys_context_init_vp9(VADriverContextP ctx,struct gen9_encoder_context_vp9 * vme_context,struct vp9_dys_context * dys_context)4466 gen9_vme_dys_context_init_vp9(VADriverContextP ctx,
4467                               struct gen9_encoder_context_vp9 *vme_context,
4468                               struct vp9_dys_context *dys_context)
4469 {
4470     struct i965_gpe_context *gpe_context = NULL;
4471     struct vp9_encoder_kernel_parameter kernel_param;
4472     struct vp9_encoder_scoreboard_parameter scoreboard_param;
4473     struct i965_kernel scale_kernel;
4474 
4475     kernel_param.curbe_size = sizeof(vp9_dys_curbe_data);
4476     kernel_param.inline_data_size = 0;
4477     kernel_param.sampler_size = sizeof(struct gen9_sampler_8x8_avs);
4478 
4479     memset(&scoreboard_param, 0, sizeof(scoreboard_param));
4480     scoreboard_param.mask = 0xFF;
4481     scoreboard_param.enable = vme_context->use_hw_scoreboard;
4482     scoreboard_param.type = vme_context->use_hw_non_stalling_scoreboard;
4483     scoreboard_param.walkpat_flag = 0;
4484 
4485     gpe_context = &dys_context->gpe_context;
4486     gen9_init_gpe_context_vp9(ctx, gpe_context, &kernel_param);
4487     gen9_init_vfe_scoreboard_vp9(gpe_context, &scoreboard_param);
4488 
4489     memset(&scale_kernel, 0, sizeof(scale_kernel));
4490 
4491     intel_vp9_get_kernel_header_and_size((void *)media_vp9_kernels,
4492                                          sizeof(media_vp9_kernels),
4493                                          INTEL_VP9_ENC_DYS,
4494                                          0,
4495                                          &scale_kernel);
4496 
4497     gen8_gpe_load_kernels(ctx,
4498                           gpe_context,
4499                           &scale_kernel,
4500                           1);
4501 
4502     return;
4503 }
4504 
4505 static Bool
gen9_vme_kernels_context_init_vp9(VADriverContextP ctx,struct intel_encoder_context * encoder_context,struct gen9_encoder_context_vp9 * vme_context)4506 gen9_vme_kernels_context_init_vp9(VADriverContextP ctx,
4507                                   struct intel_encoder_context *encoder_context,
4508                                   struct gen9_encoder_context_vp9 *vme_context)
4509 {
4510     gen9_vme_scaling_context_init_vp9(ctx, vme_context, &vme_context->scaling_context);
4511     gen9_vme_me_context_init_vp9(ctx, vme_context, &vme_context->me_context);
4512     gen9_vme_mbenc_context_init_vp9(ctx, vme_context, &vme_context->mbenc_context);
4513     gen9_vme_dys_context_init_vp9(ctx, vme_context, &vme_context->dys_context);
4514     gen9_vme_brc_context_init_vp9(ctx, vme_context, &vme_context->brc_context);
4515 
4516     vme_context->pfn_set_curbe_brc = gen9_vp9_set_curbe_brc;
4517     vme_context->pfn_set_curbe_me = gen9_vp9_set_curbe_me;
4518     vme_context->pfn_send_me_surface = gen9_vp9_send_me_surface;
4519     vme_context->pfn_send_scaling_surface = gen9_vp9_send_scaling_surface;
4520 
4521     vme_context->pfn_set_curbe_scaling = gen9_vp9_set_curbe_scaling_cm;
4522 
4523     vme_context->pfn_send_dys_surface = gen9_vp9_send_dys_surface;
4524     vme_context->pfn_set_curbe_dys = gen9_vp9_set_curbe_dys;
4525     vme_context->pfn_set_curbe_mbenc = gen9_vp9_set_curbe_mbenc;
4526     vme_context->pfn_send_mbenc_surface = gen9_vp9_send_mbenc_surface;
4527     return true;
4528 }
4529 
4530 static
gen9_vp9_write_compressed_element(char * buffer,int index,int prob,bool value)4531 void gen9_vp9_write_compressed_element(char *buffer,
4532                                        int index,
4533                                        int prob,
4534                                        bool value)
4535 {
4536     struct vp9_compressed_element *base_element, *vp9_element;
4537     base_element = (struct vp9_compressed_element *)buffer;
4538 
4539     vp9_element = base_element + (index >> 1);
4540     if (index % 2) {
4541         vp9_element->b_valid = 1;
4542         vp9_element->b_probdiff_select = 1;
4543         vp9_element->b_prob_select = (prob == 252) ? 1 : 0;
4544         vp9_element->b_bin = value;
4545     } else {
4546         vp9_element->a_valid = 1;
4547         vp9_element->a_probdiff_select = 1;
4548         vp9_element->a_prob_select = (prob == 252) ? 1 : 0;
4549         vp9_element->a_bin = value;
4550     }
4551 }
4552 
4553 static void
intel_vp9enc_refresh_frame_internal_buffers(VADriverContextP ctx,struct intel_encoder_context * encoder_context)4554 intel_vp9enc_refresh_frame_internal_buffers(VADriverContextP ctx,
4555                                             struct intel_encoder_context *encoder_context)
4556 {
4557     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4558     VAEncPictureParameterBufferVP9 *pic_param;
4559     struct gen9_vp9_state *vp9_state;
4560     char *buffer;
4561     int i;
4562 
4563     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4564 
4565     if (!pak_context || !vp9_state || !vp9_state->pic_param)
4566         return;
4567 
4568     pic_param = vp9_state->pic_param;
4569     if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4570         (pic_param->pic_flags.bits.intra_only) ||
4571         pic_param->pic_flags.bits.error_resilient_mode) {
4572         /* reset current frame_context */
4573         intel_init_default_vp9_probs(&vp9_state->vp9_current_fc);
4574         if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
4575             pic_param->pic_flags.bits.error_resilient_mode ||
4576             (pic_param->pic_flags.bits.reset_frame_context == 3)) {
4577             for (i = 0; i < 4; i++)
4578                 memcpy(&vp9_state->vp9_frame_ctx[i],
4579                        &vp9_state->vp9_current_fc,
4580                        sizeof(FRAME_CONTEXT));
4581         } else if (pic_param->pic_flags.bits.reset_frame_context == 2) {
4582             i = pic_param->pic_flags.bits.frame_context_idx;
4583             memcpy(&vp9_state->vp9_frame_ctx[i],
4584                    &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
4585         }
4586         /* reset the frame_ctx_idx = 0 */
4587         vp9_state->frame_ctx_idx = 0;
4588     } else {
4589         vp9_state->frame_ctx_idx = pic_param->pic_flags.bits.frame_context_idx;
4590     }
4591 
4592     i965_zero_gpe_resource(&pak_context->res_compressed_input_buffer);
4593     buffer = i965_map_gpe_resource(&pak_context->res_compressed_input_buffer);
4594 
4595     if (!buffer)
4596         return;
4597 
4598     /* write tx_size */
4599     if ((pic_param->luma_ac_qindex == 0) &&
4600         (pic_param->luma_dc_qindex_delta == 0) &&
4601         (pic_param->chroma_ac_qindex_delta == 0) &&
4602         (pic_param->chroma_dc_qindex_delta == 0)) {
4603         /* lossless flag */
4604         /* nothing is needed */
4605         gen9_vp9_write_compressed_element(buffer,
4606                                           0, 128, 0);
4607         gen9_vp9_write_compressed_element(buffer,
4608                                           1, 128, 0);
4609         gen9_vp9_write_compressed_element(buffer,
4610                                           2, 128, 0);
4611     } else {
4612         if (vp9_state->tx_mode == TX_MODE_SELECT) {
4613             gen9_vp9_write_compressed_element(buffer,
4614                                               0, 128, 1);
4615             gen9_vp9_write_compressed_element(buffer,
4616                                               1, 128, 1);
4617             gen9_vp9_write_compressed_element(buffer,
4618                                               2, 128, 1);
4619         } else if (vp9_state->tx_mode == ALLOW_32X32) {
4620             gen9_vp9_write_compressed_element(buffer,
4621                                               0, 128, 1);
4622             gen9_vp9_write_compressed_element(buffer,
4623                                               1, 128, 1);
4624             gen9_vp9_write_compressed_element(buffer,
4625                                               2, 128, 0);
4626         } else {
4627             unsigned int tx_mode;
4628 
4629             tx_mode = vp9_state->tx_mode;
4630             gen9_vp9_write_compressed_element(buffer,
4631                                               0, 128, ((tx_mode) & 2));
4632             gen9_vp9_write_compressed_element(buffer,
4633                                               1, 128, ((tx_mode) & 1));
4634             gen9_vp9_write_compressed_element(buffer,
4635                                               2, 128, 0);
4636         }
4637 
4638         if (vp9_state->tx_mode == TX_MODE_SELECT) {
4639 
4640             gen9_vp9_write_compressed_element(buffer,
4641                                               3, 128, 0);
4642 
4643             gen9_vp9_write_compressed_element(buffer,
4644                                               7, 128, 0);
4645 
4646             gen9_vp9_write_compressed_element(buffer,
4647                                               15, 128, 0);
4648         }
4649     }
4650     /*Setup all the input&output object*/
4651 
4652     {
4653         /* update the coeff_update flag */
4654         gen9_vp9_write_compressed_element(buffer,
4655                                           27, 128, 0);
4656         gen9_vp9_write_compressed_element(buffer,
4657                                           820, 128, 0);
4658         gen9_vp9_write_compressed_element(buffer,
4659                                           1613, 128, 0);
4660         gen9_vp9_write_compressed_element(buffer,
4661                                           2406, 128, 0);
4662     }
4663 
4664 
4665     if (pic_param->pic_flags.bits.frame_type && !pic_param->pic_flags.bits.intra_only) {
4666         bool allow_comp = !(
4667                               (pic_param->ref_flags.bits.ref_last_sign_bias && pic_param->ref_flags.bits.ref_gf_sign_bias && pic_param->ref_flags.bits.ref_arf_sign_bias) ||
4668                               (!pic_param->ref_flags.bits.ref_last_sign_bias && !pic_param->ref_flags.bits.ref_gf_sign_bias && !pic_param->ref_flags.bits.ref_arf_sign_bias)
4669                           );
4670 
4671         if (allow_comp) {
4672             if (pic_param->pic_flags.bits.comp_prediction_mode == REFERENCE_MODE_SELECT) {
4673                 gen9_vp9_write_compressed_element(buffer,
4674                                                   3271, 128, 1);
4675                 gen9_vp9_write_compressed_element(buffer,
4676                                                   3272, 128, 1);
4677             } else if (pic_param->pic_flags.bits.comp_prediction_mode == COMPOUND_REFERENCE) {
4678                 gen9_vp9_write_compressed_element(buffer,
4679                                                   3271, 128, 1);
4680                 gen9_vp9_write_compressed_element(buffer,
4681                                                   3272, 128, 0);
4682             } else {
4683 
4684                 gen9_vp9_write_compressed_element(buffer,
4685                                                   3271, 128, 0);
4686                 gen9_vp9_write_compressed_element(buffer,
4687                                                   3272, 128, 0);
4688             }
4689         }
4690     }
4691 
4692     i965_unmap_gpe_resource(&pak_context->res_compressed_input_buffer);
4693 }
4694 
4695 
4696 static void
gen9_pak_vp9_pipe_mode_select(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,struct gen9_hcpe_pipe_mode_select_param * pipe_mode_param)4697 gen9_pak_vp9_pipe_mode_select(VADriverContextP ctx,
4698                               struct encode_state *encode_state,
4699                               struct intel_encoder_context *encoder_context,
4700                               struct gen9_hcpe_pipe_mode_select_param *pipe_mode_param)
4701 {
4702     struct intel_batchbuffer *batch = encoder_context->base.batch;
4703 
4704     BEGIN_BCS_BATCH(batch, 6);
4705 
4706     OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (6 - 2));
4707     OUT_BCS_BATCH(batch,
4708                   (pipe_mode_param->stream_out << 12) |
4709                   (pipe_mode_param->codec_mode << 5) |
4710                   (0 << 3) | /* disable Pic Status / Error Report */
4711                   (pipe_mode_param->stream_out << 2) |
4712                   HCP_CODEC_SELECT_ENCODE);
4713     OUT_BCS_BATCH(batch, 0);
4714     OUT_BCS_BATCH(batch, 0);
4715     OUT_BCS_BATCH(batch, (1 << 6));
4716     OUT_BCS_BATCH(batch, 0);
4717 
4718     ADVANCE_BCS_BATCH(batch);
4719 }
4720 
4721 static void
gen9_vp9_add_surface_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,hcp_surface_state * hcp_state)4722 gen9_vp9_add_surface_state(VADriverContextP ctx,
4723                            struct encode_state *encode_state,
4724                            struct intel_encoder_context *encoder_context,
4725                            hcp_surface_state *hcp_state)
4726 {
4727     struct intel_batchbuffer *batch = encoder_context->base.batch;
4728     if (!hcp_state)
4729         return;
4730 
4731     BEGIN_BCS_BATCH(batch, 3);
4732     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
4733     OUT_BCS_BATCH(batch,
4734                   (hcp_state->dw1.surface_id << 28) |
4735                   (hcp_state->dw1.surface_pitch - 1)
4736                  );
4737     OUT_BCS_BATCH(batch,
4738                   (hcp_state->dw2.surface_format << 28) |
4739                   (hcp_state->dw2.y_cb_offset)
4740                  );
4741     ADVANCE_BCS_BATCH(batch);
4742 }
4743 
4744 static void
gen9_pak_vp9_pipe_buf_addr_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)4745 gen9_pak_vp9_pipe_buf_addr_state(VADriverContextP ctx,
4746                                  struct encode_state *encode_state,
4747                                  struct intel_encoder_context *encoder_context)
4748 {
4749     struct i965_driver_data *i965 = i965_driver_data(ctx);
4750     struct intel_batchbuffer *batch = encoder_context->base.batch;
4751     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4752     struct gen9_vp9_state *vp9_state;
4753     unsigned int i;
4754     struct object_surface *obj_surface;
4755 
4756     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4757 
4758     if (!vp9_state || !vp9_state->pic_param)
4759         return;
4760 
4761 
4762     BEGIN_BCS_BATCH(batch, 104);
4763 
4764     OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (104 - 2));
4765 
4766     obj_surface = encode_state->reconstructed_object;
4767 
4768     /* reconstructed obj_surface is already checked. So this is skipped */
4769     /* DW 1..3 decoded surface */
4770     OUT_RELOC64(batch,
4771                 obj_surface->bo,
4772                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4773                 0);
4774     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4775 
4776     /* DW 4..6 deblocking line */
4777     OUT_RELOC64(batch,
4778                 pak_context->res_deblocking_filter_line_buffer.bo,
4779                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4780                 0);
4781     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4782 
4783     /* DW 7..9 deblocking tile line */
4784     OUT_RELOC64(batch,
4785                 pak_context->res_deblocking_filter_tile_line_buffer.bo,
4786                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4787                 0);
4788     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4789 
4790     /* DW 10..12 deblocking tile col */
4791     OUT_RELOC64(batch,
4792                 pak_context->res_deblocking_filter_tile_col_buffer.bo,
4793                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4794                 0);
4795     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4796 
4797     /* DW 13..15 metadata line */
4798     OUT_RELOC64(batch,
4799                 pak_context->res_metadata_line_buffer.bo,
4800                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4801                 0);
4802     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4803 
4804     /* DW 16..18 metadata tile line */
4805     OUT_RELOC64(batch,
4806                 pak_context->res_metadata_tile_line_buffer.bo,
4807                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4808                 0);
4809     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4810 
4811     /* DW 19..21 metadata tile col */
4812     OUT_RELOC64(batch,
4813                 pak_context->res_metadata_tile_col_buffer.bo,
4814                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4815                 0);
4816     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4817 
4818     /* DW 22..30 SAO is not used for VP9 */
4819     OUT_BCS_BATCH(batch, 0);
4820     OUT_BCS_BATCH(batch, 0);
4821     OUT_BCS_BATCH(batch, 0);
4822     OUT_BCS_BATCH(batch, 0);
4823     OUT_BCS_BATCH(batch, 0);
4824     OUT_BCS_BATCH(batch, 0);
4825     OUT_BCS_BATCH(batch, 0);
4826     OUT_BCS_BATCH(batch, 0);
4827     OUT_BCS_BATCH(batch, 0);
4828 
4829     /* DW 31..33 Current Motion vector temporal buffer */
4830     OUT_RELOC64(batch,
4831                 pak_context->res_mv_temporal_buffer[vp9_state->curr_mv_temporal_index].bo,
4832                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4833                 0);
4834     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4835 
4836     /* DW 34..36 Not used */
4837     OUT_BCS_BATCH(batch, 0);
4838     OUT_BCS_BATCH(batch, 0);
4839     OUT_BCS_BATCH(batch, 0);
4840 
4841     /* Only the first three reference_frame is used for VP9 */
4842     /* DW 37..52 for reference_frame */
4843     i = 0;
4844     if (vp9_state->picture_coding_type) {
4845         for (i = 0; i < 3; i++) {
4846 
4847             if (pak_context->reference_surfaces[i].bo) {
4848                 OUT_RELOC64(batch,
4849                             pak_context->reference_surfaces[i].bo,
4850                             I915_GEM_DOMAIN_INSTRUCTION, 0,
4851                             0);
4852             } else {
4853                 OUT_BCS_BATCH(batch, 0);
4854                 OUT_BCS_BATCH(batch, 0);
4855             }
4856         }
4857     }
4858 
4859     for (; i < 8; i++) {
4860         OUT_BCS_BATCH(batch, 0);
4861         OUT_BCS_BATCH(batch, 0);
4862     }
4863 
4864     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4865 
4866     /* DW 54..56 for source input */
4867     OUT_RELOC64(batch,
4868                 pak_context->uncompressed_picture_source.bo,
4869                 I915_GEM_DOMAIN_INSTRUCTION, 0,
4870                 0);
4871     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4872 
4873     /* DW 57..59 StreamOut is not used */
4874     OUT_BCS_BATCH(batch, 0);
4875     OUT_BCS_BATCH(batch, 0);
4876     OUT_BCS_BATCH(batch, 0);
4877 
4878     /* DW 60..62. Not used for encoder */
4879     OUT_BCS_BATCH(batch, 0);
4880     OUT_BCS_BATCH(batch, 0);
4881     OUT_BCS_BATCH(batch, 0);
4882 
4883     /* DW 63..65. ILDB Not used for encoder */
4884     OUT_BCS_BATCH(batch, 0);
4885     OUT_BCS_BATCH(batch, 0);
4886     OUT_BCS_BATCH(batch, 0);
4887 
4888     /* DW 66..81 For the collocated motion vector temporal buffer */
4889     if (vp9_state->picture_coding_type) {
4890         int prev_index = vp9_state->curr_mv_temporal_index ^ 0x01;
4891         OUT_RELOC64(batch,
4892                     pak_context->res_mv_temporal_buffer[prev_index].bo,
4893                     I915_GEM_DOMAIN_INSTRUCTION, 0,
4894                     0);
4895     } else {
4896         OUT_BCS_BATCH(batch, 0);
4897         OUT_BCS_BATCH(batch, 0);
4898     }
4899 
4900     for (i = 1; i < 8; i++) {
4901         OUT_BCS_BATCH(batch, 0);
4902         OUT_BCS_BATCH(batch, 0);
4903     }
4904     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4905 
4906     /* DW 83..85 VP9 prob buffer */
4907     OUT_RELOC64(batch,
4908                 pak_context->res_prob_buffer.bo,
4909                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4910                 0);
4911 
4912     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4913 
4914     /* DW 86..88 Segment id buffer */
4915     if (pak_context->res_segmentid_buffer.bo) {
4916         OUT_RELOC64(batch,
4917                     pak_context->res_segmentid_buffer.bo,
4918                     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4919                     0);
4920     } else {
4921         OUT_BCS_BATCH(batch, 0);
4922         OUT_BCS_BATCH(batch, 0);
4923     }
4924     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4925 
4926     /* DW 89..91 HVD line rowstore buffer */
4927     OUT_RELOC64(batch,
4928                 pak_context->res_hvd_line_buffer.bo,
4929                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4930                 0);
4931     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4932 
4933     /* DW 92..94 HVD tile line rowstore buffer */
4934     OUT_RELOC64(batch,
4935                 pak_context->res_hvd_tile_line_buffer.bo,
4936                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4937                 0);
4938     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4939 
4940     /* DW 95..97 SAO streamout. Not used for VP9 */
4941     OUT_BCS_BATCH(batch, 0);
4942     OUT_BCS_BATCH(batch, 0);
4943     OUT_BCS_BATCH(batch, 0);
4944 
4945     /* reserved for KBL. 98..100 */
4946     OUT_BCS_BATCH(batch, 0);
4947     OUT_BCS_BATCH(batch, 0);
4948     OUT_BCS_BATCH(batch, 0);
4949 
4950     /* 101..103 */
4951     OUT_BCS_BATCH(batch, 0);
4952     OUT_BCS_BATCH(batch, 0);
4953     OUT_BCS_BATCH(batch, 0);
4954 
4955     ADVANCE_BCS_BATCH(batch);
4956 }
4957 
4958 static void
gen9_pak_vp9_ind_obj_base_addr_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)4959 gen9_pak_vp9_ind_obj_base_addr_state(VADriverContextP ctx,
4960                                      struct encode_state *encode_state,
4961                                      struct intel_encoder_context *encoder_context)
4962 {
4963     struct i965_driver_data *i965 = i965_driver_data(ctx);
4964     struct intel_batchbuffer *batch = encoder_context->base.batch;
4965     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
4966     struct gen9_vp9_state *vp9_state;
4967 
4968     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
4969 
4970     /* to do */
4971     BEGIN_BCS_BATCH(batch, 29);
4972 
4973     OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (29 - 2));
4974 
4975     /* indirect bitstream object base */
4976     OUT_BCS_BATCH(batch, 0);
4977     OUT_BCS_BATCH(batch, 0);
4978     OUT_BCS_BATCH(batch, 0);
4979     /* the upper bound of indirect bitstream object */
4980     OUT_BCS_BATCH(batch, 0);
4981     OUT_BCS_BATCH(batch, 0);
4982 
4983     /* DW 6: Indirect CU object base address */
4984     OUT_RELOC64(batch,
4985                 pak_context->res_mb_code_surface.bo,
4986                 I915_GEM_DOMAIN_INSTRUCTION, 0,   /* No write domain */
4987                 vp9_state->mb_data_offset);
4988     /* default attribute */
4989     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4990 
4991     /* DW 9..11, PAK-BSE */
4992     OUT_RELOC64(batch,
4993                 pak_context->indirect_pak_bse_object.bo,
4994                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
4995                 pak_context->indirect_pak_bse_object.offset);
4996     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
4997 
4998     /* DW 12..13 upper bound */
4999     OUT_RELOC64(batch,
5000                 pak_context->indirect_pak_bse_object.bo,
5001                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5002                 pak_context->indirect_pak_bse_object.end_offset);
5003 
5004     /* DW 14..16 compressed header buffer */
5005     OUT_RELOC64(batch,
5006                 pak_context->res_compressed_input_buffer.bo,
5007                 I915_GEM_DOMAIN_INSTRUCTION, 0,
5008                 0);
5009     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5010 
5011     /* DW 17..19 prob counter streamout */
5012     OUT_RELOC64(batch,
5013                 pak_context->res_prob_counter_buffer.bo,
5014                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5015                 0);
5016     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5017 
5018     /* DW 20..22 prob delta streamin */
5019     OUT_RELOC64(batch,
5020                 pak_context->res_prob_delta_buffer.bo,
5021                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5022                 0);
5023     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5024 
5025     /* DW 23..25 Tile record streamout */
5026     OUT_RELOC64(batch,
5027                 pak_context->res_tile_record_streamout_buffer.bo,
5028                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5029                 0);
5030     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5031 
5032     /* DW 26..28 CU record streamout */
5033     OUT_RELOC64(batch,
5034                 pak_context->res_cu_stat_streamout_buffer.bo,
5035                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
5036                 0);
5037     OUT_BCS_BATCH(batch, i965->intel.mocs_state);
5038 
5039     ADVANCE_BCS_BATCH(batch);
5040 }
5041 
5042 static void
gen9_pak_vp9_segment_state(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context,VAEncSegParamVP9 * seg_param,uint8_t seg_id)5043 gen9_pak_vp9_segment_state(VADriverContextP ctx,
5044                            struct encode_state *encode_state,
5045                            struct intel_encoder_context *encoder_context,
5046                            VAEncSegParamVP9 *seg_param, uint8_t seg_id)
5047 {
5048     struct intel_batchbuffer *batch = encoder_context->base.batch;
5049     uint32_t batch_value, tmp;
5050     VAEncPictureParameterBufferVP9 *pic_param;
5051 
5052     if (!encode_state->pic_param_ext ||
5053         !encode_state->pic_param_ext->buffer) {
5054         return;
5055     }
5056 
5057     pic_param = (VAEncPictureParameterBufferVP9 *)encode_state->pic_param_ext->buffer;
5058 
5059     batch_value = seg_param->seg_flags.bits.segment_reference;
5060     if (pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME ||
5061         pic_param->pic_flags.bits.intra_only)
5062         batch_value = 0;
5063 
5064     BEGIN_BCS_BATCH(batch, 8);
5065 
5066     OUT_BCS_BATCH(batch, HCP_VP9_SEGMENT_STATE | (8 - 2));
5067     OUT_BCS_BATCH(batch, seg_id << 0); /* DW 1 - SegmentID */
5068     OUT_BCS_BATCH(batch,
5069                   (seg_param->seg_flags.bits.segment_reference_enabled << 3) |
5070                   (batch_value << 1) |
5071                   (seg_param->seg_flags.bits.segment_reference_skipped << 0)
5072                  );
5073 
5074     /* DW 3..6 is not used for encoder */
5075     OUT_BCS_BATCH(batch, 0);
5076     OUT_BCS_BATCH(batch, 0);
5077     OUT_BCS_BATCH(batch, 0);
5078     OUT_BCS_BATCH(batch, 0);
5079 
5080     /* DW 7 Mode */
5081     tmp = intel_convert_sign_mag(seg_param->segment_qindex_delta, 9);
5082     batch_value = tmp;
5083     tmp = intel_convert_sign_mag(seg_param->segment_lf_level_delta, 7);
5084     batch_value |= (tmp << 16);
5085     OUT_BCS_BATCH(batch, batch_value);
5086 
5087     ADVANCE_BCS_BATCH(batch);
5088 
5089 }
5090 
5091 static void
intel_vp9enc_construct_pak_insertobj_batchbuffer(VADriverContextP ctx,struct intel_encoder_context * encoder_context,struct i965_gpe_resource * obj_batch_buffer)5092 intel_vp9enc_construct_pak_insertobj_batchbuffer(VADriverContextP ctx,
5093                                                  struct intel_encoder_context *encoder_context,
5094                                                  struct i965_gpe_resource *obj_batch_buffer)
5095 {
5096     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5097     struct gen9_vp9_state *vp9_state;
5098     int uncompressed_header_length;
5099     unsigned int *cmd_ptr;
5100     unsigned int dw_length, bits_in_last_dw;
5101 
5102     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5103 
5104     if (!pak_context || !vp9_state || !vp9_state->pic_param)
5105         return;
5106 
5107     uncompressed_header_length = vp9_state->header_length;
5108     cmd_ptr = i965_map_gpe_resource(obj_batch_buffer);
5109 
5110     if (!cmd_ptr)
5111         return;
5112 
5113     bits_in_last_dw = uncompressed_header_length % 4;
5114     bits_in_last_dw *= 8;
5115 
5116     if (bits_in_last_dw == 0)
5117         bits_in_last_dw = 32;
5118 
5119     /* get the DWORD length of the inserted_data */
5120     dw_length = ALIGN(uncompressed_header_length, 4) / 4;
5121     *cmd_ptr++ = HCP_INSERT_PAK_OBJECT | dw_length;
5122 
5123     *cmd_ptr++ = ((0 << 31) | /* indirect payload */
5124                   (0 << 16) | /* the start offset in first DW */
5125                   (0 << 15) |
5126                   (bits_in_last_dw << 8) | /* bits_in_last_dw */
5127                   (0 << 4) |  /* skip emulation byte count. 0 for VP9 */
5128                   (0 << 3) |  /* emulation flag. 0 for VP9 */
5129                   (1 << 2) |  /* last header flag. */
5130                   (0 << 1));
5131     memcpy(cmd_ptr, vp9_state->alias_insert_data, dw_length * sizeof(unsigned int));
5132 
5133     cmd_ptr += dw_length;
5134 
5135     *cmd_ptr++ = MI_NOOP;
5136     *cmd_ptr++ = MI_BATCH_BUFFER_END;
5137     i965_unmap_gpe_resource(obj_batch_buffer);
5138 }
5139 
5140 static void
gen9_vp9_pak_picture_level(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)5141 gen9_vp9_pak_picture_level(VADriverContextP ctx,
5142                            struct encode_state *encode_state,
5143                            struct intel_encoder_context *encoder_context)
5144 {
5145     struct intel_batchbuffer *batch = encoder_context->base.batch;
5146     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5147     struct object_surface *obj_surface;
5148     VAEncPictureParameterBufferVP9 *pic_param;
5149     VAEncMiscParameterTypeVP9PerSegmantParam *seg_param, tmp_seg_param;
5150     struct gen9_vp9_state *vp9_state;
5151     struct gen9_surface_vp9 *vp9_priv_surface;
5152     int i;
5153     struct gen9_hcpe_pipe_mode_select_param mode_param;
5154     hcp_surface_state hcp_surface;
5155     struct gpe_mi_batch_buffer_start_parameter second_level_batch;
5156     int segment_count;
5157 
5158     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5159 
5160     if (!pak_context || !vp9_state || !vp9_state->pic_param)
5161         return;
5162 
5163     pic_param = vp9_state->pic_param;
5164     seg_param = vp9_state->segment_param;
5165 
5166     if (vp9_state->curr_pak_pass == 0) {
5167         intel_vp9enc_construct_pak_insertobj_batchbuffer(ctx, encoder_context,
5168                                                          &pak_context->res_pak_uncompressed_input_buffer);
5169 
5170         // Check if driver already programmed pic state as part of BRC update kernel programming.
5171         if (!vp9_state->brc_enabled) {
5172             intel_vp9enc_construct_picstate_batchbuf(ctx, encode_state,
5173                                                      encoder_context, &pak_context->res_pic_state_brc_write_hfw_read_buffer);
5174         }
5175     }
5176 
5177     if (vp9_state->curr_pak_pass == 0) {
5178         intel_vp9enc_refresh_frame_internal_buffers(ctx, encoder_context);
5179     }
5180 
5181     {
5182         /* copy the frame_context[frame_idx] into curr_frame_context */
5183         memcpy(&vp9_state->vp9_current_fc,
5184                &(vp9_state->vp9_frame_ctx[vp9_state->frame_ctx_idx]),
5185                sizeof(FRAME_CONTEXT));
5186         {
5187             uint8_t *prob_ptr;
5188 
5189             prob_ptr = i965_map_gpe_resource(&pak_context->res_prob_buffer);
5190 
5191             if (!prob_ptr)
5192                 return;
5193 
5194             /* copy the current fc to vp9_prob buffer */
5195             memcpy(prob_ptr, &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT));
5196             if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) ||
5197                 pic_param->pic_flags.bits.intra_only) {
5198                 FRAME_CONTEXT *frame_ptr = (FRAME_CONTEXT *)prob_ptr;
5199 
5200                 memcpy(frame_ptr->partition_prob, vp9_kf_partition_probs,
5201                        sizeof(vp9_kf_partition_probs));
5202                 memcpy(frame_ptr->uv_mode_prob, vp9_kf_uv_mode_prob,
5203                        sizeof(vp9_kf_uv_mode_prob));
5204             }
5205             i965_unmap_gpe_resource(&pak_context->res_prob_buffer);
5206         }
5207     }
5208 
5209     if (vp9_state->brc_enabled && vp9_state->curr_pak_pass) {
5210         /* read image status and insert the conditional end cmd */
5211         /* image ctrl/status is already accessed */
5212         struct gpe_mi_conditional_batch_buffer_end_parameter mi_cond_end;
5213         struct vp9_encode_status_buffer_internal *status_buffer;
5214 
5215         status_buffer = &vp9_state->status_buffer;
5216         memset(&mi_cond_end, 0, sizeof(mi_cond_end));
5217         mi_cond_end.offset = status_buffer->image_status_mask_offset;
5218         mi_cond_end.bo = status_buffer->bo;
5219         mi_cond_end.compare_data = 0;
5220         mi_cond_end.compare_mask_mode_disabled = 1;
5221         gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch,
5222                                                  &mi_cond_end);
5223     }
5224 
5225     mode_param.codec_mode = 1;
5226     mode_param.stream_out = 0;
5227     gen9_pak_vp9_pipe_mode_select(ctx, encode_state, encoder_context, &mode_param);
5228 
5229     /* reconstructed surface */
5230     memset(&hcp_surface, 0, sizeof(hcp_surface));
5231     obj_surface = encode_state->reconstructed_object;
5232     hcp_surface.dw1.surface_id = 0;
5233     hcp_surface.dw1.surface_pitch = obj_surface->width;
5234     hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5235     hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5236     gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5237                                &hcp_surface);
5238 
5239     /* Input surface */
5240     if (vp9_state->dys_in_use &&
5241         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5242          (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5243         vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data);
5244         obj_surface = vp9_priv_surface->dys_surface_obj;
5245     } else {
5246         obj_surface = encode_state->input_yuv_object;
5247     }
5248 
5249     hcp_surface.dw1.surface_id = 1;
5250     hcp_surface.dw1.surface_pitch = obj_surface->width;
5251     hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5252     hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5253     gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5254                                &hcp_surface);
5255 
5256     if (vp9_state->picture_coding_type) {
5257         /* Add surface for last */
5258         if (vp9_state->last_ref_obj) {
5259             obj_surface = vp9_state->last_ref_obj;
5260             hcp_surface.dw1.surface_id = 2;
5261             hcp_surface.dw1.surface_pitch = obj_surface->width;
5262             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5263             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5264             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5265                                        &hcp_surface);
5266         }
5267         if (vp9_state->golden_ref_obj) {
5268             obj_surface = vp9_state->golden_ref_obj;
5269             hcp_surface.dw1.surface_id = 3;
5270             hcp_surface.dw1.surface_pitch = obj_surface->width;
5271             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5272             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5273             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5274                                        &hcp_surface);
5275         }
5276         if (vp9_state->alt_ref_obj) {
5277             obj_surface = vp9_state->alt_ref_obj;
5278             hcp_surface.dw1.surface_id = 4;
5279             hcp_surface.dw1.surface_pitch = obj_surface->width;
5280             hcp_surface.dw2.surface_format = SURFACE_FORMAT_PLANAR_420_8;
5281             hcp_surface.dw2.y_cb_offset = obj_surface->y_cb_offset;
5282             gen9_vp9_add_surface_state(ctx, encode_state, encoder_context,
5283                                        &hcp_surface);
5284         }
5285     }
5286 
5287     gen9_pak_vp9_pipe_buf_addr_state(ctx, encode_state, encoder_context);
5288 
5289     gen9_pak_vp9_ind_obj_base_addr_state(ctx, encode_state, encoder_context);
5290 
5291     // Using picstate zero with updated QP and LF deltas by HuC for repak, irrespective of how many Pak passes were run in multi-pass mode.
5292     memset(&second_level_batch, 0, sizeof(second_level_batch));
5293 
5294     if (vp9_state->curr_pak_pass == 0) {
5295         second_level_batch.offset = 0;
5296     } else
5297         second_level_batch.offset = vp9_state->curr_pak_pass * VP9_PIC_STATE_BUFFER_SIZE;
5298 
5299     second_level_batch.is_second_level = 1;
5300     second_level_batch.bo = pak_context->res_pic_state_brc_write_hfw_read_buffer.bo;
5301 
5302     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5303 
5304     if (pic_param->pic_flags.bits.segmentation_enabled &&
5305         seg_param)
5306         segment_count = 8;
5307     else {
5308         segment_count = 1;
5309         memset(&tmp_seg_param, 0, sizeof(tmp_seg_param));
5310         seg_param = &tmp_seg_param;
5311     }
5312     for (i = 0; i < segment_count; i++) {
5313         gen9_pak_vp9_segment_state(ctx, encode_state,
5314                                    encoder_context,
5315                                    &seg_param->seg_data[i], i);
5316     }
5317 
5318     /* Insert the uncompressed header buffer */
5319     second_level_batch.is_second_level = 1;
5320     second_level_batch.offset = 0;
5321     second_level_batch.bo = pak_context->res_pak_uncompressed_input_buffer.bo;
5322 
5323     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5324 
5325     /* PAK_OBJECT */
5326     second_level_batch.is_second_level = 1;
5327     second_level_batch.offset = 0;
5328     second_level_batch.bo = pak_context->res_mb_code_surface.bo;
5329     gen8_gpe_mi_batch_buffer_start(ctx, batch, &second_level_batch);
5330 
5331     return;
5332 }
5333 
5334 static void
gen9_vp9_read_mfc_status(VADriverContextP ctx,struct intel_encoder_context * encoder_context)5335 gen9_vp9_read_mfc_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5336 {
5337     struct intel_batchbuffer *batch = encoder_context->base.batch;
5338     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5339     struct gpe_mi_store_register_mem_parameter mi_store_reg_mem_param;
5340     struct gpe_mi_flush_dw_parameter mi_flush_dw_param;
5341     //struct gpe_mi_copy_mem_parameter mi_copy_mem_param;
5342     struct vp9_encode_status_buffer_internal *status_buffer;
5343     struct gen9_vp9_state *vp9_state;
5344 
5345     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5346     if (!vp9_state || !pak_context || !batch)
5347         return;
5348 
5349     status_buffer = &(vp9_state->status_buffer);
5350 
5351     memset(&mi_flush_dw_param, 0, sizeof(mi_flush_dw_param));
5352     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5353 
5354     memset(&mi_store_reg_mem_param, 0, sizeof(mi_store_reg_mem_param));
5355     mi_store_reg_mem_param.bo = status_buffer->bo;
5356     mi_store_reg_mem_param.offset = status_buffer->bs_byte_count_offset;
5357     mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5358     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5359 
5360     mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5361     mi_store_reg_mem_param.offset = 0;
5362     mi_store_reg_mem_param.mmio_offset = status_buffer->vp9_bs_frame_reg_offset;
5363     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5364 
5365     /* Read HCP Image status */
5366     mi_store_reg_mem_param.bo = status_buffer->bo;
5367     mi_store_reg_mem_param.offset = status_buffer->image_status_mask_offset;
5368     mi_store_reg_mem_param.mmio_offset =
5369         status_buffer->vp9_image_mask_reg_offset;
5370     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5371 
5372     mi_store_reg_mem_param.bo = status_buffer->bo;
5373     mi_store_reg_mem_param.offset = status_buffer->image_status_ctrl_offset;
5374     mi_store_reg_mem_param.mmio_offset =
5375         status_buffer->vp9_image_ctrl_reg_offset;
5376     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5377 
5378     mi_store_reg_mem_param.bo = pak_context->res_brc_bitstream_size_buffer.bo;
5379     mi_store_reg_mem_param.offset = 4;
5380     mi_store_reg_mem_param.mmio_offset =
5381         status_buffer->vp9_image_ctrl_reg_offset;
5382     gen8_gpe_mi_store_register_mem(ctx, batch, &mi_store_reg_mem_param);
5383 
5384     gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_param);
5385 
5386     return;
5387 }
5388 
5389 static VAStatus
gen9_vp9_pak_pipeline_prepare(VADriverContextP ctx,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)5390 gen9_vp9_pak_pipeline_prepare(VADriverContextP ctx,
5391                               struct encode_state *encode_state,
5392                               struct intel_encoder_context *encoder_context)
5393 {
5394     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5395     struct object_surface *obj_surface;
5396     struct object_buffer *obj_buffer;
5397     struct i965_coded_buffer_segment *coded_buffer_segment;
5398     VAEncPictureParameterBufferVP9 *pic_param;
5399     struct gen9_vp9_state *vp9_state;
5400     dri_bo *bo;
5401     int i;
5402 
5403     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5404     if (!vp9_state ||
5405         !vp9_state->pic_param)
5406         return VA_STATUS_ERROR_INVALID_PARAMETER;
5407 
5408     pic_param = vp9_state->pic_param;
5409 
5410     /* reconstructed surface */
5411     obj_surface = encode_state->reconstructed_object;
5412     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
5413 
5414     dri_bo_unreference(pak_context->reconstructed_object.bo);
5415 
5416     pak_context->reconstructed_object.bo = obj_surface->bo;
5417     dri_bo_reference(pak_context->reconstructed_object.bo);
5418 
5419     /* set vp9 reference frames */
5420     for (i = 0; i < ARRAY_ELEMS(pak_context->reference_surfaces); i++) {
5421         if (pak_context->reference_surfaces[i].bo)
5422             dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5423         pak_context->reference_surfaces[i].bo = NULL;
5424     }
5425 
5426     /* Three reference frames are enough for VP9 */
5427     if (pic_param->pic_flags.bits.frame_type &&
5428         !pic_param->pic_flags.bits.intra_only) {
5429         for (i = 0; i < 3; i++) {
5430             obj_surface = encode_state->reference_objects[i];
5431             if (obj_surface && obj_surface->bo) {
5432                 pak_context->reference_surfaces[i].bo = obj_surface->bo;
5433                 dri_bo_reference(obj_surface->bo);
5434             }
5435         }
5436     }
5437 
5438     /* input YUV surface */
5439     dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5440     pak_context->uncompressed_picture_source.bo = NULL;
5441     obj_surface = encode_state->reconstructed_object;
5442     if (vp9_state->dys_in_use &&
5443         ((pic_param->frame_width_src != pic_param->frame_width_dst) ||
5444          (pic_param->frame_height_src != pic_param->frame_height_dst))) {
5445         struct gen9_surface_vp9 *vp9_priv_surface =
5446             (struct gen9_surface_vp9 *)(obj_surface->private_data);
5447         obj_surface = vp9_priv_surface->dys_surface_obj;
5448     } else
5449         obj_surface = encode_state->input_yuv_object;
5450 
5451     pak_context->uncompressed_picture_source.bo = obj_surface->bo;
5452     dri_bo_reference(pak_context->uncompressed_picture_source.bo);
5453 
5454     /* coded buffer */
5455     dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5456     pak_context->indirect_pak_bse_object.bo = NULL;
5457     obj_buffer = encode_state->coded_buf_object;
5458     bo = obj_buffer->buffer_store->bo;
5459     pak_context->indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
5460     pak_context->indirect_pak_bse_object.end_offset = ALIGN((obj_buffer->size_element - 0x1000), 0x1000);
5461     pak_context->indirect_pak_bse_object.bo = bo;
5462     dri_bo_reference(pak_context->indirect_pak_bse_object.bo);
5463 
5464     /* set the internal flag to 0 to indicate the coded size is unknown */
5465     dri_bo_map(bo, 1);
5466     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
5467     coded_buffer_segment->mapped = 0;
5468     coded_buffer_segment->codec = encoder_context->codec;
5469     coded_buffer_segment->status_support = 1;
5470     dri_bo_unmap(bo);
5471 
5472     return VA_STATUS_SUCCESS;
5473 }
5474 
5475 static void
gen9_vp9_pak_brc_prepare(struct encode_state * encode_state,struct intel_encoder_context * encoder_context)5476 gen9_vp9_pak_brc_prepare(struct encode_state *encode_state,
5477                          struct intel_encoder_context *encoder_context)
5478 {
5479 }
5480 
5481 static void
gen9_vp9_pak_context_destroy(void * context)5482 gen9_vp9_pak_context_destroy(void *context)
5483 {
5484     struct gen9_encoder_context_vp9 *pak_context = context;
5485     int i;
5486 
5487     dri_bo_unreference(pak_context->reconstructed_object.bo);
5488     pak_context->reconstructed_object.bo = NULL;
5489 
5490     dri_bo_unreference(pak_context->uncompressed_picture_source.bo);
5491     pak_context->uncompressed_picture_source.bo = NULL;
5492 
5493     dri_bo_unreference(pak_context->indirect_pak_bse_object.bo);
5494     pak_context->indirect_pak_bse_object.bo = NULL;
5495 
5496     for (i = 0; i < 8; i++) {
5497         dri_bo_unreference(pak_context->reference_surfaces[i].bo);
5498         pak_context->reference_surfaces[i].bo = NULL;
5499     }
5500 
5501     /* vme & pak same the same structure, so don't free the context here */
5502 }
5503 
5504 static VAStatus
gen9_vp9_pak_pipeline(VADriverContextP ctx,VAProfile profile,struct encode_state * encode_state,struct intel_encoder_context * encoder_context)5505 gen9_vp9_pak_pipeline(VADriverContextP ctx,
5506                       VAProfile profile,
5507                       struct encode_state *encode_state,
5508                       struct intel_encoder_context *encoder_context)
5509 {
5510     struct i965_driver_data *i965 = i965_driver_data(ctx);
5511     struct intel_batchbuffer *batch = encoder_context->base.batch;
5512     struct gen9_encoder_context_vp9 *pak_context = encoder_context->mfc_context;
5513     VAStatus va_status;
5514     struct gen9_vp9_state *vp9_state;
5515     VAEncPictureParameterBufferVP9 *pic_param;
5516     int i;
5517 
5518     vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state);
5519 
5520     if (!vp9_state || !vp9_state->pic_param || !pak_context)
5521         return VA_STATUS_ERROR_INVALID_PARAMETER;
5522 
5523     va_status = gen9_vp9_pak_pipeline_prepare(ctx, encode_state, encoder_context);
5524 
5525     if (va_status != VA_STATUS_SUCCESS)
5526         return va_status;
5527 
5528     if (i965->intel.has_bsd2)
5529         intel_batchbuffer_start_atomic_bcs_override(batch, 0x1000, BSD_RING0);
5530     else
5531         intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
5532 
5533     intel_batchbuffer_emit_mi_flush(batch);
5534 
5535     BEGIN_BCS_BATCH(batch, 64);
5536     for (i = 0; i < 64; i++)
5537         OUT_BCS_BATCH(batch, MI_NOOP);
5538 
5539     ADVANCE_BCS_BATCH(batch);
5540 
5541     for (vp9_state->curr_pak_pass = 0;
5542          vp9_state->curr_pak_pass < vp9_state->num_pak_passes;
5543          vp9_state->curr_pak_pass++) {
5544 
5545         if (vp9_state->curr_pak_pass == 0) {
5546             /* Initialize the VP9 Image Ctrl reg for the first pass */
5547             struct gpe_mi_load_register_imm_parameter mi_load_reg_imm;
5548             struct vp9_encode_status_buffer_internal *status_buffer;
5549 
5550             status_buffer = &(vp9_state->status_buffer);
5551             memset(&mi_load_reg_imm, 0, sizeof(mi_load_reg_imm));
5552             mi_load_reg_imm.mmio_offset = status_buffer->vp9_image_ctrl_reg_offset;
5553             mi_load_reg_imm.data = 0;
5554             gen8_gpe_mi_load_register_imm(ctx, batch, &mi_load_reg_imm);
5555         }
5556         gen9_vp9_pak_picture_level(ctx, encode_state, encoder_context);
5557         gen9_vp9_read_mfc_status(ctx, encoder_context);
5558     }
5559 
5560     intel_batchbuffer_end_atomic(batch);
5561     intel_batchbuffer_flush(batch);
5562 
5563     pic_param = vp9_state->pic_param;
5564     vp9_state->vp9_last_frame.frame_width = pic_param->frame_width_dst;
5565     vp9_state->vp9_last_frame.frame_height = pic_param->frame_height_dst;
5566     vp9_state->vp9_last_frame.frame_type = pic_param->pic_flags.bits.frame_type;
5567     vp9_state->vp9_last_frame.show_frame = pic_param->pic_flags.bits.show_frame;
5568     vp9_state->vp9_last_frame.refresh_frame_context = pic_param->pic_flags.bits.refresh_frame_context;
5569     vp9_state->vp9_last_frame.frame_context_idx = pic_param->pic_flags.bits.frame_context_idx;
5570     vp9_state->vp9_last_frame.intra_only = pic_param->pic_flags.bits.intra_only;
5571     vp9_state->frame_number++;
5572     vp9_state->curr_mv_temporal_index ^= 1;
5573     vp9_state->first_frame = 0;
5574 
5575     return VA_STATUS_SUCCESS;
5576 }
5577 
5578 Bool
gen9_vp9_vme_context_init(VADriverContextP ctx,struct intel_encoder_context * encoder_context)5579 gen9_vp9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5580 {
5581     struct gen9_encoder_context_vp9 *vme_context = NULL;
5582     struct gen9_vp9_state *vp9_state = NULL;
5583 
5584     vme_context = calloc(1, sizeof(struct gen9_encoder_context_vp9));
5585     vp9_state = calloc(1, sizeof(struct gen9_vp9_state));
5586 
5587     if (!vme_context || !vp9_state) {
5588         if (vme_context)
5589             free(vme_context);
5590         if (vp9_state)
5591             free(vp9_state);
5592         return false;
5593     }
5594 
5595     encoder_context->enc_priv_state = vp9_state;
5596     vme_context->enc_priv_state = vp9_state;
5597 
5598     /* Initialize the features that are supported by VP9 */
5599     vme_context->hme_supported = 1;
5600     vme_context->use_hw_scoreboard = 1;
5601     vme_context->use_hw_non_stalling_scoreboard = 1;
5602 
5603     vp9_state->tx_mode = TX_MODE_SELECT;
5604     vp9_state->multi_ref_qp_check = 0;
5605     vp9_state->target_usage = INTEL_ENC_VP9_TU_NORMAL;
5606     vp9_state->num_pak_passes = 1;
5607     vp9_state->hme_supported = vme_context->hme_supported;
5608     vp9_state->b16xme_supported = 1;
5609 
5610     if (encoder_context->rate_control_mode != VA_RC_NONE &&
5611         encoder_context->rate_control_mode != VA_RC_CQP) {
5612         vp9_state->brc_enabled = 1;
5613         vp9_state->brc_distortion_buffer_supported = 1;
5614         vp9_state->brc_constant_buffer_supported = 1;
5615         vp9_state->num_pak_passes = 4;
5616     }
5617     vp9_state->dys_enabled = 1; /* this is supported by default */
5618     vp9_state->first_frame = 1;
5619 
5620     /* the definition of status buffer offset for VP9 */
5621     {
5622         struct vp9_encode_status_buffer_internal *status_buffer;
5623         uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
5624 
5625         status_buffer = &vp9_state->status_buffer;
5626         memset(status_buffer, 0,
5627                sizeof(struct vp9_encode_status_buffer_internal));
5628 
5629         status_buffer->bs_byte_count_offset = base_offset + offsetof(struct vp9_encode_status, bs_byte_count);
5630         status_buffer->image_status_mask_offset = base_offset + offsetof(struct vp9_encode_status, image_status_mask);
5631         status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct vp9_encode_status, image_status_ctrl);
5632         status_buffer->media_index_offset       = base_offset + offsetof(struct vp9_encode_status, media_index);
5633 
5634         status_buffer->vp9_bs_frame_reg_offset = 0x1E9E0;
5635         status_buffer->vp9_image_mask_reg_offset = 0x1E9F0;
5636         status_buffer->vp9_image_ctrl_reg_offset = 0x1E9F4;
5637     }
5638 
5639     gen9_vme_kernels_context_init_vp9(ctx, encoder_context, vme_context);
5640 
5641     encoder_context->vme_context = vme_context;
5642     encoder_context->vme_pipeline = gen9_vme_pipeline_vp9;
5643     encoder_context->vme_context_destroy = gen9_vme_context_destroy_vp9;
5644 
5645     return true;
5646 }
5647 
5648 static VAStatus
gen9_vp9_get_coded_status(VADriverContextP ctx,struct intel_encoder_context * encoder_context,struct i965_coded_buffer_segment * coded_buf_seg)5649 gen9_vp9_get_coded_status(VADriverContextP ctx,
5650                           struct intel_encoder_context *encoder_context,
5651                           struct i965_coded_buffer_segment *coded_buf_seg)
5652 {
5653     struct vp9_encode_status *vp9_encode_status;
5654 
5655     if (!encoder_context || !coded_buf_seg)
5656         return VA_STATUS_ERROR_INVALID_BUFFER;
5657 
5658     vp9_encode_status = (struct vp9_encode_status *)coded_buf_seg->codec_private_data;
5659     coded_buf_seg->base.size = vp9_encode_status->bs_byte_count;
5660 
5661     /* One VACodedBufferSegment for VP9 will be added later.
5662      * It will be linked to the next element of coded_buf_seg->base.next
5663      */
5664 
5665     return VA_STATUS_SUCCESS;
5666 }
5667 
5668 Bool
gen9_vp9_pak_context_init(VADriverContextP ctx,struct intel_encoder_context * encoder_context)5669 gen9_vp9_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
5670 {
5671     /* VME & PAK share the same context */
5672     struct gen9_encoder_context_vp9 *pak_context = encoder_context->vme_context;
5673 
5674     if (!pak_context)
5675         return false;
5676 
5677     encoder_context->mfc_context = pak_context;
5678     encoder_context->mfc_context_destroy = gen9_vp9_pak_context_destroy;
5679     encoder_context->mfc_pipeline = gen9_vp9_pak_pipeline;
5680     encoder_context->mfc_brc_prepare = gen9_vp9_pak_brc_prepare;
5681     encoder_context->get_status = gen9_vp9_get_coded_status;
5682     return true;
5683 }
5684