1 /*
2  * Copyright � 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *    Zhao Yakui <yakui.zhao@intel.com>
28  *
29  */
30 
31 /*
32  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
33  */
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <assert.h>
39 #include <math.h>
40 
41 #include <va/va_drmcommon.h>
42 
43 #include "media_drv_defines.h"
44 
45 #include "media_drv_util.h"
46 #include "media_drv_driver.h"
47 #include "media_drv_render.h"
48 #include "media_drv_surface.h"
49 #include "media_drv_init.h"
50 
51 #include "media_drv_batchbuffer.h"
52 #include "media_render_common.h"
53 
54 /* Programs for Gen9 */
55 static const uint32_t sf_kernel_static_gen9[][4] ={
56 
57 };
58 static const uint32_t ps_kernel_static_gen9[][4] = {
59 #include "shaders/render/exa_wm_src_affine.g9b"
60 #include "shaders/render/exa_wm_src_sample_planar.g9b"
61 #include "shaders/render/exa_wm_yuv_color_balance.g9b"
62 #include "shaders/render/exa_wm_yuv_rgb.g9b"
63 #include "shaders/render/exa_wm_write.g9b"
64 };
65 
66 static const uint32_t ps_subpic_kernel_static_gen9[][4] = {
67 #include "shaders/render/exa_wm_src_affine.g9b"
68 #include "shaders/render/exa_wm_src_sample_argb.g9b"
69 #include "shaders/render/exa_wm_write.g9b"
70 };
71 
72 
73 #define RENDER_SURFACE_STATE_PADDED_SIZE       sizeof(struct gen8_surface_state)
74 
75 #define RENDER_SURFACE_STATE_OFFSET(index)     (RENDER_SURFACE_STATE_PADDED_SIZE * index)
76 #define RENDER_BINDING_TABLE_OFFSET            RENDER_SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
77 
78 #define DEFAULT_BRIGHTNESS      0
79 #define DEFAULT_CONTRAST        50
80 #define DEFAULT_HUE             0
81 #define DEFAULT_SATURATION      50
82 
83 #define GEN9_PIPELINE_SELECTION_MASK        (0x3 << 8)
84 
85 #define GEN9_SBE_ACTIVE_COMPONENT_XYZW          3
86 
87 static struct media_render_kernel render_kernels_gen9[] = {
88     {
89         "SF",
90         SF_KERNEL,
91         sf_kernel_static_gen9,
92         sizeof(sf_kernel_static_gen9),
93         NULL
94     },
95     {
96         "PS",
97         PS_KERNEL,
98         ps_kernel_static_gen9,
99         sizeof(ps_kernel_static_gen9),
100         NULL
101     },
102 
103     {
104         "PS_SUBPIC",
105         PS_SUBPIC_KERNEL,
106         ps_subpic_kernel_static_gen9,
107         sizeof(ps_subpic_kernel_static_gen9),
108         NULL
109     }
110 };
111 
112 #define URB_VS_ENTRIES	      8
113 #define URB_VS_ENTRY_SIZE     1
114 
115 #define URB_GS_ENTRIES	      0
116 #define URB_GS_ENTRY_SIZE     0
117 
118 #define URB_CLIP_ENTRIES      0
119 #define URB_CLIP_ENTRY_SIZE   0
120 
121 #define URB_SF_ENTRIES	      1
122 #define URB_SF_ENTRY_SIZE     2
123 
124 #define URB_CS_ENTRIES	      4
125 #define URB_CS_ENTRY_SIZE     4
126 
127 static float yuv_to_rgb_bt601[3][4] = {
128 {1.164,		0,	1.596,		-0.06275,},
129 {1.164,		-0.392,	-0.813,		-0.50196,},
130 {1.164,		2.017,	0,		-0.50196,},
131 };
132 
133 static float yuv_to_rgb_bt709[3][4] = {
134 {1.164,		0,	1.793,		-0.06275,},
135 {1.164,		-0.213,	-0.533,		-0.50196,},
136 {1.164,		2.112,	0,		-0.50196,},
137 };
138 
139 static float yuv_to_rgb_smpte_240[3][4] = {
140 {1.164,		0,	1.794,		-0.06275,},
141 {1.164,		-0.258,	-0.5425,	-0.50196,},
142 {1.164,		2.078,	0,		-0.50196,},
143 };
144 
145 
146 static void
gen9_render_set_surface_tiling(struct gen8_surface_state * ss,uint32_t tiling)147 gen9_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
148 {
149    switch (tiling) {
150    case I915_TILING_NONE:
151       ss->ss0.tiled_surface = 0;
152       ss->ss0.tile_walk = 0;
153       break;
154    case I915_TILING_X:
155       ss->ss0.tiled_surface = 1;
156       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
157       break;
158    case I915_TILING_Y:
159       ss->ss0.tiled_surface = 1;
160       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
161       break;
162    }
163 }
164 
165 /* Set "Shader Channel Select" for GEN9+ */
166 static void
gen9_render_set_surface_scs(struct gen8_surface_state * ss)167 gen9_render_set_surface_scs(struct gen8_surface_state *ss)
168 {
169     ss->ss7.shader_chanel_select_r = RENDER_HSW_SCS_RED;
170     ss->ss7.shader_chanel_select_g = RENDER_HSW_SCS_GREEN;
171     ss->ss7.shader_chanel_select_b = RENDER_HSW_SCS_BLUE;
172     ss->ss7.shader_chanel_select_a = RENDER_HSW_SCS_ALPHA;
173 }
174 
175 static void
gen9_render_set_surface_state(struct gen8_surface_state * ss,dri_bo * bo,unsigned long offset,int width,int height,int pitch,int format,unsigned int flags)176 gen9_render_set_surface_state(
177     struct gen8_surface_state *ss,
178     dri_bo                    *bo,
179     unsigned long              offset,
180     int                        width,
181     int                        height,
182     int                        pitch,
183     int                        format,
184     unsigned int               flags
185 )
186 {
187     unsigned int tiling;
188     unsigned int swizzle;
189 
190     memset(ss, 0, sizeof(*ss));
191 
192     switch (flags & (VA_TOP_FIELD|VA_BOTTOM_FIELD)) {
193     case VA_BOTTOM_FIELD:
194         ss->ss0.vert_line_stride_ofs = 1;
195         /* fall-through */
196     case VA_TOP_FIELD:
197         ss->ss0.vert_line_stride = 1;
198         height /= 2;
199         break;
200     }
201 
202     ss->ss0.surface_type = I965_SURFACE_2D;
203     ss->ss0.surface_format = format;
204 
205     ss->ss8.base_addr = bo->offset + offset;
206 
207     ss->ss2.width = width - 1;
208     ss->ss2.height = height - 1;
209 
210     ss->ss3.pitch = pitch - 1;
211 
212     /* Always set 1(align 4 mode) per B-spec */
213     ss->ss0.vertical_alignment = 1;
214     ss->ss0.horizontal_alignment = 1;
215 
216     dri_bo_get_tiling(bo, &tiling, &swizzle);
217     gen9_render_set_surface_tiling(ss, tiling);
218 }
219 
220 static void
gen9_render_src_surface_state(VADriverContextP ctx,int index,dri_bo * region,unsigned long offset,int w,int h,int pitch,int format,unsigned int flags)221 gen9_render_src_surface_state(
222     VADriverContextP ctx,
223     int              index,
224     dri_bo          *region,
225     unsigned long    offset,
226     int              w,
227     int              h,
228     int              pitch,
229     int              format,
230     unsigned int     flags
231 )
232 {
233     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
234     struct media_render_state *render_state = &drv_ctx->render_state;
235     void *ss;
236     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
237 
238     assert(index < MAX_RENDER_SURFACES);
239 
240     dri_bo_map(ss_bo, 1);
241     assert(ss_bo->virtual);
242     ss = (char *)ss_bo->virtual + RENDER_SURFACE_STATE_OFFSET(index);
243 
244     gen9_render_set_surface_state(ss,
245                                   region, offset,
246                                   w, h,
247                                   pitch, format, flags);
248     gen9_render_set_surface_scs(ss);
249     dri_bo_emit_reloc(ss_bo,
250                       I915_GEM_DOMAIN_SAMPLER, 0,
251                       offset,
252                       RENDER_SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
253                       region);
254 
255     ((unsigned int *)((char *)ss_bo->virtual + RENDER_BINDING_TABLE_OFFSET))[index] = RENDER_SURFACE_STATE_OFFSET(index);
256     dri_bo_unmap(ss_bo);
257     render_state->wm.sampler_count++;
258 }
259 
260 static void
gen9_render_src_surfaces_state(VADriverContextP ctx,struct object_surface * obj_surface,unsigned int flags)261 gen9_render_src_surfaces_state(
262     VADriverContextP ctx,
263     struct object_surface *obj_surface,
264     unsigned int     flags
265 )
266 {
267     int region_pitch;
268     int rw, rh;
269     dri_bo *region;
270 
271     region_pitch = obj_surface->width;
272     rw = obj_surface->orig_width;
273     rh = obj_surface->orig_height;
274     region = obj_surface->bo;
275 
276     gen9_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
277     gen9_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
278 
279     if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
280         gen9_render_src_surface_state(ctx, 3, region,
281                                       region_pitch * obj_surface->y_cb_offset,
282                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
283                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
284         gen9_render_src_surface_state(ctx, 4, region,
285                                       region_pitch * obj_surface->y_cb_offset,
286                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
287                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
288     } else {
289         gen9_render_src_surface_state(ctx, 3, region,
290                                       region_pitch * obj_surface->y_cb_offset,
291                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
292                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
293         gen9_render_src_surface_state(ctx, 4, region,
294                                       region_pitch * obj_surface->y_cb_offset,
295                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
296                                       I965_SURFACEFORMAT_R8_UNORM, flags);
297         gen9_render_src_surface_state(ctx, 5, region,
298                                       region_pitch * obj_surface->y_cr_offset,
299                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
300                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
301         gen9_render_src_surface_state(ctx, 6, region,
302                                       region_pitch * obj_surface->y_cr_offset,
303                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
304                                       I965_SURFACEFORMAT_R8_UNORM, flags);
305     }
306 }
307 
308 static void
gen9_subpic_render_src_surfaces_state(VADriverContextP ctx,struct object_surface * obj_surface)309 gen9_subpic_render_src_surfaces_state(VADriverContextP ctx,
310                                       struct object_surface *obj_surface)
311 {
312     dri_bo *subpic_region;
313     unsigned int index = obj_surface->subpic_render_idx;
314     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
315     struct object_image *obj_image = obj_subpic->obj_image;
316 
317     assert(obj_surface);
318     assert(obj_surface->bo);
319     subpic_region = obj_image->bo;
320     /*subpicture surface*/
321     gen9_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
322     gen9_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
323 }
324 
325 static void
gen9_render_dest_surface_state(VADriverContextP ctx,int index)326 gen9_render_dest_surface_state(VADriverContextP ctx, int index)
327 {
328     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
329     struct media_render_state *render_state = &drv_ctx->render_state;
330     struct region *dest_region = render_state->draw_region;
331     void *ss;
332     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
333     int format;
334     assert(index < MAX_RENDER_SURFACES);
335 
336     if (dest_region->cpp == 2) {
337 	format = I965_SURFACEFORMAT_B5G6R5_UNORM;
338     } else {
339 	format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
340     }
341 
342     dri_bo_map(ss_bo, 1);
343     assert(ss_bo->virtual);
344     ss = (char *)ss_bo->virtual + RENDER_SURFACE_STATE_OFFSET(index);
345 
346     gen9_render_set_surface_state(ss,
347                                   dest_region->bo, 0,
348                                   dest_region->width, dest_region->height,
349                                   dest_region->pitch, format, 0);
350     gen9_render_set_surface_scs(ss);
351     dri_bo_emit_reloc(ss_bo,
352                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
353                       0,
354                       RENDER_SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
355                       dest_region->bo);
356 
357     ((unsigned int *)((char *)ss_bo->virtual + RENDER_BINDING_TABLE_OFFSET))[index] = RENDER_SURFACE_STATE_OFFSET(index);
358     dri_bo_unmap(ss_bo);
359 }
360 
361 static void
i965_fill_vertex_buffer(VADriverContextP ctx,float tex_coords[4],float vid_coords[4])362 i965_fill_vertex_buffer(
363     VADriverContextP ctx,
364     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
365     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
366 )
367 {
368     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
369     float vb[12];
370 
371     enum { X1, Y1, X2, Y2 };
372 
373     static const unsigned int g_rotation_indices[][6] = {
374         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
375         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
376         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
377         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
378     };
379 
380     const unsigned int * const rotation_indices =
381         g_rotation_indices[drv_ctx->rotation_attrib->value];
382 
383     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
384     vb[1]  = tex_coords[rotation_indices[1]];
385     vb[2]  = vid_coords[X2];
386     vb[3]  = vid_coords[Y2];
387 
388     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
389     vb[5]  = tex_coords[rotation_indices[3]];
390     vb[6]  = vid_coords[X1];
391     vb[7]  = vid_coords[Y2];
392 
393     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
394     vb[9]  = tex_coords[rotation_indices[5]];
395     vb[10] = vid_coords[X1];
396     vb[11] = vid_coords[Y1];
397 
398     dri_bo_subdata(drv_ctx->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
399 }
400 
401 static void
i965_subpic_render_upload_vertex(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * output_rect)402 i965_subpic_render_upload_vertex(VADriverContextP ctx,
403                                  struct object_surface *obj_surface,
404                                  const VARectangle *output_rect)
405 {
406     unsigned int index = obj_surface->subpic_render_idx;
407     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
408     float tex_coords[4], vid_coords[4];
409     VARectangle dst_rect;
410 
411     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
412         dst_rect = obj_subpic->dst_rect;
413     else {
414         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
415         const float sy  = (float)output_rect->height / obj_surface->orig_height;
416         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
417         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
418         dst_rect.width  = sx * obj_subpic->dst_rect.width;
419         dst_rect.height = sy * obj_subpic->dst_rect.height;
420     }
421 
422     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
423     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
424     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
425     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
426 
427     vid_coords[0] = dst_rect.x;
428     vid_coords[1] = dst_rect.y;
429     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
430     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
431 
432     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
433 }
434 
435 static void
i965_render_upload_vertex(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect)436 i965_render_upload_vertex(
437     VADriverContextP   ctx,
438     struct object_surface *obj_surface,
439     const VARectangle *src_rect,
440     const VARectangle *dst_rect
441 )
442 {
443     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
444     struct media_render_state *render_state = &drv_ctx->render_state;
445     struct region *dest_region = render_state->draw_region;
446     float tex_coords[4], vid_coords[4];
447     int width, height;
448 
449     width  = obj_surface->orig_width;
450     height = obj_surface->orig_height;
451 
452     tex_coords[0] = (float)src_rect->x / width;
453     tex_coords[1] = (float)src_rect->y / height;
454     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
455     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
456 
457     vid_coords[0] = dest_region->x + dst_rect->x;
458     vid_coords[1] = dest_region->y + dst_rect->y;
459     vid_coords[2] = vid_coords[0] + dst_rect->width;
460     vid_coords[3] = vid_coords[1] + dst_rect->height;
461 
462     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
463 }
464 
465 static void
i965_render_drawing_rectangle(VADriverContextP ctx)466 i965_render_drawing_rectangle(VADriverContextP ctx)
467 {
468     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
469     struct media_render_state *render_state = &drv_ctx->render_state;
470     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
471     struct region *dest_region = render_state->draw_region;
472 
473     BEGIN_BATCH(batch, 4);
474     OUT_BATCH(batch, RCMD_DRAWING_RECTANGLE | 2);
475     OUT_BATCH(batch, 0x00000000);
476     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
477     OUT_BATCH(batch, 0x00000000);
478     ADVANCE_BATCH(batch);
479 }
480 
481 static void
i965_render_upload_image_palette(VADriverContextP ctx,struct object_image * obj_image,unsigned int alpha)482 i965_render_upload_image_palette(
483     VADriverContextP ctx,
484     struct object_image *obj_image,
485     unsigned int     alpha
486 )
487 {
488     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
489     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
490     unsigned int i;
491 
492     assert(obj_image);
493 
494     if (!obj_image)
495         return;
496 
497     if (obj_image->image.num_palette_entries == 0)
498         return;
499 
500     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
501     OUT_BATCH(batch, RCMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
502     /*fill palette*/
503     //int32_t out[16]; //0-23:color 23-31:alpha
504     for (i = 0; i < obj_image->image.num_palette_entries; i++)
505         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
506     ADVANCE_BATCH(batch);
507 }
508 
509 static void
gen9_clear_dest_region(VADriverContextP ctx)510 gen9_clear_dest_region(VADriverContextP ctx)
511 {
512     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
513     struct media_render_state *render_state = &drv_ctx->render_state;
514     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
515     struct region *dest_region = render_state->draw_region;
516     unsigned int blt_cmd, br13;
517     int pitch;
518 
519     blt_cmd = GEN8_XY_COLOR_BLT_CMD;
520     br13 = 0xf0 << 16;
521     pitch = dest_region->pitch;
522 
523     if (dest_region->cpp == 4) {
524         br13 |= BR13_8888;
525         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
526     } else {
527         assert(dest_region->cpp == 2);
528         br13 |= BR13_565;
529     }
530 
531     if (dest_region->tiling != I915_TILING_NONE) {
532         blt_cmd |= XY_COLOR_BLT_DST_TILED;
533         pitch /= 4;
534     }
535 
536     br13 |= pitch;
537 
538     media_batchbuffer_start_atomic_blt(batch, 24);
539     __BEGIN_BATCH(batch, 7, I915_EXEC_BLT);
540 
541     OUT_BATCH(batch, blt_cmd);
542     OUT_BATCH(batch, br13);
543     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
544     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
545               (dest_region->x + dest_region->width));
546     OUT_RELOC(batch, dest_region->bo,
547               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
548               0);
549     OUT_BATCH(batch, 0x0);
550     OUT_BATCH(batch, 0x0);
551     ADVANCE_BATCH(batch);
552     media_batchbuffer_end_atomic(batch);
553 }
554 
555 
556 /*
557  * for GEN8
558  */
559 #define ALIGNMENT       64
560 
561 static void
gen9_render_initialize(VADriverContextP ctx)562 gen9_render_initialize(VADriverContextP ctx)
563 {
564     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
565     struct media_render_state *render_state = &drv_ctx->render_state;
566     dri_bo *bo;
567     int size;
568     unsigned int end_offset;
569 
570     /* VERTEX BUFFER */
571     dri_bo_unreference(render_state->vb.vertex_buffer);
572     bo = dri_bo_alloc(drv_ctx->drv_data.bufmgr,
573                       "vertex buffer",
574                       4096,
575                       4096);
576     assert(bo);
577     render_state->vb.vertex_buffer = bo;
578 
579     /* WM */
580     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
581     bo = dri_bo_alloc(drv_ctx->drv_data.bufmgr,
582                       "surface state & binding table",
583                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
584                       4096);
585     assert(bo);
586     render_state->wm.surface_state_binding_table_bo = bo;
587 
588     render_state->curbe_size = 256;
589 
590     render_state->wm.sampler_count = 0;
591 
592     render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state);
593 
594     render_state->cc_state_size = sizeof(struct gen6_color_calc_state);
595 
596     render_state->cc_viewport_size = sizeof(struct i965_cc_viewport);
597 
598     render_state->blend_state_size = sizeof(struct gen8_global_blend_state) +
599 			16 * sizeof(struct gen8_blend_state_rt);
600 
601     render_state->sf_clip_size = 1024;
602 
603     render_state->scissor_size = 1024;
604 
605     size = ALIGN(render_state->curbe_size, ALIGNMENT) +
606         ALIGN(render_state->sampler_size, ALIGNMENT) +
607         ALIGN(render_state->cc_viewport_size, ALIGNMENT) +
608         ALIGN(render_state->cc_state_size, ALIGNMENT) +
609         ALIGN(render_state->blend_state_size, ALIGNMENT) +
610         ALIGN(render_state->sf_clip_size, ALIGNMENT) +
611         ALIGN(render_state->scissor_size, ALIGNMENT);
612 
613     dri_bo_unreference(render_state->dynamic_state.bo);
614     bo = dri_bo_alloc(drv_ctx->drv_data.bufmgr,
615                       "dynamic_state",
616                       size,
617                       4096);
618 
619     render_state->dynamic_state.bo = bo;
620 
621     end_offset = 0;
622     render_state->dynamic_state.end_offset = 0;
623 
624     /* Constant buffer offset */
625     render_state->curbe_offset = end_offset;
626     end_offset += ALIGN(render_state->curbe_size, ALIGNMENT);
627 
628     /* Sampler_state  */
629     render_state->sampler_offset = end_offset;
630     end_offset += ALIGN(render_state->sampler_size, ALIGNMENT);
631 
632     /* CC_VIEWPORT_state  */
633     render_state->cc_viewport_offset = end_offset;
634     end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT);
635 
636     /* CC_STATE_state  */
637     render_state->cc_state_offset = end_offset;
638     end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT);
639 
640     /* Blend_state  */
641     render_state->blend_state_offset = end_offset;
642     end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT);
643 
644     /* SF_CLIP_state  */
645     render_state->sf_clip_offset = end_offset;
646     end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT);
647 
648     /* SCISSOR_state  */
649     render_state->scissor_offset = end_offset;
650     end_offset += ALIGN(render_state->scissor_size, ALIGNMENT);
651 
652     /* update the end offset of dynamic_state */
653     render_state->dynamic_state.end_offset = end_offset;
654 
655 }
656 
657 static void
gen9_render_sampler(VADriverContextP ctx)658 gen9_render_sampler(VADriverContextP ctx)
659 {
660     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
661     struct media_render_state *render_state = &drv_ctx->render_state;
662     struct gen8_sampler_state *sampler_state;
663     int i;
664     unsigned char *cc_ptr;
665 
666     assert(render_state->wm.sampler_count > 0);
667     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
668 
669     dri_bo_map(render_state->dynamic_state.bo, 1);
670     assert(render_state->dynamic_state.bo->virtual);
671 
672     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
673 			render_state->sampler_offset;
674 
675     sampler_state = (struct gen8_sampler_state *) cc_ptr;
676 
677     for (i = 0; i < render_state->wm.sampler_count; i++) {
678         memset(sampler_state, 0, sizeof(*sampler_state));
679         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
680         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
681         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
682         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
683         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
684         sampler_state++;
685     }
686 
687     dri_bo_unmap(render_state->dynamic_state.bo);
688 }
689 
690 static void
gen9_render_blend_state(VADriverContextP ctx)691 gen9_render_blend_state(VADriverContextP ctx)
692 {
693     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
694     struct media_render_state *render_state = &drv_ctx->render_state;
695     struct gen8_global_blend_state *global_blend_state;
696     struct gen8_blend_state_rt *blend_state;
697     unsigned char *cc_ptr;
698 
699     dri_bo_map(render_state->dynamic_state.bo, 1);
700     assert(render_state->dynamic_state.bo->virtual);
701 
702     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
703 			render_state->blend_state_offset;
704 
705     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
706 
707     memset(global_blend_state, 0, render_state->blend_state_size);
708     /* Global blend state + blend_state for Render Target */
709     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
710     blend_state->blend1.logic_op_enable = 1;
711     blend_state->blend1.logic_op_func = 0xc;
712     blend_state->blend1.pre_blend_clamp_enable = 1;
713 
714     dri_bo_unmap(render_state->dynamic_state.bo);
715 }
716 
717 
718 static void
gen9_render_cc_viewport(VADriverContextP ctx)719 gen9_render_cc_viewport(VADriverContextP ctx)
720 {
721     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
722     struct media_render_state *render_state = &drv_ctx->render_state;
723     struct i965_cc_viewport *cc_viewport;
724     unsigned char *cc_ptr;
725 
726     dri_bo_map(render_state->dynamic_state.bo, 1);
727     assert(render_state->dynamic_state.bo->virtual);
728 
729     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
730 			render_state->cc_viewport_offset;
731 
732     cc_viewport = (struct i965_cc_viewport *) cc_ptr;
733 
734     memset(cc_viewport, 0, sizeof(*cc_viewport));
735 
736     cc_viewport->min_depth = -1.e35;
737     cc_viewport->max_depth = 1.e35;
738 
739     dri_bo_unmap(render_state->dynamic_state.bo);
740 }
741 
742 static void
gen9_render_color_calc_state(VADriverContextP ctx)743 gen9_render_color_calc_state(VADriverContextP ctx)
744 {
745     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
746     struct media_render_state *render_state = &drv_ctx->render_state;
747     struct gen6_color_calc_state *color_calc_state;
748     unsigned char *cc_ptr;
749 
750     dri_bo_map(render_state->dynamic_state.bo, 1);
751     assert(render_state->dynamic_state.bo->virtual);
752 
753     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
754 			render_state->cc_state_offset;
755 
756     color_calc_state = (struct gen6_color_calc_state *) cc_ptr;
757 
758     memset(color_calc_state, 0, sizeof(*color_calc_state));
759     color_calc_state->constant_r = 1.0;
760     color_calc_state->constant_g = 0.0;
761     color_calc_state->constant_b = 1.0;
762     color_calc_state->constant_a = 1.0;
763     dri_bo_unmap(render_state->dynamic_state.bo);
764 }
765 
766 #define PI  3.1415926
767 
768 static void
gen9_render_upload_constants(VADriverContextP ctx,struct object_surface * obj_surface,unsigned int flags)769 gen9_render_upload_constants(VADriverContextP ctx,
770                              struct object_surface *obj_surface,
771                              unsigned int flags)
772 {
773     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
774     struct media_render_state *render_state = &drv_ctx->render_state;
775     unsigned short *constant_buffer;
776     unsigned char *cc_ptr;
777     float *color_balance_base;
778     float contrast = (float)drv_ctx->contrast_attrib->value / DEFAULT_CONTRAST;
779     float brightness = (float)drv_ctx->brightness_attrib->value / 255; /* YUV is float in the shader */
780     float hue = (float)drv_ctx->hue_attrib->value / 180 * PI;
781     float saturation = (float)drv_ctx->saturation_attrib->value / DEFAULT_SATURATION;
782     float *yuv_to_rgb;
783     unsigned int color_flag;
784 
785     dri_bo_map(render_state->dynamic_state.bo, 1);
786     assert(render_state->dynamic_state.bo->virtual);
787 
788     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
789 			render_state->curbe_offset;
790 
791     constant_buffer = (unsigned short *) cc_ptr;
792 
793     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
794         assert(obj_surface->fourcc == VA_FOURCC('Y', '8', '0', '0'));
795 
796         *constant_buffer = 2;
797     } else {
798         if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2'))
799             *constant_buffer = 1;
800         else
801             *constant_buffer = 0;
802     }
803 
804     if (drv_ctx->contrast_attrib->value == DEFAULT_CONTRAST &&
805         drv_ctx->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
806         drv_ctx->hue_attrib->value == DEFAULT_HUE &&
807         drv_ctx->saturation_attrib->value == DEFAULT_SATURATION)
808         constant_buffer[1] = 1; /* skip color balance transformation */
809     else
810         constant_buffer[1] = 0;
811 
812     color_balance_base = (float *)constant_buffer + 4;
813     *color_balance_base++ = contrast;
814     *color_balance_base++ = brightness;
815     *color_balance_base++ = cos(hue) * contrast * saturation;
816     *color_balance_base++ = sin(hue) * contrast * saturation;
817 
818     color_flag = flags & VA_SRC_COLOR_MASK;
819     yuv_to_rgb = (float *)constant_buffer + 8;
820     if (color_flag == VA_SRC_BT709)
821         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
822     else if (color_flag == VA_SRC_SMPTE_240)
823         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
824     else
825         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
826 
827     dri_bo_unmap(render_state->dynamic_state.bo);
828 }
829 
830 static void
gen9_render_setup_states(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect,unsigned int flags)831 gen9_render_setup_states(
832     VADriverContextP   ctx,
833     struct object_surface *obj_surface,
834     const VARectangle *src_rect,
835     const VARectangle *dst_rect,
836     unsigned int       flags
837 )
838 {
839     gen9_render_dest_surface_state(ctx, 0);
840     gen9_render_src_surfaces_state(ctx, obj_surface, flags);
841     gen9_render_sampler(ctx);
842     gen9_render_cc_viewport(ctx);
843     gen9_render_color_calc_state(ctx);
844     gen9_render_blend_state(ctx);
845     gen9_render_upload_constants(ctx, obj_surface, flags);
846     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
847 }
848 
849 static void
gen9_emit_state_base_address(VADriverContextP ctx)850 gen9_emit_state_base_address(VADriverContextP ctx)
851 {
852     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
853     struct media_render_state *render_state = &drv_ctx->render_state;
854     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
855 
856     BEGIN_BATCH(batch, 19);
857     OUT_BATCH(batch, RCMD_STATE_BASE_ADDRESS | (19 - 2));
858     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
859 	OUT_BATCH(batch, 0);
860 	OUT_BATCH(batch, 0);
861 	/*DW4 */
862     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
863 	OUT_BATCH(batch, 0);
864 
865 	/*DW6*/
866     /* Dynamic state base address */
867     OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
868 		0, BASE_ADDRESS_MODIFY);
869     OUT_BATCH(batch, 0);
870 
871 	/*DW8*/
872     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
873     OUT_BATCH(batch, 0);
874 
875 	/*DW10 */
876     /* Instruction base address */
877     OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
878     OUT_BATCH(batch, 0);
879 
880 	/*DW12 */
881     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
882     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
883     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
884     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
885 
886     /* the bindless surface state address */
887     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
888     OUT_BATCH(batch, 0);
889     OUT_BATCH(batch, 0xFFFF0000);
890     ADVANCE_BATCH(batch);
891 }
892 
893 static void
gen9_emit_cc_state_pointers(VADriverContextP ctx)894 gen9_emit_cc_state_pointers(VADriverContextP ctx)
895 {
896     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
897     struct media_render_state *render_state = &drv_ctx->render_state;
898     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
899 
900     BEGIN_BATCH(batch, 2);
901     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
902     OUT_BATCH(batch, (render_state->cc_state_offset + 1));
903     ADVANCE_BATCH(batch);
904 
905     BEGIN_BATCH(batch, 2);
906     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
907     OUT_BATCH(batch, (render_state->blend_state_offset + 1));
908     ADVANCE_BATCH(batch);
909 
910 }
911 
912 static void
gen9_emit_vertices(VADriverContextP ctx)913 gen9_emit_vertices(VADriverContextP ctx)
914 {
915     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
916     struct media_render_state *render_state = &drv_ctx->render_state;
917     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
918 
919     BEGIN_BATCH(batch, 5);
920     OUT_BATCH(batch, RCMD_VERTEX_BUFFERS | (5 - 2));
921     OUT_BATCH(batch,
922               (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
923 	      (0 << GEN8_VB0_MOCS_SHIFT) |
924               GEN7_VB0_ADDRESS_MODIFYENABLE |
925               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
926     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
927     OUT_BATCH(batch, 0);
928     OUT_BATCH(batch, 12 * 4);
929     ADVANCE_BATCH(batch);
930 
931     /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
932     BEGIN_BATCH(batch, 2);
933     OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
934     OUT_BATCH(batch,
935               _3DPRIM_RECTLIST);
936     ADVANCE_BATCH(batch);
937 
938     OUT_BATCH(batch, GEN7_3DSTATE_VF | (2 - 2));
939     OUT_BATCH(batch, 0);
940 
941     OUT_BATCH(batch, GEN8_3DSTATE_VF_INSTANCING | (3 - 2));
942     OUT_BATCH(batch, 0);
943     OUT_BATCH(batch, 0);
944 
945     OUT_BATCH(batch, GEN8_3DSTATE_VF_SGVS | (2 - 2));
946     OUT_BATCH(batch, 0);
947 
948     BEGIN_BATCH(batch, 7);
949     OUT_BATCH(batch, RCMD_3DPRIMITIVE | (7 - 2));
950     OUT_BATCH(batch,
951               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
952     OUT_BATCH(batch, 3); /* vertex count per instance */
953     OUT_BATCH(batch, 0); /* start vertex offset */
954     OUT_BATCH(batch, 1); /* single instance */
955     OUT_BATCH(batch, 0); /* start instance location */
956     OUT_BATCH(batch, 0);
957     ADVANCE_BATCH(batch);
958 }
959 
960 static void
gen9_emit_vertex_element_state(VADriverContextP ctx)961 gen9_emit_vertex_element_state(VADriverContextP ctx)
962 {
963     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
964     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
965     int i;
966 
967     /*
968      * The VUE layout
969      * dword 0-3: pad (0, 0, 0. 0)
970      * dword 4-7: position (x, y, 1.0, 1.0),
971      * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0)
972      */
973 
974     /* Set up our vertex elements, sourced from the single vertex buffer. */
975     OUT_BATCH(batch, RCMD_VERTEX_ELEMENTS | (7 - 2));
976 
977     /* Element state 0. These are 4 dwords of 0 required for the VUE format.
978      * We don't really know or care what they do.
979      */
980 
981     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
982               GEN8_VE0_VALID |
983               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
984               (0 << VE0_OFFSET_SHIFT));
985     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
986               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
987               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
988               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
989 
990     /* offset 8: X, Y -> {x, y, 1.0, 1.0} */
991     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
992               GEN8_VE0_VALID |
993               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
994               (8 << VE0_OFFSET_SHIFT));
995     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
996 	      (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
997               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
998               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
999 
1000     /* offset 0: u,v -> {U, V, 1.0, 1.0} */
1001     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1002               GEN8_VE0_VALID |
1003               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1004               (0 << VE0_OFFSET_SHIFT));
1005     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1006               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1007               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1008               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1009 
1010     /* Disable instancing for all vertex elements. */
1011     for (i = 0; i < 3; i++) {
1012         OUT_BATCH(batch, GEN8_3DSTATE_VF_INSTANCING | (3 - 2));
1013         OUT_BATCH(batch, i);
1014         OUT_BATCH(batch, 0);
1015     }
1016 }
1017 
1018 static void
gen9_emit_vs_state(VADriverContextP ctx)1019 gen9_emit_vs_state(VADriverContextP ctx)
1020 {
1021     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1022     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1023 
1024     /* disable VS constant buffer */
1025     BEGIN_BATCH(batch, 11);
1026     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
1027     OUT_BATCH(batch, 0);
1028     OUT_BATCH(batch, 0);
1029     /* CS Buffer 0 */
1030     OUT_BATCH(batch, 0);
1031     OUT_BATCH(batch, 0);
1032     /* CS Buffer 1 */
1033     OUT_BATCH(batch, 0);
1034     OUT_BATCH(batch, 0);
1035     /* CS Buffer 2 */
1036     OUT_BATCH(batch, 0);
1037     OUT_BATCH(batch, 0);
1038     /* CS Buffer 3 */
1039     OUT_BATCH(batch, 0);
1040     OUT_BATCH(batch, 0);
1041     ADVANCE_BATCH(batch);
1042 
1043     BEGIN_BATCH(batch, 9);
1044     OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
1045     OUT_BATCH(batch, 0); /* without VS kernel */
1046     OUT_BATCH(batch, 0);
1047     /* VS shader dispatch flag */
1048     OUT_BATCH(batch, 0);
1049     OUT_BATCH(batch, 0);
1050     OUT_BATCH(batch, 0);
1051     /* DW6. VS shader GRF and URB buffer definition */
1052     OUT_BATCH(batch, 0);
1053     OUT_BATCH(batch, 0); /* pass-through */
1054     OUT_BATCH(batch, 0);
1055     ADVANCE_BATCH(batch);
1056 
1057     BEGIN_BATCH(batch, 2);
1058     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
1059     OUT_BATCH(batch, 0);
1060     ADVANCE_BATCH(batch);
1061 
1062     BEGIN_BATCH(batch, 2);
1063     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
1064     OUT_BATCH(batch, 0);
1065     ADVANCE_BATCH(batch);
1066 
1067 }
1068 
1069 /*
1070  * URB layout on GEN8
1071  * ----------------------------------------
1072  * | PS Push Constants (8KB) | VS entries |
1073  * ----------------------------------------
1074  */
1075 static void
gen9_emit_urb(VADriverContextP ctx)1076 gen9_emit_urb(VADriverContextP ctx)
1077 {
1078     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1079     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1080     unsigned int num_urb_entries = 64;
1081 
1082     /* The minimum urb entries is 64 */
1083 
1084     BEGIN_BATCH(batch, 2);
1085     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
1086     OUT_BATCH(batch, 0);
1087     ADVANCE_BATCH(batch);
1088 
1089     BEGIN_BATCH(batch, 2);
1090     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
1091     OUT_BATCH(batch, 0);
1092     ADVANCE_BATCH(batch);
1093 
1094     BEGIN_BATCH(batch, 2);
1095     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
1096     OUT_BATCH(batch, 0);
1097     ADVANCE_BATCH(batch);
1098 
1099     BEGIN_BATCH(batch, 2);
1100     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
1101     OUT_BATCH(batch, 0);
1102     ADVANCE_BATCH(batch);
1103 
1104     /* Size is 8Kbs and base address is 0Kb */
1105     BEGIN_BATCH(batch, 2);
1106     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
1107     /* Size is 8Kbs and base address is 0Kb */
1108     OUT_BATCH(batch,
1109 		(0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
1110 		(8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
1111     ADVANCE_BATCH(batch);
1112 
1113     BEGIN_BATCH(batch, 2);
1114     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
1115     OUT_BATCH(batch,
1116               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
1117               (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
1118               (4 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1119    ADVANCE_BATCH(batch);
1120 
1121    BEGIN_BATCH(batch, 2);
1122    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
1123    OUT_BATCH(batch,
1124              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1125              (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1126    ADVANCE_BATCH(batch);
1127 
1128    BEGIN_BATCH(batch, 2);
1129    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
1130    OUT_BATCH(batch,
1131              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1132              (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1133    ADVANCE_BATCH(batch);
1134 
1135    BEGIN_BATCH(batch, 2);
1136    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
1137    OUT_BATCH(batch,
1138              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1139              (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1140    ADVANCE_BATCH(batch);
1141 }
1142 
1143 static void
gen9_emit_bypass_state(VADriverContextP ctx)1144 gen9_emit_bypass_state(VADriverContextP ctx)
1145 {
1146     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1147     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1148 
1149     /* bypass GS */
1150     BEGIN_BATCH(batch, 11);
1151     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
1152     OUT_BATCH(batch, 0);
1153     OUT_BATCH(batch, 0);
1154     OUT_BATCH(batch, 0);
1155     OUT_BATCH(batch, 0);
1156     OUT_BATCH(batch, 0);
1157     OUT_BATCH(batch, 0);
1158     OUT_BATCH(batch, 0);
1159     OUT_BATCH(batch, 0);
1160     OUT_BATCH(batch, 0);
1161     OUT_BATCH(batch, 0);
1162     ADVANCE_BATCH(batch);
1163 
1164     BEGIN_BATCH(batch, 10);
1165     OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
1166     /* GS shader address */
1167     OUT_BATCH(batch, 0); /* without GS kernel */
1168     OUT_BATCH(batch, 0);
1169     /* DW3. GS shader dispatch flag */
1170     OUT_BATCH(batch, 0);
1171     OUT_BATCH(batch, 0);
1172     OUT_BATCH(batch, 0);
1173     /* DW6. GS shader GRF and URB offset/length */
1174     OUT_BATCH(batch, 0);
1175     OUT_BATCH(batch, 0); /* pass-through */
1176     OUT_BATCH(batch, 0);
1177     OUT_BATCH(batch, 0);
1178     ADVANCE_BATCH(batch);
1179 
1180     BEGIN_BATCH(batch, 2);
1181     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
1182     OUT_BATCH(batch, 0);
1183     ADVANCE_BATCH(batch);
1184 
1185     BEGIN_BATCH(batch, 2);
1186     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
1187     OUT_BATCH(batch, 0);
1188     ADVANCE_BATCH(batch);
1189 
1190     /* disable HS */
1191     BEGIN_BATCH(batch, 11);
1192     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
1193     OUT_BATCH(batch, 0);
1194     OUT_BATCH(batch, 0);
1195     OUT_BATCH(batch, 0);
1196     OUT_BATCH(batch, 0);
1197     OUT_BATCH(batch, 0);
1198     OUT_BATCH(batch, 0);
1199     OUT_BATCH(batch, 0);
1200     OUT_BATCH(batch, 0);
1201     OUT_BATCH(batch, 0);
1202     OUT_BATCH(batch, 0);
1203     ADVANCE_BATCH(batch);
1204 
1205     BEGIN_BATCH(batch, 9);
1206     OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
1207     OUT_BATCH(batch, 0);
1208     /*DW2. HS pass-through */
1209     OUT_BATCH(batch, 0);
1210     /*DW3. HS shader address */
1211     OUT_BATCH(batch, 0);
1212     OUT_BATCH(batch, 0);
1213     /*DW5. HS shader flag. URB offset/length and so on */
1214     OUT_BATCH(batch, 0);
1215     OUT_BATCH(batch, 0);
1216     OUT_BATCH(batch, 0);
1217     OUT_BATCH(batch, 0);
1218     ADVANCE_BATCH(batch);
1219 
1220     BEGIN_BATCH(batch, 2);
1221     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
1222     OUT_BATCH(batch, 0);
1223     ADVANCE_BATCH(batch);
1224 
1225     BEGIN_BATCH(batch, 2);
1226     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
1227     OUT_BATCH(batch, 0);
1228     ADVANCE_BATCH(batch);
1229 
1230     /* Disable TE */
1231     BEGIN_BATCH(batch, 4);
1232     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
1233     OUT_BATCH(batch, 0);
1234     OUT_BATCH(batch, 0);
1235     OUT_BATCH(batch, 0);
1236     ADVANCE_BATCH(batch);
1237 
1238     /* Disable DS */
1239     BEGIN_BATCH(batch, 11);
1240     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
1241     OUT_BATCH(batch, 0);
1242     OUT_BATCH(batch, 0);
1243     OUT_BATCH(batch, 0);
1244     OUT_BATCH(batch, 0);
1245     OUT_BATCH(batch, 0);
1246     OUT_BATCH(batch, 0);
1247     OUT_BATCH(batch, 0);
1248     OUT_BATCH(batch, 0);
1249     OUT_BATCH(batch, 0);
1250     OUT_BATCH(batch, 0);
1251     ADVANCE_BATCH(batch);
1252 
1253     BEGIN_BATCH(batch, 11);
1254     OUT_BATCH(batch, GEN7_3DSTATE_DS | (11 - 2));
1255     /* DW1. DS shader pointer */
1256     OUT_BATCH(batch, 0);
1257     OUT_BATCH(batch, 0);
1258     /* DW3-5. DS shader dispatch flag.*/
1259     OUT_BATCH(batch, 0);
1260     OUT_BATCH(batch, 0);
1261     OUT_BATCH(batch, 0);
1262     /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
1263     OUT_BATCH(batch, 0);
1264     OUT_BATCH(batch, 0);
1265     /* DW8. DS shader output URB */
1266     OUT_BATCH(batch, 0);
1267 
1268     /* Dual-patch kernel start pointer */
1269     OUT_BATCH(batch, 0);
1270     OUT_BATCH(batch, 0);
1271     ADVANCE_BATCH(batch);
1272 
1273     BEGIN_BATCH(batch, 2);
1274     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
1275     OUT_BATCH(batch, 0);
1276     ADVANCE_BATCH(batch);
1277 
1278     BEGIN_BATCH(batch, 2);
1279     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
1280     OUT_BATCH(batch, 0);
1281     ADVANCE_BATCH(batch);
1282 
1283     /* Disable STREAMOUT */
1284     BEGIN_BATCH(batch, 5);
1285     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
1286     OUT_BATCH(batch, 0);
1287     OUT_BATCH(batch, 0);
1288     OUT_BATCH(batch, 0);
1289     OUT_BATCH(batch, 0);
1290     ADVANCE_BATCH(batch);
1291 }
1292 
1293 static void
gen9_emit_invarient_states(VADriverContextP ctx)1294 gen9_emit_invarient_states(VADriverContextP ctx)
1295 {
1296     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1297     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1298 
1299     BEGIN_BATCH(batch, 1);
1300     OUT_BATCH(batch, RCMD_PIPELINE_SELECT | PIPELINE_SELECT_3D |
1301                      GEN9_PIPELINE_SELECTION_MASK);
1302     ADVANCE_BATCH(batch);
1303 
1304     BEGIN_BATCH(batch, 2);
1305     OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
1306     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1307               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1308     ADVANCE_BATCH(batch);
1309 
1310     /* Update 3D Multisample pattern */
1311     BEGIN_BATCH(batch, 9);
1312     OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
1313     OUT_BATCH(batch, 0);
1314     OUT_BATCH(batch, 0);
1315     OUT_BATCH(batch, 0);
1316     OUT_BATCH(batch, 0);
1317     OUT_BATCH(batch, 0);
1318     OUT_BATCH(batch, 0);
1319     OUT_BATCH(batch, 0);
1320     OUT_BATCH(batch, 0);
1321     ADVANCE_BATCH(batch);
1322 
1323 
1324     BEGIN_BATCH(batch, 2);
1325     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1326     OUT_BATCH(batch, 1);
1327     ADVANCE_BATCH(batch);
1328 
1329     /* Set system instruction pointer */
1330     BEGIN_BATCH(batch, 3);
1331     OUT_BATCH(batch, RCMD_STATE_SIP | 0);
1332     OUT_BATCH(batch, 0);
1333     OUT_BATCH(batch, 0);
1334     ADVANCE_BATCH(batch);
1335 }
1336 
1337 static void
gen9_emit_clip_state(VADriverContextP ctx)1338 gen9_emit_clip_state(VADriverContextP ctx)
1339 {
1340     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1341     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1342 
1343     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1344     OUT_BATCH(batch, 0);
1345     OUT_BATCH(batch, 0); /* pass-through */
1346     OUT_BATCH(batch, 0);
1347 }
1348 
1349 static void
gen9_emit_sf_state(VADriverContextP ctx)1350 gen9_emit_sf_state(VADriverContextP ctx)
1351 {
1352     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1353     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1354 
1355     BEGIN_BATCH(batch, 5);
1356     OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
1357     OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
1358     OUT_BATCH(batch, 0);
1359     OUT_BATCH(batch, 0);
1360     OUT_BATCH(batch, 0);
1361     ADVANCE_BATCH(batch);
1362 
1363 
1364     BEGIN_BATCH(batch, 6);
1365     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (6 - 2));
1366     OUT_BATCH(batch,
1367 	      (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) |
1368 	      (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) |
1369               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
1370               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
1371               (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
1372     OUT_BATCH(batch, 0);
1373     OUT_BATCH(batch, 0);
1374     OUT_BATCH(batch, GEN9_SBE_ACTIVE_COMPONENT_XYZW);
1375     OUT_BATCH(batch, 0);
1376     ADVANCE_BATCH(batch);
1377 
1378     /* SBE for backend setup */
1379     BEGIN_BATCH(batch, 11);
1380     OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
1381     OUT_BATCH(batch, 0);
1382     OUT_BATCH(batch, 0);
1383     OUT_BATCH(batch, 0);
1384     OUT_BATCH(batch, 0);
1385     OUT_BATCH(batch, 0);
1386     OUT_BATCH(batch, 0);
1387     OUT_BATCH(batch, 0);
1388     OUT_BATCH(batch, 0);
1389     OUT_BATCH(batch, 0);
1390     OUT_BATCH(batch, 0);
1391     ADVANCE_BATCH(batch);
1392 
1393     BEGIN_BATCH(batch, 4);
1394     OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
1395     OUT_BATCH(batch, 0);
1396     OUT_BATCH(batch, 0);
1397     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
1398     ADVANCE_BATCH(batch);
1399 }
1400 
1401 static void
gen9_emit_wm_state(VADriverContextP ctx,int kernel)1402 gen9_emit_wm_state(VADriverContextP ctx, int kernel)
1403 {
1404     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1405     struct media_render_state *render_state = &drv_ctx->render_state;
1406     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1407     unsigned int num_samples = 0;
1408     unsigned int max_threads;
1409 
1410     max_threads = render_state->max_wm_threads - 2;
1411 
1412     BEGIN_BATCH(batch, 2);
1413     OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
1414     OUT_BATCH(batch,
1415               (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
1416     ADVANCE_BATCH(batch);
1417 
1418     if (kernel == PS_KERNEL) {
1419 	BEGIN_BATCH(batch, 2);
1420 	OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1421 	OUT_BATCH(batch,
1422 		GEN8_PS_BLEND_HAS_WRITEABLE_RT);
1423 	ADVANCE_BATCH(batch);
1424     } else if (kernel == PS_SUBPIC_KERNEL) {
1425 	BEGIN_BATCH(batch, 2);
1426 	OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1427 	OUT_BATCH(batch,
1428 		(GEN8_PS_BLEND_HAS_WRITEABLE_RT |
1429 		 GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
1430 		 (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
1431 		 (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
1432 		 (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
1433 		 (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
1434 	ADVANCE_BATCH(batch);
1435     }
1436 
1437     BEGIN_BATCH(batch, 2);
1438     OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
1439     OUT_BATCH(batch,
1440               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1441     ADVANCE_BATCH(batch);
1442 
1443     BEGIN_BATCH(batch, 11);
1444     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
1445     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
1446     OUT_BATCH(batch, 0);
1447     /*DW3-4. Constant buffer 0 */
1448     OUT_BATCH(batch, render_state->curbe_offset);
1449     OUT_BATCH(batch, 0);
1450 
1451     /*DW5-10. Constant buffer 1-3 */
1452     OUT_BATCH(batch, 0);
1453     OUT_BATCH(batch, 0);
1454     OUT_BATCH(batch, 0);
1455     OUT_BATCH(batch, 0);
1456     OUT_BATCH(batch, 0);
1457     OUT_BATCH(batch, 0);
1458     ADVANCE_BATCH(batch);
1459 
1460     BEGIN_BATCH(batch, 12);
1461     OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
1462     /* PS shader address */
1463     OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset);
1464 
1465     OUT_BATCH(batch, 0);
1466     /* DW3. PS shader flag .Binding table cnt/sample cnt */
1467     OUT_BATCH(batch,
1468               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
1469               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
1470               GEN7_PS_VECTOR_MASK_ENABLE);
1471     /* DW4-5. Scatch space */
1472     OUT_BATCH(batch, 0); /* scratch space base offset */
1473     OUT_BATCH(batch, 0);
1474     /* DW6. PS shader threads. */
1475     OUT_BATCH(batch,
1476               ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
1477               GEN7_PS_PUSH_CONSTANT_ENABLE |
1478               GEN7_PS_16_DISPATCH_ENABLE);
1479     /* DW7. PS shader GRF */
1480     OUT_BATCH(batch,
1481               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
1482     OUT_BATCH(batch, 0); /* kernel 1 pointer */
1483     OUT_BATCH(batch, 0);
1484     OUT_BATCH(batch, 0); /* kernel 2 pointer */
1485     OUT_BATCH(batch, 0);
1486     ADVANCE_BATCH(batch);
1487 
1488     BEGIN_BATCH(batch, 2);
1489     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
1490     OUT_BATCH(batch, RENDER_BINDING_TABLE_OFFSET);
1491     ADVANCE_BATCH(batch);
1492 }
1493 
1494 static void
gen9_emit_depth_buffer_state(VADriverContextP ctx)1495 gen9_emit_depth_buffer_state(VADriverContextP ctx)
1496 {
1497     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1498     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1499 
1500     BEGIN_BATCH(batch, 8);
1501     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
1502     OUT_BATCH(batch,
1503               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
1504               (I965_SURFACE_NULL << 29));
1505     /* DW2-3. Depth Buffer Address */
1506     OUT_BATCH(batch, 0);
1507     OUT_BATCH(batch, 0);
1508     /* DW4-7. Surface structure */
1509     OUT_BATCH(batch, 0);
1510     OUT_BATCH(batch, 0);
1511     OUT_BATCH(batch, 0);
1512     OUT_BATCH(batch, 0);
1513     ADVANCE_BATCH(batch);
1514 
1515     /* Update the Hier Depth buffer */
1516     BEGIN_BATCH(batch, 5);
1517     OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
1518     OUT_BATCH(batch, 0);
1519     OUT_BATCH(batch, 0);
1520     OUT_BATCH(batch, 0);
1521     OUT_BATCH(batch, 0);
1522     ADVANCE_BATCH(batch);
1523 
1524     /* Update the stencil buffer */
1525     BEGIN_BATCH(batch, 5);
1526     OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
1527     OUT_BATCH(batch, 0);
1528     OUT_BATCH(batch, 0);
1529     OUT_BATCH(batch, 0);
1530     OUT_BATCH(batch, 0);
1531     ADVANCE_BATCH(batch);
1532 
1533     BEGIN_BATCH(batch, 3);
1534     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
1535     OUT_BATCH(batch, 0);
1536     OUT_BATCH(batch, 0);
1537     ADVANCE_BATCH(batch);
1538 }
1539 
1540 static void
gen9_emit_depth_stencil_state(VADriverContextP ctx)1541 gen9_emit_depth_stencil_state(VADriverContextP ctx)
1542 {
1543     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1544     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1545 
1546     BEGIN_BATCH(batch, 3);
1547     OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
1548     OUT_BATCH(batch, 0);
1549     OUT_BATCH(batch, 0);
1550     ADVANCE_BATCH(batch);
1551 }
1552 
1553 static void
gen9_emit_wm_hz_op(VADriverContextP ctx)1554 gen9_emit_wm_hz_op(VADriverContextP ctx)
1555 {
1556     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1557     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1558 
1559     BEGIN_BATCH(batch, 5);
1560     OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
1561     OUT_BATCH(batch, 0);
1562     OUT_BATCH(batch, 0);
1563     OUT_BATCH(batch, 0);
1564     OUT_BATCH(batch, 0);
1565     ADVANCE_BATCH(batch);
1566 }
1567 
1568 static void
gen9_emit_viewport_state_pointers(VADriverContextP ctx)1569 gen9_emit_viewport_state_pointers(VADriverContextP ctx)
1570 {
1571     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1572     struct media_render_state *render_state = &drv_ctx->render_state;
1573     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1574 
1575     BEGIN_BATCH(batch, 2);
1576     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
1577     OUT_BATCH(batch, render_state->cc_viewport_offset);
1578     ADVANCE_BATCH(batch);
1579 
1580     BEGIN_BATCH(batch, 2);
1581     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
1582     OUT_BATCH(batch, 0);
1583     ADVANCE_BATCH(batch);
1584 }
1585 
1586 static void
gen9_emit_sampler_state_pointers(VADriverContextP ctx)1587 gen9_emit_sampler_state_pointers(VADriverContextP ctx)
1588 {
1589     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1590     struct media_render_state *render_state = &drv_ctx->render_state;
1591     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1592 
1593     BEGIN_BATCH(batch, 2);
1594     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
1595     OUT_BATCH(batch, render_state->sampler_offset);
1596     ADVANCE_BATCH(batch);
1597 }
1598 
1599 
1600 static void
gen9_emit_drawing_rectangle(VADriverContextP ctx)1601 gen9_emit_drawing_rectangle(VADriverContextP ctx)
1602 {
1603     i965_render_drawing_rectangle(ctx);
1604 }
1605 
1606 static void
gen9_render_emit_states(VADriverContextP ctx,int kernel)1607 gen9_render_emit_states(VADriverContextP ctx, int kernel)
1608 {
1609     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1610     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1611 
1612     media_batchbuffer_start_atomic(batch, 0x1000);
1613     media_batchbuffer_emit_mi_flush(batch);
1614     gen9_emit_invarient_states(ctx);
1615     gen9_emit_state_base_address(ctx);
1616     gen9_emit_viewport_state_pointers(ctx);
1617     gen9_emit_urb(ctx);
1618     gen9_emit_cc_state_pointers(ctx);
1619     gen9_emit_sampler_state_pointers(ctx);
1620     gen9_emit_wm_hz_op(ctx);
1621     gen9_emit_bypass_state(ctx);
1622     gen9_emit_vs_state(ctx);
1623     gen9_emit_clip_state(ctx);
1624     gen9_emit_sf_state(ctx);
1625     gen9_emit_depth_stencil_state(ctx);
1626     gen9_emit_wm_state(ctx, kernel);
1627     gen9_emit_depth_buffer_state(ctx);
1628     gen9_emit_drawing_rectangle(ctx);
1629     gen9_emit_vertex_element_state(ctx);
1630     gen9_emit_vertices(ctx);
1631     media_batchbuffer_end_atomic(batch);
1632 }
1633 
1634 static void
gen9_render_put_surface(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect,unsigned int flags)1635 gen9_render_put_surface(
1636     VADriverContextP   ctx,
1637     struct object_surface *obj_surface,
1638     const VARectangle *src_rect,
1639     const VARectangle *dst_rect,
1640     unsigned int       flags
1641 )
1642 {
1643     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1644     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1645 
1646     gen9_render_initialize(ctx);
1647     gen9_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
1648     gen9_clear_dest_region(ctx);
1649     gen9_render_emit_states(ctx, PS_KERNEL);
1650     media_batchbuffer_flush(batch);
1651 }
1652 
1653 static void
gen9_subpicture_render_blend_state(VADriverContextP ctx)1654 gen9_subpicture_render_blend_state(VADriverContextP ctx)
1655 {
1656     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1657     struct media_render_state *render_state = &drv_ctx->render_state;
1658     struct gen8_global_blend_state *global_blend_state;
1659     struct gen8_blend_state_rt *blend_state;
1660     unsigned char *cc_ptr;
1661 
1662     dri_bo_map(render_state->dynamic_state.bo, 1);
1663     assert(render_state->dynamic_state.bo->virtual);
1664 
1665     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1666 			render_state->blend_state_offset;
1667 
1668     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
1669 
1670     memset(global_blend_state, 0, render_state->blend_state_size);
1671     /* Global blend state + blend_state for Render Target */
1672     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
1673     blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD;
1674     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1675     blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1676     blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
1677     blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1678     blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1679     blend_state->blend0.colorbuf_blend = 1;
1680     blend_state->blend1.post_blend_clamp_enable = 1;
1681     blend_state->blend1.pre_blend_clamp_enable = 1;
1682     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
1683 
1684     dri_bo_unmap(render_state->dynamic_state.bo);
1685 }
1686 
1687 static void
gen9_subpic_render_upload_constants(VADriverContextP ctx,struct object_surface * obj_surface)1688 gen9_subpic_render_upload_constants(VADriverContextP ctx,
1689                                     struct object_surface *obj_surface)
1690 {
1691     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1692     struct media_render_state *render_state = &drv_ctx->render_state;
1693     float *constant_buffer;
1694     float global_alpha = 1.0;
1695     unsigned int index = obj_surface->subpic_render_idx;
1696     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1697     unsigned char *cc_ptr;
1698 
1699     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1700         global_alpha = obj_subpic->global_alpha;
1701     }
1702 
1703 
1704     dri_bo_map(render_state->dynamic_state.bo, 1);
1705     assert(render_state->dynamic_state.bo->virtual);
1706 
1707     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1708 				render_state->curbe_offset;
1709 
1710     constant_buffer = (float *) cc_ptr;
1711     *constant_buffer = global_alpha;
1712 
1713     dri_bo_unmap(render_state->dynamic_state.bo);
1714 }
1715 
1716 static void
gen9_subpicture_render_setup_states(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect)1717 gen9_subpicture_render_setup_states(
1718     VADriverContextP   ctx,
1719     struct object_surface *obj_surface,
1720     const VARectangle *src_rect,
1721     const VARectangle *dst_rect
1722 )
1723 {
1724     gen9_render_dest_surface_state(ctx, 0);
1725     gen9_subpic_render_src_surfaces_state(ctx, obj_surface);
1726     gen9_render_sampler(ctx);
1727     gen9_render_cc_viewport(ctx);
1728     gen9_render_color_calc_state(ctx);
1729     gen9_subpicture_render_blend_state(ctx);
1730     gen9_subpic_render_upload_constants(ctx, obj_surface);
1731     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1732 }
1733 
1734 static void
gen9_render_put_subpicture(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect)1735 gen9_render_put_subpicture(
1736     VADriverContextP   ctx,
1737     struct object_surface *obj_surface,
1738     const VARectangle *src_rect,
1739     const VARectangle *dst_rect
1740 )
1741 {
1742     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1743     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1744     unsigned int index = obj_surface->subpic_render_idx;
1745     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1746 
1747     assert(obj_subpic);
1748     gen9_render_initialize(ctx);
1749     gen9_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
1750     gen9_render_emit_states(ctx, PS_SUBPIC_KERNEL);
1751     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1752     media_batchbuffer_flush(batch);
1753 }
1754 
1755 static void
gen9_render_terminate(VADriverContextP ctx)1756 gen9_render_terminate(VADriverContextP ctx)
1757 {
1758     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1759     struct media_render_state *render_state = &drv_ctx->render_state;
1760 
1761     dri_bo_unreference(render_state->vb.vertex_buffer);
1762     render_state->vb.vertex_buffer = NULL;
1763 
1764     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1765     render_state->wm.surface_state_binding_table_bo = NULL;
1766 
1767     if (render_state->instruction_state.bo) {
1768         dri_bo_unreference(render_state->instruction_state.bo);
1769         render_state->instruction_state.bo = NULL;
1770     }
1771 
1772     if (render_state->dynamic_state.bo) {
1773         dri_bo_unreference(render_state->dynamic_state.bo);
1774         render_state->dynamic_state.bo = NULL;
1775     }
1776 
1777     if (render_state->indirect_state.bo) {
1778         dri_bo_unreference(render_state->indirect_state.bo);
1779         render_state->indirect_state.bo = NULL;
1780     }
1781 
1782     if (render_state->draw_region) {
1783         dri_bo_unreference(render_state->draw_region->bo);
1784         free(render_state->draw_region);
1785         render_state->draw_region = NULL;
1786     }
1787 }
1788 
1789 bool
media_drv_gen9_render_init(VADriverContextP ctx)1790 media_drv_gen9_render_init(VADriverContextP ctx)
1791 {
1792     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1793     struct media_render_state *render_state = &drv_ctx->render_state;
1794     int i, kernel_size;
1795     unsigned int kernel_offset, end_offset;
1796     unsigned char *kernel_ptr;
1797     struct media_render_kernel *kernel;
1798 
1799     render_state->render_put_surface = gen9_render_put_surface;
1800     render_state->render_put_subpicture = gen9_render_put_subpicture;
1801     render_state->render_terminate = gen9_render_terminate;
1802     render_state->max_wm_threads = 64;
1803 
1804     memcpy(render_state->render_kernels, render_kernels_gen9,
1805 			sizeof(render_state->render_kernels));
1806 
1807     kernel_size = 4096;
1808 
1809     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1810         kernel = &render_state->render_kernels[i];
1811 
1812         if (!kernel->size)
1813             continue;
1814 
1815         kernel_size += ALIGN(kernel->size, ALIGNMENT);
1816     }
1817 
1818     render_state->instruction_state.bo = dri_bo_alloc(drv_ctx->drv_data.bufmgr,
1819                                   "kernel shader",
1820                                   kernel_size,
1821                                   0x1000);
1822     if (render_state->instruction_state.bo == NULL) {
1823         return false;
1824     }
1825 
1826     assert(render_state->instruction_state.bo);
1827 
1828     render_state->instruction_state.bo_size = kernel_size;
1829     render_state->instruction_state.end_offset = 0;
1830     end_offset = 0;
1831 
1832     dri_bo_map(render_state->instruction_state.bo, 1);
1833     kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual);
1834     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
1835         kernel = &render_state->render_kernels[i];
1836         kernel_offset = end_offset;
1837         kernel->kernel_offset = kernel_offset;
1838 
1839         if (!kernel->size)
1840             continue;
1841 
1842         memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1843 
1844         end_offset += ALIGN(kernel->size, ALIGNMENT);
1845     }
1846 
1847     render_state->instruction_state.end_offset = end_offset;
1848 
1849     dri_bo_unmap(render_state->instruction_state.bo);
1850 
1851     return true;
1852 }
1853