1 /*
2  * Copyright � 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Keith Packard <keithp@keithp.com>
26  *    Xiang Haihao <haihao.xiang@intel.com>
27  *    Zhao Yakui <yakui.zhao@intel.com>
28  *
29  */
30 
31 /*
32  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
33  */
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <assert.h>
39 #include <math.h>
40 
41 #include <va/va_drmcommon.h>
42 
43 #include "media_drv_defines.h"
44 
45 #include "media_drv_util.h"
46 #include "media_drv_driver.h"
47 #include "media_drv_render.h"
48 #include "media_drv_surface.h"
49 #include "media_drv_init.h"
50 
51 #include "media_drv_batchbuffer.h"
52 #include "media_render_common.h"
53 
54 #define SF_KERNEL_NUM_GRF       16
55 #define SF_MAX_THREADS          1
56 
57 #define PS_KERNEL_NUM_GRF       48
58 #define PS_MAX_THREADS          32
59 
60 /* Programs for Gen8 */
61 static const uint32_t sf_kernel_static_gen8[][4] ={
62 
63 };
64 static const uint32_t ps_kernel_static_gen8[][4] = {
65 #include "shaders/render/exa_wm_src_affine.g8b"
66 #include "shaders/render/exa_wm_src_sample_planar.g8b"
67 #include "shaders/render/exa_wm_yuv_color_balance.g8b"
68 #include "shaders/render/exa_wm_yuv_rgb.g8b"
69 #include "shaders/render/exa_wm_write.g8b"
70 };
71 
72 static const uint32_t ps_subpic_kernel_static_gen8[][4] = {
73 #include "shaders/render/exa_wm_src_affine.g8b"
74 #include "shaders/render/exa_wm_src_sample_argb.g8b"
75 #include "shaders/render/exa_wm_write.g8b"
76 };
77 
78 
79 #define RENDER_SURFACE_STATE_PADDED_SIZE       sizeof(struct gen8_surface_state)
80 
81 #define RENDER_SURFACE_STATE_OFFSET(index)     (RENDER_SURFACE_STATE_PADDED_SIZE * index)
82 #define RENDER_BINDING_TABLE_OFFSET            RENDER_SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
83 
84 #define DEFAULT_BRIGHTNESS      0
85 #define DEFAULT_CONTRAST        10
86 #define DEFAULT_HUE             0
87 #define DEFAULT_SATURATION      10
88 
89 static struct media_render_kernel render_kernels_gen8[] = {
90     {
91         "PS",
92         PS_KERNEL,
93         ps_kernel_static_gen8,
94         sizeof(ps_kernel_static_gen8),
95         NULL
96     },
97 
98     {
99         "PS_SUBPIC",
100         PS_SUBPIC_KERNEL,
101         ps_subpic_kernel_static_gen8,
102         sizeof(ps_subpic_kernel_static_gen8),
103         NULL
104     }
105 };
106 
107 #define URB_VS_ENTRIES	      8
108 #define URB_VS_ENTRY_SIZE     1
109 
110 #define URB_GS_ENTRIES	      0
111 #define URB_GS_ENTRY_SIZE     0
112 
113 #define URB_CLIP_ENTRIES      0
114 #define URB_CLIP_ENTRY_SIZE   0
115 
116 #define URB_SF_ENTRIES	      1
117 #define URB_SF_ENTRY_SIZE     2
118 
119 #define URB_CS_ENTRIES	      4
120 #define URB_CS_ENTRY_SIZE     4
121 
122 static float yuv_to_rgb_bt601[3][4] = {
123 {1.164,        0,        1.596,        -0.06275,},
124 {1.164,        -0.392,   -0.813,       -0.50196,},
125 {1.164,        2.017,    0,            -0.50196,},
126 };
127 
128 static float yuv_to_rgb_bt709[3][4] = {
129 {1.164,        0,        1.793,        -0.06275,},
130 {1.164,        -0.213,   -0.533,       -0.50196,},
131 {1.164,        2.112,    0,            -0.50196,},
132 };
133 
134 static float yuv_to_rgb_smpte_240[3][4] = {
135 {1.164,        0,        1.794,        -0.06275,},
136 {1.164,        -0.258,   -0.5425,      -0.50196,},
137 {1.164,        2.078,    0,            -0.50196,},
138 };
139 
140 static void
gen8_render_set_surface_tiling(struct gen8_surface_state * ss,uint32_t tiling)141 gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
142 {
143    switch (tiling) {
144    case I915_TILING_NONE:
145       ss->ss0.tiled_surface = 0;
146       ss->ss0.tile_walk = 0;
147       break;
148    case I915_TILING_X:
149       ss->ss0.tiled_surface = 1;
150       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
151       break;
152    case I915_TILING_Y:
153       ss->ss0.tiled_surface = 1;
154       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
155       break;
156    }
157 }
158 
159 /* Set "Shader Channel Select" for GEN8+ */
160 void
gen8_render_set_surface_scs(struct gen8_surface_state * ss)161 gen8_render_set_surface_scs(struct gen8_surface_state *ss)
162 {
163     ss->ss7.shader_chanel_select_r = RENDER_HSW_SCS_RED;
164     ss->ss7.shader_chanel_select_g = RENDER_HSW_SCS_GREEN;
165     ss->ss7.shader_chanel_select_b = RENDER_HSW_SCS_BLUE;
166     ss->ss7.shader_chanel_select_a = RENDER_HSW_SCS_ALPHA;
167 }
168 
169 static void
gen8_render_set_surface_state(struct gen8_surface_state * ss,dri_bo * bo,unsigned long offset,int width,int height,int pitch,int format,unsigned int flags)170 gen8_render_set_surface_state(
171     struct gen8_surface_state *ss,
172     dri_bo                    *bo,
173     unsigned long              offset,
174     int                        width,
175     int                        height,
176     int                        pitch,
177     int                        format,
178     unsigned int               flags
179 )
180 {
181     unsigned int tiling;
182     unsigned int swizzle;
183 
184     memset(ss, 0, sizeof(*ss));
185 
186 
187     ss->ss0.surface_type = I965_SURFACE_2D;
188     ss->ss0.surface_format = format;
189 
190     ss->ss8.base_addr = bo->offset + offset;
191 
192     ss->ss2.width = width - 1;
193     ss->ss2.height = height - 1;
194 
195     ss->ss3.pitch = pitch - 1;
196 
197     /* Always set 1(align 4 mode) per B-spec */
198     ss->ss0.vertical_alignment = 1;
199     ss->ss0.horizontal_alignment = 1;
200 
201     dri_bo_get_tiling(bo, &tiling, &swizzle);
202     gen8_render_set_surface_tiling(ss, tiling);
203 }
204 
205 static void
gen8_render_src_surface_state(VADriverContextP ctx,int index,dri_bo * region,unsigned long offset,int w,int h,int pitch,int format,unsigned int flags)206 gen8_render_src_surface_state(
207     VADriverContextP ctx,
208     int              index,
209     dri_bo          *region,
210     unsigned long    offset,
211     int              w,
212     int              h,
213     int              pitch,
214     int              format,
215     unsigned int     flags
216 )
217 {
218     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
219     struct media_render_state *render_state = &drv_ctx->render_state;
220     void *ss;
221     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
222 
223     assert(index < MAX_RENDER_SURFACES);
224 
225     dri_bo_map(ss_bo, 1);
226     assert(ss_bo->virtual);
227     ss = (char *)ss_bo->virtual + RENDER_SURFACE_STATE_OFFSET(index);
228 
229     gen8_render_set_surface_state(ss,
230                                   region, offset,
231                                   w, h,
232                                   pitch, format, flags);
233     gen8_render_set_surface_scs(ss);
234     dri_bo_emit_reloc(ss_bo,
235                       I915_GEM_DOMAIN_SAMPLER, 0,
236                       offset,
237                       RENDER_SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
238                       region);
239 
240     ((unsigned int *)((char *)ss_bo->virtual + RENDER_BINDING_TABLE_OFFSET))[index] = RENDER_SURFACE_STATE_OFFSET(index);
241     dri_bo_unmap(ss_bo);
242     render_state->wm.sampler_count++;
243 }
244 
245 static void
gen8_render_src_surfaces_state(VADriverContextP ctx,struct object_surface * obj_surface,unsigned int flags)246 gen8_render_src_surfaces_state(
247     VADriverContextP ctx,
248     struct object_surface *obj_surface,
249     unsigned int     flags
250 )
251 {
252     int region_pitch;
253     int rw, rh;
254     dri_bo *region;
255 
256     region_pitch = obj_surface->width;
257     rw = obj_surface->orig_width;
258     rh = obj_surface->orig_height;
259     region = obj_surface->bo;
260 
261     gen8_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
262     gen8_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
263 
264     if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
265         return;
266 
267     if (obj_surface->fourcc == VA_FOURCC_NV12) {
268         gen8_render_src_surface_state(ctx, 3, region,
269                                       region_pitch * obj_surface->y_cb_offset,
270                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
271                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
272         gen8_render_src_surface_state(ctx, 4, region,
273                                       region_pitch * obj_surface->y_cb_offset,
274                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
275                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
276     } else {
277         gen8_render_src_surface_state(ctx, 3, region,
278                                       region_pitch * obj_surface->y_cb_offset,
279                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
280                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
281         gen8_render_src_surface_state(ctx, 4, region,
282                                       region_pitch * obj_surface->y_cb_offset,
283                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
284                                       I965_SURFACEFORMAT_R8_UNORM, flags);
285         gen8_render_src_surface_state(ctx, 5, region,
286                                       region_pitch * obj_surface->y_cr_offset,
287                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
288                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
289         gen8_render_src_surface_state(ctx, 6, region,
290                                       region_pitch * obj_surface->y_cr_offset,
291                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
292                                       I965_SURFACEFORMAT_R8_UNORM, flags);
293     }
294 }
295 
296 static void
gen8_subpic_render_src_surfaces_state(VADriverContextP ctx,struct object_surface * obj_surface)297 gen8_subpic_render_src_surfaces_state(VADriverContextP ctx,
298                                       struct object_surface *obj_surface)
299 {
300     dri_bo *subpic_region;
301     unsigned int index = obj_surface->subpic_render_idx;
302     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
303     struct object_image *obj_image = obj_subpic->obj_image;
304 
305     assert(obj_surface);
306     assert(obj_surface->bo);
307     subpic_region = obj_image->bo;
308     /*subpicture surface*/
309     gen8_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
310     gen8_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
311 }
312 
313 static void
gen8_render_dest_surface_state(VADriverContextP ctx,int index)314 gen8_render_dest_surface_state(VADriverContextP ctx, int index)
315 {
316     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
317     struct media_render_state *render_state = &drv_ctx->render_state;
318     struct region *dest_region = render_state->draw_region;
319     void *ss;
320     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
321     int format;
322     assert(index < MAX_RENDER_SURFACES);
323 
324     if (dest_region->cpp == 2) {
325 	format = I965_SURFACEFORMAT_B5G6R5_UNORM;
326     } else {
327 	format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
328     }
329 
330     dri_bo_map(ss_bo, 1);
331     assert(ss_bo->virtual);
332     ss = (char *)ss_bo->virtual + RENDER_SURFACE_STATE_OFFSET(index);
333 
334     gen8_render_set_surface_state(ss,
335                                   dest_region->bo, 0,
336                                   dest_region->width, dest_region->height,
337                                   dest_region->pitch, format, 0);
338     gen8_render_set_surface_scs(ss);
339     dri_bo_emit_reloc(ss_bo,
340                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
341                       0,
342                       RENDER_SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
343                       dest_region->bo);
344 
345     ((unsigned int *)((char *)ss_bo->virtual + RENDER_BINDING_TABLE_OFFSET))[index] = RENDER_SURFACE_STATE_OFFSET(index);
346     dri_bo_unmap(ss_bo);
347 }
348 
349 static void
i965_fill_vertex_buffer(VADriverContextP ctx,float tex_coords[4],float vid_coords[4])350 i965_fill_vertex_buffer(
351     VADriverContextP ctx,
352     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
353     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
354 )
355 {
356     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
357     float vb[12];
358 
359     enum { X1, Y1, X2, Y2 };
360 
361     static const unsigned int g_rotation_indices[][6] = {
362         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
363         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
364         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
365         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
366     };
367 
368     const unsigned int * const rotation_indices =
369         g_rotation_indices[drv_ctx->rotation_attrib->value];
370 
371     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
372     vb[1]  = tex_coords[rotation_indices[1]];
373     vb[2]  = vid_coords[X2];
374     vb[3]  = vid_coords[Y2];
375 
376     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
377     vb[5]  = tex_coords[rotation_indices[3]];
378     vb[6]  = vid_coords[X1];
379     vb[7]  = vid_coords[Y2];
380 
381     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
382     vb[9]  = tex_coords[rotation_indices[5]];
383     vb[10] = vid_coords[X1];
384     vb[11] = vid_coords[Y1];
385 
386     dri_bo_subdata(drv_ctx->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
387 }
388 
389 static void
i965_subpic_render_upload_vertex(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * output_rect)390 i965_subpic_render_upload_vertex(VADriverContextP ctx,
391                                  struct object_surface *obj_surface,
392                                  const VARectangle *output_rect)
393 {
394     unsigned int index = obj_surface->subpic_render_idx;
395     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
396     float tex_coords[4], vid_coords[4];
397     VARectangle dst_rect;
398 
399     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
400         dst_rect = obj_subpic->dst_rect;
401     else {
402         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
403         const float sy  = (float)output_rect->height / obj_surface->orig_height;
404         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
405         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
406         dst_rect.width  = sx * obj_subpic->dst_rect.width;
407         dst_rect.height = sy * obj_subpic->dst_rect.height;
408     }
409 
410     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
411     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
412     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
413     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
414 
415     vid_coords[0] = dst_rect.x;
416     vid_coords[1] = dst_rect.y;
417     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
418     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
419 
420     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
421 }
422 
423 static void
i965_render_upload_vertex(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect)424 i965_render_upload_vertex(
425     VADriverContextP   ctx,
426     struct object_surface *obj_surface,
427     const VARectangle *src_rect,
428     const VARectangle *dst_rect
429 )
430 {
431     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
432     struct media_render_state *render_state = &drv_ctx->render_state;
433     struct region *dest_region = render_state->draw_region;
434     float tex_coords[4], vid_coords[4];
435     int width, height;
436 
437     width  = obj_surface->orig_width;
438     height = obj_surface->orig_height;
439 
440     tex_coords[0] = (float)src_rect->x / width;
441     tex_coords[1] = (float)src_rect->y / height;
442     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
443     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
444 
445     vid_coords[0] = dest_region->x + dst_rect->x;
446     vid_coords[1] = dest_region->y + dst_rect->y;
447     vid_coords[2] = vid_coords[0] + dst_rect->width;
448     vid_coords[3] = vid_coords[1] + dst_rect->height;
449 
450     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
451 }
452 
453 static void
i965_render_drawing_rectangle(VADriverContextP ctx)454 i965_render_drawing_rectangle(VADriverContextP ctx)
455 {
456     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
457     struct media_render_state *render_state = &drv_ctx->render_state;
458     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
459     struct region *dest_region = render_state->draw_region;
460 
461     BEGIN_BATCH(batch, 4);
462     OUT_BATCH(batch, RCMD_DRAWING_RECTANGLE | 2);
463     OUT_BATCH(batch, 0x00000000);
464     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
465     OUT_BATCH(batch, 0x00000000);
466     ADVANCE_BATCH(batch);
467 }
468 
469 
470 static void
gen8_clear_dest_region(VADriverContextP ctx)471 gen8_clear_dest_region(VADriverContextP ctx)
472 {
473     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
474     struct media_render_state *render_state = &drv_ctx->render_state;
475     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
476     struct region *dest_region = render_state->draw_region;
477     unsigned int blt_cmd, br13;
478     int pitch;
479 
480     blt_cmd = GEN8_XY_COLOR_BLT_CMD;
481     br13 = 0xf0 << 16;
482     pitch = dest_region->pitch;
483 
484     if (dest_region->cpp == 4) {
485         br13 |= BR13_8888;
486         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
487     } else {
488         assert(dest_region->cpp == 2);
489         br13 |= BR13_565;
490     }
491 
492     if (dest_region->tiling != I915_TILING_NONE) {
493         blt_cmd |= XY_COLOR_BLT_DST_TILED;
494         pitch /= 4;
495     }
496 
497     br13 |= pitch;
498 
499     media_batchbuffer_start_atomic_blt(batch, 24);
500     __BEGIN_BATCH(batch, 7, I915_EXEC_BLT);
501 
502     OUT_BATCH(batch, blt_cmd);
503     OUT_BATCH(batch, br13);
504     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
505     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
506               (dest_region->x + dest_region->width));
507     OUT_RELOC(batch, dest_region->bo,
508               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
509               0);
510     OUT_BATCH(batch, 0x0);
511     OUT_BATCH(batch, 0x0);
512     ADVANCE_BATCH(batch);
513     media_batchbuffer_end_atomic(batch);
514 }
515 
516 
517 /*
518  * for GEN8
519  */
520 #define ALIGNMENT       64
521 
522 static void
gen8_render_initialize(VADriverContextP ctx)523 gen8_render_initialize(VADriverContextP ctx)
524 {
525     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
526     struct media_render_state *render_state = &drv_ctx->render_state;
527     dri_bo *bo;
528     int size;
529     unsigned int end_offset;
530 
531     /* VERTEX BUFFER */
532     dri_bo_unreference(render_state->vb.vertex_buffer);
533     bo = dri_bo_alloc(drv_ctx->drv_data.bufmgr,
534                       "vertex buffer",
535                       4096,
536                       4096);
537     assert(bo);
538     render_state->vb.vertex_buffer = bo;
539 
540     /* WM */
541     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
542     bo = dri_bo_alloc(drv_ctx->drv_data.bufmgr,
543                       "surface state & binding table",
544                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
545                       4096);
546     assert(bo);
547     render_state->wm.surface_state_binding_table_bo = bo;
548 
549     render_state->curbe_size = 256;
550 
551     render_state->wm.sampler_count = 0;
552 
553     render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state);
554 
555     render_state->cc_state_size = sizeof(struct gen6_color_calc_state);
556 
557     render_state->cc_viewport_size = sizeof(struct i965_cc_viewport);
558 
559     render_state->blend_state_size = sizeof(struct gen8_global_blend_state) +
560 			16 * sizeof(struct gen8_blend_state_rt);
561 
562     render_state->sf_clip_size = 1024;
563 
564     render_state->scissor_size = 1024;
565 
566     size = ALIGN(render_state->curbe_size, ALIGNMENT) +
567         ALIGN(render_state->sampler_size, ALIGNMENT) +
568         ALIGN(render_state->cc_viewport_size, ALIGNMENT) +
569         ALIGN(render_state->cc_state_size, ALIGNMENT) +
570         ALIGN(render_state->blend_state_size, ALIGNMENT) +
571         ALIGN(render_state->sf_clip_size, ALIGNMENT) +
572         ALIGN(render_state->scissor_size, ALIGNMENT);
573 
574     dri_bo_unreference(render_state->dynamic_state.bo);
575     bo = dri_bo_alloc(drv_ctx->drv_data.bufmgr,
576                       "dynamic_state",
577                       size,
578                       4096);
579 
580     render_state->dynamic_state.bo = bo;
581 
582     end_offset = 0;
583     render_state->dynamic_state.end_offset = 0;
584 
585     /* Constant buffer offset */
586     render_state->curbe_offset = end_offset;
587     end_offset += ALIGN(render_state->curbe_size, ALIGNMENT);
588 
589     /* Sampler_state  */
590     render_state->sampler_offset = end_offset;
591     end_offset += ALIGN(render_state->sampler_size, ALIGNMENT);
592 
593     /* CC_VIEWPORT_state  */
594     render_state->cc_viewport_offset = end_offset;
595     end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT);
596 
597     /* CC_STATE_state  */
598     render_state->cc_state_offset = end_offset;
599     end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT);
600 
601     /* Blend_state  */
602     render_state->blend_state_offset = end_offset;
603     end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT);
604 
605     /* SF_CLIP_state  */
606     render_state->sf_clip_offset = end_offset;
607     end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT);
608 
609     /* SCISSOR_state  */
610     render_state->scissor_offset = end_offset;
611     end_offset += ALIGN(render_state->scissor_size, ALIGNMENT);
612 
613     /* update the end offset of dynamic_state */
614     render_state->dynamic_state.end_offset = end_offset;
615 
616 }
617 
618 static void
gen8_render_sampler(VADriverContextP ctx)619 gen8_render_sampler(VADriverContextP ctx)
620 {
621     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
622     struct media_render_state *render_state = &drv_ctx->render_state;
623     struct gen8_sampler_state *sampler_state;
624     int i;
625     unsigned char *cc_ptr;
626 
627     assert(render_state->wm.sampler_count > 0);
628     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
629 
630     dri_bo_map(render_state->dynamic_state.bo, 1);
631     assert(render_state->dynamic_state.bo->virtual);
632 
633     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
634 			render_state->sampler_offset;
635 
636     sampler_state = (struct gen8_sampler_state *) cc_ptr;
637 
638     for (i = 0; i < render_state->wm.sampler_count; i++) {
639         memset(sampler_state, 0, sizeof(*sampler_state));
640         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
641         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
642         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
643         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
644         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
645         sampler_state++;
646     }
647 
648     dri_bo_unmap(render_state->dynamic_state.bo);
649 }
650 
651 static void
gen8_render_blend_state(VADriverContextP ctx)652 gen8_render_blend_state(VADriverContextP ctx)
653 {
654     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
655     struct media_render_state *render_state = &drv_ctx->render_state;
656     struct gen8_global_blend_state *global_blend_state;
657     struct gen8_blend_state_rt *blend_state;
658     unsigned char *cc_ptr;
659 
660     dri_bo_map(render_state->dynamic_state.bo, 1);
661     assert(render_state->dynamic_state.bo->virtual);
662 
663     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
664 			render_state->blend_state_offset;
665 
666     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
667 
668     memset(global_blend_state, 0, render_state->blend_state_size);
669     /* Global blend state + blend_state for Render Target */
670     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
671     blend_state->blend1.logic_op_enable = 1;
672     blend_state->blend1.logic_op_func = 0xc;
673     blend_state->blend1.pre_blend_clamp_enable = 1;
674 
675     dri_bo_unmap(render_state->dynamic_state.bo);
676 }
677 
678 
679 static void
gen8_render_cc_viewport(VADriverContextP ctx)680 gen8_render_cc_viewport(VADriverContextP ctx)
681 {
682     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
683     struct media_render_state *render_state = &drv_ctx->render_state;
684     struct i965_cc_viewport *cc_viewport;
685     unsigned char *cc_ptr;
686 
687     dri_bo_map(render_state->dynamic_state.bo, 1);
688     assert(render_state->dynamic_state.bo->virtual);
689 
690     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
691 			render_state->cc_viewport_offset;
692 
693     cc_viewport = (struct i965_cc_viewport *) cc_ptr;
694 
695     memset(cc_viewport, 0, sizeof(*cc_viewport));
696 
697     cc_viewport->min_depth = -1.e35;
698     cc_viewport->max_depth = 1.e35;
699 
700     dri_bo_unmap(render_state->dynamic_state.bo);
701 }
702 
703 static void
gen8_render_color_calc_state(VADriverContextP ctx)704 gen8_render_color_calc_state(VADriverContextP ctx)
705 {
706     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
707     struct media_render_state *render_state = &drv_ctx->render_state;
708     struct gen6_color_calc_state *color_calc_state;
709     unsigned char *cc_ptr;
710 
711     dri_bo_map(render_state->dynamic_state.bo, 1);
712     assert(render_state->dynamic_state.bo->virtual);
713 
714     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
715 			render_state->cc_state_offset;
716 
717     color_calc_state = (struct gen6_color_calc_state *) cc_ptr;
718 
719     memset(color_calc_state, 0, sizeof(*color_calc_state));
720     color_calc_state->constant_r = 1.0;
721     color_calc_state->constant_g = 0.0;
722     color_calc_state->constant_b = 1.0;
723     color_calc_state->constant_a = 1.0;
724     dri_bo_unmap(render_state->dynamic_state.bo);
725 }
726 
727 #define PI  3.1415926
728 
729 static void
gen8_render_upload_constants(VADriverContextP ctx,struct object_surface * obj_surface,unsigned int flags)730 gen8_render_upload_constants(VADriverContextP ctx,
731                              struct object_surface *obj_surface,
732                              unsigned int flags)
733 {
734     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
735     struct media_render_state *render_state = &drv_ctx->render_state;
736     unsigned short *constant_buffer;
737     unsigned char *cc_ptr;
738     float *color_balance_base;
739     float contrast = (float)drv_ctx->contrast_attrib->value / DEFAULT_CONTRAST;
740     float brightness = (float)drv_ctx->brightness_attrib->value / 255; /* YUV is float in the shader */
741     float hue = (float)drv_ctx->hue_attrib->value / 180 * PI;
742     float saturation = (float)drv_ctx->saturation_attrib->value / DEFAULT_SATURATION;
743     float *yuv_to_rgb;
744     unsigned int color_flag;
745 
746     dri_bo_map(render_state->dynamic_state.bo, 1);
747     assert(render_state->dynamic_state.bo->virtual);
748 
749     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
750 			render_state->curbe_offset;
751 
752     constant_buffer = (unsigned short *) cc_ptr;
753 
754     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
755         assert(obj_surface->fourcc == VA_FOURCC_Y800);
756 
757         *constant_buffer = 2;
758     } else {
759         if (obj_surface->fourcc == VA_FOURCC_NV12)
760             *constant_buffer = 1;
761         else
762             *constant_buffer = 0;
763     }
764 
765     if (drv_ctx->contrast_attrib->value == DEFAULT_CONTRAST &&
766         drv_ctx->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
767         drv_ctx->hue_attrib->value == DEFAULT_HUE &&
768         drv_ctx->saturation_attrib->value == DEFAULT_SATURATION)
769         constant_buffer[1] = 1; /* skip color balance transformation */
770     else
771         constant_buffer[1] = 0;
772 
773     color_balance_base = (float *)constant_buffer + 4;
774     *color_balance_base++ = contrast;
775     *color_balance_base++ = brightness;
776     *color_balance_base++ = cos(hue) * contrast * saturation;
777     *color_balance_base++ = sin(hue) * contrast * saturation;
778 
779     color_flag = flags & VA_SRC_COLOR_MASK;
780     yuv_to_rgb = (float *)constant_buffer + 8;
781     if (color_flag == VA_SRC_BT709)
782         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
783     else if (color_flag == VA_SRC_SMPTE_240)
784         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
785     else
786         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
787 
788     dri_bo_unmap(render_state->dynamic_state.bo);
789 }
790 
791 static void
gen8_render_setup_states(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect,unsigned int flags)792 gen8_render_setup_states(
793     VADriverContextP   ctx,
794     struct object_surface *obj_surface,
795     const VARectangle *src_rect,
796     const VARectangle *dst_rect,
797     unsigned int       flags
798 )
799 {
800     gen8_render_dest_surface_state(ctx, 0);
801     gen8_render_src_surfaces_state(ctx, obj_surface, flags);
802     gen8_render_sampler(ctx);
803     gen8_render_cc_viewport(ctx);
804     gen8_render_color_calc_state(ctx);
805     gen8_render_blend_state(ctx);
806     gen8_render_upload_constants(ctx, obj_surface, flags);
807     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
808 }
809 
810 static void
gen8_emit_state_base_address(VADriverContextP ctx)811 gen8_emit_state_base_address(VADriverContextP ctx)
812 {
813     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
814     struct media_render_state *render_state = &drv_ctx->render_state;
815     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
816 
817     BEGIN_BATCH(batch, 16);
818     OUT_BATCH(batch, RCMD_STATE_BASE_ADDRESS | (16 - 2));
819     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
820 	OUT_BATCH(batch, 0);
821 	OUT_BATCH(batch, 0);
822 	/*DW4 */
823     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
824 	OUT_BATCH(batch, 0);
825 
826 	/*DW6*/
827     /* Dynamic state base address */
828     OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
829 		0, BASE_ADDRESS_MODIFY);
830     OUT_BATCH(batch, 0);
831 
832 	/*DW8*/
833     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
834     OUT_BATCH(batch, 0);
835 
836 	/*DW10 */
837     /* Instruction base address */
838     OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
839     OUT_BATCH(batch, 0);
840 
841 	/*DW12 */
842     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
843     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
844     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
845     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
846     ADVANCE_BATCH(batch);
847 }
848 
849 static void
gen8_emit_cc_state_pointers(VADriverContextP ctx)850 gen8_emit_cc_state_pointers(VADriverContextP ctx)
851 {
852     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
853     struct media_render_state *render_state = &drv_ctx->render_state;
854     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
855 
856     BEGIN_BATCH(batch, 2);
857     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
858     OUT_BATCH(batch, (render_state->cc_state_offset + 1));
859     ADVANCE_BATCH(batch);
860 
861     BEGIN_BATCH(batch, 2);
862     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
863     OUT_BATCH(batch, (render_state->blend_state_offset + 1));
864     ADVANCE_BATCH(batch);
865 
866 }
867 
868 static void
gen8_emit_vertices(VADriverContextP ctx)869 gen8_emit_vertices(VADriverContextP ctx)
870 {
871     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
872     struct media_render_state *render_state = &drv_ctx->render_state;
873     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
874 
875     BEGIN_BATCH(batch, 5);
876     OUT_BATCH(batch, RCMD_VERTEX_BUFFERS | (5 - 2));
877     OUT_BATCH(batch,
878               (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
879 	      (0 << GEN8_VB0_MOCS_SHIFT) |
880               GEN7_VB0_ADDRESS_MODIFYENABLE |
881               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
882     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
883     OUT_BATCH(batch, 0);
884     OUT_BATCH(batch, 12 * 4);
885     ADVANCE_BATCH(batch);
886 
887     /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
888     BEGIN_BATCH(batch, 2);
889     OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
890     OUT_BATCH(batch,
891               _3DPRIM_RECTLIST);
892     ADVANCE_BATCH(batch);
893 
894     BEGIN_BATCH(batch, 7);
895     OUT_BATCH(batch, RCMD_3DPRIMITIVE | (7 - 2));
896     OUT_BATCH(batch,
897               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
898     OUT_BATCH(batch, 3); /* vertex count per instance */
899     OUT_BATCH(batch, 0); /* start vertex offset */
900     OUT_BATCH(batch, 1); /* single instance */
901     OUT_BATCH(batch, 0); /* start instance location */
902     OUT_BATCH(batch, 0);
903     ADVANCE_BATCH(batch);
904 }
905 
906 static void
gen8_emit_vertex_element_state(VADriverContextP ctx)907 gen8_emit_vertex_element_state(VADriverContextP ctx)
908 {
909     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
910     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
911     int i;
912     /*
913      * The VUE layout
914      * dword 0-3: pad (0, 0, 0. 0)
915      * dword 4-7: position (x, y, 1.0, 1.0),
916      * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0)
917      */
918 
919     /* Set up our vertex elements, sourced from the single vertex buffer. */
920     OUT_BATCH(batch, RCMD_VERTEX_ELEMENTS | (7 - 2));
921 
922     /* Element state 0. These are 4 dwords of 0 required for the VUE format.
923      * We don't really know or care what they do.
924      */
925 
926     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
927               GEN8_VE0_VALID |
928               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
929               (0 << VE0_OFFSET_SHIFT));
930     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
931               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
932               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
933               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
934 
935     /* offset 8: X, Y -> {x, y, 1.0, 1.0} */
936     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
937               GEN8_VE0_VALID |
938               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
939               (8 << VE0_OFFSET_SHIFT));
940     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
941 	      (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
942               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
943               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
944 
945     /* offset 0: u,v -> {U, V, 1.0, 1.0} */
946     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
947               GEN8_VE0_VALID |
948               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
949               (0 << VE0_OFFSET_SHIFT));
950     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
951               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
952               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
953               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
954 
955    /* Disable instancing for all vertex elements. */
956    for (i = 0; i < 3; i++) {
957       OUT_BATCH(batch, GEN8_3DSTATE_VF_INSTANCING | (3 - 2));
958       OUT_BATCH(batch, i);
959       OUT_BATCH(batch, 0);
960    }
961 
962    /* Disable system-generated values. */
963    OUT_BATCH(batch, GEN8_3DSTATE_VF_SGVS | (2 - 2));
964    OUT_BATCH(batch, 0);
965 }
966 
967 static void
gen8_emit_vs_state(VADriverContextP ctx)968 gen8_emit_vs_state(VADriverContextP ctx)
969 {
970     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
971     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
972 
973     /* disable VS constant buffer */
974     BEGIN_BATCH(batch, 11);
975     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
976     OUT_BATCH(batch, 0);
977     OUT_BATCH(batch, 0);
978     /* CS Buffer 0 */
979     OUT_BATCH(batch, 0);
980     OUT_BATCH(batch, 0);
981     /* CS Buffer 1 */
982     OUT_BATCH(batch, 0);
983     OUT_BATCH(batch, 0);
984     /* CS Buffer 2 */
985     OUT_BATCH(batch, 0);
986     OUT_BATCH(batch, 0);
987     /* CS Buffer 3 */
988     OUT_BATCH(batch, 0);
989     OUT_BATCH(batch, 0);
990     ADVANCE_BATCH(batch);
991 
992     BEGIN_BATCH(batch, 9);
993     OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
994     OUT_BATCH(batch, 0); /* without VS kernel */
995     OUT_BATCH(batch, 0);
996     /* VS shader dispatch flag */
997     OUT_BATCH(batch, 0);
998     OUT_BATCH(batch, 0);
999     OUT_BATCH(batch, 0);
1000     /* DW6. VS shader GRF and URB buffer definition */
1001     OUT_BATCH(batch, 0);
1002     OUT_BATCH(batch, 0); /* pass-through */
1003     OUT_BATCH(batch, 0);
1004     ADVANCE_BATCH(batch);
1005 
1006     BEGIN_BATCH(batch, 2);
1007     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
1008     OUT_BATCH(batch, 0);
1009     ADVANCE_BATCH(batch);
1010 
1011     BEGIN_BATCH(batch, 2);
1012     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
1013     OUT_BATCH(batch, 0);
1014     ADVANCE_BATCH(batch);
1015 
1016 }
1017 
1018 /*
1019  * URB layout on GEN8
1020  * ----------------------------------------
1021  * | PS Push Constants (8KB) | VS entries |
1022  * ----------------------------------------
1023  */
1024 static void
gen8_emit_urb(VADriverContextP ctx)1025 gen8_emit_urb(VADriverContextP ctx)
1026 {
1027     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1028     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1029     unsigned int num_urb_entries = 64;
1030 
1031     /* The minimum urb entries is 64 */
1032 
1033     BEGIN_BATCH(batch, 2);
1034     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
1035     OUT_BATCH(batch, 0);
1036     ADVANCE_BATCH(batch);
1037 
1038     BEGIN_BATCH(batch, 2);
1039     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
1040     OUT_BATCH(batch, 0);
1041     ADVANCE_BATCH(batch);
1042 
1043     BEGIN_BATCH(batch, 2);
1044     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
1045     OUT_BATCH(batch, 0);
1046     ADVANCE_BATCH(batch);
1047 
1048     BEGIN_BATCH(batch, 2);
1049     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
1050     OUT_BATCH(batch, 0);
1051     ADVANCE_BATCH(batch);
1052 
1053     /* Size is 8Kbs and base address is 0Kb */
1054     BEGIN_BATCH(batch, 2);
1055     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
1056     /* Size is 8Kbs and base address is 0Kb */
1057     OUT_BATCH(batch,
1058 		(0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
1059 		(8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
1060     ADVANCE_BATCH(batch);
1061 
1062     BEGIN_BATCH(batch, 2);
1063     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
1064     OUT_BATCH(batch,
1065               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
1066               (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
1067               (4 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1068    ADVANCE_BATCH(batch);
1069 
1070    BEGIN_BATCH(batch, 2);
1071    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
1072    OUT_BATCH(batch,
1073              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1074              (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1075    ADVANCE_BATCH(batch);
1076 
1077    BEGIN_BATCH(batch, 2);
1078    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
1079    OUT_BATCH(batch,
1080              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1081              (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1082    ADVANCE_BATCH(batch);
1083 
1084    BEGIN_BATCH(batch, 2);
1085    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
1086    OUT_BATCH(batch,
1087              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
1088              (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
1089    ADVANCE_BATCH(batch);
1090 }
1091 
1092 static void
gen8_emit_bypass_state(VADriverContextP ctx)1093 gen8_emit_bypass_state(VADriverContextP ctx)
1094 {
1095     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1096     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1097 
1098     /* bypass GS */
1099     BEGIN_BATCH(batch, 11);
1100     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
1101     OUT_BATCH(batch, 0);
1102     OUT_BATCH(batch, 0);
1103     OUT_BATCH(batch, 0);
1104     OUT_BATCH(batch, 0);
1105     OUT_BATCH(batch, 0);
1106     OUT_BATCH(batch, 0);
1107     OUT_BATCH(batch, 0);
1108     OUT_BATCH(batch, 0);
1109     OUT_BATCH(batch, 0);
1110     OUT_BATCH(batch, 0);
1111     ADVANCE_BATCH(batch);
1112 
1113     BEGIN_BATCH(batch, 10);
1114     OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
1115     /* GS shader address */
1116     OUT_BATCH(batch, 0); /* without GS kernel */
1117     OUT_BATCH(batch, 0);
1118     /* DW3. GS shader dispatch flag */
1119     OUT_BATCH(batch, 0);
1120     OUT_BATCH(batch, 0);
1121     OUT_BATCH(batch, 0);
1122     /* DW6. GS shader GRF and URB offset/length */
1123     OUT_BATCH(batch, 0);
1124     OUT_BATCH(batch, 0); /* pass-through */
1125     OUT_BATCH(batch, 0);
1126     OUT_BATCH(batch, 0);
1127     ADVANCE_BATCH(batch);
1128 
1129     BEGIN_BATCH(batch, 2);
1130     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
1131     OUT_BATCH(batch, 0);
1132     ADVANCE_BATCH(batch);
1133 
1134     BEGIN_BATCH(batch, 2);
1135     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
1136     OUT_BATCH(batch, 0);
1137     ADVANCE_BATCH(batch);
1138 
1139     /* disable HS */
1140     BEGIN_BATCH(batch, 11);
1141     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
1142     OUT_BATCH(batch, 0);
1143     OUT_BATCH(batch, 0);
1144     OUT_BATCH(batch, 0);
1145     OUT_BATCH(batch, 0);
1146     OUT_BATCH(batch, 0);
1147     OUT_BATCH(batch, 0);
1148     OUT_BATCH(batch, 0);
1149     OUT_BATCH(batch, 0);
1150     OUT_BATCH(batch, 0);
1151     OUT_BATCH(batch, 0);
1152     ADVANCE_BATCH(batch);
1153 
1154     BEGIN_BATCH(batch, 9);
1155     OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
1156     OUT_BATCH(batch, 0);
1157     /*DW2. HS pass-through */
1158     OUT_BATCH(batch, 0);
1159     /*DW3. HS shader address */
1160     OUT_BATCH(batch, 0);
1161     OUT_BATCH(batch, 0);
1162     /*DW5. HS shader flag. URB offset/length and so on */
1163     OUT_BATCH(batch, 0);
1164     OUT_BATCH(batch, 0);
1165     OUT_BATCH(batch, 0);
1166     OUT_BATCH(batch, 0);
1167     ADVANCE_BATCH(batch);
1168 
1169     BEGIN_BATCH(batch, 2);
1170     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
1171     OUT_BATCH(batch, 0);
1172     ADVANCE_BATCH(batch);
1173 
1174     BEGIN_BATCH(batch, 2);
1175     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
1176     OUT_BATCH(batch, 0);
1177     ADVANCE_BATCH(batch);
1178 
1179     /* Disable TE */
1180     BEGIN_BATCH(batch, 4);
1181     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
1182     OUT_BATCH(batch, 0);
1183     OUT_BATCH(batch, 0);
1184     OUT_BATCH(batch, 0);
1185     ADVANCE_BATCH(batch);
1186 
1187     /* Disable DS */
1188     BEGIN_BATCH(batch, 11);
1189     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
1190     OUT_BATCH(batch, 0);
1191     OUT_BATCH(batch, 0);
1192     OUT_BATCH(batch, 0);
1193     OUT_BATCH(batch, 0);
1194     OUT_BATCH(batch, 0);
1195     OUT_BATCH(batch, 0);
1196     OUT_BATCH(batch, 0);
1197     OUT_BATCH(batch, 0);
1198     OUT_BATCH(batch, 0);
1199     OUT_BATCH(batch, 0);
1200     ADVANCE_BATCH(batch);
1201 
1202     BEGIN_BATCH(batch, 9);
1203     OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2));
1204     /* DW1. DS shader pointer */
1205     OUT_BATCH(batch, 0);
1206     OUT_BATCH(batch, 0);
1207     /* DW3-5. DS shader dispatch flag.*/
1208     OUT_BATCH(batch, 0);
1209     OUT_BATCH(batch, 0);
1210     OUT_BATCH(batch, 0);
1211     /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
1212     OUT_BATCH(batch, 0);
1213     OUT_BATCH(batch, 0);
1214     /* DW8. DS shader output URB */
1215     OUT_BATCH(batch, 0);
1216     ADVANCE_BATCH(batch);
1217 
1218     BEGIN_BATCH(batch, 2);
1219     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
1220     OUT_BATCH(batch, 0);
1221     ADVANCE_BATCH(batch);
1222 
1223     BEGIN_BATCH(batch, 2);
1224     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
1225     OUT_BATCH(batch, 0);
1226     ADVANCE_BATCH(batch);
1227 
1228     /* Disable STREAMOUT */
1229     BEGIN_BATCH(batch, 5);
1230     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
1231     OUT_BATCH(batch, 0);
1232     OUT_BATCH(batch, 0);
1233     OUT_BATCH(batch, 0);
1234     OUT_BATCH(batch, 0);
1235     ADVANCE_BATCH(batch);
1236 }
1237 
1238 static void
gen8_emit_invarient_states(VADriverContextP ctx)1239 gen8_emit_invarient_states(VADriverContextP ctx)
1240 {
1241     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1242     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1243 
1244     BEGIN_BATCH(batch, 1);
1245     OUT_BATCH(batch, RCMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1246     ADVANCE_BATCH(batch);
1247 
1248     BEGIN_BATCH(batch, 2);
1249     OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
1250     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1251               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1252     ADVANCE_BATCH(batch);
1253 
1254     /* Update 3D Multisample pattern */
1255     BEGIN_BATCH(batch, 9);
1256     OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
1257     OUT_BATCH(batch, 0);
1258     OUT_BATCH(batch, 0);
1259     OUT_BATCH(batch, 0);
1260     OUT_BATCH(batch, 0);
1261     OUT_BATCH(batch, 0);
1262     OUT_BATCH(batch, 0);
1263     OUT_BATCH(batch, 0);
1264     OUT_BATCH(batch, 0);
1265     ADVANCE_BATCH(batch);
1266 
1267 
1268     BEGIN_BATCH(batch, 2);
1269     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1270     OUT_BATCH(batch, 1);
1271     ADVANCE_BATCH(batch);
1272 
1273     /* Set system instruction pointer */
1274     BEGIN_BATCH(batch, 3);
1275     OUT_BATCH(batch, RCMD_STATE_SIP | 0);
1276     OUT_BATCH(batch, 0);
1277     OUT_BATCH(batch, 0);
1278     ADVANCE_BATCH(batch);
1279 }
1280 
1281 static void
gen8_emit_clip_state(VADriverContextP ctx)1282 gen8_emit_clip_state(VADriverContextP ctx)
1283 {
1284     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1285     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1286 
1287     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
1288     OUT_BATCH(batch, 0);
1289     OUT_BATCH(batch, 0); /* pass-through */
1290     OUT_BATCH(batch, 0);
1291 }
1292 
1293 static void
gen8_emit_sf_state(VADriverContextP ctx)1294 gen8_emit_sf_state(VADriverContextP ctx)
1295 {
1296     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1297     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1298 
1299     BEGIN_BATCH(batch, 5);
1300     OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
1301     OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
1302     OUT_BATCH(batch, 0);
1303     OUT_BATCH(batch, 0);
1304     OUT_BATCH(batch, 0);
1305     ADVANCE_BATCH(batch);
1306 
1307 
1308     BEGIN_BATCH(batch, 4);
1309     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2));
1310     OUT_BATCH(batch,
1311 	      (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) |
1312 	      (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) |
1313               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
1314               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
1315               (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
1316     OUT_BATCH(batch, 0);
1317     OUT_BATCH(batch, 0);
1318     ADVANCE_BATCH(batch);
1319 
1320     /* SBE for backend setup */
1321     BEGIN_BATCH(batch, 11);
1322     OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
1323     OUT_BATCH(batch, 0);
1324     OUT_BATCH(batch, 0);
1325     OUT_BATCH(batch, 0);
1326     OUT_BATCH(batch, 0);
1327     OUT_BATCH(batch, 0);
1328     OUT_BATCH(batch, 0);
1329     OUT_BATCH(batch, 0);
1330     OUT_BATCH(batch, 0);
1331     OUT_BATCH(batch, 0);
1332     OUT_BATCH(batch, 0);
1333     ADVANCE_BATCH(batch);
1334 
1335     BEGIN_BATCH(batch, 4);
1336     OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
1337     OUT_BATCH(batch, 0);
1338     OUT_BATCH(batch, 0);
1339     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
1340     ADVANCE_BATCH(batch);
1341 }
1342 
1343 static void
gen8_emit_wm_state(VADriverContextP ctx,int kernel)1344 gen8_emit_wm_state(VADriverContextP ctx, int kernel)
1345 {
1346     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1347     struct media_render_state *render_state = &drv_ctx->render_state;
1348     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1349     unsigned int num_samples = 0;
1350     unsigned int max_threads;
1351 
1352     max_threads = render_state->max_wm_threads - 2;
1353 
1354     BEGIN_BATCH(batch, 2);
1355     OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
1356     OUT_BATCH(batch,
1357               (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
1358     ADVANCE_BATCH(batch);
1359 
1360     if (kernel == PS_KERNEL) {
1361         BEGIN_BATCH(batch, 2);
1362         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1363         OUT_BATCH(batch, GEN8_PS_BLEND_HAS_WRITEABLE_RT);
1364         ADVANCE_BATCH(batch);
1365     } else if (kernel == PS_SUBPIC_KERNEL) {
1366         BEGIN_BATCH(batch, 2);
1367         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
1368         OUT_BATCH(batch,
1369                   (GEN8_PS_BLEND_HAS_WRITEABLE_RT |
1370                    GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
1371                    (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
1372                    (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
1373                    (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
1374                    (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
1375         ADVANCE_BATCH(batch);
1376     }
1377 
1378     BEGIN_BATCH(batch, 2);
1379     OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
1380     OUT_BATCH(batch,
1381               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
1382     ADVANCE_BATCH(batch);
1383 
1384     BEGIN_BATCH(batch, 11);
1385     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
1386     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
1387     OUT_BATCH(batch, 0);
1388     /*DW3-4. Constant buffer 0 */
1389     OUT_BATCH(batch, render_state->curbe_offset);
1390     OUT_BATCH(batch, 0);
1391 
1392     /*DW5-10. Constant buffer 1-3 */
1393     OUT_BATCH(batch, 0);
1394     OUT_BATCH(batch, 0);
1395     OUT_BATCH(batch, 0);
1396     OUT_BATCH(batch, 0);
1397     OUT_BATCH(batch, 0);
1398     OUT_BATCH(batch, 0);
1399     ADVANCE_BATCH(batch);
1400 
1401     BEGIN_BATCH(batch, 12);
1402     OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
1403     /* PS shader address */
1404     OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset);
1405 
1406     OUT_BATCH(batch, 0);
1407     /* DW3. PS shader flag .Binding table cnt/sample cnt */
1408     OUT_BATCH(batch,
1409               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
1410               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
1411     /* DW4-5. Scatch space */
1412     OUT_BATCH(batch, 0); /* scratch space base offset */
1413     OUT_BATCH(batch, 0);
1414     /* DW6. PS shader threads. */
1415     OUT_BATCH(batch,
1416               ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
1417               GEN7_PS_PUSH_CONSTANT_ENABLE |
1418               GEN7_PS_16_DISPATCH_ENABLE);
1419     /* DW7. PS shader GRF */
1420     OUT_BATCH(batch,
1421               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
1422     OUT_BATCH(batch, 0); /* kernel 1 pointer */
1423     OUT_BATCH(batch, 0);
1424     OUT_BATCH(batch, 0); /* kernel 2 pointer */
1425     OUT_BATCH(batch, 0);
1426     ADVANCE_BATCH(batch);
1427 
1428     BEGIN_BATCH(batch, 2);
1429     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
1430     OUT_BATCH(batch, RENDER_BINDING_TABLE_OFFSET);
1431     ADVANCE_BATCH(batch);
1432 }
1433 
1434 static void
gen8_emit_depth_buffer_state(VADriverContextP ctx)1435 gen8_emit_depth_buffer_state(VADriverContextP ctx)
1436 {
1437     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1438     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1439 
1440     BEGIN_BATCH(batch, 8);
1441     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
1442     OUT_BATCH(batch,
1443               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
1444               (I965_SURFACE_NULL << 29));
1445     /* DW2-3. Depth Buffer Address */
1446     OUT_BATCH(batch, 0);
1447     OUT_BATCH(batch, 0);
1448     /* DW4-7. Surface structure */
1449     OUT_BATCH(batch, 0);
1450     OUT_BATCH(batch, 0);
1451     OUT_BATCH(batch, 0);
1452     OUT_BATCH(batch, 0);
1453     ADVANCE_BATCH(batch);
1454 
1455     /* Update the Hier Depth buffer */
1456     BEGIN_BATCH(batch, 5);
1457     OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
1458     OUT_BATCH(batch, 0);
1459     OUT_BATCH(batch, 0);
1460     OUT_BATCH(batch, 0);
1461     OUT_BATCH(batch, 0);
1462     ADVANCE_BATCH(batch);
1463 
1464     /* Update the stencil buffer */
1465     BEGIN_BATCH(batch, 5);
1466     OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
1467     OUT_BATCH(batch, 0);
1468     OUT_BATCH(batch, 0);
1469     OUT_BATCH(batch, 0);
1470     OUT_BATCH(batch, 0);
1471     ADVANCE_BATCH(batch);
1472 
1473     BEGIN_BATCH(batch, 3);
1474     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
1475     OUT_BATCH(batch, 0);
1476     OUT_BATCH(batch, 0);
1477     ADVANCE_BATCH(batch);
1478 }
1479 
1480 static void
gen8_emit_depth_stencil_state(VADriverContextP ctx)1481 gen8_emit_depth_stencil_state(VADriverContextP ctx)
1482 {
1483     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1484     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1485 
1486     BEGIN_BATCH(batch, 3);
1487     OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
1488     OUT_BATCH(batch, 0);
1489     OUT_BATCH(batch, 0);
1490     ADVANCE_BATCH(batch);
1491 }
1492 
1493 static void
gen8_emit_wm_hz_op(VADriverContextP ctx)1494 gen8_emit_wm_hz_op(VADriverContextP ctx)
1495 {
1496     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1497     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1498 
1499     BEGIN_BATCH(batch, 5);
1500     OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
1501     OUT_BATCH(batch, 0);
1502     OUT_BATCH(batch, 0);
1503     OUT_BATCH(batch, 0);
1504     OUT_BATCH(batch, 0);
1505     ADVANCE_BATCH(batch);
1506 }
1507 
1508 static void
gen8_emit_viewport_state_pointers(VADriverContextP ctx)1509 gen8_emit_viewport_state_pointers(VADriverContextP ctx)
1510 {
1511     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1512     struct media_render_state *render_state = &drv_ctx->render_state;
1513     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1514 
1515     BEGIN_BATCH(batch, 2);
1516     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
1517     OUT_BATCH(batch, render_state->cc_viewport_offset);
1518     ADVANCE_BATCH(batch);
1519 
1520     BEGIN_BATCH(batch, 2);
1521     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
1522     OUT_BATCH(batch, 0);
1523     ADVANCE_BATCH(batch);
1524 }
1525 
1526 static void
gen8_emit_sampler_state_pointers(VADriverContextP ctx)1527 gen8_emit_sampler_state_pointers(VADriverContextP ctx)
1528 {
1529     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1530     struct media_render_state *render_state = &drv_ctx->render_state;
1531     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1532 
1533     BEGIN_BATCH(batch, 2);
1534     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
1535     OUT_BATCH(batch, render_state->sampler_offset);
1536     ADVANCE_BATCH(batch);
1537 }
1538 
1539 
1540 static void
gen7_emit_drawing_rectangle(VADriverContextP ctx)1541 gen7_emit_drawing_rectangle(VADriverContextP ctx)
1542 {
1543     i965_render_drawing_rectangle(ctx);
1544 }
1545 
1546 static void
gen8_render_emit_states(VADriverContextP ctx,int kernel)1547 gen8_render_emit_states(VADriverContextP ctx, int kernel)
1548 {
1549     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1550     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1551 
1552     media_batchbuffer_start_atomic(batch, 0x1000);
1553     media_batchbuffer_emit_mi_flush(batch);
1554     gen8_emit_invarient_states(ctx);
1555     gen8_emit_state_base_address(ctx);
1556     gen8_emit_viewport_state_pointers(ctx);
1557     gen8_emit_urb(ctx);
1558     gen8_emit_cc_state_pointers(ctx);
1559     gen8_emit_sampler_state_pointers(ctx);
1560     gen8_emit_wm_hz_op(ctx);
1561     gen8_emit_bypass_state(ctx);
1562     gen8_emit_vs_state(ctx);
1563     gen8_emit_clip_state(ctx);
1564     gen8_emit_sf_state(ctx);
1565     gen8_emit_depth_stencil_state(ctx);
1566     gen8_emit_wm_state(ctx, kernel);
1567     gen8_emit_depth_buffer_state(ctx);
1568     gen7_emit_drawing_rectangle(ctx);
1569     gen8_emit_vertex_element_state(ctx);
1570     gen8_emit_vertices(ctx);
1571     media_batchbuffer_end_atomic(batch);
1572 }
1573 
1574 static void
gen8_render_put_surface(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect,unsigned int flags)1575 gen8_render_put_surface(
1576     VADriverContextP   ctx,
1577     struct object_surface *obj_surface,
1578     const VARectangle *src_rect,
1579     const VARectangle *dst_rect,
1580     unsigned int       flags
1581 )
1582 {
1583     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1584     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1585 
1586     gen8_render_initialize(ctx);
1587     gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
1588     gen8_clear_dest_region(ctx);
1589     gen8_render_emit_states(ctx, PS_KERNEL);
1590     media_batchbuffer_flush(batch);
1591 }
1592 
1593 static void
gen8_subpicture_render_blend_state(VADriverContextP ctx)1594 gen8_subpicture_render_blend_state(VADriverContextP ctx)
1595 {
1596     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1597     struct media_render_state *render_state = &drv_ctx->render_state;
1598     struct gen8_global_blend_state *global_blend_state;
1599     struct gen8_blend_state_rt *blend_state;
1600     unsigned char *cc_ptr;
1601 
1602     dri_bo_map(render_state->dynamic_state.bo, 1);
1603     assert(render_state->dynamic_state.bo->virtual);
1604 
1605     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1606 			render_state->blend_state_offset;
1607 
1608     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
1609 
1610     memset(global_blend_state, 0, render_state->blend_state_size);
1611     /* Global blend state + blend_state for Render Target */
1612     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
1613     blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD;
1614     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1615     blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1616     blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
1617     blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
1618     blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
1619     blend_state->blend0.colorbuf_blend = 1;
1620     blend_state->blend1.post_blend_clamp_enable = 1;
1621     blend_state->blend1.pre_blend_clamp_enable = 1;
1622     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
1623 
1624     dri_bo_unmap(render_state->dynamic_state.bo);
1625 }
1626 
1627 static void
gen8_subpic_render_upload_constants(VADriverContextP ctx,struct object_surface * obj_surface)1628 gen8_subpic_render_upload_constants(VADriverContextP ctx,
1629                                     struct object_surface *obj_surface)
1630 {
1631     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1632     struct media_render_state *render_state = &drv_ctx->render_state;
1633     float *constant_buffer;
1634     float global_alpha = 1.0;
1635     unsigned int index = obj_surface->subpic_render_idx;
1636     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1637     unsigned char *cc_ptr;
1638 
1639     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1640         global_alpha = obj_subpic->global_alpha;
1641     }
1642 
1643 
1644     dri_bo_map(render_state->dynamic_state.bo, 1);
1645     assert(render_state->dynamic_state.bo->virtual);
1646 
1647     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
1648 				render_state->curbe_offset;
1649 
1650     constant_buffer = (float *) cc_ptr;
1651     *constant_buffer = global_alpha;
1652 
1653     dri_bo_unmap(render_state->dynamic_state.bo);
1654 }
1655 
1656 static void
gen8_subpicture_render_setup_states(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect)1657 gen8_subpicture_render_setup_states(
1658     VADriverContextP   ctx,
1659     struct object_surface *obj_surface,
1660     const VARectangle *src_rect,
1661     const VARectangle *dst_rect
1662 )
1663 {
1664     gen8_render_dest_surface_state(ctx, 0);
1665     gen8_subpic_render_src_surfaces_state(ctx, obj_surface);
1666     gen8_render_sampler(ctx);
1667     gen8_render_cc_viewport(ctx);
1668     gen8_render_color_calc_state(ctx);
1669     gen8_subpicture_render_blend_state(ctx);
1670     gen8_subpic_render_upload_constants(ctx, obj_surface);
1671     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1672 }
1673 
1674 static void
gen8_render_put_subpicture(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect)1675 gen8_render_put_subpicture(
1676     VADriverContextP   ctx,
1677     struct object_surface *obj_surface,
1678     const VARectangle *src_rect,
1679     const VARectangle *dst_rect
1680 )
1681 {
1682     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1683     MEDIA_BATCH_BUFFER *batch = drv_ctx->render_batch;
1684     unsigned int index = obj_surface->subpic_render_idx;
1685     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1686 
1687     assert(obj_subpic);
1688     gen8_render_initialize(ctx);
1689     gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
1690     gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL);
1691     media_batchbuffer_flush(batch);
1692 }
1693 
1694 static void
gen8_render_terminate(VADriverContextP ctx)1695 gen8_render_terminate(VADriverContextP ctx)
1696 {
1697     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1698     struct media_render_state *render_state = &drv_ctx->render_state;
1699 
1700     dri_bo_unreference(render_state->vb.vertex_buffer);
1701     render_state->vb.vertex_buffer = NULL;
1702 
1703     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1704     render_state->wm.surface_state_binding_table_bo = NULL;
1705 
1706     if (render_state->instruction_state.bo) {
1707         dri_bo_unreference(render_state->instruction_state.bo);
1708         render_state->instruction_state.bo = NULL;
1709     }
1710 
1711     if (render_state->dynamic_state.bo) {
1712         dri_bo_unreference(render_state->dynamic_state.bo);
1713         render_state->dynamic_state.bo = NULL;
1714     }
1715 
1716     if (render_state->indirect_state.bo) {
1717         dri_bo_unreference(render_state->indirect_state.bo);
1718         render_state->indirect_state.bo = NULL;
1719     }
1720 
1721     if (render_state->draw_region) {
1722         dri_bo_unreference(render_state->draw_region->bo);
1723         free(render_state->draw_region);
1724         render_state->draw_region = NULL;
1725     }
1726 }
1727 
1728 bool
media_drv_gen8_render_init(VADriverContextP ctx)1729 media_drv_gen8_render_init(VADriverContextP ctx)
1730 {
1731     MEDIA_DRV_CONTEXT *drv_ctx = (MEDIA_DRV_CONTEXT *) (ctx->pDriverData);
1732     struct media_render_state *render_state = &drv_ctx->render_state;
1733     int i, kernel_size;
1734     unsigned int kernel_offset, end_offset;
1735     unsigned char *kernel_ptr;
1736     struct media_render_kernel *kernel;
1737 
1738     render_state->render_put_surface = gen8_render_put_surface;
1739     render_state->render_put_subpicture = gen8_render_put_subpicture;
1740     render_state->render_terminate = gen8_render_terminate;
1741     render_state->max_wm_threads = 64;
1742 
1743     memcpy(render_state->render_kernels, render_kernels_gen8,
1744            sizeof(render_state->render_kernels));
1745 
1746     kernel_size = 4096;
1747 
1748     for (i = 0; i < sizeof(render_kernels_gen8) / sizeof(struct media_render_kernel); i++) {
1749         kernel = &render_state->render_kernels[i];
1750 
1751         if (!kernel->size)
1752             continue;
1753 
1754         kernel_size += ALIGN(kernel->size, 64);
1755     }
1756 
1757     render_state->instruction_state.bo = dri_bo_alloc(drv_ctx->drv_data.bufmgr,
1758                                   "kernel shader",
1759                                   kernel_size,
1760                                   0x1000);
1761     if (render_state->instruction_state.bo == NULL) {
1762         return false;
1763     }
1764 
1765     assert(render_state->instruction_state.bo);
1766 
1767     render_state->instruction_state.bo_size = kernel_size;
1768     render_state->instruction_state.end_offset = 0;
1769     end_offset = 0;
1770 
1771     dri_bo_map(render_state->instruction_state.bo, 1);
1772     kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual);
1773     for (i = 0; i < sizeof(render_kernels_gen8) / sizeof(struct media_render_kernel); i++) {
1774         kernel = &render_state->render_kernels[i];
1775         kernel_offset = end_offset;
1776         kernel->kernel_offset = kernel_offset;
1777 
1778         if (!kernel->size)
1779             continue;
1780 
1781         memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1782 
1783         end_offset += ALIGN(kernel->size, ALIGNMENT);
1784     }
1785 
1786     render_state->instruction_state.end_offset = end_offset;
1787 
1788     dri_bo_unmap(render_state->instruction_state.bo);
1789 
1790     return true;
1791 }
1792