1 /*
2 * Copyright � 2006 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 * Keith Packard <keithp@keithp.com>
26 * Xiang Haihao <haihao.xiang@intel.com>
27 *
28 */
29
30 /*
31 * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
32 */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #include <va/va_drmcommon.h>
41
42 #include "intel_batchbuffer.h"
43 #include "intel_driver.h"
44 #include "i965_defines.h"
45 #include "i965_drv_video.h"
46 #include "i965_structs.h"
47 #include "i965_yuv_coefs.h"
48
49 #include "i965_render.h"
50 #include "i965_post_processing.h"
51
52 #define SF_KERNEL_NUM_GRF 16
53 #define SF_MAX_THREADS 1
54
55 static const uint32_t sf_kernel_static[][4] = {
56 #include "shaders/render/exa_sf.g4b"
57 };
58
59 #define PS_KERNEL_NUM_GRF 48
60 #define PS_MAX_THREADS 32
61
62 #define I965_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1)
63
64 static const uint32_t ps_kernel_static[][4] = {
65 #include "shaders/render/exa_wm_xy.g4b"
66 #include "shaders/render/exa_wm_src_affine.g4b"
67 #include "shaders/render/exa_wm_src_sample_planar.g4b"
68 #include "shaders/render/exa_wm_yuv_color_balance.g4b"
69 #include "shaders/render/exa_wm_yuv_rgb.g4b"
70 #include "shaders/render/exa_wm_write.g4b"
71 };
72 static const uint32_t ps_subpic_kernel_static[][4] = {
73 #include "shaders/render/exa_wm_xy.g4b"
74 #include "shaders/render/exa_wm_src_affine.g4b"
75 #include "shaders/render/exa_wm_src_sample_argb.g4b"
76 #include "shaders/render/exa_wm_write.g4b"
77 };
78
79 /* On IRONLAKE */
80 static const uint32_t sf_kernel_static_gen5[][4] = {
81 #include "shaders/render/exa_sf.g4b.gen5"
82 };
83
84 static const uint32_t ps_kernel_static_gen5[][4] = {
85 #include "shaders/render/exa_wm_xy.g4b.gen5"
86 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
87 #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
88 #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
89 #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
90 #include "shaders/render/exa_wm_write.g4b.gen5"
91 };
92 static const uint32_t ps_subpic_kernel_static_gen5[][4] = {
93 #include "shaders/render/exa_wm_xy.g4b.gen5"
94 #include "shaders/render/exa_wm_src_affine.g4b.gen5"
95 #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
96 #include "shaders/render/exa_wm_write.g4b.gen5"
97 };
98
99 /* programs for Sandybridge */
100 static const uint32_t sf_kernel_static_gen6[][4] = {
101 };
102
103 static const uint32_t ps_kernel_static_gen6[][4] = {
104 #include "shaders/render/exa_wm_src_affine.g6b"
105 #include "shaders/render/exa_wm_src_sample_planar.g6b"
106 #include "shaders/render/exa_wm_yuv_color_balance.g6b"
107 #include "shaders/render/exa_wm_yuv_rgb.g6b"
108 #include "shaders/render/exa_wm_write.g6b"
109 };
110
111 static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
112 #include "shaders/render/exa_wm_src_affine.g6b"
113 #include "shaders/render/exa_wm_src_sample_argb.g6b"
114 #include "shaders/render/exa_wm_write.g6b"
115 };
116
117 /* programs for Ivybridge */
118 static const uint32_t sf_kernel_static_gen7[][4] = {
119 };
120
121 static const uint32_t ps_kernel_static_gen7[][4] = {
122 #include "shaders/render/exa_wm_src_affine.g7b"
123 #include "shaders/render/exa_wm_src_sample_planar.g7b"
124 #include "shaders/render/exa_wm_yuv_color_balance.g7b"
125 #include "shaders/render/exa_wm_yuv_rgb.g7b"
126 #include "shaders/render/exa_wm_write.g7b"
127 };
128
129 static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
130 #include "shaders/render/exa_wm_src_affine.g7b"
131 #include "shaders/render/exa_wm_src_sample_argb.g7b"
132 #include "shaders/render/exa_wm_write.g7b"
133 };
134
135 /* Programs for Haswell */
136 static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
137 #include "shaders/render/exa_wm_src_affine.g7b"
138 #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
139 #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
140 #include "shaders/render/exa_wm_yuv_rgb.g7b"
141 #include "shaders/render/exa_wm_write.g7b"
142 };
143
144
145 #define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
146
147 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
148 #define BINDING_TABLE_OFFSET SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
149
float_to_uint(float f)150 static uint32_t float_to_uint(float f)
151 {
152 union {
153 uint32_t i;
154 float f;
155 } x;
156
157 x.f = f;
158 return x.i;
159 }
160
161 enum {
162 SF_KERNEL = 0,
163 PS_KERNEL,
164 PS_SUBPIC_KERNEL,
165 PS_CLEAR_KERNEL
166 };
167
168 static struct i965_kernel render_kernels_gen4[] = {
169 {
170 "SF",
171 SF_KERNEL,
172 sf_kernel_static,
173 sizeof(sf_kernel_static),
174 NULL
175 },
176 {
177 "PS",
178 PS_KERNEL,
179 ps_kernel_static,
180 sizeof(ps_kernel_static),
181 NULL
182 },
183
184 {
185 "PS_SUBPIC",
186 PS_SUBPIC_KERNEL,
187 ps_subpic_kernel_static,
188 sizeof(ps_subpic_kernel_static),
189 NULL
190 },
191
192 // Not used
193 {
194 "PS_CLEAR",
195 PS_CLEAR_KERNEL,
196 NULL,
197 0,
198 0
199 }
200 };
201
202 static struct i965_kernel render_kernels_gen5[] = {
203 {
204 "SF",
205 SF_KERNEL,
206 sf_kernel_static_gen5,
207 sizeof(sf_kernel_static_gen5),
208 NULL
209 },
210 {
211 "PS",
212 PS_KERNEL,
213 ps_kernel_static_gen5,
214 sizeof(ps_kernel_static_gen5),
215 NULL
216 },
217
218 {
219 "PS_SUBPIC",
220 PS_SUBPIC_KERNEL,
221 ps_subpic_kernel_static_gen5,
222 sizeof(ps_subpic_kernel_static_gen5),
223 NULL
224 },
225
226 // Not used
227 {
228 "PS_CLEAR",
229 PS_CLEAR_KERNEL,
230 NULL,
231 0,
232 0
233 }
234 };
235
236 static struct i965_kernel render_kernels_gen6[] = {
237 {
238 "SF",
239 SF_KERNEL,
240 sf_kernel_static_gen6,
241 sizeof(sf_kernel_static_gen6),
242 NULL
243 },
244 {
245 "PS",
246 PS_KERNEL,
247 ps_kernel_static_gen6,
248 sizeof(ps_kernel_static_gen6),
249 NULL
250 },
251
252 {
253 "PS_SUBPIC",
254 PS_SUBPIC_KERNEL,
255 ps_subpic_kernel_static_gen6,
256 sizeof(ps_subpic_kernel_static_gen6),
257 NULL
258 },
259
260 // Not used
261 {
262 "PS_CLEAR",
263 PS_CLEAR_KERNEL,
264 NULL,
265 0,
266 0
267 }
268 };
269
270 static struct i965_kernel render_kernels_gen7[] = {
271 {
272 "SF",
273 SF_KERNEL,
274 sf_kernel_static_gen7,
275 sizeof(sf_kernel_static_gen7),
276 NULL
277 },
278 {
279 "PS",
280 PS_KERNEL,
281 ps_kernel_static_gen7,
282 sizeof(ps_kernel_static_gen7),
283 NULL
284 },
285
286 {
287 "PS_SUBPIC",
288 PS_SUBPIC_KERNEL,
289 ps_subpic_kernel_static_gen7,
290 sizeof(ps_subpic_kernel_static_gen7),
291 NULL
292 },
293
294 // Not used
295 {
296 "PS_CLEAR",
297 PS_CLEAR_KERNEL,
298 NULL,
299 0,
300 0
301 }
302 };
303
304 static struct i965_kernel render_kernels_gen7_haswell[] = {
305 {
306 "SF",
307 SF_KERNEL,
308 sf_kernel_static_gen7,
309 sizeof(sf_kernel_static_gen7),
310 NULL
311 },
312 {
313 "PS",
314 PS_KERNEL,
315 ps_kernel_static_gen7_haswell,
316 sizeof(ps_kernel_static_gen7_haswell),
317 NULL
318 },
319
320 {
321 "PS_SUBPIC",
322 PS_SUBPIC_KERNEL,
323 ps_subpic_kernel_static_gen7,
324 sizeof(ps_subpic_kernel_static_gen7),
325 NULL
326 },
327
328 // Not used
329 {
330 "PS_CLEAR",
331 PS_CLEAR_KERNEL,
332 NULL,
333 0,
334 0
335 }
336 };
337
338 #define URB_VS_ENTRIES 8
339 #define URB_VS_ENTRY_SIZE 1
340
341 #define URB_GS_ENTRIES 0
342 #define URB_GS_ENTRY_SIZE 0
343
344 #define URB_CLIP_ENTRIES 0
345 #define URB_CLIP_ENTRY_SIZE 0
346
347 #define URB_SF_ENTRIES 1
348 #define URB_SF_ENTRY_SIZE 2
349
350 #define URB_CS_ENTRIES 4
351 #define URB_CS_ENTRY_SIZE 4
352
353 static void
i965_render_vs_unit(VADriverContextP ctx)354 i965_render_vs_unit(VADriverContextP ctx)
355 {
356 struct i965_driver_data *i965 = i965_driver_data(ctx);
357 struct i965_render_state *render_state = &i965->render_state;
358 struct i965_vs_unit_state *vs_state;
359
360 dri_bo_map(render_state->vs.state, 1);
361 assert(render_state->vs.state->virtual);
362 vs_state = render_state->vs.state->virtual;
363 memset(vs_state, 0, sizeof(*vs_state));
364
365 if (IS_IRONLAKE(i965->intel.device_info))
366 vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
367 else
368 vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
369
370 vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
371 vs_state->vs6.vs_enable = 0;
372 vs_state->vs6.vert_cache_disable = 1;
373
374 dri_bo_unmap(render_state->vs.state);
375 }
376
377 static void
i965_render_sf_unit(VADriverContextP ctx)378 i965_render_sf_unit(VADriverContextP ctx)
379 {
380 struct i965_driver_data *i965 = i965_driver_data(ctx);
381 struct i965_render_state *render_state = &i965->render_state;
382 struct i965_sf_unit_state *sf_state;
383
384 dri_bo_map(render_state->sf.state, 1);
385 assert(render_state->sf.state->virtual);
386 sf_state = render_state->sf.state->virtual;
387 memset(sf_state, 0, sizeof(*sf_state));
388
389 sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
390 sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
391
392 sf_state->sf1.single_program_flow = 1; /* XXX */
393 sf_state->sf1.binding_table_entry_count = 0;
394 sf_state->sf1.thread_priority = 0;
395 sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
396 sf_state->sf1.illegal_op_exception_enable = 1;
397 sf_state->sf1.mask_stack_exception_enable = 1;
398 sf_state->sf1.sw_exception_enable = 1;
399
400 /* scratch space is not used in our kernel */
401 sf_state->thread2.per_thread_scratch_space = 0;
402 sf_state->thread2.scratch_space_base_pointer = 0;
403
404 sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
405 sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
406 sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
407 sf_state->thread3.urb_entry_read_offset = 0;
408 sf_state->thread3.dispatch_grf_start_reg = 3;
409
410 sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
411 sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
412 sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
413 sf_state->thread4.stats_enable = 1;
414
415 sf_state->sf5.viewport_transform = 0; /* skip viewport */
416
417 sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
418 sf_state->sf6.scissor = 0;
419
420 sf_state->sf7.trifan_pv = 2;
421
422 sf_state->sf6.dest_org_vbias = 0x8;
423 sf_state->sf6.dest_org_hbias = 0x8;
424
425 dri_bo_emit_reloc(render_state->sf.state,
426 I915_GEM_DOMAIN_INSTRUCTION, 0,
427 sf_state->thread0.grf_reg_count << 1,
428 offsetof(struct i965_sf_unit_state, thread0),
429 render_state->render_kernels[SF_KERNEL].bo);
430
431 dri_bo_unmap(render_state->sf.state);
432 }
433
434 static void
i965_render_sampler(VADriverContextP ctx)435 i965_render_sampler(VADriverContextP ctx)
436 {
437 struct i965_driver_data *i965 = i965_driver_data(ctx);
438 struct i965_render_state *render_state = &i965->render_state;
439 struct i965_sampler_state *sampler_state;
440 int i;
441
442 assert(render_state->wm.sampler_count > 0);
443 assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
444
445 dri_bo_map(render_state->wm.sampler, 1);
446 assert(render_state->wm.sampler->virtual);
447 sampler_state = render_state->wm.sampler->virtual;
448 for (i = 0; i < render_state->wm.sampler_count; i++) {
449 memset(sampler_state, 0, sizeof(*sampler_state));
450 sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
451 sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
452 sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
453 sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
454 sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
455 sampler_state++;
456 }
457
458 dri_bo_unmap(render_state->wm.sampler);
459 }
460 static void
i965_subpic_render_wm_unit(VADriverContextP ctx)461 i965_subpic_render_wm_unit(VADriverContextP ctx)
462 {
463 struct i965_driver_data *i965 = i965_driver_data(ctx);
464 struct i965_render_state *render_state = &i965->render_state;
465 struct i965_wm_unit_state *wm_state;
466
467 assert(render_state->wm.sampler);
468
469 dri_bo_map(render_state->wm.state, 1);
470 assert(render_state->wm.state->virtual);
471 wm_state = render_state->wm.state->virtual;
472 memset(wm_state, 0, sizeof(*wm_state));
473
474 wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
475 wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
476
477 wm_state->thread1.single_program_flow = 1; /* XXX */
478
479 if (IS_IRONLAKE(i965->intel.device_info))
480 wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
481 else
482 wm_state->thread1.binding_table_entry_count = 7;
483
484 wm_state->thread2.scratch_space_base_pointer = 0;
485 wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
486
487 wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
488 wm_state->thread3.const_urb_entry_read_length = 4;
489 wm_state->thread3.const_urb_entry_read_offset = 0;
490 wm_state->thread3.urb_entry_read_length = 1; /* XXX */
491 wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
492
493 wm_state->wm4.stats_enable = 0;
494 wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5;
495
496 if (IS_IRONLAKE(i965->intel.device_info)) {
497 wm_state->wm4.sampler_count = 0; /* hardware requirement */
498 } else {
499 wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
500 }
501
502 wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
503 wm_state->wm5.thread_dispatch_enable = 1;
504 wm_state->wm5.enable_16_pix = 1;
505 wm_state->wm5.enable_8_pix = 0;
506 wm_state->wm5.early_depth_test = 1;
507
508 dri_bo_emit_reloc(render_state->wm.state,
509 I915_GEM_DOMAIN_INSTRUCTION, 0,
510 wm_state->thread0.grf_reg_count << 1,
511 offsetof(struct i965_wm_unit_state, thread0),
512 render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
513
514 dri_bo_emit_reloc(render_state->wm.state,
515 I915_GEM_DOMAIN_INSTRUCTION, 0,
516 wm_state->wm4.sampler_count << 2,
517 offsetof(struct i965_wm_unit_state, wm4),
518 render_state->wm.sampler);
519
520 dri_bo_unmap(render_state->wm.state);
521 }
522
523
524 static void
i965_render_wm_unit(VADriverContextP ctx)525 i965_render_wm_unit(VADriverContextP ctx)
526 {
527 struct i965_driver_data *i965 = i965_driver_data(ctx);
528 struct i965_render_state *render_state = &i965->render_state;
529 struct i965_wm_unit_state *wm_state;
530
531 assert(render_state->wm.sampler);
532
533 dri_bo_map(render_state->wm.state, 1);
534 assert(render_state->wm.state->virtual);
535 wm_state = render_state->wm.state->virtual;
536 memset(wm_state, 0, sizeof(*wm_state));
537
538 wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
539 wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
540
541 wm_state->thread1.single_program_flow = 1; /* XXX */
542
543 if (IS_IRONLAKE(i965->intel.device_info))
544 wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
545 else
546 wm_state->thread1.binding_table_entry_count = 7;
547
548 wm_state->thread2.scratch_space_base_pointer = 0;
549 wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
550
551 wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
552 wm_state->thread3.const_urb_entry_read_length = 4;
553 wm_state->thread3.const_urb_entry_read_offset = 0;
554 wm_state->thread3.urb_entry_read_length = 1; /* XXX */
555 wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
556
557 wm_state->wm4.stats_enable = 0;
558 wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5;
559
560 if (IS_IRONLAKE(i965->intel.device_info)) {
561 wm_state->wm4.sampler_count = 0; /* hardware requirement */
562 } else {
563 wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
564 }
565
566 wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
567 wm_state->wm5.thread_dispatch_enable = 1;
568 wm_state->wm5.enable_16_pix = 1;
569 wm_state->wm5.enable_8_pix = 0;
570 wm_state->wm5.early_depth_test = 1;
571
572 dri_bo_emit_reloc(render_state->wm.state,
573 I915_GEM_DOMAIN_INSTRUCTION, 0,
574 wm_state->thread0.grf_reg_count << 1,
575 offsetof(struct i965_wm_unit_state, thread0),
576 render_state->render_kernels[PS_KERNEL].bo);
577
578 dri_bo_emit_reloc(render_state->wm.state,
579 I915_GEM_DOMAIN_INSTRUCTION, 0,
580 wm_state->wm4.sampler_count << 2,
581 offsetof(struct i965_wm_unit_state, wm4),
582 render_state->wm.sampler);
583
584 dri_bo_unmap(render_state->wm.state);
585 }
586
587 static void
i965_render_cc_viewport(VADriverContextP ctx)588 i965_render_cc_viewport(VADriverContextP ctx)
589 {
590 struct i965_driver_data *i965 = i965_driver_data(ctx);
591 struct i965_render_state *render_state = &i965->render_state;
592 struct i965_cc_viewport *cc_viewport;
593
594 dri_bo_map(render_state->cc.viewport, 1);
595 assert(render_state->cc.viewport->virtual);
596 cc_viewport = render_state->cc.viewport->virtual;
597 memset(cc_viewport, 0, sizeof(*cc_viewport));
598
599 cc_viewport->min_depth = -1.e35;
600 cc_viewport->max_depth = 1.e35;
601
602 dri_bo_unmap(render_state->cc.viewport);
603 }
604
605 static void
i965_subpic_render_cc_unit(VADriverContextP ctx)606 i965_subpic_render_cc_unit(VADriverContextP ctx)
607 {
608 struct i965_driver_data *i965 = i965_driver_data(ctx);
609 struct i965_render_state *render_state = &i965->render_state;
610 struct i965_cc_unit_state *cc_state;
611
612 assert(render_state->cc.viewport);
613
614 dri_bo_map(render_state->cc.state, 1);
615 assert(render_state->cc.state->virtual);
616 cc_state = render_state->cc.state->virtual;
617 memset(cc_state, 0, sizeof(*cc_state));
618
619 cc_state->cc0.stencil_enable = 0; /* disable stencil */
620 cc_state->cc2.depth_test = 0; /* disable depth test */
621 cc_state->cc2.logicop_enable = 0; /* disable logic op */
622 cc_state->cc3.ia_blend_enable = 0 ; /* blend alpha just like colors */
623 cc_state->cc3.blend_enable = 1; /* enable color blend */
624 cc_state->cc3.alpha_test = 0; /* disable alpha test */
625 cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8; /*store alpha value with UNORM8 */
626 cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS; /*pass if less than the reference */
627 cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
628
629 cc_state->cc5.dither_enable = 0; /* disable dither */
630 cc_state->cc5.logicop_func = 0xc; /* WHITE */
631 cc_state->cc5.statistics_enable = 1;
632 cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
633 cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
634 cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
635
636 cc_state->cc6.clamp_post_alpha_blend = 0;
637 cc_state->cc6.clamp_pre_alpha_blend = 0;
638
639 /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
640 cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
641 cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
642 cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
643
644 /*alpha test reference*/
645 cc_state->cc7.alpha_ref.f = 0.0 ;
646
647
648 dri_bo_emit_reloc(render_state->cc.state,
649 I915_GEM_DOMAIN_INSTRUCTION, 0,
650 0,
651 offsetof(struct i965_cc_unit_state, cc4),
652 render_state->cc.viewport);
653
654 dri_bo_unmap(render_state->cc.state);
655 }
656
657
658 static void
i965_render_cc_unit(VADriverContextP ctx)659 i965_render_cc_unit(VADriverContextP ctx)
660 {
661 struct i965_driver_data *i965 = i965_driver_data(ctx);
662 struct i965_render_state *render_state = &i965->render_state;
663 struct i965_cc_unit_state *cc_state;
664
665 assert(render_state->cc.viewport);
666
667 dri_bo_map(render_state->cc.state, 1);
668 assert(render_state->cc.state->virtual);
669 cc_state = render_state->cc.state->virtual;
670 memset(cc_state, 0, sizeof(*cc_state));
671
672 cc_state->cc0.stencil_enable = 0; /* disable stencil */
673 cc_state->cc2.depth_test = 0; /* disable depth test */
674 cc_state->cc2.logicop_enable = 1; /* enable logic op */
675 cc_state->cc3.ia_blend_enable = 0; /* blend alpha just like colors */
676 cc_state->cc3.blend_enable = 0; /* disable color blend */
677 cc_state->cc3.alpha_test = 0; /* disable alpha test */
678 cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
679
680 cc_state->cc5.dither_enable = 0; /* disable dither */
681 cc_state->cc5.logicop_func = 0xc; /* WHITE */
682 cc_state->cc5.statistics_enable = 1;
683 cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
684 cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
685 cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
686
687 dri_bo_emit_reloc(render_state->cc.state,
688 I915_GEM_DOMAIN_INSTRUCTION, 0,
689 0,
690 offsetof(struct i965_cc_unit_state, cc4),
691 render_state->cc.viewport);
692
693 dri_bo_unmap(render_state->cc.state);
694 }
695
696 static void
i965_render_set_surface_tiling(struct i965_surface_state * ss,unsigned int tiling)697 i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
698 {
699 switch (tiling) {
700 case I915_TILING_NONE:
701 ss->ss3.tiled_surface = 0;
702 ss->ss3.tile_walk = 0;
703 break;
704 case I915_TILING_X:
705 ss->ss3.tiled_surface = 1;
706 ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
707 break;
708 case I915_TILING_Y:
709 ss->ss3.tiled_surface = 1;
710 ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
711 break;
712 }
713 }
714
715 static void
i965_render_set_surface_state(struct i965_surface_state * ss,dri_bo * bo,unsigned long offset,unsigned int width,unsigned int height,unsigned int pitch,unsigned int format,unsigned int flags)716 i965_render_set_surface_state(
717 struct i965_surface_state *ss,
718 dri_bo *bo,
719 unsigned long offset,
720 unsigned int width,
721 unsigned int height,
722 unsigned int pitch,
723 unsigned int format,
724 unsigned int flags
725 )
726 {
727 unsigned int tiling;
728 unsigned int swizzle;
729
730 memset(ss, 0, sizeof(*ss));
731
732 switch (flags & (VA_TOP_FIELD | VA_BOTTOM_FIELD)) {
733 case VA_BOTTOM_FIELD:
734 ss->ss0.vert_line_stride_ofs = 1;
735 /* fall-through */
736 case VA_TOP_FIELD:
737 ss->ss0.vert_line_stride = 1;
738 height /= 2;
739 break;
740 }
741
742 ss->ss0.surface_type = I965_SURFACE_2D;
743 ss->ss0.surface_format = format;
744 ss->ss0.color_blend = 1;
745
746 ss->ss1.base_addr = bo->offset + offset;
747
748 ss->ss2.width = width - 1;
749 ss->ss2.height = height - 1;
750
751 ss->ss3.pitch = pitch - 1;
752
753 dri_bo_get_tiling(bo, &tiling, &swizzle);
754 i965_render_set_surface_tiling(ss, tiling);
755 }
756
757 static void
gen7_render_set_surface_tiling(struct gen7_surface_state * ss,uint32_t tiling)758 gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
759 {
760 switch (tiling) {
761 case I915_TILING_NONE:
762 ss->ss0.tiled_surface = 0;
763 ss->ss0.tile_walk = 0;
764 break;
765 case I915_TILING_X:
766 ss->ss0.tiled_surface = 1;
767 ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
768 break;
769 case I915_TILING_Y:
770 ss->ss0.tiled_surface = 1;
771 ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
772 break;
773 }
774 }
775
776 /* Set "Shader Channel Select" */
777 void
gen7_render_set_surface_scs(struct gen7_surface_state * ss)778 gen7_render_set_surface_scs(struct gen7_surface_state *ss)
779 {
780 ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
781 ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
782 ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
783 ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
784 }
785
786 static void
gen7_render_set_surface_state(struct gen7_surface_state * ss,dri_bo * bo,unsigned long offset,int width,int height,int pitch,int format,unsigned int flags)787 gen7_render_set_surface_state(
788 struct gen7_surface_state *ss,
789 dri_bo *bo,
790 unsigned long offset,
791 int width,
792 int height,
793 int pitch,
794 int format,
795 unsigned int flags
796 )
797 {
798 unsigned int tiling;
799 unsigned int swizzle;
800
801 memset(ss, 0, sizeof(*ss));
802
803 switch (flags & (VA_TOP_FIELD | VA_BOTTOM_FIELD)) {
804 case VA_BOTTOM_FIELD:
805 ss->ss0.vert_line_stride_ofs = 1;
806 /* fall-through */
807 case VA_TOP_FIELD:
808 ss->ss0.vert_line_stride = 1;
809 height /= 2;
810 break;
811 }
812
813 ss->ss0.surface_type = I965_SURFACE_2D;
814 ss->ss0.surface_format = format;
815
816 ss->ss1.base_addr = bo->offset + offset;
817
818 ss->ss2.width = width - 1;
819 ss->ss2.height = height - 1;
820
821 ss->ss3.pitch = pitch - 1;
822
823 dri_bo_get_tiling(bo, &tiling, &swizzle);
824 gen7_render_set_surface_tiling(ss, tiling);
825 }
826
827
828 static void
i965_render_src_surface_state(VADriverContextP ctx,int index,dri_bo * region,unsigned long offset,int w,int h,int pitch,int format,unsigned int flags)829 i965_render_src_surface_state(
830 VADriverContextP ctx,
831 int index,
832 dri_bo *region,
833 unsigned long offset,
834 int w,
835 int h,
836 int pitch,
837 int format,
838 unsigned int flags
839 )
840 {
841 struct i965_driver_data *i965 = i965_driver_data(ctx);
842 struct i965_render_state *render_state = &i965->render_state;
843 void *ss;
844 dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
845
846 assert(index < MAX_RENDER_SURFACES);
847
848 dri_bo_map(ss_bo, 1);
849 assert(ss_bo->virtual);
850 ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
851
852 if (IS_GEN7(i965->intel.device_info)) {
853 gen7_render_set_surface_state(ss,
854 region, offset,
855 w, h,
856 pitch, format, flags);
857 if (IS_HASWELL(i965->intel.device_info))
858 gen7_render_set_surface_scs(ss);
859 dri_bo_emit_reloc(ss_bo,
860 I915_GEM_DOMAIN_SAMPLER, 0,
861 offset,
862 SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
863 region);
864 } else {
865 i965_render_set_surface_state(ss,
866 region, offset,
867 w, h,
868 pitch, format, flags);
869 dri_bo_emit_reloc(ss_bo,
870 I915_GEM_DOMAIN_SAMPLER, 0,
871 offset,
872 SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
873 region);
874 }
875
876 ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
877 dri_bo_unmap(ss_bo);
878 render_state->wm.sampler_count++;
879 }
880
881 static void
i965_render_src_surfaces_state(VADriverContextP ctx,struct object_surface * obj_surface,unsigned int flags)882 i965_render_src_surfaces_state(
883 VADriverContextP ctx,
884 struct object_surface *obj_surface,
885 unsigned int flags
886 )
887 {
888 int region_pitch;
889 int rw, rh;
890 dri_bo *region;
891
892 region_pitch = obj_surface->width;
893 rw = obj_surface->orig_width;
894 rh = obj_surface->orig_height;
895 region = obj_surface->bo;
896
897 i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags); /* Y */
898 i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
899
900 if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
901 return;
902
903 if (obj_surface->fourcc == VA_FOURCC_NV12) {
904 i965_render_src_surface_state(ctx, 3, region,
905 region_pitch * obj_surface->y_cb_offset,
906 obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
907 I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
908 i965_render_src_surface_state(ctx, 4, region,
909 region_pitch * obj_surface->y_cb_offset,
910 obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
911 I965_SURFACEFORMAT_R8G8_UNORM, flags);
912 } else {
913 i965_render_src_surface_state(ctx, 3, region,
914 region_pitch * obj_surface->y_cb_offset,
915 obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
916 I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
917 i965_render_src_surface_state(ctx, 4, region,
918 region_pitch * obj_surface->y_cb_offset,
919 obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
920 I965_SURFACEFORMAT_R8_UNORM, flags);
921 i965_render_src_surface_state(ctx, 5, region,
922 region_pitch * obj_surface->y_cr_offset,
923 obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
924 I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
925 i965_render_src_surface_state(ctx, 6, region,
926 region_pitch * obj_surface->y_cr_offset,
927 obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
928 I965_SURFACEFORMAT_R8_UNORM, flags);
929 }
930 }
931
932 static void
i965_subpic_render_src_surfaces_state(VADriverContextP ctx,struct object_surface * obj_surface)933 i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
934 struct object_surface *obj_surface)
935 {
936 dri_bo *subpic_region;
937 unsigned int index = obj_surface->subpic_render_idx;
938 struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
939 struct object_image *obj_image = obj_subpic->obj_image;
940
941 assert(obj_surface);
942 assert(obj_surface->bo);
943 subpic_region = obj_image->bo;
944 /*subpicture surface*/
945 i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
946 i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
947 }
948
949 static void
i965_render_dest_surface_state(VADriverContextP ctx,int index)950 i965_render_dest_surface_state(VADriverContextP ctx, int index)
951 {
952 struct i965_driver_data *i965 = i965_driver_data(ctx);
953 struct i965_render_state *render_state = &i965->render_state;
954 struct intel_region *dest_region = render_state->draw_region;
955 void *ss;
956 dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
957 int format;
958 assert(index < MAX_RENDER_SURFACES);
959
960 if (dest_region->cpp == 2) {
961 format = I965_SURFACEFORMAT_B5G6R5_UNORM;
962 } else {
963 format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
964 }
965
966 dri_bo_map(ss_bo, 1);
967 assert(ss_bo->virtual);
968 ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
969
970 if (IS_GEN7(i965->intel.device_info)) {
971 gen7_render_set_surface_state(ss,
972 dest_region->bo, 0,
973 dest_region->width, dest_region->height,
974 dest_region->pitch, format, 0);
975 if (IS_HASWELL(i965->intel.device_info))
976 gen7_render_set_surface_scs(ss);
977 dri_bo_emit_reloc(ss_bo,
978 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
979 0,
980 SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
981 dest_region->bo);
982 } else {
983 i965_render_set_surface_state(ss,
984 dest_region->bo, 0,
985 dest_region->width, dest_region->height,
986 dest_region->pitch, format, 0);
987 dri_bo_emit_reloc(ss_bo,
988 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
989 0,
990 SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
991 dest_region->bo);
992 }
993
994 ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
995 dri_bo_unmap(ss_bo);
996 }
997
998 static void
i965_fill_vertex_buffer(VADriverContextP ctx,float tex_coords[4],float vid_coords[4])999 i965_fill_vertex_buffer(
1000 VADriverContextP ctx,
1001 float tex_coords[4], /* [(u1,v1);(u2,v2)] */
1002 float vid_coords[4] /* [(x1,y1);(x2,y2)] */
1003 )
1004 {
1005 struct i965_driver_data * const i965 = i965_driver_data(ctx);
1006 float vb[12];
1007
1008 enum { X1, Y1, X2, Y2 };
1009
1010 static const unsigned int g_rotation_indices[][6] = {
1011 [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
1012 [VA_ROTATION_90] = { X2, Y1, X2, Y2, X1, Y2 },
1013 [VA_ROTATION_180] = { X1, Y1, X2, Y1, X2, Y2 },
1014 [VA_ROTATION_270] = { X1, Y2, X1, Y1, X2, Y1 },
1015 };
1016
1017 const unsigned int * const rotation_indices =
1018 g_rotation_indices[i965->rotation_attrib->value];
1019
1020 vb[0] = tex_coords[rotation_indices[0]]; /* bottom-right corner */
1021 vb[1] = tex_coords[rotation_indices[1]];
1022 vb[2] = vid_coords[X2];
1023 vb[3] = vid_coords[Y2];
1024
1025 vb[4] = tex_coords[rotation_indices[2]]; /* bottom-left corner */
1026 vb[5] = tex_coords[rotation_indices[3]];
1027 vb[6] = vid_coords[X1];
1028 vb[7] = vid_coords[Y2];
1029
1030 vb[8] = tex_coords[rotation_indices[4]]; /* top-left corner */
1031 vb[9] = tex_coords[rotation_indices[5]];
1032 vb[10] = vid_coords[X1];
1033 vb[11] = vid_coords[Y1];
1034
1035 dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
1036 }
1037
1038 static void
i965_subpic_render_upload_vertex(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * output_rect)1039 i965_subpic_render_upload_vertex(VADriverContextP ctx,
1040 struct object_surface *obj_surface,
1041 const VARectangle *output_rect)
1042 {
1043 unsigned int index = obj_surface->subpic_render_idx;
1044 struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1045 float tex_coords[4], vid_coords[4];
1046 VARectangle dst_rect;
1047
1048 if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
1049 dst_rect = obj_subpic->dst_rect;
1050 else {
1051 const float sx = (float)output_rect->width / obj_surface->orig_width;
1052 const float sy = (float)output_rect->height / obj_surface->orig_height;
1053 dst_rect.x = output_rect->x + sx * obj_subpic->dst_rect.x;
1054 dst_rect.y = output_rect->y + sy * obj_subpic->dst_rect.y;
1055 dst_rect.width = sx * obj_subpic->dst_rect.width;
1056 dst_rect.height = sy * obj_subpic->dst_rect.height;
1057 }
1058
1059 tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
1060 tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
1061 tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
1062 tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
1063
1064 vid_coords[0] = dst_rect.x;
1065 vid_coords[1] = dst_rect.y;
1066 vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
1067 vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
1068
1069 i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1070 }
1071
1072 static void
i965_render_upload_vertex(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect)1073 i965_render_upload_vertex(
1074 VADriverContextP ctx,
1075 struct object_surface *obj_surface,
1076 const VARectangle *src_rect,
1077 const VARectangle *dst_rect
1078 )
1079 {
1080 struct i965_driver_data *i965 = i965_driver_data(ctx);
1081 struct i965_render_state *render_state = &i965->render_state;
1082 struct intel_region *dest_region = render_state->draw_region;
1083 float tex_coords[4], vid_coords[4];
1084 int width, height;
1085
1086 width = obj_surface->orig_width;
1087 height = obj_surface->orig_height;
1088
1089 tex_coords[0] = (float)src_rect->x / width;
1090 tex_coords[1] = (float)src_rect->y / height;
1091 tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
1092 tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
1093
1094 vid_coords[0] = dest_region->x + dst_rect->x;
1095 vid_coords[1] = dest_region->y + dst_rect->y;
1096 vid_coords[2] = vid_coords[0] + dst_rect->width;
1097 vid_coords[3] = vid_coords[1] + dst_rect->height;
1098
1099 i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
1100 }
1101
1102 #define PI 3.1415926
1103
1104 static void
i965_render_upload_constants(VADriverContextP ctx,struct object_surface * obj_surface,unsigned int flags)1105 i965_render_upload_constants(VADriverContextP ctx,
1106 struct object_surface *obj_surface,
1107 unsigned int flags)
1108 {
1109 struct i965_driver_data *i965 = i965_driver_data(ctx);
1110 struct i965_render_state *render_state = &i965->render_state;
1111 unsigned short *constant_buffer;
1112 float *color_balance_base;
1113 float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
1114 float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
1115 float hue = (float)i965->hue_attrib->value / 180 * PI;
1116 float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
1117 float *yuv_to_rgb;
1118 const float* yuv_coefs;
1119 size_t coefs_length;
1120
1121 dri_bo_map(render_state->curbe.bo, 1);
1122 assert(render_state->curbe.bo->virtual);
1123 constant_buffer = render_state->curbe.bo->virtual;
1124
1125 if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
1126 assert(obj_surface->fourcc == VA_FOURCC_Y800);
1127
1128 constant_buffer[0] = 2;
1129 } else {
1130 if (obj_surface->fourcc == VA_FOURCC_NV12)
1131 constant_buffer[0] = 1;
1132 else
1133 constant_buffer[0] = 0;
1134 }
1135
1136 if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
1137 i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
1138 i965->hue_attrib->value == DEFAULT_HUE &&
1139 i965->saturation_attrib->value == DEFAULT_SATURATION)
1140 constant_buffer[1] = 1; /* skip color balance transformation */
1141 else
1142 constant_buffer[1] = 0;
1143
1144 color_balance_base = (float *)constant_buffer + 4;
1145 *color_balance_base++ = contrast;
1146 *color_balance_base++ = brightness;
1147 *color_balance_base++ = cos(hue) * contrast * saturation;
1148 *color_balance_base++ = sin(hue) * contrast * saturation;
1149
1150 yuv_to_rgb = (float *)constant_buffer + 8;
1151 yuv_coefs = i915_color_standard_to_coefs(i915_filter_to_color_standard(flags & VA_SRC_COLOR_MASK),
1152 &coefs_length);
1153 memcpy(yuv_to_rgb, yuv_coefs, coefs_length);
1154
1155 dri_bo_unmap(render_state->curbe.bo);
1156 }
1157
1158 static void
i965_subpic_render_upload_constants(VADriverContextP ctx,struct object_surface * obj_surface)1159 i965_subpic_render_upload_constants(VADriverContextP ctx,
1160 struct object_surface *obj_surface)
1161 {
1162 struct i965_driver_data *i965 = i965_driver_data(ctx);
1163 struct i965_render_state *render_state = &i965->render_state;
1164 float *constant_buffer;
1165 float global_alpha = 1.0;
1166 unsigned int index = obj_surface->subpic_render_idx;
1167 struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1168
1169 if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
1170 global_alpha = obj_subpic->global_alpha;
1171 }
1172
1173 dri_bo_map(render_state->curbe.bo, 1);
1174
1175 assert(render_state->curbe.bo->virtual);
1176 constant_buffer = render_state->curbe.bo->virtual;
1177 *constant_buffer = global_alpha;
1178
1179 dri_bo_unmap(render_state->curbe.bo);
1180 }
1181
1182 static void
i965_surface_render_state_setup(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect,unsigned int flags)1183 i965_surface_render_state_setup(
1184 VADriverContextP ctx,
1185 struct object_surface *obj_surface,
1186 const VARectangle *src_rect,
1187 const VARectangle *dst_rect,
1188 unsigned int flags
1189 )
1190 {
1191 i965_render_vs_unit(ctx);
1192 i965_render_sf_unit(ctx);
1193 i965_render_dest_surface_state(ctx, 0);
1194 i965_render_src_surfaces_state(ctx, obj_surface, flags);
1195 i965_render_sampler(ctx);
1196 i965_render_wm_unit(ctx);
1197 i965_render_cc_viewport(ctx);
1198 i965_render_cc_unit(ctx);
1199 i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1200 i965_render_upload_constants(ctx, obj_surface, flags);
1201 }
1202
1203 static void
i965_subpic_render_state_setup(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect)1204 i965_subpic_render_state_setup(
1205 VADriverContextP ctx,
1206 struct object_surface *obj_surface,
1207 const VARectangle *src_rect,
1208 const VARectangle *dst_rect
1209 )
1210 {
1211 i965_render_vs_unit(ctx);
1212 i965_render_sf_unit(ctx);
1213 i965_render_dest_surface_state(ctx, 0);
1214 i965_subpic_render_src_surfaces_state(ctx, obj_surface);
1215 i965_render_sampler(ctx);
1216 i965_subpic_render_wm_unit(ctx);
1217 i965_render_cc_viewport(ctx);
1218 i965_subpic_render_cc_unit(ctx);
1219 i965_subpic_render_upload_constants(ctx, obj_surface);
1220 i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
1221 }
1222
1223
1224 static void
i965_render_pipeline_select(VADriverContextP ctx)1225 i965_render_pipeline_select(VADriverContextP ctx)
1226 {
1227 struct i965_driver_data *i965 = i965_driver_data(ctx);
1228 struct intel_batchbuffer *batch = i965->batch;
1229
1230 BEGIN_BATCH(batch, 1);
1231 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1232 ADVANCE_BATCH(batch);
1233 }
1234
1235 static void
i965_render_state_sip(VADriverContextP ctx)1236 i965_render_state_sip(VADriverContextP ctx)
1237 {
1238 struct i965_driver_data *i965 = i965_driver_data(ctx);
1239 struct intel_batchbuffer *batch = i965->batch;
1240
1241 BEGIN_BATCH(batch, 2);
1242 OUT_BATCH(batch, CMD_STATE_SIP | 0);
1243 OUT_BATCH(batch, 0);
1244 ADVANCE_BATCH(batch);
1245 }
1246
1247 static void
i965_render_state_base_address(VADriverContextP ctx)1248 i965_render_state_base_address(VADriverContextP ctx)
1249 {
1250 struct i965_driver_data *i965 = i965_driver_data(ctx);
1251 struct intel_batchbuffer *batch = i965->batch;
1252 struct i965_render_state *render_state = &i965->render_state;
1253
1254 if (IS_IRONLAKE(i965->intel.device_info)) {
1255 BEGIN_BATCH(batch, 8);
1256 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
1257 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1258 OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1259 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1260 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1261 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1262 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1263 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1264 ADVANCE_BATCH(batch);
1265 } else {
1266 BEGIN_BATCH(batch, 6);
1267 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
1268 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1269 OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
1270 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1271 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1272 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1273 ADVANCE_BATCH(batch);
1274 }
1275 }
1276
1277 static void
i965_render_binding_table_pointers(VADriverContextP ctx)1278 i965_render_binding_table_pointers(VADriverContextP ctx)
1279 {
1280 struct i965_driver_data *i965 = i965_driver_data(ctx);
1281 struct intel_batchbuffer *batch = i965->batch;
1282
1283 BEGIN_BATCH(batch, 6);
1284 OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
1285 OUT_BATCH(batch, 0); /* vs */
1286 OUT_BATCH(batch, 0); /* gs */
1287 OUT_BATCH(batch, 0); /* clip */
1288 OUT_BATCH(batch, 0); /* sf */
1289 OUT_BATCH(batch, BINDING_TABLE_OFFSET);
1290 ADVANCE_BATCH(batch);
1291 }
1292
1293 static void
i965_render_constant_color(VADriverContextP ctx)1294 i965_render_constant_color(VADriverContextP ctx)
1295 {
1296 struct i965_driver_data *i965 = i965_driver_data(ctx);
1297 struct intel_batchbuffer *batch = i965->batch;
1298
1299 BEGIN_BATCH(batch, 5);
1300 OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
1301 OUT_BATCH(batch, float_to_uint(1.0));
1302 OUT_BATCH(batch, float_to_uint(0.0));
1303 OUT_BATCH(batch, float_to_uint(1.0));
1304 OUT_BATCH(batch, float_to_uint(1.0));
1305 ADVANCE_BATCH(batch);
1306 }
1307
1308 static void
i965_render_pipelined_pointers(VADriverContextP ctx)1309 i965_render_pipelined_pointers(VADriverContextP ctx)
1310 {
1311 struct i965_driver_data *i965 = i965_driver_data(ctx);
1312 struct intel_batchbuffer *batch = i965->batch;
1313 struct i965_render_state *render_state = &i965->render_state;
1314
1315 BEGIN_BATCH(batch, 7);
1316 OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
1317 OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1318 OUT_BATCH(batch, 0); /* disable GS */
1319 OUT_BATCH(batch, 0); /* disable CLIP */
1320 OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1321 OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1322 OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1323 ADVANCE_BATCH(batch);
1324 }
1325
1326 static void
i965_render_urb_layout(VADriverContextP ctx)1327 i965_render_urb_layout(VADriverContextP ctx)
1328 {
1329 struct i965_driver_data *i965 = i965_driver_data(ctx);
1330 struct intel_batchbuffer *batch = i965->batch;
1331 int urb_vs_start, urb_vs_size;
1332 int urb_gs_start, urb_gs_size;
1333 int urb_clip_start, urb_clip_size;
1334 int urb_sf_start, urb_sf_size;
1335 int urb_cs_start, urb_cs_size;
1336
1337 urb_vs_start = 0;
1338 urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
1339 urb_gs_start = urb_vs_start + urb_vs_size;
1340 urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
1341 urb_clip_start = urb_gs_start + urb_gs_size;
1342 urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
1343 urb_sf_start = urb_clip_start + urb_clip_size;
1344 urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
1345 urb_cs_start = urb_sf_start + urb_sf_size;
1346 urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
1347
1348 BEGIN_BATCH(batch, 3);
1349 OUT_BATCH(batch,
1350 CMD_URB_FENCE |
1351 UF0_CS_REALLOC |
1352 UF0_SF_REALLOC |
1353 UF0_CLIP_REALLOC |
1354 UF0_GS_REALLOC |
1355 UF0_VS_REALLOC |
1356 1);
1357 OUT_BATCH(batch,
1358 ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
1359 ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
1360 ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
1361 OUT_BATCH(batch,
1362 ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
1363 ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
1364 ADVANCE_BATCH(batch);
1365 }
1366
1367 static void
i965_render_cs_urb_layout(VADriverContextP ctx)1368 i965_render_cs_urb_layout(VADriverContextP ctx)
1369 {
1370 struct i965_driver_data *i965 = i965_driver_data(ctx);
1371 struct intel_batchbuffer *batch = i965->batch;
1372
1373 BEGIN_BATCH(batch, 2);
1374 OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
1375 OUT_BATCH(batch,
1376 ((URB_CS_ENTRY_SIZE - 1) << 4) | /* URB Entry Allocation Size */
1377 (URB_CS_ENTRIES << 0)); /* Number of URB Entries */
1378 ADVANCE_BATCH(batch);
1379 }
1380
1381 static void
i965_render_constant_buffer(VADriverContextP ctx)1382 i965_render_constant_buffer(VADriverContextP ctx)
1383 {
1384 struct i965_driver_data *i965 = i965_driver_data(ctx);
1385 struct intel_batchbuffer *batch = i965->batch;
1386 struct i965_render_state *render_state = &i965->render_state;
1387
1388 BEGIN_BATCH(batch, 2);
1389 OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
1390 OUT_RELOC(batch, render_state->curbe.bo,
1391 I915_GEM_DOMAIN_INSTRUCTION, 0,
1392 URB_CS_ENTRY_SIZE - 1);
1393 ADVANCE_BATCH(batch);
1394 }
1395
1396 static void
i965_render_drawing_rectangle(VADriverContextP ctx)1397 i965_render_drawing_rectangle(VADriverContextP ctx)
1398 {
1399 struct i965_driver_data *i965 = i965_driver_data(ctx);
1400 struct intel_batchbuffer *batch = i965->batch;
1401 struct i965_render_state *render_state = &i965->render_state;
1402 struct intel_region *dest_region = render_state->draw_region;
1403
1404 BEGIN_BATCH(batch, 4);
1405 OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
1406 OUT_BATCH(batch, 0x00000000);
1407 OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
1408 OUT_BATCH(batch, 0x00000000);
1409 ADVANCE_BATCH(batch);
1410 }
1411
1412 static void
i965_render_vertex_elements(VADriverContextP ctx)1413 i965_render_vertex_elements(VADriverContextP ctx)
1414 {
1415 struct i965_driver_data *i965 = i965_driver_data(ctx);
1416 struct intel_batchbuffer *batch = i965->batch;
1417
1418 if (IS_IRONLAKE(i965->intel.device_info)) {
1419 BEGIN_BATCH(batch, 5);
1420 OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1421 /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1422 OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1423 VE0_VALID |
1424 (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1425 (0 << VE0_OFFSET_SHIFT));
1426 OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1427 (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1428 (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1429 (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1430 /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1431 OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1432 VE0_VALID |
1433 (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1434 (8 << VE0_OFFSET_SHIFT));
1435 OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1436 (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1437 (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1438 (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
1439 ADVANCE_BATCH(batch);
1440 } else {
1441 BEGIN_BATCH(batch, 5);
1442 OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
1443 /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
1444 OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1445 VE0_VALID |
1446 (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1447 (0 << VE0_OFFSET_SHIFT));
1448 OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1449 (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1450 (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1451 (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1452 (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1453 /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
1454 OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
1455 VE0_VALID |
1456 (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
1457 (8 << VE0_OFFSET_SHIFT));
1458 OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
1459 (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
1460 (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
1461 (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
1462 (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
1463 ADVANCE_BATCH(batch);
1464 }
1465 }
1466
1467 static void
i965_render_upload_image_palette(VADriverContextP ctx,struct object_image * obj_image,unsigned int alpha)1468 i965_render_upload_image_palette(
1469 VADriverContextP ctx,
1470 struct object_image *obj_image,
1471 unsigned int alpha
1472 )
1473 {
1474 struct i965_driver_data *i965 = i965_driver_data(ctx);
1475 struct intel_batchbuffer *batch = i965->batch;
1476 unsigned int i;
1477
1478 assert(obj_image);
1479
1480 if (!obj_image)
1481 return;
1482
1483 if (obj_image->image.num_palette_entries == 0)
1484 return;
1485
1486 BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
1487 OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
1488 /*fill palette*/
1489 //int32_t out[16]; //0-23:color 23-31:alpha
1490 for (i = 0; i < obj_image->image.num_palette_entries; i++)
1491 OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
1492 ADVANCE_BATCH(batch);
1493 }
1494
1495 static void
i965_render_startup(VADriverContextP ctx)1496 i965_render_startup(VADriverContextP ctx)
1497 {
1498 struct i965_driver_data *i965 = i965_driver_data(ctx);
1499 struct intel_batchbuffer *batch = i965->batch;
1500 struct i965_render_state *render_state = &i965->render_state;
1501
1502 BEGIN_BATCH(batch, 11);
1503 OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
1504 OUT_BATCH(batch,
1505 (0 << VB0_BUFFER_INDEX_SHIFT) |
1506 VB0_VERTEXDATA |
1507 ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
1508 OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
1509
1510 if (IS_IRONLAKE(i965->intel.device_info))
1511 OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
1512 else
1513 OUT_BATCH(batch, 3);
1514
1515 OUT_BATCH(batch, 0);
1516
1517 OUT_BATCH(batch,
1518 CMD_3DPRIMITIVE |
1519 _3DPRIMITIVE_VERTEX_SEQUENTIAL |
1520 (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
1521 (0 << 9) |
1522 4);
1523 OUT_BATCH(batch, 3); /* vertex count per instance */
1524 OUT_BATCH(batch, 0); /* start vertex offset */
1525 OUT_BATCH(batch, 1); /* single instance */
1526 OUT_BATCH(batch, 0); /* start instance location */
1527 OUT_BATCH(batch, 0); /* index buffer offset, ignored */
1528 ADVANCE_BATCH(batch);
1529 }
1530
1531 static void
i965_clear_dest_region(VADriverContextP ctx)1532 i965_clear_dest_region(VADriverContextP ctx)
1533 {
1534 struct i965_driver_data *i965 = i965_driver_data(ctx);
1535 struct intel_batchbuffer *batch = i965->batch;
1536 struct i965_render_state *render_state = &i965->render_state;
1537 struct intel_region *dest_region = render_state->draw_region;
1538 unsigned int blt_cmd, br13;
1539 int pitch;
1540
1541 blt_cmd = XY_COLOR_BLT_CMD;
1542 br13 = 0xf0 << 16;
1543 pitch = dest_region->pitch;
1544
1545 if (dest_region->cpp == 4) {
1546 br13 |= BR13_8888;
1547 blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
1548 } else {
1549 assert(dest_region->cpp == 2);
1550 br13 |= BR13_565;
1551 }
1552
1553 if (dest_region->tiling != I915_TILING_NONE) {
1554 blt_cmd |= XY_COLOR_BLT_DST_TILED;
1555 pitch /= 4;
1556 }
1557
1558 br13 |= pitch;
1559
1560 if (IS_GEN6(i965->intel.device_info) ||
1561 IS_GEN7(i965->intel.device_info)) {
1562 intel_batchbuffer_start_atomic_blt(batch, 24);
1563 BEGIN_BLT_BATCH(batch, 6);
1564 } else {
1565 intel_batchbuffer_start_atomic(batch, 24);
1566 BEGIN_BATCH(batch, 6);
1567 }
1568
1569 OUT_BATCH(batch, blt_cmd);
1570 OUT_BATCH(batch, br13);
1571 OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
1572 OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
1573 (dest_region->x + dest_region->width));
1574 OUT_RELOC(batch, dest_region->bo,
1575 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1576 0);
1577 OUT_BATCH(batch, 0x0);
1578 ADVANCE_BATCH(batch);
1579 intel_batchbuffer_end_atomic(batch);
1580 }
1581
1582 static void
i965_surface_render_pipeline_setup(VADriverContextP ctx)1583 i965_surface_render_pipeline_setup(VADriverContextP ctx)
1584 {
1585 struct i965_driver_data *i965 = i965_driver_data(ctx);
1586 struct intel_batchbuffer *batch = i965->batch;
1587
1588 i965_clear_dest_region(ctx);
1589 intel_batchbuffer_start_atomic(batch, 0x1000);
1590 intel_batchbuffer_emit_mi_flush(batch);
1591 i965_render_pipeline_select(ctx);
1592 i965_render_state_sip(ctx);
1593 i965_render_state_base_address(ctx);
1594 i965_render_binding_table_pointers(ctx);
1595 i965_render_constant_color(ctx);
1596 i965_render_pipelined_pointers(ctx);
1597 i965_render_urb_layout(ctx);
1598 i965_render_cs_urb_layout(ctx);
1599 i965_render_constant_buffer(ctx);
1600 i965_render_drawing_rectangle(ctx);
1601 i965_render_vertex_elements(ctx);
1602 i965_render_startup(ctx);
1603 intel_batchbuffer_end_atomic(batch);
1604 }
1605
1606 static void
i965_subpic_render_pipeline_setup(VADriverContextP ctx)1607 i965_subpic_render_pipeline_setup(VADriverContextP ctx)
1608 {
1609 struct i965_driver_data *i965 = i965_driver_data(ctx);
1610 struct intel_batchbuffer *batch = i965->batch;
1611
1612 intel_batchbuffer_start_atomic(batch, 0x1000);
1613 intel_batchbuffer_emit_mi_flush(batch);
1614 i965_render_pipeline_select(ctx);
1615 i965_render_state_sip(ctx);
1616 i965_render_state_base_address(ctx);
1617 i965_render_binding_table_pointers(ctx);
1618 i965_render_constant_color(ctx);
1619 i965_render_pipelined_pointers(ctx);
1620 i965_render_urb_layout(ctx);
1621 i965_render_cs_urb_layout(ctx);
1622 i965_render_constant_buffer(ctx);
1623 i965_render_drawing_rectangle(ctx);
1624 i965_render_vertex_elements(ctx);
1625 i965_render_startup(ctx);
1626 intel_batchbuffer_end_atomic(batch);
1627 }
1628
1629
1630 static void
i965_render_initialize(VADriverContextP ctx)1631 i965_render_initialize(VADriverContextP ctx)
1632 {
1633 struct i965_driver_data *i965 = i965_driver_data(ctx);
1634 struct i965_render_state *render_state = &i965->render_state;
1635 dri_bo *bo;
1636
1637 /* VERTEX BUFFER */
1638 dri_bo_unreference(render_state->vb.vertex_buffer);
1639 bo = dri_bo_alloc(i965->intel.bufmgr,
1640 "vertex buffer",
1641 4096,
1642 4096);
1643 assert(bo);
1644 render_state->vb.vertex_buffer = bo;
1645
1646 /* VS */
1647 dri_bo_unreference(render_state->vs.state);
1648 bo = dri_bo_alloc(i965->intel.bufmgr,
1649 "vs state",
1650 sizeof(struct i965_vs_unit_state),
1651 64);
1652 assert(bo);
1653 render_state->vs.state = bo;
1654
1655 /* GS */
1656 /* CLIP */
1657 /* SF */
1658 dri_bo_unreference(render_state->sf.state);
1659 bo = dri_bo_alloc(i965->intel.bufmgr,
1660 "sf state",
1661 sizeof(struct i965_sf_unit_state),
1662 64);
1663 assert(bo);
1664 render_state->sf.state = bo;
1665
1666 /* WM */
1667 dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1668 bo = dri_bo_alloc(i965->intel.bufmgr,
1669 "surface state & binding table",
1670 (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1671 4096);
1672 assert(bo);
1673 render_state->wm.surface_state_binding_table_bo = bo;
1674
1675 dri_bo_unreference(render_state->wm.sampler);
1676 bo = dri_bo_alloc(i965->intel.bufmgr,
1677 "sampler state",
1678 MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1679 64);
1680 assert(bo);
1681 render_state->wm.sampler = bo;
1682 render_state->wm.sampler_count = 0;
1683
1684 dri_bo_unreference(render_state->wm.state);
1685 bo = dri_bo_alloc(i965->intel.bufmgr,
1686 "wm state",
1687 sizeof(struct i965_wm_unit_state),
1688 64);
1689 assert(bo);
1690 render_state->wm.state = bo;
1691
1692 /* COLOR CALCULATOR */
1693 dri_bo_unreference(render_state->cc.state);
1694 bo = dri_bo_alloc(i965->intel.bufmgr,
1695 "color calc state",
1696 sizeof(struct i965_cc_unit_state),
1697 64);
1698 assert(bo);
1699 render_state->cc.state = bo;
1700
1701 dri_bo_unreference(render_state->cc.viewport);
1702 bo = dri_bo_alloc(i965->intel.bufmgr,
1703 "cc viewport",
1704 sizeof(struct i965_cc_viewport),
1705 64);
1706 assert(bo);
1707 render_state->cc.viewport = bo;
1708 }
1709
1710 static void
i965_render_put_surface(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect,unsigned int flags)1711 i965_render_put_surface(
1712 VADriverContextP ctx,
1713 struct object_surface *obj_surface,
1714 const VARectangle *src_rect,
1715 const VARectangle *dst_rect,
1716 unsigned int flags
1717 )
1718 {
1719 struct i965_driver_data *i965 = i965_driver_data(ctx);
1720 struct intel_batchbuffer *batch = i965->batch;
1721
1722 i965_render_initialize(ctx);
1723 i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
1724 i965_surface_render_pipeline_setup(ctx);
1725 intel_batchbuffer_flush(batch);
1726 }
1727
1728 static void
i965_render_put_subpicture(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect)1729 i965_render_put_subpicture(
1730 VADriverContextP ctx,
1731 struct object_surface *obj_surface,
1732 const VARectangle *src_rect,
1733 const VARectangle *dst_rect
1734 )
1735 {
1736 struct i965_driver_data *i965 = i965_driver_data(ctx);
1737 struct intel_batchbuffer *batch = i965->batch;
1738 unsigned int index = obj_surface->subpic_render_idx;
1739 struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
1740
1741 assert(obj_subpic);
1742
1743 i965_render_initialize(ctx);
1744 i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
1745 i965_subpic_render_pipeline_setup(ctx);
1746 i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
1747 intel_batchbuffer_flush(batch);
1748 }
1749
1750 /*
1751 * for GEN6+
1752 */
1753 static void
gen6_render_initialize(VADriverContextP ctx)1754 gen6_render_initialize(VADriverContextP ctx)
1755 {
1756 struct i965_driver_data *i965 = i965_driver_data(ctx);
1757 struct i965_render_state *render_state = &i965->render_state;
1758 dri_bo *bo;
1759
1760 /* VERTEX BUFFER */
1761 dri_bo_unreference(render_state->vb.vertex_buffer);
1762 bo = dri_bo_alloc(i965->intel.bufmgr,
1763 "vertex buffer",
1764 4096,
1765 4096);
1766 assert(bo);
1767 render_state->vb.vertex_buffer = bo;
1768
1769 /* WM */
1770 dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
1771 bo = dri_bo_alloc(i965->intel.bufmgr,
1772 "surface state & binding table",
1773 (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
1774 4096);
1775 assert(bo);
1776 render_state->wm.surface_state_binding_table_bo = bo;
1777
1778 dri_bo_unreference(render_state->wm.sampler);
1779 bo = dri_bo_alloc(i965->intel.bufmgr,
1780 "sampler state",
1781 MAX_SAMPLERS * sizeof(struct i965_sampler_state),
1782 4096);
1783 assert(bo);
1784 render_state->wm.sampler = bo;
1785 render_state->wm.sampler_count = 0;
1786
1787 /* COLOR CALCULATOR */
1788 dri_bo_unreference(render_state->cc.state);
1789 bo = dri_bo_alloc(i965->intel.bufmgr,
1790 "color calc state",
1791 sizeof(struct gen6_color_calc_state),
1792 4096);
1793 assert(bo);
1794 render_state->cc.state = bo;
1795
1796 /* CC VIEWPORT */
1797 dri_bo_unreference(render_state->cc.viewport);
1798 bo = dri_bo_alloc(i965->intel.bufmgr,
1799 "cc viewport",
1800 sizeof(struct i965_cc_viewport),
1801 4096);
1802 assert(bo);
1803 render_state->cc.viewport = bo;
1804
1805 /* BLEND STATE */
1806 dri_bo_unreference(render_state->cc.blend);
1807 bo = dri_bo_alloc(i965->intel.bufmgr,
1808 "blend state",
1809 sizeof(struct gen6_blend_state),
1810 4096);
1811 assert(bo);
1812 render_state->cc.blend = bo;
1813
1814 /* DEPTH & STENCIL STATE */
1815 dri_bo_unreference(render_state->cc.depth_stencil);
1816 bo = dri_bo_alloc(i965->intel.bufmgr,
1817 "depth & stencil state",
1818 sizeof(struct gen6_depth_stencil_state),
1819 4096);
1820 assert(bo);
1821 render_state->cc.depth_stencil = bo;
1822 }
1823
1824 static void
gen6_render_color_calc_state(VADriverContextP ctx)1825 gen6_render_color_calc_state(VADriverContextP ctx)
1826 {
1827 struct i965_driver_data *i965 = i965_driver_data(ctx);
1828 struct i965_render_state *render_state = &i965->render_state;
1829 struct gen6_color_calc_state *color_calc_state;
1830
1831 dri_bo_map(render_state->cc.state, 1);
1832 assert(render_state->cc.state->virtual);
1833 color_calc_state = render_state->cc.state->virtual;
1834 memset(color_calc_state, 0, sizeof(*color_calc_state));
1835 color_calc_state->constant_r = 1.0;
1836 color_calc_state->constant_g = 0.0;
1837 color_calc_state->constant_b = 1.0;
1838 color_calc_state->constant_a = 1.0;
1839 dri_bo_unmap(render_state->cc.state);
1840 }
1841
1842 static void
gen6_render_blend_state(VADriverContextP ctx)1843 gen6_render_blend_state(VADriverContextP ctx)
1844 {
1845 struct i965_driver_data *i965 = i965_driver_data(ctx);
1846 struct i965_render_state *render_state = &i965->render_state;
1847 struct gen6_blend_state *blend_state;
1848
1849 dri_bo_map(render_state->cc.blend, 1);
1850 assert(render_state->cc.blend->virtual);
1851 blend_state = render_state->cc.blend->virtual;
1852 memset(blend_state, 0, sizeof(*blend_state));
1853 blend_state->blend1.logic_op_enable = 1;
1854 blend_state->blend1.logic_op_func = 0xc;
1855 dri_bo_unmap(render_state->cc.blend);
1856 }
1857
1858 static void
gen6_render_depth_stencil_state(VADriverContextP ctx)1859 gen6_render_depth_stencil_state(VADriverContextP ctx)
1860 {
1861 struct i965_driver_data *i965 = i965_driver_data(ctx);
1862 struct i965_render_state *render_state = &i965->render_state;
1863 struct gen6_depth_stencil_state *depth_stencil_state;
1864
1865 dri_bo_map(render_state->cc.depth_stencil, 1);
1866 assert(render_state->cc.depth_stencil->virtual);
1867 depth_stencil_state = render_state->cc.depth_stencil->virtual;
1868 memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
1869 dri_bo_unmap(render_state->cc.depth_stencil);
1870 }
1871
1872 static void
gen6_render_setup_states(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect,unsigned int flags)1873 gen6_render_setup_states(
1874 VADriverContextP ctx,
1875 struct object_surface *obj_surface,
1876 const VARectangle *src_rect,
1877 const VARectangle *dst_rect,
1878 unsigned int flags
1879 )
1880 {
1881 i965_render_dest_surface_state(ctx, 0);
1882 i965_render_src_surfaces_state(ctx, obj_surface, flags);
1883 i965_render_sampler(ctx);
1884 i965_render_cc_viewport(ctx);
1885 gen6_render_color_calc_state(ctx);
1886 gen6_render_blend_state(ctx);
1887 gen6_render_depth_stencil_state(ctx);
1888 i965_render_upload_constants(ctx, obj_surface, flags);
1889 i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
1890 }
1891
1892 static void
gen6_emit_invarient_states(VADriverContextP ctx)1893 gen6_emit_invarient_states(VADriverContextP ctx)
1894 {
1895 struct i965_driver_data *i965 = i965_driver_data(ctx);
1896 struct intel_batchbuffer *batch = i965->batch;
1897
1898 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
1899
1900 OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
1901 OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
1902 GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
1903 OUT_BATCH(batch, 0);
1904
1905 OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
1906 OUT_BATCH(batch, 1);
1907
1908 /* Set system instruction pointer */
1909 OUT_BATCH(batch, CMD_STATE_SIP | 0);
1910 OUT_BATCH(batch, 0);
1911 }
1912
1913 static void
gen6_emit_state_base_address(VADriverContextP ctx)1914 gen6_emit_state_base_address(VADriverContextP ctx)
1915 {
1916 struct i965_driver_data *i965 = i965_driver_data(ctx);
1917 struct intel_batchbuffer *batch = i965->batch;
1918 struct i965_render_state *render_state = &i965->render_state;
1919
1920 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
1921 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
1922 OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
1923 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
1924 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
1925 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
1926 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
1927 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
1928 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
1929 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
1930 }
1931
1932 static void
gen6_emit_viewport_state_pointers(VADriverContextP ctx)1933 gen6_emit_viewport_state_pointers(VADriverContextP ctx)
1934 {
1935 struct i965_driver_data *i965 = i965_driver_data(ctx);
1936 struct intel_batchbuffer *batch = i965->batch;
1937 struct i965_render_state *render_state = &i965->render_state;
1938
1939 OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
1940 GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
1941 (4 - 2));
1942 OUT_BATCH(batch, 0);
1943 OUT_BATCH(batch, 0);
1944 OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1945 }
1946
1947 static void
gen6_emit_urb(VADriverContextP ctx)1948 gen6_emit_urb(VADriverContextP ctx)
1949 {
1950 struct i965_driver_data *i965 = i965_driver_data(ctx);
1951 struct intel_batchbuffer *batch = i965->batch;
1952
1953 OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
1954 OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
1955 (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
1956 OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
1957 (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
1958 }
1959
1960 static void
gen6_emit_cc_state_pointers(VADriverContextP ctx)1961 gen6_emit_cc_state_pointers(VADriverContextP ctx)
1962 {
1963 struct i965_driver_data *i965 = i965_driver_data(ctx);
1964 struct intel_batchbuffer *batch = i965->batch;
1965 struct i965_render_state *render_state = &i965->render_state;
1966
1967 OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
1968 OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1969 OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1970 OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
1971 }
1972
1973 static void
gen6_emit_sampler_state_pointers(VADriverContextP ctx)1974 gen6_emit_sampler_state_pointers(VADriverContextP ctx)
1975 {
1976 struct i965_driver_data *i965 = i965_driver_data(ctx);
1977 struct intel_batchbuffer *batch = i965->batch;
1978 struct i965_render_state *render_state = &i965->render_state;
1979
1980 OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
1981 GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
1982 (4 - 2));
1983 OUT_BATCH(batch, 0); /* VS */
1984 OUT_BATCH(batch, 0); /* GS */
1985 OUT_RELOC(batch, render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
1986 }
1987
1988 static void
gen6_emit_binding_table(VADriverContextP ctx)1989 gen6_emit_binding_table(VADriverContextP ctx)
1990 {
1991 struct i965_driver_data *i965 = i965_driver_data(ctx);
1992 struct intel_batchbuffer *batch = i965->batch;
1993
1994 /* Binding table pointers */
1995 OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
1996 GEN6_BINDING_TABLE_MODIFY_PS |
1997 (4 - 2));
1998 OUT_BATCH(batch, 0); /* vs */
1999 OUT_BATCH(batch, 0); /* gs */
2000 /* Only the PS uses the binding table */
2001 OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2002 }
2003
2004 static void
gen6_emit_depth_buffer_state(VADriverContextP ctx)2005 gen6_emit_depth_buffer_state(VADriverContextP ctx)
2006 {
2007 struct i965_driver_data *i965 = i965_driver_data(ctx);
2008 struct intel_batchbuffer *batch = i965->batch;
2009
2010 OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
2011 OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
2012 (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
2013 OUT_BATCH(batch, 0);
2014 OUT_BATCH(batch, 0);
2015 OUT_BATCH(batch, 0);
2016 OUT_BATCH(batch, 0);
2017 OUT_BATCH(batch, 0);
2018
2019 OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
2020 OUT_BATCH(batch, 0);
2021 }
2022
2023 static void
gen6_emit_drawing_rectangle(VADriverContextP ctx)2024 gen6_emit_drawing_rectangle(VADriverContextP ctx)
2025 {
2026 i965_render_drawing_rectangle(ctx);
2027 }
2028
2029 static void
gen6_emit_vs_state(VADriverContextP ctx)2030 gen6_emit_vs_state(VADriverContextP ctx)
2031 {
2032 struct i965_driver_data *i965 = i965_driver_data(ctx);
2033 struct intel_batchbuffer *batch = i965->batch;
2034
2035 /* disable VS constant buffer */
2036 OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
2037 OUT_BATCH(batch, 0);
2038 OUT_BATCH(batch, 0);
2039 OUT_BATCH(batch, 0);
2040 OUT_BATCH(batch, 0);
2041
2042 OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2043 OUT_BATCH(batch, 0); /* without VS kernel */
2044 OUT_BATCH(batch, 0);
2045 OUT_BATCH(batch, 0);
2046 OUT_BATCH(batch, 0);
2047 OUT_BATCH(batch, 0); /* pass-through */
2048 }
2049
2050 static void
gen6_emit_gs_state(VADriverContextP ctx)2051 gen6_emit_gs_state(VADriverContextP ctx)
2052 {
2053 struct i965_driver_data *i965 = i965_driver_data(ctx);
2054 struct intel_batchbuffer *batch = i965->batch;
2055
2056 /* disable GS constant buffer */
2057 OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
2058 OUT_BATCH(batch, 0);
2059 OUT_BATCH(batch, 0);
2060 OUT_BATCH(batch, 0);
2061 OUT_BATCH(batch, 0);
2062
2063 OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2064 OUT_BATCH(batch, 0); /* without GS kernel */
2065 OUT_BATCH(batch, 0);
2066 OUT_BATCH(batch, 0);
2067 OUT_BATCH(batch, 0);
2068 OUT_BATCH(batch, 0);
2069 OUT_BATCH(batch, 0); /* pass-through */
2070 }
2071
2072 static void
gen6_emit_clip_state(VADriverContextP ctx)2073 gen6_emit_clip_state(VADriverContextP ctx)
2074 {
2075 struct i965_driver_data *i965 = i965_driver_data(ctx);
2076 struct intel_batchbuffer *batch = i965->batch;
2077
2078 OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2079 OUT_BATCH(batch, 0);
2080 OUT_BATCH(batch, 0); /* pass-through */
2081 OUT_BATCH(batch, 0);
2082 }
2083
2084 static void
gen6_emit_sf_state(VADriverContextP ctx)2085 gen6_emit_sf_state(VADriverContextP ctx)
2086 {
2087 struct i965_driver_data *i965 = i965_driver_data(ctx);
2088 struct intel_batchbuffer *batch = i965->batch;
2089
2090 OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
2091 OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
2092 (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
2093 (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
2094 OUT_BATCH(batch, 0);
2095 OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2096 OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
2097 OUT_BATCH(batch, 0);
2098 OUT_BATCH(batch, 0);
2099 OUT_BATCH(batch, 0);
2100 OUT_BATCH(batch, 0);
2101 OUT_BATCH(batch, 0); /* DW9 */
2102 OUT_BATCH(batch, 0);
2103 OUT_BATCH(batch, 0);
2104 OUT_BATCH(batch, 0);
2105 OUT_BATCH(batch, 0);
2106 OUT_BATCH(batch, 0); /* DW14 */
2107 OUT_BATCH(batch, 0);
2108 OUT_BATCH(batch, 0);
2109 OUT_BATCH(batch, 0);
2110 OUT_BATCH(batch, 0);
2111 OUT_BATCH(batch, 0); /* DW19 */
2112 }
2113
2114 static void
gen6_emit_wm_state(VADriverContextP ctx,int kernel)2115 gen6_emit_wm_state(VADriverContextP ctx, int kernel)
2116 {
2117 struct i965_driver_data *i965 = i965_driver_data(ctx);
2118 struct intel_batchbuffer *batch = i965->batch;
2119 struct i965_render_state *render_state = &i965->render_state;
2120
2121 OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
2122 GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
2123 (5 - 2));
2124 OUT_RELOC(batch,
2125 render_state->curbe.bo,
2126 I915_GEM_DOMAIN_INSTRUCTION, 0,
2127 (URB_CS_ENTRY_SIZE - 1));
2128 OUT_BATCH(batch, 0);
2129 OUT_BATCH(batch, 0);
2130 OUT_BATCH(batch, 0);
2131
2132 OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
2133 OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
2134 I915_GEM_DOMAIN_INSTRUCTION, 0,
2135 0);
2136 OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
2137 (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2138 OUT_BATCH(batch, 0);
2139 OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
2140 OUT_BATCH(batch, ((i965->intel.device_info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
2141 GEN6_3DSTATE_WM_DISPATCH_ENABLE |
2142 GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
2143 OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
2144 GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2145 OUT_BATCH(batch, 0);
2146 OUT_BATCH(batch, 0);
2147 }
2148
2149 static void
gen6_emit_vertex_element_state(VADriverContextP ctx)2150 gen6_emit_vertex_element_state(VADriverContextP ctx)
2151 {
2152 struct i965_driver_data *i965 = i965_driver_data(ctx);
2153 struct intel_batchbuffer *batch = i965->batch;
2154
2155 /* Set up our vertex elements, sourced from the single vertex buffer. */
2156 OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2157 /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2158 OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2159 GEN6_VE0_VALID |
2160 (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2161 (0 << VE0_OFFSET_SHIFT));
2162 OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2163 (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2164 (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2165 (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2166 /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2167 OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2168 GEN6_VE0_VALID |
2169 (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2170 (8 << VE0_OFFSET_SHIFT));
2171 OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2172 (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2173 (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2174 (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2175 }
2176
2177 static void
gen6_emit_vertices(VADriverContextP ctx)2178 gen6_emit_vertices(VADriverContextP ctx)
2179 {
2180 struct i965_driver_data *i965 = i965_driver_data(ctx);
2181 struct intel_batchbuffer *batch = i965->batch;
2182 struct i965_render_state *render_state = &i965->render_state;
2183
2184 BEGIN_BATCH(batch, 11);
2185 OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
2186 OUT_BATCH(batch,
2187 (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2188 GEN6_VB0_VERTEXDATA |
2189 ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2190 OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2191 OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2192 OUT_BATCH(batch, 0);
2193
2194 OUT_BATCH(batch,
2195 CMD_3DPRIMITIVE |
2196 _3DPRIMITIVE_VERTEX_SEQUENTIAL |
2197 (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
2198 (0 << 9) |
2199 4);
2200 OUT_BATCH(batch, 3); /* vertex count per instance */
2201 OUT_BATCH(batch, 0); /* start vertex offset */
2202 OUT_BATCH(batch, 1); /* single instance */
2203 OUT_BATCH(batch, 0); /* start instance location */
2204 OUT_BATCH(batch, 0); /* index buffer offset, ignored */
2205 ADVANCE_BATCH(batch);
2206 }
2207
2208 static void
gen6_render_emit_states(VADriverContextP ctx,int kernel)2209 gen6_render_emit_states(VADriverContextP ctx, int kernel)
2210 {
2211 struct i965_driver_data *i965 = i965_driver_data(ctx);
2212 struct intel_batchbuffer *batch = i965->batch;
2213
2214 intel_batchbuffer_start_atomic(batch, 0x1000);
2215 intel_batchbuffer_emit_mi_flush(batch);
2216 gen6_emit_invarient_states(ctx);
2217 gen6_emit_state_base_address(ctx);
2218 gen6_emit_viewport_state_pointers(ctx);
2219 gen6_emit_urb(ctx);
2220 gen6_emit_cc_state_pointers(ctx);
2221 gen6_emit_sampler_state_pointers(ctx);
2222 gen6_emit_vs_state(ctx);
2223 gen6_emit_gs_state(ctx);
2224 gen6_emit_clip_state(ctx);
2225 gen6_emit_sf_state(ctx);
2226 gen6_emit_wm_state(ctx, kernel);
2227 gen6_emit_binding_table(ctx);
2228 gen6_emit_depth_buffer_state(ctx);
2229 gen6_emit_drawing_rectangle(ctx);
2230 gen6_emit_vertex_element_state(ctx);
2231 gen6_emit_vertices(ctx);
2232 intel_batchbuffer_end_atomic(batch);
2233 }
2234
2235 static void
gen6_render_put_surface(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect,unsigned int flags)2236 gen6_render_put_surface(
2237 VADriverContextP ctx,
2238 struct object_surface *obj_surface,
2239 const VARectangle *src_rect,
2240 const VARectangle *dst_rect,
2241 unsigned int flags
2242 )
2243 {
2244 struct i965_driver_data *i965 = i965_driver_data(ctx);
2245 struct intel_batchbuffer *batch = i965->batch;
2246
2247 gen6_render_initialize(ctx);
2248 gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
2249 i965_clear_dest_region(ctx);
2250 gen6_render_emit_states(ctx, PS_KERNEL);
2251 intel_batchbuffer_flush(batch);
2252 }
2253
2254 static void
gen6_subpicture_render_blend_state(VADriverContextP ctx)2255 gen6_subpicture_render_blend_state(VADriverContextP ctx)
2256 {
2257 struct i965_driver_data *i965 = i965_driver_data(ctx);
2258 struct i965_render_state *render_state = &i965->render_state;
2259 struct gen6_blend_state *blend_state;
2260
2261 dri_bo_unmap(render_state->cc.state);
2262 dri_bo_map(render_state->cc.blend, 1);
2263 assert(render_state->cc.blend->virtual);
2264 blend_state = render_state->cc.blend->virtual;
2265 memset(blend_state, 0, sizeof(*blend_state));
2266 blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
2267 blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
2268 blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
2269 blend_state->blend0.blend_enable = 1;
2270 blend_state->blend1.post_blend_clamp_enable = 1;
2271 blend_state->blend1.pre_blend_clamp_enable = 1;
2272 blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
2273 dri_bo_unmap(render_state->cc.blend);
2274 }
2275
2276 static void
gen6_subpicture_render_setup_states(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect)2277 gen6_subpicture_render_setup_states(
2278 VADriverContextP ctx,
2279 struct object_surface *obj_surface,
2280 const VARectangle *src_rect,
2281 const VARectangle *dst_rect
2282 )
2283 {
2284 i965_render_dest_surface_state(ctx, 0);
2285 i965_subpic_render_src_surfaces_state(ctx, obj_surface);
2286 i965_render_sampler(ctx);
2287 i965_render_cc_viewport(ctx);
2288 gen6_render_color_calc_state(ctx);
2289 gen6_subpicture_render_blend_state(ctx);
2290 gen6_render_depth_stencil_state(ctx);
2291 i965_subpic_render_upload_constants(ctx, obj_surface);
2292 i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
2293 }
2294
2295 static void
gen6_render_put_subpicture(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect)2296 gen6_render_put_subpicture(
2297 VADriverContextP ctx,
2298 struct object_surface *obj_surface,
2299 const VARectangle *src_rect,
2300 const VARectangle *dst_rect
2301 )
2302 {
2303 struct i965_driver_data *i965 = i965_driver_data(ctx);
2304 struct intel_batchbuffer *batch = i965->batch;
2305 unsigned int index = obj_surface->subpic_render_idx;
2306 struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
2307
2308 assert(obj_subpic);
2309 gen6_render_initialize(ctx);
2310 gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
2311 gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
2312 i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
2313 intel_batchbuffer_flush(batch);
2314 }
2315
2316 /*
2317 * for GEN7
2318 */
2319 static void
gen7_render_initialize(VADriverContextP ctx)2320 gen7_render_initialize(VADriverContextP ctx)
2321 {
2322 struct i965_driver_data *i965 = i965_driver_data(ctx);
2323 struct i965_render_state *render_state = &i965->render_state;
2324 dri_bo *bo;
2325
2326 /* VERTEX BUFFER */
2327 dri_bo_unreference(render_state->vb.vertex_buffer);
2328 bo = dri_bo_alloc(i965->intel.bufmgr,
2329 "vertex buffer",
2330 4096,
2331 4096);
2332 assert(bo);
2333 render_state->vb.vertex_buffer = bo;
2334
2335 /* WM */
2336 dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
2337 bo = dri_bo_alloc(i965->intel.bufmgr,
2338 "surface state & binding table",
2339 (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
2340 4096);
2341 assert(bo);
2342 render_state->wm.surface_state_binding_table_bo = bo;
2343
2344 dri_bo_unreference(render_state->wm.sampler);
2345 bo = dri_bo_alloc(i965->intel.bufmgr,
2346 "sampler state",
2347 MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
2348 4096);
2349 assert(bo);
2350 render_state->wm.sampler = bo;
2351 render_state->wm.sampler_count = 0;
2352
2353 /* COLOR CALCULATOR */
2354 dri_bo_unreference(render_state->cc.state);
2355 bo = dri_bo_alloc(i965->intel.bufmgr,
2356 "color calc state",
2357 sizeof(struct gen6_color_calc_state),
2358 4096);
2359 assert(bo);
2360 render_state->cc.state = bo;
2361
2362 /* CC VIEWPORT */
2363 dri_bo_unreference(render_state->cc.viewport);
2364 bo = dri_bo_alloc(i965->intel.bufmgr,
2365 "cc viewport",
2366 sizeof(struct i965_cc_viewport),
2367 4096);
2368 assert(bo);
2369 render_state->cc.viewport = bo;
2370
2371 /* BLEND STATE */
2372 dri_bo_unreference(render_state->cc.blend);
2373 bo = dri_bo_alloc(i965->intel.bufmgr,
2374 "blend state",
2375 sizeof(struct gen6_blend_state),
2376 4096);
2377 assert(bo);
2378 render_state->cc.blend = bo;
2379
2380 /* DEPTH & STENCIL STATE */
2381 dri_bo_unreference(render_state->cc.depth_stencil);
2382 bo = dri_bo_alloc(i965->intel.bufmgr,
2383 "depth & stencil state",
2384 sizeof(struct gen6_depth_stencil_state),
2385 4096);
2386 assert(bo);
2387 render_state->cc.depth_stencil = bo;
2388 }
2389
2390 /*
2391 * for GEN8
2392 */
2393 #define ALIGNMENT 64
2394
2395 static void
gen7_render_color_calc_state(VADriverContextP ctx)2396 gen7_render_color_calc_state(VADriverContextP ctx)
2397 {
2398 struct i965_driver_data *i965 = i965_driver_data(ctx);
2399 struct i965_render_state *render_state = &i965->render_state;
2400 struct gen6_color_calc_state *color_calc_state;
2401
2402 dri_bo_map(render_state->cc.state, 1);
2403 assert(render_state->cc.state->virtual);
2404 color_calc_state = render_state->cc.state->virtual;
2405 memset(color_calc_state, 0, sizeof(*color_calc_state));
2406 color_calc_state->constant_r = 1.0;
2407 color_calc_state->constant_g = 0.0;
2408 color_calc_state->constant_b = 1.0;
2409 color_calc_state->constant_a = 1.0;
2410 dri_bo_unmap(render_state->cc.state);
2411 }
2412
2413 static void
gen7_render_blend_state(VADriverContextP ctx)2414 gen7_render_blend_state(VADriverContextP ctx)
2415 {
2416 struct i965_driver_data *i965 = i965_driver_data(ctx);
2417 struct i965_render_state *render_state = &i965->render_state;
2418 struct gen6_blend_state *blend_state;
2419
2420 dri_bo_map(render_state->cc.blend, 1);
2421 assert(render_state->cc.blend->virtual);
2422 blend_state = render_state->cc.blend->virtual;
2423 memset(blend_state, 0, sizeof(*blend_state));
2424 blend_state->blend1.logic_op_enable = 1;
2425 blend_state->blend1.logic_op_func = 0xc;
2426 blend_state->blend1.pre_blend_clamp_enable = 1;
2427 dri_bo_unmap(render_state->cc.blend);
2428 }
2429
2430 static void
gen7_render_depth_stencil_state(VADriverContextP ctx)2431 gen7_render_depth_stencil_state(VADriverContextP ctx)
2432 {
2433 struct i965_driver_data *i965 = i965_driver_data(ctx);
2434 struct i965_render_state *render_state = &i965->render_state;
2435 struct gen6_depth_stencil_state *depth_stencil_state;
2436
2437 dri_bo_map(render_state->cc.depth_stencil, 1);
2438 assert(render_state->cc.depth_stencil->virtual);
2439 depth_stencil_state = render_state->cc.depth_stencil->virtual;
2440 memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
2441 dri_bo_unmap(render_state->cc.depth_stencil);
2442 }
2443
2444 static void
gen7_render_sampler(VADriverContextP ctx)2445 gen7_render_sampler(VADriverContextP ctx)
2446 {
2447 struct i965_driver_data *i965 = i965_driver_data(ctx);
2448 struct i965_render_state *render_state = &i965->render_state;
2449 struct gen7_sampler_state *sampler_state;
2450 int i;
2451
2452 assert(render_state->wm.sampler_count > 0);
2453 assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
2454
2455 dri_bo_map(render_state->wm.sampler, 1);
2456 assert(render_state->wm.sampler->virtual);
2457 sampler_state = render_state->wm.sampler->virtual;
2458 for (i = 0; i < render_state->wm.sampler_count; i++) {
2459 memset(sampler_state, 0, sizeof(*sampler_state));
2460 sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
2461 sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
2462 sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2463 sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2464 sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
2465 sampler_state++;
2466 }
2467
2468 dri_bo_unmap(render_state->wm.sampler);
2469 }
2470
2471
2472 static void
gen7_render_setup_states(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect,unsigned int flags)2473 gen7_render_setup_states(
2474 VADriverContextP ctx,
2475 struct object_surface *obj_surface,
2476 const VARectangle *src_rect,
2477 const VARectangle *dst_rect,
2478 unsigned int flags
2479 )
2480 {
2481 i965_render_dest_surface_state(ctx, 0);
2482 i965_render_src_surfaces_state(ctx, obj_surface, flags);
2483 gen7_render_sampler(ctx);
2484 i965_render_cc_viewport(ctx);
2485 gen7_render_color_calc_state(ctx);
2486 gen7_render_blend_state(ctx);
2487 gen7_render_depth_stencil_state(ctx);
2488 i965_render_upload_constants(ctx, obj_surface, flags);
2489 i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
2490 }
2491
2492
2493 static void
gen7_emit_invarient_states(VADriverContextP ctx)2494 gen7_emit_invarient_states(VADriverContextP ctx)
2495 {
2496 struct i965_driver_data *i965 = i965_driver_data(ctx);
2497 struct intel_batchbuffer *batch = i965->batch;
2498
2499 BEGIN_BATCH(batch, 1);
2500 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
2501 ADVANCE_BATCH(batch);
2502
2503 BEGIN_BATCH(batch, 4);
2504 OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
2505 OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
2506 GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
2507 OUT_BATCH(batch, 0);
2508 OUT_BATCH(batch, 0);
2509 ADVANCE_BATCH(batch);
2510
2511 BEGIN_BATCH(batch, 2);
2512 OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
2513 OUT_BATCH(batch, 1);
2514 ADVANCE_BATCH(batch);
2515
2516 /* Set system instruction pointer */
2517 BEGIN_BATCH(batch, 2);
2518 OUT_BATCH(batch, CMD_STATE_SIP | 0);
2519 OUT_BATCH(batch, 0);
2520 ADVANCE_BATCH(batch);
2521 }
2522
2523 static void
gen7_emit_state_base_address(VADriverContextP ctx)2524 gen7_emit_state_base_address(VADriverContextP ctx)
2525 {
2526 struct i965_driver_data *i965 = i965_driver_data(ctx);
2527 struct intel_batchbuffer *batch = i965->batch;
2528 struct i965_render_state *render_state = &i965->render_state;
2529
2530 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
2531 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
2532 OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
2533 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
2534 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
2535 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
2536 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
2537 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
2538 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
2539 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
2540 }
2541
2542 static void
gen7_emit_viewport_state_pointers(VADriverContextP ctx)2543 gen7_emit_viewport_state_pointers(VADriverContextP ctx)
2544 {
2545 struct i965_driver_data *i965 = i965_driver_data(ctx);
2546 struct intel_batchbuffer *batch = i965->batch;
2547 struct i965_render_state *render_state = &i965->render_state;
2548
2549 BEGIN_BATCH(batch, 2);
2550 OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
2551 OUT_RELOC(batch,
2552 render_state->cc.viewport,
2553 I915_GEM_DOMAIN_INSTRUCTION, 0,
2554 0);
2555 ADVANCE_BATCH(batch);
2556
2557 BEGIN_BATCH(batch, 2);
2558 OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
2559 OUT_BATCH(batch, 0);
2560 ADVANCE_BATCH(batch);
2561 }
2562
2563 /*
2564 * URB layout on GEN7
2565 * ----------------------------------------
2566 * | PS Push Constants (8KB) | VS entries |
2567 * ----------------------------------------
2568 */
2569 static void
gen7_emit_urb(VADriverContextP ctx)2570 gen7_emit_urb(VADriverContextP ctx)
2571 {
2572 struct i965_driver_data *i965 = i965_driver_data(ctx);
2573 struct intel_batchbuffer *batch = i965->batch;
2574 unsigned int num_urb_entries = 32;
2575
2576 if (IS_HASWELL(i965->intel.device_info))
2577 num_urb_entries = 64;
2578
2579 BEGIN_BATCH(batch, 2);
2580 OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
2581 OUT_BATCH(batch, 8); /* in 1KBs */
2582 ADVANCE_BATCH(batch);
2583
2584 BEGIN_BATCH(batch, 2);
2585 OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
2586 OUT_BATCH(batch,
2587 (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
2588 (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
2589 (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2590 ADVANCE_BATCH(batch);
2591
2592 BEGIN_BATCH(batch, 2);
2593 OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
2594 OUT_BATCH(batch,
2595 (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2596 (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2597 ADVANCE_BATCH(batch);
2598
2599 BEGIN_BATCH(batch, 2);
2600 OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
2601 OUT_BATCH(batch,
2602 (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2603 (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2604 ADVANCE_BATCH(batch);
2605
2606 BEGIN_BATCH(batch, 2);
2607 OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
2608 OUT_BATCH(batch,
2609 (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
2610 (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
2611 ADVANCE_BATCH(batch);
2612 }
2613
2614 static void
gen7_emit_cc_state_pointers(VADriverContextP ctx)2615 gen7_emit_cc_state_pointers(VADriverContextP ctx)
2616 {
2617 struct i965_driver_data *i965 = i965_driver_data(ctx);
2618 struct intel_batchbuffer *batch = i965->batch;
2619 struct i965_render_state *render_state = &i965->render_state;
2620
2621 BEGIN_BATCH(batch, 2);
2622 OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
2623 OUT_RELOC(batch,
2624 render_state->cc.state,
2625 I915_GEM_DOMAIN_INSTRUCTION, 0,
2626 1);
2627 ADVANCE_BATCH(batch);
2628
2629 BEGIN_BATCH(batch, 2);
2630 OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
2631 OUT_RELOC(batch,
2632 render_state->cc.blend,
2633 I915_GEM_DOMAIN_INSTRUCTION, 0,
2634 1);
2635 ADVANCE_BATCH(batch);
2636
2637 BEGIN_BATCH(batch, 2);
2638 OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
2639 OUT_RELOC(batch,
2640 render_state->cc.depth_stencil,
2641 I915_GEM_DOMAIN_INSTRUCTION, 0,
2642 1);
2643 ADVANCE_BATCH(batch);
2644 }
2645
2646 static void
gen7_emit_sampler_state_pointers(VADriverContextP ctx)2647 gen7_emit_sampler_state_pointers(VADriverContextP ctx)
2648 {
2649 struct i965_driver_data *i965 = i965_driver_data(ctx);
2650 struct intel_batchbuffer *batch = i965->batch;
2651 struct i965_render_state *render_state = &i965->render_state;
2652
2653 BEGIN_BATCH(batch, 2);
2654 OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
2655 OUT_RELOC(batch,
2656 render_state->wm.sampler,
2657 I915_GEM_DOMAIN_INSTRUCTION, 0,
2658 0);
2659 ADVANCE_BATCH(batch);
2660 }
2661
2662 static void
gen7_emit_binding_table(VADriverContextP ctx)2663 gen7_emit_binding_table(VADriverContextP ctx)
2664 {
2665 struct i965_driver_data *i965 = i965_driver_data(ctx);
2666 struct intel_batchbuffer *batch = i965->batch;
2667
2668 BEGIN_BATCH(batch, 2);
2669 OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
2670 OUT_BATCH(batch, BINDING_TABLE_OFFSET);
2671 ADVANCE_BATCH(batch);
2672 }
2673
2674 static void
gen7_emit_depth_buffer_state(VADriverContextP ctx)2675 gen7_emit_depth_buffer_state(VADriverContextP ctx)
2676 {
2677 struct i965_driver_data *i965 = i965_driver_data(ctx);
2678 struct intel_batchbuffer *batch = i965->batch;
2679
2680 BEGIN_BATCH(batch, 7);
2681 OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
2682 OUT_BATCH(batch,
2683 (I965_DEPTHFORMAT_D32_FLOAT << 18) |
2684 (I965_SURFACE_NULL << 29));
2685 OUT_BATCH(batch, 0);
2686 OUT_BATCH(batch, 0);
2687 OUT_BATCH(batch, 0);
2688 OUT_BATCH(batch, 0);
2689 OUT_BATCH(batch, 0);
2690 ADVANCE_BATCH(batch);
2691
2692 BEGIN_BATCH(batch, 3);
2693 OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
2694 OUT_BATCH(batch, 0);
2695 OUT_BATCH(batch, 0);
2696 ADVANCE_BATCH(batch);
2697 }
2698
2699 static void
gen7_emit_drawing_rectangle(VADriverContextP ctx)2700 gen7_emit_drawing_rectangle(VADriverContextP ctx)
2701 {
2702 i965_render_drawing_rectangle(ctx);
2703 }
2704
2705 static void
gen7_emit_vs_state(VADriverContextP ctx)2706 gen7_emit_vs_state(VADriverContextP ctx)
2707 {
2708 struct i965_driver_data *i965 = i965_driver_data(ctx);
2709 struct intel_batchbuffer *batch = i965->batch;
2710
2711 /* disable VS constant buffer */
2712 OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
2713 OUT_BATCH(batch, 0);
2714 OUT_BATCH(batch, 0);
2715 OUT_BATCH(batch, 0);
2716 OUT_BATCH(batch, 0);
2717 OUT_BATCH(batch, 0);
2718 OUT_BATCH(batch, 0);
2719
2720 OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
2721 OUT_BATCH(batch, 0); /* without VS kernel */
2722 OUT_BATCH(batch, 0);
2723 OUT_BATCH(batch, 0);
2724 OUT_BATCH(batch, 0);
2725 OUT_BATCH(batch, 0); /* pass-through */
2726 }
2727
2728 static void
gen7_emit_bypass_state(VADriverContextP ctx)2729 gen7_emit_bypass_state(VADriverContextP ctx)
2730 {
2731 struct i965_driver_data *i965 = i965_driver_data(ctx);
2732 struct intel_batchbuffer *batch = i965->batch;
2733
2734 /* bypass GS */
2735 BEGIN_BATCH(batch, 7);
2736 OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
2737 OUT_BATCH(batch, 0);
2738 OUT_BATCH(batch, 0);
2739 OUT_BATCH(batch, 0);
2740 OUT_BATCH(batch, 0);
2741 OUT_BATCH(batch, 0);
2742 OUT_BATCH(batch, 0);
2743 ADVANCE_BATCH(batch);
2744
2745 BEGIN_BATCH(batch, 7);
2746 OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
2747 OUT_BATCH(batch, 0); /* without GS kernel */
2748 OUT_BATCH(batch, 0);
2749 OUT_BATCH(batch, 0);
2750 OUT_BATCH(batch, 0);
2751 OUT_BATCH(batch, 0);
2752 OUT_BATCH(batch, 0); /* pass-through */
2753 ADVANCE_BATCH(batch);
2754
2755 BEGIN_BATCH(batch, 2);
2756 OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
2757 OUT_BATCH(batch, 0);
2758 ADVANCE_BATCH(batch);
2759
2760 /* disable HS */
2761 BEGIN_BATCH(batch, 7);
2762 OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
2763 OUT_BATCH(batch, 0);
2764 OUT_BATCH(batch, 0);
2765 OUT_BATCH(batch, 0);
2766 OUT_BATCH(batch, 0);
2767 OUT_BATCH(batch, 0);
2768 OUT_BATCH(batch, 0);
2769 ADVANCE_BATCH(batch);
2770
2771 BEGIN_BATCH(batch, 7);
2772 OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
2773 OUT_BATCH(batch, 0);
2774 OUT_BATCH(batch, 0);
2775 OUT_BATCH(batch, 0);
2776 OUT_BATCH(batch, 0);
2777 OUT_BATCH(batch, 0);
2778 OUT_BATCH(batch, 0);
2779 ADVANCE_BATCH(batch);
2780
2781 BEGIN_BATCH(batch, 2);
2782 OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
2783 OUT_BATCH(batch, 0);
2784 ADVANCE_BATCH(batch);
2785
2786 /* Disable TE */
2787 BEGIN_BATCH(batch, 4);
2788 OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
2789 OUT_BATCH(batch, 0);
2790 OUT_BATCH(batch, 0);
2791 OUT_BATCH(batch, 0);
2792 ADVANCE_BATCH(batch);
2793
2794 /* Disable DS */
2795 BEGIN_BATCH(batch, 7);
2796 OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
2797 OUT_BATCH(batch, 0);
2798 OUT_BATCH(batch, 0);
2799 OUT_BATCH(batch, 0);
2800 OUT_BATCH(batch, 0);
2801 OUT_BATCH(batch, 0);
2802 OUT_BATCH(batch, 0);
2803 ADVANCE_BATCH(batch);
2804
2805 BEGIN_BATCH(batch, 6);
2806 OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
2807 OUT_BATCH(batch, 0);
2808 OUT_BATCH(batch, 0);
2809 OUT_BATCH(batch, 0);
2810 OUT_BATCH(batch, 0);
2811 OUT_BATCH(batch, 0);
2812 ADVANCE_BATCH(batch);
2813
2814 BEGIN_BATCH(batch, 2);
2815 OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
2816 OUT_BATCH(batch, 0);
2817 ADVANCE_BATCH(batch);
2818
2819 /* Disable STREAMOUT */
2820 BEGIN_BATCH(batch, 3);
2821 OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
2822 OUT_BATCH(batch, 0);
2823 OUT_BATCH(batch, 0);
2824 ADVANCE_BATCH(batch);
2825 }
2826
2827 static void
gen7_emit_clip_state(VADriverContextP ctx)2828 gen7_emit_clip_state(VADriverContextP ctx)
2829 {
2830 struct i965_driver_data *i965 = i965_driver_data(ctx);
2831 struct intel_batchbuffer *batch = i965->batch;
2832
2833 OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
2834 OUT_BATCH(batch, 0);
2835 OUT_BATCH(batch, 0); /* pass-through */
2836 OUT_BATCH(batch, 0);
2837 }
2838
2839 static void
gen7_emit_sf_state(VADriverContextP ctx)2840 gen7_emit_sf_state(VADriverContextP ctx)
2841 {
2842 struct i965_driver_data *i965 = i965_driver_data(ctx);
2843 struct intel_batchbuffer *batch = i965->batch;
2844
2845 BEGIN_BATCH(batch, 14);
2846 OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
2847 OUT_BATCH(batch,
2848 (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
2849 (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
2850 (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
2851 OUT_BATCH(batch, 0);
2852 OUT_BATCH(batch, 0);
2853 OUT_BATCH(batch, 0); /* DW4 */
2854 OUT_BATCH(batch, 0);
2855 OUT_BATCH(batch, 0);
2856 OUT_BATCH(batch, 0);
2857 OUT_BATCH(batch, 0);
2858 OUT_BATCH(batch, 0); /* DW9 */
2859 OUT_BATCH(batch, 0);
2860 OUT_BATCH(batch, 0);
2861 OUT_BATCH(batch, 0);
2862 OUT_BATCH(batch, 0);
2863 ADVANCE_BATCH(batch);
2864
2865 BEGIN_BATCH(batch, 7);
2866 OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
2867 OUT_BATCH(batch, 0);
2868 OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
2869 OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
2870 OUT_BATCH(batch, 0);
2871 OUT_BATCH(batch, 0);
2872 OUT_BATCH(batch, 0);
2873 ADVANCE_BATCH(batch);
2874 }
2875
2876 static void
gen7_emit_wm_state(VADriverContextP ctx,int kernel)2877 gen7_emit_wm_state(VADriverContextP ctx, int kernel)
2878 {
2879 struct i965_driver_data *i965 = i965_driver_data(ctx);
2880 struct intel_batchbuffer *batch = i965->batch;
2881 struct i965_render_state *render_state = &i965->render_state;
2882 unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
2883 unsigned int num_samples = 0;
2884
2885 if (IS_HASWELL(i965->intel.device_info)) {
2886 max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
2887 num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
2888 }
2889
2890 BEGIN_BATCH(batch, 3);
2891 OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
2892 OUT_BATCH(batch,
2893 GEN7_WM_DISPATCH_ENABLE |
2894 GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
2895 OUT_BATCH(batch, 0);
2896 ADVANCE_BATCH(batch);
2897
2898 BEGIN_BATCH(batch, 7);
2899 OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
2900 OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
2901 OUT_BATCH(batch, 0);
2902 OUT_RELOC(batch,
2903 render_state->curbe.bo,
2904 I915_GEM_DOMAIN_INSTRUCTION, 0,
2905 0);
2906 OUT_BATCH(batch, 0);
2907 OUT_BATCH(batch, 0);
2908 OUT_BATCH(batch, 0);
2909 ADVANCE_BATCH(batch);
2910
2911 BEGIN_BATCH(batch, 8);
2912 OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
2913 OUT_RELOC(batch,
2914 render_state->render_kernels[kernel].bo,
2915 I915_GEM_DOMAIN_INSTRUCTION, 0,
2916 0);
2917 OUT_BATCH(batch,
2918 (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
2919 (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
2920 OUT_BATCH(batch, 0); /* scratch space base offset */
2921 OUT_BATCH(batch,
2922 ((i965->intel.device_info->max_wm_threads - 1) << max_threads_shift) | num_samples |
2923 GEN7_PS_PUSH_CONSTANT_ENABLE |
2924 GEN7_PS_ATTRIBUTE_ENABLE |
2925 GEN7_PS_16_DISPATCH_ENABLE);
2926 OUT_BATCH(batch,
2927 (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
2928 OUT_BATCH(batch, 0); /* kernel 1 pointer */
2929 OUT_BATCH(batch, 0); /* kernel 2 pointer */
2930 ADVANCE_BATCH(batch);
2931 }
2932
2933 static void
gen7_emit_vertex_element_state(VADriverContextP ctx)2934 gen7_emit_vertex_element_state(VADriverContextP ctx)
2935 {
2936 struct i965_driver_data *i965 = i965_driver_data(ctx);
2937 struct intel_batchbuffer *batch = i965->batch;
2938
2939 /* Set up our vertex elements, sourced from the single vertex buffer. */
2940 OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
2941 /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
2942 OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2943 GEN6_VE0_VALID |
2944 (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2945 (0 << VE0_OFFSET_SHIFT));
2946 OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2947 (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2948 (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2949 (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2950 /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
2951 OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
2952 GEN6_VE0_VALID |
2953 (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
2954 (8 << VE0_OFFSET_SHIFT));
2955 OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
2956 (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
2957 (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
2958 (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
2959 }
2960
2961 static void
gen7_emit_vertices(VADriverContextP ctx)2962 gen7_emit_vertices(VADriverContextP ctx)
2963 {
2964 struct i965_driver_data *i965 = i965_driver_data(ctx);
2965 struct intel_batchbuffer *batch = i965->batch;
2966 struct i965_render_state *render_state = &i965->render_state;
2967
2968 BEGIN_BATCH(batch, 5);
2969 OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
2970 OUT_BATCH(batch,
2971 (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
2972 GEN6_VB0_VERTEXDATA |
2973 GEN7_VB0_ADDRESS_MODIFYENABLE |
2974 ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
2975 OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
2976 OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
2977 OUT_BATCH(batch, 0);
2978 ADVANCE_BATCH(batch);
2979
2980 BEGIN_BATCH(batch, 7);
2981 OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
2982 OUT_BATCH(batch,
2983 _3DPRIM_RECTLIST |
2984 GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
2985 OUT_BATCH(batch, 3); /* vertex count per instance */
2986 OUT_BATCH(batch, 0); /* start vertex offset */
2987 OUT_BATCH(batch, 1); /* single instance */
2988 OUT_BATCH(batch, 0); /* start instance location */
2989 OUT_BATCH(batch, 0);
2990 ADVANCE_BATCH(batch);
2991 }
2992
2993 static void
gen7_render_emit_states(VADriverContextP ctx,int kernel)2994 gen7_render_emit_states(VADriverContextP ctx, int kernel)
2995 {
2996 struct i965_driver_data *i965 = i965_driver_data(ctx);
2997 struct intel_batchbuffer *batch = i965->batch;
2998
2999 intel_batchbuffer_start_atomic(batch, 0x1000);
3000 intel_batchbuffer_emit_mi_flush(batch);
3001 gen7_emit_invarient_states(ctx);
3002 gen7_emit_state_base_address(ctx);
3003 gen7_emit_viewport_state_pointers(ctx);
3004 gen7_emit_urb(ctx);
3005 gen7_emit_cc_state_pointers(ctx);
3006 gen7_emit_sampler_state_pointers(ctx);
3007 gen7_emit_bypass_state(ctx);
3008 gen7_emit_vs_state(ctx);
3009 gen7_emit_clip_state(ctx);
3010 gen7_emit_sf_state(ctx);
3011 gen7_emit_wm_state(ctx, kernel);
3012 gen7_emit_binding_table(ctx);
3013 gen7_emit_depth_buffer_state(ctx);
3014 gen7_emit_drawing_rectangle(ctx);
3015 gen7_emit_vertex_element_state(ctx);
3016 gen7_emit_vertices(ctx);
3017 intel_batchbuffer_end_atomic(batch);
3018 }
3019
3020
3021 static void
gen7_render_put_surface(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect,unsigned int flags)3022 gen7_render_put_surface(
3023 VADriverContextP ctx,
3024 struct object_surface *obj_surface,
3025 const VARectangle *src_rect,
3026 const VARectangle *dst_rect,
3027 unsigned int flags
3028 )
3029 {
3030 struct i965_driver_data *i965 = i965_driver_data(ctx);
3031 struct intel_batchbuffer *batch = i965->batch;
3032
3033 gen7_render_initialize(ctx);
3034 gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
3035 i965_clear_dest_region(ctx);
3036 gen7_render_emit_states(ctx, PS_KERNEL);
3037 intel_batchbuffer_flush(batch);
3038 }
3039
3040
3041 static void
gen7_subpicture_render_blend_state(VADriverContextP ctx)3042 gen7_subpicture_render_blend_state(VADriverContextP ctx)
3043 {
3044 struct i965_driver_data *i965 = i965_driver_data(ctx);
3045 struct i965_render_state *render_state = &i965->render_state;
3046 struct gen6_blend_state *blend_state;
3047
3048 dri_bo_unmap(render_state->cc.state);
3049 dri_bo_map(render_state->cc.blend, 1);
3050 assert(render_state->cc.blend->virtual);
3051 blend_state = render_state->cc.blend->virtual;
3052 memset(blend_state, 0, sizeof(*blend_state));
3053 blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
3054 blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
3055 blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
3056 blend_state->blend0.blend_enable = 1;
3057 blend_state->blend1.post_blend_clamp_enable = 1;
3058 blend_state->blend1.pre_blend_clamp_enable = 1;
3059 blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
3060 dri_bo_unmap(render_state->cc.blend);
3061 }
3062
3063 static void
gen7_subpicture_render_setup_states(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect)3064 gen7_subpicture_render_setup_states(
3065 VADriverContextP ctx,
3066 struct object_surface *obj_surface,
3067 const VARectangle *src_rect,
3068 const VARectangle *dst_rect
3069 )
3070 {
3071 i965_render_dest_surface_state(ctx, 0);
3072 i965_subpic_render_src_surfaces_state(ctx, obj_surface);
3073 i965_render_sampler(ctx);
3074 i965_render_cc_viewport(ctx);
3075 gen7_render_color_calc_state(ctx);
3076 gen7_subpicture_render_blend_state(ctx);
3077 gen7_render_depth_stencil_state(ctx);
3078 i965_subpic_render_upload_constants(ctx, obj_surface);
3079 i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
3080 }
3081
3082 static void
gen7_render_put_subpicture(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect)3083 gen7_render_put_subpicture(
3084 VADriverContextP ctx,
3085 struct object_surface *obj_surface,
3086 const VARectangle *src_rect,
3087 const VARectangle *dst_rect
3088 )
3089 {
3090 struct i965_driver_data *i965 = i965_driver_data(ctx);
3091 struct intel_batchbuffer *batch = i965->batch;
3092 unsigned int index = obj_surface->subpic_render_idx;
3093 struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
3094
3095 assert(obj_subpic);
3096 gen7_render_initialize(ctx);
3097 gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
3098 gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
3099 i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
3100 intel_batchbuffer_flush(batch);
3101 }
3102
3103
3104 void
intel_render_put_surface(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect,unsigned int flags)3105 intel_render_put_surface(
3106 VADriverContextP ctx,
3107 struct object_surface *obj_surface,
3108 const VARectangle *src_rect,
3109 const VARectangle *dst_rect,
3110 unsigned int flags
3111 )
3112 {
3113 struct i965_driver_data *i965 = i965_driver_data(ctx);
3114 struct i965_render_state *render_state = &i965->render_state;
3115 int has_done_scaling = 0;
3116 VARectangle calibrated_rect;
3117 VASurfaceID out_surface_id = i965_post_processing(ctx,
3118 obj_surface,
3119 src_rect,
3120 dst_rect,
3121 flags,
3122 &has_done_scaling,
3123 &calibrated_rect);
3124
3125 assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
3126
3127 if (out_surface_id != VA_INVALID_ID) {
3128 struct object_surface *new_obj_surface = SURFACE(out_surface_id);
3129
3130 if (new_obj_surface && new_obj_surface->bo)
3131 obj_surface = new_obj_surface;
3132
3133 if (has_done_scaling)
3134 src_rect = &calibrated_rect;
3135 }
3136
3137 render_state->render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
3138
3139 if (out_surface_id != VA_INVALID_ID)
3140 i965_DestroySurfaces(ctx, &out_surface_id, 1);
3141 }
3142
3143 void
intel_render_put_subpicture(VADriverContextP ctx,struct object_surface * obj_surface,const VARectangle * src_rect,const VARectangle * dst_rect)3144 intel_render_put_subpicture(
3145 VADriverContextP ctx,
3146 struct object_surface *obj_surface,
3147 const VARectangle *src_rect,
3148 const VARectangle *dst_rect
3149 )
3150 {
3151 struct i965_driver_data *i965 = i965_driver_data(ctx);
3152 struct i965_render_state *render_state = &i965->render_state;
3153
3154 render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
3155 }
3156
3157 static void
genx_render_terminate(VADriverContextP ctx)3158 genx_render_terminate(VADriverContextP ctx)
3159 {
3160 int i;
3161 struct i965_driver_data *i965 = i965_driver_data(ctx);
3162 struct i965_render_state *render_state = &i965->render_state;
3163
3164 dri_bo_unreference(render_state->curbe.bo);
3165 render_state->curbe.bo = NULL;
3166
3167 for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3168 struct i965_kernel *kernel = &render_state->render_kernels[i];
3169
3170 dri_bo_unreference(kernel->bo);
3171 kernel->bo = NULL;
3172 }
3173
3174 dri_bo_unreference(render_state->vb.vertex_buffer);
3175 render_state->vb.vertex_buffer = NULL;
3176 dri_bo_unreference(render_state->vs.state);
3177 render_state->vs.state = NULL;
3178 dri_bo_unreference(render_state->sf.state);
3179 render_state->sf.state = NULL;
3180 dri_bo_unreference(render_state->wm.sampler);
3181 render_state->wm.sampler = NULL;
3182 dri_bo_unreference(render_state->wm.state);
3183 render_state->wm.state = NULL;
3184 dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
3185 dri_bo_unreference(render_state->cc.viewport);
3186 render_state->cc.viewport = NULL;
3187 dri_bo_unreference(render_state->cc.state);
3188 render_state->cc.state = NULL;
3189 dri_bo_unreference(render_state->cc.blend);
3190 render_state->cc.blend = NULL;
3191 dri_bo_unreference(render_state->cc.depth_stencil);
3192 render_state->cc.depth_stencil = NULL;
3193
3194 if (render_state->draw_region) {
3195 dri_bo_unreference(render_state->draw_region->bo);
3196 free(render_state->draw_region);
3197 render_state->draw_region = NULL;
3198 }
3199 }
3200
3201 bool
genx_render_init(VADriverContextP ctx)3202 genx_render_init(VADriverContextP ctx)
3203 {
3204 struct i965_driver_data *i965 = i965_driver_data(ctx);
3205 struct i965_render_state *render_state = &i965->render_state;
3206 int i;
3207
3208 /* kernel */
3209 assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) /
3210 sizeof(render_kernels_gen5[0])));
3211 assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) /
3212 sizeof(render_kernels_gen6[0])));
3213
3214 if (IS_GEN7(i965->intel.device_info)) {
3215 memcpy(render_state->render_kernels,
3216 (IS_HASWELL(i965->intel.device_info) ? render_kernels_gen7_haswell : render_kernels_gen7),
3217 sizeof(render_state->render_kernels));
3218 render_state->render_put_surface = gen7_render_put_surface;
3219 render_state->render_put_subpicture = gen7_render_put_subpicture;
3220 } else if (IS_GEN6(i965->intel.device_info)) {
3221 memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
3222 render_state->render_put_surface = gen6_render_put_surface;
3223 render_state->render_put_subpicture = gen6_render_put_subpicture;
3224 } else if (IS_IRONLAKE(i965->intel.device_info)) {
3225 memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
3226 render_state->render_put_surface = i965_render_put_surface;
3227 render_state->render_put_subpicture = i965_render_put_subpicture;
3228 } else {
3229 memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
3230 render_state->render_put_surface = i965_render_put_surface;
3231 render_state->render_put_subpicture = i965_render_put_subpicture;
3232 }
3233
3234 render_state->render_terminate = genx_render_terminate;
3235
3236 for (i = 0; i < NUM_RENDER_KERNEL; i++) {
3237 struct i965_kernel *kernel = &render_state->render_kernels[i];
3238
3239 if (!kernel->size)
3240 continue;
3241
3242 kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
3243 kernel->name,
3244 kernel->size, 0x1000);
3245 assert(kernel->bo);
3246 dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
3247 }
3248
3249 /* constant buffer */
3250 render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
3251 "constant buffer",
3252 4096, 64);
3253 assert(render_state->curbe.bo);
3254
3255 return true;
3256 }
3257
3258 bool
i965_render_init(VADriverContextP ctx)3259 i965_render_init(VADriverContextP ctx)
3260 {
3261 struct i965_driver_data *i965 = i965_driver_data(ctx);
3262
3263 return i965->codec_info->render_init(ctx);
3264 }
3265
3266 void
i965_render_terminate(VADriverContextP ctx)3267 i965_render_terminate(VADriverContextP ctx)
3268 {
3269 struct i965_driver_data *i965 = i965_driver_data(ctx);
3270 struct i965_render_state *render_state = &i965->render_state;
3271
3272 render_state->render_terminate(ctx);
3273 }
3274