1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Xiang Haihao <haihao.xiang@intel.com>
25 */
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <assert.h>
31
32 #include "intel_batchbuffer.h"
33 #include "intel_driver.h"
34
35 #include "i965_drv_video.h"
36 #include "i965_gpe_utils.h"
37
38 static void
i965_gpe_select(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)39 i965_gpe_select(VADriverContextP ctx,
40 struct i965_gpe_context *gpe_context,
41 struct intel_batchbuffer *batch)
42 {
43 BEGIN_BATCH(batch, 1);
44 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
45 ADVANCE_BATCH(batch);
46 }
47
48 static void
gen6_gpe_state_base_address(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)49 gen6_gpe_state_base_address(VADriverContextP ctx,
50 struct i965_gpe_context *gpe_context,
51 struct intel_batchbuffer *batch)
52 {
53 BEGIN_BATCH(batch, 10);
54
55 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
56 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General State Base Address */
57 OUT_RELOC(batch,
58 gpe_context->surface_state_binding_table.bo,
59 I915_GEM_DOMAIN_INSTRUCTION,
60 0,
61 BASE_ADDRESS_MODIFY); /* Surface state base address */
62 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic State Base Address */
63 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect Object Base Address */
64 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction Base Address */
65 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General State Access Upper Bound */
66 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic State Access Upper Bound */
67 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect Object Access Upper Bound */
68 OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction Access Upper Bound */
69
70 ADVANCE_BATCH(batch);
71 }
72
73 static void
gen6_gpe_vfe_state(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)74 gen6_gpe_vfe_state(VADriverContextP ctx,
75 struct i965_gpe_context *gpe_context,
76 struct intel_batchbuffer *batch)
77 {
78
79 BEGIN_BATCH(batch, 8);
80
81 OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
82 OUT_BATCH(batch, 0); /* Scratch Space Base Pointer and Space */
83 OUT_BATCH(batch,
84 gpe_context->vfe_state.max_num_threads << 16 | /* Maximum Number of Threads */
85 gpe_context->vfe_state.num_urb_entries << 8 | /* Number of URB Entries */
86 gpe_context->vfe_state.gpgpu_mode << 2); /* MEDIA Mode */
87 OUT_BATCH(batch, 0); /* Debug: Object ID */
88 OUT_BATCH(batch,
89 gpe_context->vfe_state.urb_entry_size << 16 | /* URB Entry Allocation Size */
90 gpe_context->vfe_state.curbe_allocation_size); /* CURBE Allocation Size */
91 /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
92 OUT_BATCH(batch, gpe_context->vfe_desc5.dword);
93 OUT_BATCH(batch, gpe_context->vfe_desc6.dword);
94 OUT_BATCH(batch, gpe_context->vfe_desc7.dword);
95
96 ADVANCE_BATCH(batch);
97
98 }
99
100 static void
gen6_gpe_curbe_load(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)101 gen6_gpe_curbe_load(VADriverContextP ctx,
102 struct i965_gpe_context *gpe_context,
103 struct intel_batchbuffer *batch)
104 {
105 BEGIN_BATCH(batch, 4);
106
107 OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
108 OUT_BATCH(batch, 0);
109 OUT_BATCH(batch, gpe_context->curbe.length);
110 OUT_RELOC(batch, gpe_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
111
112 ADVANCE_BATCH(batch);
113 }
114
115 static void
gen6_gpe_idrt(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)116 gen6_gpe_idrt(VADriverContextP ctx,
117 struct i965_gpe_context *gpe_context,
118 struct intel_batchbuffer *batch)
119 {
120 BEGIN_BATCH(batch, 4);
121
122 OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
123 OUT_BATCH(batch, 0);
124 OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
125 OUT_RELOC(batch, gpe_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
126
127 ADVANCE_BATCH(batch);
128 }
129
130 void
i965_gpe_load_kernels(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct i965_kernel * kernel_list,unsigned int num_kernels)131 i965_gpe_load_kernels(VADriverContextP ctx,
132 struct i965_gpe_context *gpe_context,
133 struct i965_kernel *kernel_list,
134 unsigned int num_kernels)
135 {
136 struct i965_driver_data *i965 = i965_driver_data(ctx);
137 int i;
138
139 assert(num_kernels <= MAX_GPE_KERNELS);
140 memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
141 gpe_context->num_kernels = num_kernels;
142
143 for (i = 0; i < num_kernels; i++) {
144 struct i965_kernel *kernel = &gpe_context->kernels[i];
145
146 kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
147 kernel->name,
148 kernel->size,
149 0x1000);
150 assert(kernel->bo);
151 dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
152 }
153 }
154
155 void
i965_gpe_context_destroy(struct i965_gpe_context * gpe_context)156 i965_gpe_context_destroy(struct i965_gpe_context *gpe_context)
157 {
158 int i;
159
160 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
161 gpe_context->surface_state_binding_table.bo = NULL;
162
163 dri_bo_unreference(gpe_context->idrt.bo);
164 gpe_context->idrt.bo = NULL;
165
166 dri_bo_unreference(gpe_context->curbe.bo);
167 gpe_context->curbe.bo = NULL;
168
169 for (i = 0; i < gpe_context->num_kernels; i++) {
170 struct i965_kernel *kernel = &gpe_context->kernels[i];
171
172 dri_bo_unreference(kernel->bo);
173 kernel->bo = NULL;
174 }
175 }
176
177 void
i965_gpe_context_init(VADriverContextP ctx,struct i965_gpe_context * gpe_context)178 i965_gpe_context_init(VADriverContextP ctx,
179 struct i965_gpe_context *gpe_context)
180 {
181 struct i965_driver_data *i965 = i965_driver_data(ctx);
182 dri_bo *bo;
183
184 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
185 bo = dri_bo_alloc(i965->intel.bufmgr,
186 "surface state & binding table",
187 gpe_context->surface_state_binding_table.length,
188 4096);
189 assert(bo);
190 gpe_context->surface_state_binding_table.bo = bo;
191
192 dri_bo_unreference(gpe_context->idrt.bo);
193 bo = dri_bo_alloc(i965->intel.bufmgr,
194 "interface descriptor table",
195 gpe_context->idrt.entry_size * gpe_context->idrt.max_entries,
196 4096);
197 assert(bo);
198 gpe_context->idrt.bo = bo;
199
200 dri_bo_unreference(gpe_context->curbe.bo);
201 bo = dri_bo_alloc(i965->intel.bufmgr,
202 "curbe buffer",
203 gpe_context->curbe.length,
204 4096);
205 assert(bo);
206 gpe_context->curbe.bo = bo;
207 }
208
209 void
gen6_gpe_pipeline_setup(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)210 gen6_gpe_pipeline_setup(VADriverContextP ctx,
211 struct i965_gpe_context *gpe_context,
212 struct intel_batchbuffer *batch)
213 {
214 intel_batchbuffer_emit_mi_flush(batch);
215
216 i965_gpe_select(ctx, gpe_context, batch);
217 gen6_gpe_state_base_address(ctx, gpe_context, batch);
218 gen6_gpe_vfe_state(ctx, gpe_context, batch);
219 gen6_gpe_curbe_load(ctx, gpe_context, batch);
220 gen6_gpe_idrt(ctx, gpe_context, batch);
221 }
222
223 static void
gen8_gpe_pipeline_end(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)224 gen8_gpe_pipeline_end(VADriverContextP ctx,
225 struct i965_gpe_context *gpe_context,
226 struct intel_batchbuffer *batch)
227 {
228 /* No thing to do */
229 }
230
231 static void
i965_gpe_set_surface_tiling(struct i965_surface_state * ss,unsigned int tiling)232 i965_gpe_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
233 {
234 switch (tiling) {
235 case I915_TILING_NONE:
236 ss->ss3.tiled_surface = 0;
237 ss->ss3.tile_walk = 0;
238 break;
239 case I915_TILING_X:
240 ss->ss3.tiled_surface = 1;
241 ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
242 break;
243 case I915_TILING_Y:
244 ss->ss3.tiled_surface = 1;
245 ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
246 break;
247 }
248 }
249
250 static void
i965_gpe_set_surface2_tiling(struct i965_surface_state2 * ss,unsigned int tiling)251 i965_gpe_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
252 {
253 switch (tiling) {
254 case I915_TILING_NONE:
255 ss->ss2.tiled_surface = 0;
256 ss->ss2.tile_walk = 0;
257 break;
258 case I915_TILING_X:
259 ss->ss2.tiled_surface = 1;
260 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
261 break;
262 case I915_TILING_Y:
263 ss->ss2.tiled_surface = 1;
264 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
265 break;
266 }
267 }
268
269 static void
gen7_gpe_set_surface_tiling(struct gen7_surface_state * ss,unsigned int tiling)270 gen7_gpe_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
271 {
272 switch (tiling) {
273 case I915_TILING_NONE:
274 ss->ss0.tiled_surface = 0;
275 ss->ss0.tile_walk = 0;
276 break;
277 case I915_TILING_X:
278 ss->ss0.tiled_surface = 1;
279 ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
280 break;
281 case I915_TILING_Y:
282 ss->ss0.tiled_surface = 1;
283 ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
284 break;
285 }
286 }
287
288 static void
gen7_gpe_set_surface2_tiling(struct gen7_surface_state2 * ss,unsigned int tiling)289 gen7_gpe_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
290 {
291 switch (tiling) {
292 case I915_TILING_NONE:
293 ss->ss2.tiled_surface = 0;
294 ss->ss2.tile_walk = 0;
295 break;
296 case I915_TILING_X:
297 ss->ss2.tiled_surface = 1;
298 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
299 break;
300 case I915_TILING_Y:
301 ss->ss2.tiled_surface = 1;
302 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
303 break;
304 }
305 }
306
307 static void
gen8_gpe_set_surface_tiling(struct gen8_surface_state * ss,unsigned int tiling)308 gen8_gpe_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling)
309 {
310 switch (tiling) {
311 case I915_TILING_NONE:
312 ss->ss0.tiled_surface = 0;
313 ss->ss0.tile_walk = 0;
314 break;
315 case I915_TILING_X:
316 ss->ss0.tiled_surface = 1;
317 ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
318 break;
319 case I915_TILING_Y:
320 ss->ss0.tiled_surface = 1;
321 ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
322 break;
323 }
324 }
325
326 static void
gen8_gpe_set_surface2_tiling(struct gen8_surface_state2 * ss,unsigned int tiling)327 gen8_gpe_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling)
328 {
329 switch (tiling) {
330 case I915_TILING_NONE:
331 ss->ss2.tiled_surface = 0;
332 ss->ss2.tile_walk = 0;
333 break;
334 case I915_TILING_X:
335 ss->ss2.tiled_surface = 1;
336 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
337 break;
338 case I915_TILING_Y:
339 ss->ss2.tiled_surface = 1;
340 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
341 break;
342 }
343 }
344
345 static void
i965_gpe_set_surface2_state(VADriverContextP ctx,struct object_surface * obj_surface,struct i965_surface_state2 * ss)346 i965_gpe_set_surface2_state(VADriverContextP ctx,
347 struct object_surface *obj_surface,
348 struct i965_surface_state2 *ss)
349 {
350 int w, h, w_pitch;
351 unsigned int tiling, swizzle;
352
353 assert(obj_surface->bo);
354 assert(obj_surface->fourcc == VA_FOURCC_NV12);
355
356 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
357 w = obj_surface->orig_width;
358 h = obj_surface->orig_height;
359 w_pitch = obj_surface->width;
360
361 memset(ss, 0, sizeof(*ss));
362 /* ss0 */
363 ss->ss0.surface_base_address = obj_surface->bo->offset;
364 /* ss1 */
365 ss->ss1.cbcr_pixel_offset_v_direction = 2;
366 ss->ss1.width = w - 1;
367 ss->ss1.height = h - 1;
368 /* ss2 */
369 ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
370 ss->ss2.interleave_chroma = 1;
371 ss->ss2.pitch = w_pitch - 1;
372 ss->ss2.half_pitch_for_chroma = 0;
373 i965_gpe_set_surface2_tiling(ss, tiling);
374 /* ss3: UV offset for interleave mode */
375 ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
376 ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
377 }
378
379 void
i965_gpe_surface2_setup(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct object_surface * obj_surface,unsigned long binding_table_offset,unsigned long surface_state_offset)380 i965_gpe_surface2_setup(VADriverContextP ctx,
381 struct i965_gpe_context *gpe_context,
382 struct object_surface *obj_surface,
383 unsigned long binding_table_offset,
384 unsigned long surface_state_offset)
385 {
386 struct i965_surface_state2 *ss;
387 dri_bo *bo;
388
389 bo = gpe_context->surface_state_binding_table.bo;
390 dri_bo_map(bo, 1);
391 assert(bo->virtual);
392
393 ss = (struct i965_surface_state2 *)((char *)bo->virtual + surface_state_offset);
394 i965_gpe_set_surface2_state(ctx, obj_surface, ss);
395 dri_bo_emit_reloc(bo,
396 I915_GEM_DOMAIN_RENDER, 0,
397 0,
398 surface_state_offset + offsetof(struct i965_surface_state2, ss0),
399 obj_surface->bo);
400
401 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
402 dri_bo_unmap(bo);
403 }
404
405 static void
i965_gpe_set_media_rw_surface_state(VADriverContextP ctx,struct object_surface * obj_surface,struct i965_surface_state * ss)406 i965_gpe_set_media_rw_surface_state(VADriverContextP ctx,
407 struct object_surface *obj_surface,
408 struct i965_surface_state *ss)
409 {
410 int w, h, w_pitch;
411 unsigned int tiling, swizzle;
412
413 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
414 w = obj_surface->orig_width;
415 h = obj_surface->orig_height;
416 w_pitch = obj_surface->width;
417
418 memset(ss, 0, sizeof(*ss));
419 /* ss0 */
420 ss->ss0.surface_type = I965_SURFACE_2D;
421 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
422 /* ss1 */
423 ss->ss1.base_addr = obj_surface->bo->offset;
424 /* ss2 */
425 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
426 ss->ss2.height = h - 1;
427 /* ss3 */
428 ss->ss3.pitch = w_pitch - 1;
429 i965_gpe_set_surface_tiling(ss, tiling);
430 }
431
432 void
i965_gpe_media_rw_surface_setup(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct object_surface * obj_surface,unsigned long binding_table_offset,unsigned long surface_state_offset,int write_enabled)433 i965_gpe_media_rw_surface_setup(VADriverContextP ctx,
434 struct i965_gpe_context *gpe_context,
435 struct object_surface *obj_surface,
436 unsigned long binding_table_offset,
437 unsigned long surface_state_offset,
438 int write_enabled)
439 {
440 struct i965_surface_state *ss;
441 dri_bo *bo;
442
443 bo = gpe_context->surface_state_binding_table.bo;
444 dri_bo_map(bo, True);
445 assert(bo->virtual);
446
447 ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
448 i965_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
449 dri_bo_emit_reloc(bo,
450 I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
451 0,
452 surface_state_offset + offsetof(struct i965_surface_state, ss1),
453 obj_surface->bo);
454
455 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
456 dri_bo_unmap(bo);
457 }
458
459 static void
i965_gpe_set_buffer_surface_state(VADriverContextP ctx,struct i965_buffer_surface * buffer_surface,struct i965_surface_state * ss)460 i965_gpe_set_buffer_surface_state(VADriverContextP ctx,
461 struct i965_buffer_surface *buffer_surface,
462 struct i965_surface_state *ss)
463 {
464 int num_entries;
465
466 assert(buffer_surface->bo);
467 num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
468
469 memset(ss, 0, sizeof(*ss));
470 /* ss0 */
471 ss->ss0.render_cache_read_mode = 1;
472 ss->ss0.surface_type = I965_SURFACE_BUFFER;
473 /* ss1 */
474 ss->ss1.base_addr = buffer_surface->bo->offset;
475 /* ss2 */
476 ss->ss2.width = ((num_entries - 1) & 0x7f);
477 ss->ss2.height = (((num_entries - 1) >> 7) & 0x1fff);
478 /* ss3 */
479 ss->ss3.depth = (((num_entries - 1) >> 20) & 0x7f);
480 ss->ss3.pitch = buffer_surface->pitch - 1;
481 }
482
483 void
i965_gpe_buffer_suface_setup(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct i965_buffer_surface * buffer_surface,unsigned long binding_table_offset,unsigned long surface_state_offset)484 i965_gpe_buffer_suface_setup(VADriverContextP ctx,
485 struct i965_gpe_context *gpe_context,
486 struct i965_buffer_surface *buffer_surface,
487 unsigned long binding_table_offset,
488 unsigned long surface_state_offset)
489 {
490 struct i965_surface_state *ss;
491 dri_bo *bo;
492
493 bo = gpe_context->surface_state_binding_table.bo;
494 dri_bo_map(bo, 1);
495 assert(bo->virtual);
496
497 ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
498 i965_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
499 dri_bo_emit_reloc(bo,
500 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
501 0,
502 surface_state_offset + offsetof(struct i965_surface_state, ss1),
503 buffer_surface->bo);
504
505 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
506 dri_bo_unmap(bo);
507 }
508
509 static void
gen7_gpe_set_surface2_state(VADriverContextP ctx,struct object_surface * obj_surface,struct gen7_surface_state2 * ss)510 gen7_gpe_set_surface2_state(VADriverContextP ctx,
511 struct object_surface *obj_surface,
512 struct gen7_surface_state2 *ss)
513 {
514 int w, h, w_pitch;
515 unsigned int tiling, swizzle;
516
517 assert(obj_surface->bo);
518 assert(obj_surface->fourcc == VA_FOURCC_NV12);
519
520 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
521 w = obj_surface->orig_width;
522 h = obj_surface->orig_height;
523 w_pitch = obj_surface->width;
524
525 memset(ss, 0, sizeof(*ss));
526 /* ss0 */
527 ss->ss0.surface_base_address = obj_surface->bo->offset;
528 /* ss1 */
529 ss->ss1.cbcr_pixel_offset_v_direction = 2;
530 ss->ss1.width = w - 1;
531 ss->ss1.height = h - 1;
532 /* ss2 */
533 ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
534 ss->ss2.interleave_chroma = 1;
535 ss->ss2.pitch = w_pitch - 1;
536 ss->ss2.half_pitch_for_chroma = 0;
537 gen7_gpe_set_surface2_tiling(ss, tiling);
538 /* ss3: UV offset for interleave mode */
539 ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
540 ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
541 }
542
543 void
gen7_gpe_surface2_setup(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct object_surface * obj_surface,unsigned long binding_table_offset,unsigned long surface_state_offset)544 gen7_gpe_surface2_setup(VADriverContextP ctx,
545 struct i965_gpe_context *gpe_context,
546 struct object_surface *obj_surface,
547 unsigned long binding_table_offset,
548 unsigned long surface_state_offset)
549 {
550 struct gen7_surface_state2 *ss;
551 dri_bo *bo;
552
553 bo = gpe_context->surface_state_binding_table.bo;
554 dri_bo_map(bo, 1);
555 assert(bo->virtual);
556
557 ss = (struct gen7_surface_state2 *)((char *)bo->virtual + surface_state_offset);
558 gen7_gpe_set_surface2_state(ctx, obj_surface, ss);
559 dri_bo_emit_reloc(bo,
560 I915_GEM_DOMAIN_RENDER, 0,
561 0,
562 surface_state_offset + offsetof(struct gen7_surface_state2, ss0),
563 obj_surface->bo);
564
565 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
566 dri_bo_unmap(bo);
567 }
568
569 static void
gen7_gpe_set_media_rw_surface_state(VADriverContextP ctx,struct object_surface * obj_surface,struct gen7_surface_state * ss)570 gen7_gpe_set_media_rw_surface_state(VADriverContextP ctx,
571 struct object_surface *obj_surface,
572 struct gen7_surface_state *ss)
573 {
574 int w, h, w_pitch;
575 unsigned int tiling, swizzle;
576
577 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
578 w = obj_surface->orig_width;
579 h = obj_surface->orig_height;
580 w_pitch = obj_surface->width;
581
582 memset(ss, 0, sizeof(*ss));
583 /* ss0 */
584 ss->ss0.surface_type = I965_SURFACE_2D;
585 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
586 /* ss1 */
587 ss->ss1.base_addr = obj_surface->bo->offset;
588 /* ss2 */
589 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
590 ss->ss2.height = h - 1;
591 /* ss3 */
592 ss->ss3.pitch = w_pitch - 1;
593 gen7_gpe_set_surface_tiling(ss, tiling);
594 }
595
596 static void
gen75_gpe_set_media_chroma_surface_state(VADriverContextP ctx,struct object_surface * obj_surface,struct gen7_surface_state * ss)597 gen75_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
598 struct object_surface *obj_surface,
599 struct gen7_surface_state *ss)
600 {
601 int w, w_pitch;
602 unsigned int tiling, swizzle;
603 int cbcr_offset;
604
605 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
606 w = obj_surface->orig_width;
607 w_pitch = obj_surface->width;
608
609 cbcr_offset = obj_surface->height * obj_surface->width;
610 memset(ss, 0, sizeof(*ss));
611 /* ss0 */
612 ss->ss0.surface_type = I965_SURFACE_2D;
613 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
614 /* ss1 */
615 ss->ss1.base_addr = obj_surface->bo->offset + cbcr_offset;
616 /* ss2 */
617 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
618 ss->ss2.height = (obj_surface->height / 2) - 1;
619 /* ss3 */
620 ss->ss3.pitch = w_pitch - 1;
621 gen7_gpe_set_surface_tiling(ss, tiling);
622 }
623
624 void
gen7_gpe_media_rw_surface_setup(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct object_surface * obj_surface,unsigned long binding_table_offset,unsigned long surface_state_offset,int write_enabled)625 gen7_gpe_media_rw_surface_setup(VADriverContextP ctx,
626 struct i965_gpe_context *gpe_context,
627 struct object_surface *obj_surface,
628 unsigned long binding_table_offset,
629 unsigned long surface_state_offset,
630 int write_enabled)
631 {
632 struct gen7_surface_state *ss;
633 dri_bo *bo;
634
635 bo = gpe_context->surface_state_binding_table.bo;
636 dri_bo_map(bo, True);
637 assert(bo->virtual);
638
639 ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
640 gen7_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
641 dri_bo_emit_reloc(bo,
642 I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
643 0,
644 surface_state_offset + offsetof(struct gen7_surface_state, ss1),
645 obj_surface->bo);
646
647 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
648 dri_bo_unmap(bo);
649 }
650
651 void
gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct object_surface * obj_surface,unsigned long binding_table_offset,unsigned long surface_state_offset,int write_enabled)652 gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx,
653 struct i965_gpe_context *gpe_context,
654 struct object_surface *obj_surface,
655 unsigned long binding_table_offset,
656 unsigned long surface_state_offset,
657 int write_enabled)
658 {
659 struct gen7_surface_state *ss;
660 dri_bo *bo;
661 int cbcr_offset;
662
663 assert(obj_surface->fourcc == VA_FOURCC_NV12);
664 bo = gpe_context->surface_state_binding_table.bo;
665 dri_bo_map(bo, True);
666 assert(bo->virtual);
667
668 cbcr_offset = obj_surface->height * obj_surface->width;
669 ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
670 gen75_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
671 dri_bo_emit_reloc(bo,
672 I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
673 cbcr_offset,
674 surface_state_offset + offsetof(struct gen7_surface_state, ss1),
675 obj_surface->bo);
676
677 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
678 dri_bo_unmap(bo);
679 }
680
681
682 static void
gen7_gpe_set_buffer_surface_state(VADriverContextP ctx,struct i965_buffer_surface * buffer_surface,struct gen7_surface_state * ss)683 gen7_gpe_set_buffer_surface_state(VADriverContextP ctx,
684 struct i965_buffer_surface *buffer_surface,
685 struct gen7_surface_state *ss)
686 {
687 int num_entries;
688
689 assert(buffer_surface->bo);
690 num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
691
692 memset(ss, 0, sizeof(*ss));
693 /* ss0 */
694 ss->ss0.surface_type = I965_SURFACE_BUFFER;
695 /* ss1 */
696 ss->ss1.base_addr = buffer_surface->bo->offset;
697 /* ss2 */
698 ss->ss2.width = ((num_entries - 1) & 0x7f);
699 ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
700 /* ss3 */
701 ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
702 ss->ss3.pitch = buffer_surface->pitch - 1;
703 }
704
705 void
gen7_gpe_buffer_suface_setup(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct i965_buffer_surface * buffer_surface,unsigned long binding_table_offset,unsigned long surface_state_offset)706 gen7_gpe_buffer_suface_setup(VADriverContextP ctx,
707 struct i965_gpe_context *gpe_context,
708 struct i965_buffer_surface *buffer_surface,
709 unsigned long binding_table_offset,
710 unsigned long surface_state_offset)
711 {
712 struct gen7_surface_state *ss;
713 dri_bo *bo;
714
715 bo = gpe_context->surface_state_binding_table.bo;
716 dri_bo_map(bo, 1);
717 assert(bo->virtual);
718
719 ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
720 gen7_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
721 dri_bo_emit_reloc(bo,
722 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
723 0,
724 surface_state_offset + offsetof(struct gen7_surface_state, ss1),
725 buffer_surface->bo);
726
727 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
728 dri_bo_unmap(bo);
729 }
730
731 static void
gen8_gpe_set_surface2_state(VADriverContextP ctx,struct object_surface * obj_surface,struct gen8_surface_state2 * ss)732 gen8_gpe_set_surface2_state(VADriverContextP ctx,
733 struct object_surface *obj_surface,
734 struct gen8_surface_state2 *ss)
735 {
736 struct i965_driver_data *i965 = i965_driver_data(ctx);
737 int w, h, w_pitch;
738 unsigned int tiling, swizzle;
739
740 assert(obj_surface->bo);
741 assert(obj_surface->fourcc == VA_FOURCC_NV12
742 || obj_surface->fourcc == VA_FOURCC_P010);
743
744 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
745 w = obj_surface->orig_width;
746 h = obj_surface->orig_height;
747 w_pitch = obj_surface->width;
748
749 memset(ss, 0, sizeof(*ss));
750 /* ss0 */
751 if (IS_GEN9(i965->intel.device_info) ||
752 IS_GEN10(i965->intel.device_info))
753 ss->ss5.surface_object_mocs = GEN9_CACHE_PTE;
754
755 ss->ss6.base_addr = (uint32_t)obj_surface->bo->offset64;
756 ss->ss7.base_addr_high = (uint32_t)(obj_surface->bo->offset64 >> 32);
757 /* ss1 */
758 ss->ss1.cbcr_pixel_offset_v_direction = 2;
759 ss->ss1.width = w - 1;
760 ss->ss1.height = h - 1;
761 /* ss2 */
762 ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
763 ss->ss2.interleave_chroma = 1;
764 ss->ss2.pitch = w_pitch - 1;
765 ss->ss2.half_pitch_for_chroma = 0;
766 gen8_gpe_set_surface2_tiling(ss, tiling);
767 /* ss3: UV offset for interleave mode */
768 ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
769 ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
770 }
771
772 void
gen8_gpe_surface2_setup(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct object_surface * obj_surface,unsigned long binding_table_offset,unsigned long surface_state_offset)773 gen8_gpe_surface2_setup(VADriverContextP ctx,
774 struct i965_gpe_context *gpe_context,
775 struct object_surface *obj_surface,
776 unsigned long binding_table_offset,
777 unsigned long surface_state_offset)
778 {
779 struct gen8_surface_state2 *ss;
780 dri_bo *bo;
781
782 bo = gpe_context->surface_state_binding_table.bo;
783 dri_bo_map(bo, 1);
784 assert(bo->virtual);
785
786 ss = (struct gen8_surface_state2 *)((char *)bo->virtual + surface_state_offset);
787 gen8_gpe_set_surface2_state(ctx, obj_surface, ss);
788 dri_bo_emit_reloc(bo,
789 I915_GEM_DOMAIN_RENDER, 0,
790 0,
791 surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
792 obj_surface->bo);
793
794 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
795 dri_bo_unmap(bo);
796 }
797
798 static void
gen8_gpe_set_media_rw_surface_state(VADriverContextP ctx,struct object_surface * obj_surface,struct gen8_surface_state * ss)799 gen8_gpe_set_media_rw_surface_state(VADriverContextP ctx,
800 struct object_surface *obj_surface,
801 struct gen8_surface_state *ss)
802 {
803 struct i965_driver_data *i965 = i965_driver_data(ctx);
804 int w, h, w_pitch;
805 unsigned int tiling, swizzle;
806
807 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
808 w = obj_surface->orig_width;
809 h = obj_surface->orig_height;
810 w_pitch = obj_surface->width;
811
812 memset(ss, 0, sizeof(*ss));
813 /* ss0 */
814 if (IS_GEN9(i965->intel.device_info) ||
815 IS_GEN10(i965->intel.device_info))
816 ss->ss1.surface_mocs = GEN9_CACHE_PTE;
817
818 ss->ss0.surface_type = I965_SURFACE_2D;
819 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
820 /* ss1 */
821 ss->ss8.base_addr = (uint32_t)obj_surface->bo->offset64;
822 ss->ss9.base_addr_high = (uint32_t)(obj_surface->bo->offset64 >> 32);
823 /* ss2 */
824 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
825 ss->ss2.height = h - 1;
826 /* ss3 */
827 ss->ss3.pitch = w_pitch - 1;
828 gen8_gpe_set_surface_tiling(ss, tiling);
829 }
830
831 static void
gen8_gpe_set_media_chroma_surface_state(VADriverContextP ctx,struct object_surface * obj_surface,struct gen8_surface_state * ss)832 gen8_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
833 struct object_surface *obj_surface,
834 struct gen8_surface_state *ss)
835 {
836 struct i965_driver_data *i965 = i965_driver_data(ctx);
837 int w, w_pitch;
838 unsigned int tiling, swizzle;
839 int cbcr_offset;
840 uint64_t base_offset;
841
842 dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
843 w = obj_surface->orig_width;
844 w_pitch = obj_surface->width;
845
846 cbcr_offset = obj_surface->height * obj_surface->width;
847 memset(ss, 0, sizeof(*ss));
848 /* ss0 */
849 if (IS_GEN9(i965->intel.device_info) ||
850 IS_GEN10(i965->intel.device_info))
851 ss->ss1.surface_mocs = GEN9_CACHE_PTE;
852
853 ss->ss0.surface_type = I965_SURFACE_2D;
854 ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
855 /* ss1 */
856 base_offset = obj_surface->bo->offset64 + cbcr_offset;
857 ss->ss8.base_addr = (uint32_t) base_offset;
858 ss->ss9.base_addr_high = (uint32_t)(base_offset >> 32);
859 /* ss2 */
860 ss->ss2.width = w / 4 - 1; /* in DWORDs for media read & write message */
861 ss->ss2.height = (obj_surface->height / 2) - 1;
862 /* ss3 */
863 ss->ss3.pitch = w_pitch - 1;
864 gen8_gpe_set_surface_tiling(ss, tiling);
865 }
866
867 void
gen8_gpe_media_rw_surface_setup(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct object_surface * obj_surface,unsigned long binding_table_offset,unsigned long surface_state_offset,int write_enabled)868 gen8_gpe_media_rw_surface_setup(VADriverContextP ctx,
869 struct i965_gpe_context *gpe_context,
870 struct object_surface *obj_surface,
871 unsigned long binding_table_offset,
872 unsigned long surface_state_offset,
873 int write_enabled)
874 {
875 struct gen8_surface_state *ss;
876 dri_bo *bo;
877
878 bo = gpe_context->surface_state_binding_table.bo;
879 dri_bo_map(bo, True);
880 assert(bo->virtual);
881
882 ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
883 gen8_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
884 dri_bo_emit_reloc(bo,
885 I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
886 0,
887 surface_state_offset + offsetof(struct gen8_surface_state, ss8),
888 obj_surface->bo);
889
890 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
891 dri_bo_unmap(bo);
892 }
893
894 void
gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct object_surface * obj_surface,unsigned long binding_table_offset,unsigned long surface_state_offset,int write_enabled)895 gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx,
896 struct i965_gpe_context *gpe_context,
897 struct object_surface *obj_surface,
898 unsigned long binding_table_offset,
899 unsigned long surface_state_offset,
900 int write_enabled)
901 {
902 struct gen8_surface_state *ss;
903 dri_bo *bo;
904 int cbcr_offset;
905
906 assert(obj_surface->fourcc == VA_FOURCC_NV12
907 || obj_surface->fourcc == VA_FOURCC_P010);
908 bo = gpe_context->surface_state_binding_table.bo;
909 dri_bo_map(bo, True);
910 assert(bo->virtual);
911
912 cbcr_offset = obj_surface->height * obj_surface->width;
913 ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
914 gen8_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
915 dri_bo_emit_reloc(bo,
916 I915_GEM_DOMAIN_RENDER, write_enabled ? I915_GEM_DOMAIN_RENDER : 0,
917 cbcr_offset,
918 surface_state_offset + offsetof(struct gen8_surface_state, ss8),
919 obj_surface->bo);
920
921 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
922 dri_bo_unmap(bo);
923 }
924
925
926 static void
gen8_gpe_set_buffer_surface_state(VADriverContextP ctx,struct i965_buffer_surface * buffer_surface,struct gen8_surface_state * ss)927 gen8_gpe_set_buffer_surface_state(VADriverContextP ctx,
928 struct i965_buffer_surface *buffer_surface,
929 struct gen8_surface_state *ss)
930 {
931 struct i965_driver_data *i965 = i965_driver_data(ctx);
932 int num_entries;
933
934 assert(buffer_surface->bo);
935 num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
936
937 memset(ss, 0, sizeof(*ss));
938 /* ss0 */
939 ss->ss0.surface_type = I965_SURFACE_BUFFER;
940 if (IS_GEN9(i965->intel.device_info) ||
941 IS_GEN10(i965->intel.device_info))
942 ss->ss1.surface_mocs = GEN9_CACHE_PTE;
943
944 /* ss1 */
945 ss->ss8.base_addr = (uint32_t)buffer_surface->bo->offset64;
946 ss->ss9.base_addr_high = (uint32_t)(buffer_surface->bo->offset64 >> 32);
947 /* ss2 */
948 ss->ss2.width = ((num_entries - 1) & 0x7f);
949 ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
950 /* ss3 */
951 ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
952 ss->ss3.pitch = buffer_surface->pitch - 1;
953 }
954
955 void
gen8_gpe_buffer_suface_setup(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct i965_buffer_surface * buffer_surface,unsigned long binding_table_offset,unsigned long surface_state_offset)956 gen8_gpe_buffer_suface_setup(VADriverContextP ctx,
957 struct i965_gpe_context *gpe_context,
958 struct i965_buffer_surface *buffer_surface,
959 unsigned long binding_table_offset,
960 unsigned long surface_state_offset)
961 {
962 struct gen8_surface_state *ss;
963 dri_bo *bo;
964
965 bo = gpe_context->surface_state_binding_table.bo;
966 dri_bo_map(bo, 1);
967 assert(bo->virtual);
968
969 ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
970 gen8_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
971 dri_bo_emit_reloc(bo,
972 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
973 0,
974 surface_state_offset + offsetof(struct gen8_surface_state, ss8),
975 buffer_surface->bo);
976
977 *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
978 dri_bo_unmap(bo);
979 }
980
981 static void
gen8_gpe_state_base_address(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)982 gen8_gpe_state_base_address(VADriverContextP ctx,
983 struct i965_gpe_context *gpe_context,
984 struct intel_batchbuffer *batch)
985 {
986 BEGIN_BATCH(batch, 16);
987
988 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 14);
989
990 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //General State Base Address
991 OUT_BATCH(batch, 0);
992 OUT_BATCH(batch, 0);
993
994 /*DW4 Surface state base address */
995 OUT_RELOC64(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
996
997 /*DW6. Dynamic state base address */
998 if (gpe_context->dynamic_state.bo)
999 OUT_RELOC64(batch, gpe_context->dynamic_state.bo,
1000 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
1001 0, BASE_ADDRESS_MODIFY);
1002 else {
1003 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1004 OUT_BATCH(batch, 0);
1005 }
1006
1007
1008 /*DW8. Indirect Object base address */
1009 if (gpe_context->indirect_state.bo)
1010 OUT_RELOC64(batch, gpe_context->indirect_state.bo,
1011 I915_GEM_DOMAIN_SAMPLER,
1012 0, BASE_ADDRESS_MODIFY);
1013 else {
1014 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1015 OUT_BATCH(batch, 0);
1016 }
1017
1018
1019 /*DW10. Instruct base address */
1020 if (gpe_context->instruction_state.bo)
1021 OUT_RELOC64(batch, gpe_context->instruction_state.bo,
1022 I915_GEM_DOMAIN_INSTRUCTION,
1023 0, BASE_ADDRESS_MODIFY);
1024 else {
1025 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1026 OUT_BATCH(batch, 0);
1027 }
1028
1029 /* DW12. Size limitation */
1030 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //General State Access Upper Bound
1031 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Dynamic State Access Upper Bound
1032 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Indirect Object Access Upper Bound
1033 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Instruction Access Upper Bound
1034
1035 /*
1036 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //LLC Coherent Base Address
1037 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY ); //LLC Coherent Upper Bound
1038 */
1039
1040 ADVANCE_BATCH(batch);
1041 }
1042
1043 static void
gen8_gpe_vfe_state(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)1044 gen8_gpe_vfe_state(VADriverContextP ctx,
1045 struct i965_gpe_context *gpe_context,
1046 struct intel_batchbuffer *batch)
1047 {
1048
1049 BEGIN_BATCH(batch, 9);
1050
1051 OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2));
1052 /* Scratch Space Base Pointer and Space */
1053 OUT_BATCH(batch, 0);
1054 OUT_BATCH(batch, 0);
1055
1056 OUT_BATCH(batch,
1057 gpe_context->vfe_state.max_num_threads << 16 | /* Maximum Number of Threads */
1058 gpe_context->vfe_state.num_urb_entries << 8 | /* Number of URB Entries */
1059 gpe_context->vfe_state.gpgpu_mode << 2); /* MEDIA Mode */
1060 OUT_BATCH(batch, 0); /* Debug: Object ID */
1061 OUT_BATCH(batch,
1062 gpe_context->vfe_state.urb_entry_size << 16 | /* URB Entry Allocation Size */
1063 gpe_context->vfe_state.curbe_allocation_size); /* CURBE Allocation Size */
1064
1065 /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
1066 OUT_BATCH(batch, gpe_context->vfe_desc5.dword);
1067 OUT_BATCH(batch, gpe_context->vfe_desc6.dword);
1068 OUT_BATCH(batch, gpe_context->vfe_desc7.dword);
1069
1070 ADVANCE_BATCH(batch);
1071
1072 }
1073
1074
1075 static void
gen8_gpe_curbe_load(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)1076 gen8_gpe_curbe_load(VADriverContextP ctx,
1077 struct i965_gpe_context *gpe_context,
1078 struct intel_batchbuffer *batch)
1079 {
1080 BEGIN_BATCH(batch, 4);
1081
1082 OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
1083 OUT_BATCH(batch, 0);
1084 OUT_BATCH(batch, ALIGN(gpe_context->curbe.length, 64));
1085 OUT_BATCH(batch, gpe_context->curbe.offset);
1086
1087 ADVANCE_BATCH(batch);
1088 }
1089
1090 static void
gen8_gpe_idrt(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)1091 gen8_gpe_idrt(VADriverContextP ctx,
1092 struct i965_gpe_context *gpe_context,
1093 struct intel_batchbuffer *batch)
1094 {
1095 BEGIN_BATCH(batch, 6);
1096
1097 OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
1098 OUT_BATCH(batch, 0);
1099
1100 OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
1101 OUT_BATCH(batch, 0);
1102 OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
1103 OUT_BATCH(batch, gpe_context->idrt.offset);
1104
1105 ADVANCE_BATCH(batch);
1106 }
1107
1108
1109 void
gen8_gpe_pipeline_setup(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)1110 gen8_gpe_pipeline_setup(VADriverContextP ctx,
1111 struct i965_gpe_context *gpe_context,
1112 struct intel_batchbuffer *batch)
1113 {
1114 intel_batchbuffer_emit_mi_flush(batch);
1115
1116 i965_gpe_select(ctx, gpe_context, batch);
1117 gen8_gpe_state_base_address(ctx, gpe_context, batch);
1118 gen8_gpe_vfe_state(ctx, gpe_context, batch);
1119 gen8_gpe_curbe_load(ctx, gpe_context, batch);
1120 gen8_gpe_idrt(ctx, gpe_context, batch);
1121 }
1122
1123 void
gen8_gpe_context_init(VADriverContextP ctx,struct i965_gpe_context * gpe_context)1124 gen8_gpe_context_init(VADriverContextP ctx,
1125 struct i965_gpe_context *gpe_context)
1126 {
1127 struct i965_driver_data *i965 = i965_driver_data(ctx);
1128 dri_bo *bo;
1129 int bo_size;
1130 unsigned int start_offset, end_offset;
1131
1132 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
1133 bo = dri_bo_alloc(i965->intel.bufmgr,
1134 "surface state & binding table",
1135 gpe_context->surface_state_binding_table.length,
1136 4096);
1137 assert(bo);
1138 gpe_context->surface_state_binding_table.bo = bo;
1139
1140 bo_size = gpe_context->idrt.max_entries * ALIGN(gpe_context->idrt.entry_size, 64) +
1141 ALIGN(gpe_context->curbe.length, 64) +
1142 gpe_context->sampler.max_entries * ALIGN(gpe_context->sampler.entry_size, 64);
1143 dri_bo_unreference(gpe_context->dynamic_state.bo);
1144 bo = dri_bo_alloc(i965->intel.bufmgr,
1145 "surface state & binding table",
1146 bo_size,
1147 4096);
1148 assert(bo);
1149 gpe_context->dynamic_state.bo = bo;
1150 gpe_context->dynamic_state.bo_size = bo_size;
1151
1152 end_offset = 0;
1153 gpe_context->dynamic_state.end_offset = 0;
1154
1155 /* Constant buffer offset */
1156 start_offset = ALIGN(end_offset, 64);
1157 dri_bo_unreference(gpe_context->curbe.bo);
1158 gpe_context->curbe.bo = bo;
1159 dri_bo_reference(gpe_context->curbe.bo);
1160 gpe_context->curbe.offset = start_offset;
1161 end_offset = start_offset + gpe_context->curbe.length;
1162
1163 /* Interface descriptor offset */
1164 start_offset = ALIGN(end_offset, 64);
1165 dri_bo_unreference(gpe_context->idrt.bo);
1166 gpe_context->idrt.bo = bo;
1167 dri_bo_reference(gpe_context->idrt.bo);
1168 gpe_context->idrt.offset = start_offset;
1169 end_offset = start_offset + ALIGN(gpe_context->idrt.entry_size, 64) * gpe_context->idrt.max_entries;
1170
1171 /* Sampler state offset */
1172 start_offset = ALIGN(end_offset, 64);
1173 dri_bo_unreference(gpe_context->sampler.bo);
1174 gpe_context->sampler.bo = bo;
1175 dri_bo_reference(gpe_context->sampler.bo);
1176 gpe_context->sampler.offset = start_offset;
1177 end_offset = start_offset + ALIGN(gpe_context->sampler.entry_size, 64) * gpe_context->sampler.max_entries;
1178
1179 /* update the end offset of dynamic_state */
1180 gpe_context->dynamic_state.end_offset = end_offset;
1181 }
1182
1183
1184 void
gen8_gpe_context_destroy(struct i965_gpe_context * gpe_context)1185 gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
1186 {
1187 dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
1188 gpe_context->surface_state_binding_table.bo = NULL;
1189
1190 dri_bo_unreference(gpe_context->instruction_state.bo);
1191 gpe_context->instruction_state.bo = NULL;
1192
1193 dri_bo_unreference(gpe_context->dynamic_state.bo);
1194 gpe_context->dynamic_state.bo = NULL;
1195
1196 dri_bo_unreference(gpe_context->indirect_state.bo);
1197 gpe_context->indirect_state.bo = NULL;
1198
1199 dri_bo_unreference(gpe_context->curbe.bo);
1200 gpe_context->curbe.bo = NULL;
1201
1202 dri_bo_unreference(gpe_context->idrt.bo);
1203 gpe_context->idrt.bo = NULL;
1204
1205 dri_bo_unreference(gpe_context->sampler.bo);
1206 gpe_context->sampler.bo = NULL;
1207 }
1208
1209
1210 void
gen8_gpe_load_kernels(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct i965_kernel * kernel_list,unsigned int num_kernels)1211 gen8_gpe_load_kernels(VADriverContextP ctx,
1212 struct i965_gpe_context *gpe_context,
1213 struct i965_kernel *kernel_list,
1214 unsigned int num_kernels)
1215 {
1216 struct i965_driver_data *i965 = i965_driver_data(ctx);
1217 int i, kernel_size = 0;
1218 unsigned int kernel_offset, end_offset;
1219 unsigned char *kernel_ptr;
1220 struct i965_kernel *kernel;
1221
1222 assert(num_kernels <= MAX_GPE_KERNELS);
1223 memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
1224 gpe_context->num_kernels = num_kernels;
1225
1226 for (i = 0; i < num_kernels; i++) {
1227 kernel = &gpe_context->kernels[i];
1228
1229 kernel_size += ALIGN(kernel->size, 64);
1230 }
1231
1232 gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
1233 "kernel shader",
1234 kernel_size,
1235 0x1000);
1236 if (gpe_context->instruction_state.bo == NULL) {
1237 WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
1238 return;
1239 }
1240
1241 assert(gpe_context->instruction_state.bo);
1242
1243 gpe_context->instruction_state.bo_size = kernel_size;
1244 gpe_context->instruction_state.end_offset = 0;
1245 end_offset = 0;
1246
1247 dri_bo_map(gpe_context->instruction_state.bo, 1);
1248 kernel_ptr = (unsigned char *)(gpe_context->instruction_state.bo->virtual);
1249 for (i = 0; i < num_kernels; i++) {
1250 kernel_offset = ALIGN(end_offset, 64);
1251 kernel = &gpe_context->kernels[i];
1252 kernel->kernel_offset = kernel_offset;
1253
1254 if (kernel->size) {
1255 memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
1256
1257 end_offset = kernel_offset + kernel->size;
1258 }
1259 }
1260
1261 gpe_context->instruction_state.end_offset = end_offset;
1262
1263 dri_bo_unmap(gpe_context->instruction_state.bo);
1264
1265 return;
1266 }
1267
1268 static void
gen9_gpe_state_base_address(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)1269 gen9_gpe_state_base_address(VADriverContextP ctx,
1270 struct i965_gpe_context *gpe_context,
1271 struct intel_batchbuffer *batch)
1272 {
1273 struct i965_driver_data *i965 = i965_driver_data(ctx);
1274 BEGIN_BATCH(batch, 19);
1275
1276 OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (19 - 2));
1277
1278 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY); //General State Base Address
1279 OUT_BATCH(batch, 0);
1280 OUT_BATCH(batch, 0);
1281
1282 /*DW4 Surface state base address */
1283 OUT_RELOC64(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY | (i965->intel.mocs_state << 4)); /* Surface state base address */
1284
1285 /*DW6. Dynamic state base address */
1286 if (gpe_context->dynamic_state.bo)
1287 OUT_RELOC64(batch, gpe_context->dynamic_state.bo,
1288 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
1289 I915_GEM_DOMAIN_RENDER,
1290 BASE_ADDRESS_MODIFY | (i965->intel.mocs_state << 4));
1291 else {
1292 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1293 OUT_BATCH(batch, 0);
1294 }
1295
1296
1297 /*DW8. Indirect Object base address */
1298 if (gpe_context->indirect_state.bo)
1299 OUT_RELOC64(batch, gpe_context->indirect_state.bo,
1300 I915_GEM_DOMAIN_SAMPLER,
1301 0, BASE_ADDRESS_MODIFY | (i965->intel.mocs_state << 4));
1302 else {
1303 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1304 OUT_BATCH(batch, 0);
1305 }
1306
1307
1308 /*DW10. Instruct base address */
1309 if (gpe_context->instruction_state.bo)
1310 OUT_RELOC64(batch, gpe_context->instruction_state.bo,
1311 I915_GEM_DOMAIN_INSTRUCTION,
1312 0, BASE_ADDRESS_MODIFY | (i965->intel.mocs_state << 4));
1313 else {
1314 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1315 OUT_BATCH(batch, 0);
1316 }
1317
1318
1319 /* DW12. Size limitation */
1320 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //General State Access Upper Bound
1321 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Dynamic State Access Upper Bound
1322 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Indirect Object Access Upper Bound
1323 OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY); //Instruction Access Upper Bound
1324
1325 /* the bindless surface state address */
1326 OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
1327 OUT_BATCH(batch, 0);
1328 OUT_BATCH(batch, 0xFFFFF000);
1329
1330 ADVANCE_BATCH(batch);
1331 }
1332
1333 static void
gen9_gpe_select(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)1334 gen9_gpe_select(VADriverContextP ctx,
1335 struct i965_gpe_context *gpe_context,
1336 struct intel_batchbuffer *batch)
1337 {
1338 BEGIN_BATCH(batch, 1);
1339 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
1340 GEN9_PIPELINE_SELECTION_MASK |
1341 GEN9_MEDIA_DOP_GATE_OFF |
1342 GEN9_MEDIA_DOP_GATE_MASK |
1343 GEN9_FORCE_MEDIA_AWAKE_ON |
1344 GEN9_FORCE_MEDIA_AWAKE_MASK);
1345 ADVANCE_BATCH(batch);
1346 }
1347
1348 void
gen9_gpe_pipeline_setup(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)1349 gen9_gpe_pipeline_setup(VADriverContextP ctx,
1350 struct i965_gpe_context *gpe_context,
1351 struct intel_batchbuffer *batch)
1352 {
1353 intel_batchbuffer_emit_mi_flush(batch);
1354
1355 gen9_gpe_select(ctx, gpe_context, batch);
1356 gen9_gpe_state_base_address(ctx, gpe_context, batch);
1357 gen8_gpe_vfe_state(ctx, gpe_context, batch);
1358 gen8_gpe_curbe_load(ctx, gpe_context, batch);
1359 gen8_gpe_idrt(ctx, gpe_context, batch);
1360 }
1361
1362 void
gen9_gpe_pipeline_end(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)1363 gen9_gpe_pipeline_end(VADriverContextP ctx,
1364 struct i965_gpe_context *gpe_context,
1365 struct intel_batchbuffer *batch)
1366 {
1367 BEGIN_BATCH(batch, 1);
1368 OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
1369 GEN9_PIPELINE_SELECTION_MASK |
1370 GEN9_MEDIA_DOP_GATE_ON |
1371 GEN9_MEDIA_DOP_GATE_MASK |
1372 GEN9_FORCE_MEDIA_AWAKE_OFF |
1373 GEN9_FORCE_MEDIA_AWAKE_MASK);
1374 ADVANCE_BATCH(batch);
1375 }
1376
1377 Bool
i965_allocate_gpe_resource(dri_bufmgr * bufmgr,struct i965_gpe_resource * res,int size,const char * name)1378 i965_allocate_gpe_resource(dri_bufmgr *bufmgr,
1379 struct i965_gpe_resource *res,
1380 int size,
1381 const char *name)
1382 {
1383 if (!res || !size)
1384 return false;
1385
1386 res->size = size;
1387 res->bo = dri_bo_alloc(bufmgr, name, res->size, 4096);
1388 res->map = NULL;
1389
1390 return (res->bo != NULL);
1391 }
1392
1393 void
i965_object_surface_to_2d_gpe_resource_with_align(struct i965_gpe_resource * res,struct object_surface * obj_surface,unsigned int alignment)1394 i965_object_surface_to_2d_gpe_resource_with_align(struct i965_gpe_resource *res,
1395 struct object_surface *obj_surface,
1396 unsigned int alignment)
1397 {
1398 unsigned int swizzle;
1399
1400 res->type = I965_GPE_RESOURCE_2D;
1401 res->width = ALIGN(obj_surface->orig_width, (1 << alignment));
1402 res->height = ALIGN(obj_surface->orig_height, (1 << alignment));
1403 res->pitch = obj_surface->width;
1404 res->size = obj_surface->size;
1405 res->cb_cr_pitch = obj_surface->cb_cr_pitch;
1406 res->x_cb_offset = obj_surface->x_cb_offset;
1407 res->y_cb_offset = obj_surface->y_cb_offset;
1408 res->bo = obj_surface->bo;
1409 res->map = NULL;
1410
1411 dri_bo_reference(res->bo);
1412 dri_bo_get_tiling(obj_surface->bo, &res->tiling, &swizzle);
1413 }
1414
1415 void
i965_object_surface_to_2d_gpe_resource(struct i965_gpe_resource * res,struct object_surface * obj_surface)1416 i965_object_surface_to_2d_gpe_resource(struct i965_gpe_resource *res,
1417 struct object_surface *obj_surface)
1418 {
1419 i965_object_surface_to_2d_gpe_resource_with_align(res, obj_surface, 0);
1420 }
1421
1422 void
i965_dri_object_to_buffer_gpe_resource(struct i965_gpe_resource * res,dri_bo * bo)1423 i965_dri_object_to_buffer_gpe_resource(struct i965_gpe_resource *res,
1424 dri_bo *bo)
1425 {
1426 unsigned int swizzle;
1427
1428 res->type = I965_GPE_RESOURCE_BUFFER;
1429 res->width = bo->size;
1430 res->height = 1;
1431 res->pitch = res->width;
1432 res->size = res->pitch * res->width;
1433 res->bo = bo;
1434 res->map = NULL;
1435
1436 dri_bo_reference(res->bo);
1437 dri_bo_get_tiling(res->bo, &res->tiling, &swizzle);
1438 }
1439
1440 void
i965_dri_object_to_2d_gpe_resource(struct i965_gpe_resource * res,dri_bo * bo,unsigned int width,unsigned int height,unsigned int pitch)1441 i965_dri_object_to_2d_gpe_resource(struct i965_gpe_resource *res,
1442 dri_bo *bo,
1443 unsigned int width,
1444 unsigned int height,
1445 unsigned int pitch)
1446 {
1447 unsigned int swizzle;
1448
1449 res->type = I965_GPE_RESOURCE_2D;
1450 res->width = width;
1451 res->height = height;
1452 res->pitch = pitch;
1453 res->size = res->pitch * res->width;
1454 res->bo = bo;
1455 res->map = NULL;
1456
1457 dri_bo_reference(res->bo);
1458 dri_bo_get_tiling(res->bo, &res->tiling, &swizzle);
1459 }
1460
1461 void
i965_zero_gpe_resource(struct i965_gpe_resource * res)1462 i965_zero_gpe_resource(struct i965_gpe_resource *res)
1463 {
1464 if (res->bo) {
1465 dri_bo_map(res->bo, 1);
1466 memset(res->bo->virtual, 0, res->size);
1467 dri_bo_unmap(res->bo);
1468 }
1469 }
1470
1471 void
i965_free_gpe_resource(struct i965_gpe_resource * res)1472 i965_free_gpe_resource(struct i965_gpe_resource *res)
1473 {
1474 dri_bo_unreference(res->bo);
1475 res->bo = NULL;
1476 res->map = NULL;
1477 }
1478
1479 void *
i965_map_gpe_resource(struct i965_gpe_resource * res)1480 i965_map_gpe_resource(struct i965_gpe_resource *res)
1481 {
1482 int ret;
1483
1484 if (res->bo) {
1485 ret = dri_bo_map(res->bo, 1);
1486
1487 if (ret == 0)
1488 res->map = res->bo->virtual;
1489 else
1490 res->map = NULL;
1491 } else
1492 res->map = NULL;
1493
1494 return res->map;
1495 }
1496
1497 void
i965_unmap_gpe_resource(struct i965_gpe_resource * res)1498 i965_unmap_gpe_resource(struct i965_gpe_resource *res)
1499 {
1500 if (res->bo && res->map)
1501 dri_bo_unmap(res->bo);
1502
1503 res->map = NULL;
1504 }
1505
1506 void
gen8_gpe_mi_flush_dw(VADriverContextP ctx,struct intel_batchbuffer * batch,struct gpe_mi_flush_dw_parameter * params)1507 gen8_gpe_mi_flush_dw(VADriverContextP ctx,
1508 struct intel_batchbuffer *batch,
1509 struct gpe_mi_flush_dw_parameter *params)
1510 {
1511 int video_pipeline_cache_invalidate = 0;
1512 int post_sync_operation = MI_FLUSH_DW_NOWRITE;
1513
1514 if (params->video_pipeline_cache_invalidate)
1515 video_pipeline_cache_invalidate = MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE;
1516
1517 if (params->bo)
1518 post_sync_operation = MI_FLUSH_DW_WRITE_QWORD;
1519
1520 __OUT_BATCH(batch, (MI_FLUSH_DW2 |
1521 video_pipeline_cache_invalidate |
1522 post_sync_operation |
1523 (5 - 2))); /* Always use PPGTT */
1524
1525 if (params->bo) {
1526 __OUT_RELOC64(batch,
1527 params->bo,
1528 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1529 params->offset);
1530 } else {
1531 __OUT_BATCH(batch, 0);
1532 __OUT_BATCH(batch, 0);
1533 }
1534
1535 __OUT_BATCH(batch, params->dw0);
1536 __OUT_BATCH(batch, params->dw1);
1537 }
1538
1539 void
gen8_gpe_mi_store_data_imm(VADriverContextP ctx,struct intel_batchbuffer * batch,struct gpe_mi_store_data_imm_parameter * params)1540 gen8_gpe_mi_store_data_imm(VADriverContextP ctx,
1541 struct intel_batchbuffer *batch,
1542 struct gpe_mi_store_data_imm_parameter *params)
1543 {
1544 if (params->is_qword) {
1545 __OUT_BATCH(batch, MI_STORE_DATA_IMM |
1546 (1 << 21) |
1547 (5 - 2)); /* Always use PPGTT */
1548 } else {
1549 __OUT_BATCH(batch, MI_STORE_DATA_IMM | (4 - 2)); /* Always use PPGTT */
1550 }
1551
1552 __OUT_RELOC64(batch,
1553 params->bo,
1554 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1555 params->offset);
1556 __OUT_BATCH(batch, params->dw0);
1557
1558 if (params->is_qword)
1559 __OUT_BATCH(batch, params->dw1);
1560 }
1561
1562 void
gen8_gpe_mi_store_register_mem(VADriverContextP ctx,struct intel_batchbuffer * batch,struct gpe_mi_store_register_mem_parameter * params)1563 gen8_gpe_mi_store_register_mem(VADriverContextP ctx,
1564 struct intel_batchbuffer *batch,
1565 struct gpe_mi_store_register_mem_parameter *params)
1566 {
1567 __OUT_BATCH(batch, (MI_STORE_REGISTER_MEM | (4 - 2))); /* Always use PPGTT */
1568 __OUT_BATCH(batch, params->mmio_offset);
1569 __OUT_RELOC64(batch,
1570 params->bo,
1571 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1572 params->offset);
1573 }
1574
1575 void
gen8_gpe_mi_load_register_mem(VADriverContextP ctx,struct intel_batchbuffer * batch,struct gpe_mi_load_register_mem_parameter * params)1576 gen8_gpe_mi_load_register_mem(VADriverContextP ctx,
1577 struct intel_batchbuffer *batch,
1578 struct gpe_mi_load_register_mem_parameter *params)
1579 {
1580 __OUT_BATCH(batch, (MI_LOAD_REGISTER_MEM | (4 - 2))); /* Always use PPGTT */
1581 __OUT_BATCH(batch, params->mmio_offset);
1582 __OUT_RELOC64(batch,
1583 params->bo,
1584 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1585 params->offset);
1586 }
1587
1588 void
gen8_gpe_mi_load_register_imm(VADriverContextP ctx,struct intel_batchbuffer * batch,struct gpe_mi_load_register_imm_parameter * params)1589 gen8_gpe_mi_load_register_imm(VADriverContextP ctx,
1590 struct intel_batchbuffer *batch,
1591 struct gpe_mi_load_register_imm_parameter *params)
1592 {
1593 __OUT_BATCH(batch, (MI_LOAD_REGISTER_IMM | (3 - 2)));
1594 __OUT_BATCH(batch, params->mmio_offset);
1595 __OUT_BATCH(batch, params->data);
1596 }
1597
1598 void
gen8_gpe_mi_load_register_reg(VADriverContextP ctx,struct intel_batchbuffer * batch,struct gpe_mi_load_register_reg_parameter * params)1599 gen8_gpe_mi_load_register_reg(VADriverContextP ctx,
1600 struct intel_batchbuffer *batch,
1601 struct gpe_mi_load_register_reg_parameter *params)
1602 {
1603 __OUT_BATCH(batch, (MI_LOAD_REGISTER_REG | (3 - 2)));
1604 __OUT_BATCH(batch, params->src_mmio_offset);
1605 __OUT_BATCH(batch, params->dst_mmio_offset);
1606 }
1607
1608 void
gen9_gpe_mi_math(VADriverContextP ctx,struct intel_batchbuffer * batch,struct gpe_mi_math_parameter * params)1609 gen9_gpe_mi_math(VADriverContextP ctx,
1610 struct intel_batchbuffer *batch,
1611 struct gpe_mi_math_parameter *params)
1612 {
1613 __OUT_BATCH(batch, (MI_MATH | (params->num_instructions - 1)));
1614 intel_batchbuffer_data(batch, params->instruction_list, params->num_instructions * 4);
1615 }
1616
1617 void
gen9_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,struct intel_batchbuffer * batch,struct gpe_mi_conditional_batch_buffer_end_parameter * params)1618 gen9_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
1619 struct intel_batchbuffer *batch,
1620 struct gpe_mi_conditional_batch_buffer_end_parameter *params)
1621 {
1622 int compare_mask_mode_enabled = MI_COMPARE_MASK_MODE_ENANBLED;
1623
1624 if (params->compare_mask_mode_disabled)
1625 compare_mask_mode_enabled = 0;
1626
1627 __OUT_BATCH(batch, (MI_CONDITIONAL_BATCH_BUFFER_END |
1628 (1 << 21) |
1629 compare_mask_mode_enabled |
1630 (4 - 2))); /* Always use PPGTT */
1631 __OUT_BATCH(batch, params->compare_data);
1632 __OUT_RELOC64(batch,
1633 params->bo,
1634 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
1635 params->offset);
1636 }
1637
1638 void
gen8_gpe_mi_batch_buffer_start(VADriverContextP ctx,struct intel_batchbuffer * batch,struct gpe_mi_batch_buffer_start_parameter * params)1639 gen8_gpe_mi_batch_buffer_start(VADriverContextP ctx,
1640 struct intel_batchbuffer *batch,
1641 struct gpe_mi_batch_buffer_start_parameter *params)
1642 {
1643 __OUT_BATCH(batch, (MI_BATCH_BUFFER_START |
1644 (!!params->is_second_level << 22) |
1645 (!params->use_global_gtt << 8) |
1646 (1 << 0)));
1647 __OUT_RELOC64(batch,
1648 params->bo,
1649 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
1650 params->offset);
1651 }
1652
1653 void
gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct gpe_dynamic_state_parameter * ds)1654 gen8_gpe_context_set_dynamic_buffer(VADriverContextP ctx,
1655 struct i965_gpe_context *gpe_context,
1656 struct gpe_dynamic_state_parameter *ds)
1657 {
1658 if (!ds->bo || !gpe_context)
1659 return;
1660
1661 dri_bo_unreference(gpe_context->dynamic_state.bo);
1662 gpe_context->dynamic_state.bo = ds->bo;
1663 dri_bo_reference(gpe_context->dynamic_state.bo);
1664 gpe_context->dynamic_state.bo_size = ds->bo_size;
1665
1666 /* curbe buffer is a part of the dynamic buffer */
1667 dri_bo_unreference(gpe_context->curbe.bo);
1668 gpe_context->curbe.bo = ds->bo;
1669 dri_bo_reference(gpe_context->curbe.bo);
1670 gpe_context->curbe.offset = ds->curbe_offset;
1671
1672 /* idrt buffer is a part of the dynamic buffer */
1673 dri_bo_unreference(gpe_context->idrt.bo);
1674 gpe_context->idrt.bo = ds->bo;
1675 dri_bo_reference(gpe_context->idrt.bo);
1676 gpe_context->idrt.offset = ds->idrt_offset;
1677
1678 /* sampler buffer is a part of the dynamic buffer */
1679 dri_bo_unreference(gpe_context->sampler.bo);
1680 gpe_context->sampler.bo = ds->bo;
1681 dri_bo_reference(gpe_context->sampler.bo);
1682 gpe_context->sampler.offset = ds->sampler_offset;
1683
1684 return;
1685 }
1686
1687 void *
i965_gpe_context_map_curbe(struct i965_gpe_context * gpe_context)1688 i965_gpe_context_map_curbe(struct i965_gpe_context *gpe_context)
1689 {
1690 dri_bo_map(gpe_context->curbe.bo, 1);
1691
1692 return (char *)gpe_context->curbe.bo->virtual + gpe_context->curbe.offset;
1693 }
1694
1695 void
i965_gpe_context_unmap_curbe(struct i965_gpe_context * gpe_context)1696 i965_gpe_context_unmap_curbe(struct i965_gpe_context *gpe_context)
1697 {
1698 dri_bo_unmap(gpe_context->curbe.bo);
1699 }
1700
1701 void
gen9_gpe_reset_binding_table(VADriverContextP ctx,struct i965_gpe_context * gpe_context)1702 gen9_gpe_reset_binding_table(VADriverContextP ctx,
1703 struct i965_gpe_context *gpe_context)
1704 {
1705 unsigned int *binding_table;
1706 unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset;
1707 int i;
1708
1709 dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
1710 binding_table = (unsigned int*)((char *)gpe_context->surface_state_binding_table.bo->virtual + binding_table_offset);
1711
1712 for (i = 0; i < gpe_context->surface_state_binding_table.max_entries; i++) {
1713 *(binding_table + i) = gpe_context->surface_state_binding_table.surface_state_offset + i * SURFACE_STATE_PADDED_SIZE_GEN9;
1714 }
1715
1716 dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
1717 }
1718
1719 void
gen8_gpe_setup_interface_data(VADriverContextP ctx,struct i965_gpe_context * gpe_context)1720 gen8_gpe_setup_interface_data(VADriverContextP ctx,
1721 struct i965_gpe_context *gpe_context)
1722 {
1723 struct gen8_interface_descriptor_data *desc;
1724 int i;
1725 dri_bo *bo;
1726 unsigned char *desc_ptr;
1727
1728 bo = gpe_context->idrt.bo;
1729 dri_bo_map(bo, 1);
1730 assert(bo->virtual);
1731 desc_ptr = (unsigned char *)bo->virtual + gpe_context->idrt.offset;
1732 desc = (struct gen8_interface_descriptor_data *)desc_ptr;
1733
1734 for (i = 0; i < gpe_context->num_kernels; i++) {
1735 struct i965_kernel *kernel;
1736
1737 kernel = &gpe_context->kernels[i];
1738 assert(sizeof(*desc) == 32);
1739
1740 /*Setup the descritor table*/
1741 memset(desc, 0, sizeof(*desc));
1742 desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
1743 desc->desc3.sampler_count = 0;
1744 desc->desc3.sampler_state_pointer = (gpe_context->sampler.offset >> 5);
1745 desc->desc4.binding_table_entry_count = 0;
1746 desc->desc4.binding_table_pointer = (gpe_context->surface_state_binding_table.binding_table_offset >> 5);
1747 desc->desc5.constant_urb_entry_read_offset = 0;
1748 desc->desc5.constant_urb_entry_read_length = ALIGN(gpe_context->curbe.length, 32) >> 5; // in registers
1749
1750 desc++;
1751 }
1752
1753 dri_bo_unmap(bo);
1754 }
1755
1756 static void
gen9_gpe_set_surface_tiling(struct gen9_surface_state * ss,unsigned int tiling)1757 gen9_gpe_set_surface_tiling(struct gen9_surface_state *ss, unsigned int tiling)
1758 {
1759 switch (tiling) {
1760 case I915_TILING_NONE:
1761 ss->ss0.tiled_surface = 0;
1762 ss->ss0.tile_walk = 0;
1763 break;
1764 case I915_TILING_X:
1765 ss->ss0.tiled_surface = 1;
1766 ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
1767 break;
1768 case I915_TILING_Y:
1769 ss->ss0.tiled_surface = 1;
1770 ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
1771 break;
1772 }
1773 }
1774
1775 static void
gen9_gpe_set_surface2_tiling(struct gen9_surface_state2 * ss,unsigned int tiling)1776 gen9_gpe_set_surface2_tiling(struct gen9_surface_state2 *ss, unsigned int tiling)
1777 {
1778 switch (tiling) {
1779 case I915_TILING_NONE:
1780 ss->ss2.tiled_surface = 0;
1781 ss->ss2.tile_walk = 0;
1782 break;
1783 case I915_TILING_X:
1784 ss->ss2.tiled_surface = 1;
1785 ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
1786 break;
1787 case I915_TILING_Y:
1788 ss->ss2.tiled_surface = 1;
1789 ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
1790 break;
1791 }
1792 }
1793
1794 static void
gen9_gpe_set_2d_surface_state(struct gen9_surface_state * ss,unsigned int cacheability_control,unsigned int format,unsigned int tiling,unsigned int width,unsigned int height,unsigned int pitch,uint64_t base_offset,unsigned int y_offset)1795 gen9_gpe_set_2d_surface_state(struct gen9_surface_state *ss,
1796 unsigned int cacheability_control,
1797 unsigned int format,
1798 unsigned int tiling,
1799 unsigned int width,
1800 unsigned int height,
1801 unsigned int pitch,
1802 uint64_t base_offset,
1803 unsigned int y_offset)
1804 {
1805 memset(ss, 0, sizeof(*ss));
1806
1807 /* Always set 1(align 4 mode) */
1808 ss->ss0.vertical_alignment = 1;
1809 ss->ss0.horizontal_alignment = 1;
1810
1811 ss->ss0.surface_format = format;
1812 ss->ss0.surface_type = I965_SURFACE_2D;
1813
1814 ss->ss1.surface_mocs = cacheability_control;
1815
1816 ss->ss2.width = width - 1;
1817 ss->ss2.height = height - 1;
1818
1819 ss->ss3.pitch = pitch - 1;
1820
1821 ss->ss5.y_offset = y_offset;
1822
1823 ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
1824 ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
1825 ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
1826 ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
1827
1828 ss->ss8.base_addr = (uint32_t)base_offset;
1829 ss->ss9.base_addr_high = (uint32_t)(base_offset >> 32);
1830
1831 gen9_gpe_set_surface_tiling(ss, tiling);
1832 }
1833
1834 /* This is only for NV12 format */
1835 static void
gen9_gpe_set_adv_surface_state(struct gen9_surface_state2 * ss,unsigned int v_direction,unsigned int cacheability_control,unsigned int format,unsigned int tiling,unsigned int width,unsigned int height,unsigned int pitch,uint64_t base_offset,unsigned int y_cb_offset)1836 gen9_gpe_set_adv_surface_state(struct gen9_surface_state2 *ss,
1837 unsigned int v_direction,
1838 unsigned int cacheability_control,
1839 unsigned int format,
1840 unsigned int tiling,
1841 unsigned int width,
1842 unsigned int height,
1843 unsigned int pitch,
1844 uint64_t base_offset,
1845 unsigned int y_cb_offset)
1846 {
1847 memset(ss, 0, sizeof(*ss));
1848
1849 ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
1850 ss->ss1.width = width - 1;
1851 ss->ss1.height = height - 1;
1852
1853 ss->ss2.surface_format = format;
1854 ss->ss2.interleave_chroma = 1;
1855 ss->ss2.pitch = pitch - 1;
1856
1857 ss->ss3.y_offset_for_cb = y_cb_offset;
1858
1859 ss->ss5.surface_object_mocs = cacheability_control;
1860
1861 ss->ss6.base_addr = (uint32_t)base_offset;
1862 ss->ss7.base_addr_high = (uint32_t)(base_offset >> 32);
1863
1864 gen9_gpe_set_surface2_tiling(ss, tiling);
1865 }
1866
1867 static void
gen9_gpe_set_buffer2_surface_state(struct gen9_surface_state * ss,unsigned int cacheability_control,unsigned int format,unsigned int size,unsigned int pitch,uint64_t base_offset)1868 gen9_gpe_set_buffer2_surface_state(struct gen9_surface_state *ss,
1869 unsigned int cacheability_control,
1870 unsigned int format,
1871 unsigned int size,
1872 unsigned int pitch,
1873 uint64_t base_offset)
1874 {
1875 memset(ss, 0, sizeof(*ss));
1876
1877 ss->ss0.surface_format = format;
1878 ss->ss0.surface_type = I965_SURFACE_BUFFER;
1879
1880 ss->ss1.surface_mocs = cacheability_control;
1881
1882 ss->ss2.width = (size - 1) & 0x7F;
1883 ss->ss2.height = ((size - 1) & 0x1FFF80) >> 7;
1884
1885 ss->ss3.depth = ((size - 1) & 0xFE00000) >> 21;
1886 ss->ss3.pitch = pitch - 1;
1887
1888 ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
1889 ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
1890 ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
1891 ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
1892
1893 ss->ss8.base_addr = (uint32_t)base_offset;
1894 ss->ss9.base_addr_high = (uint32_t)(base_offset >> 32);
1895 }
1896
1897 void
gen9_gpe_context_add_surface(struct i965_gpe_context * gpe_context,struct i965_gpe_surface * gpe_surface,int index)1898 gen9_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
1899 struct i965_gpe_surface *gpe_surface,
1900 int index)
1901 {
1902 char *buf;
1903 unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset = 0;
1904 unsigned int surface_state_offset = gpe_context->surface_state_binding_table.surface_state_offset +
1905 index * SURFACE_STATE_PADDED_SIZE_GEN9;
1906 unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset +
1907 index * 4;
1908 struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
1909
1910 dri_bo_get_tiling(gpe_resource->bo, &tiling, &swizzle);
1911
1912 dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
1913 buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
1914 *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
1915
1916 if (gpe_surface->is_2d_surface && gpe_surface->is_override_offset) {
1917 struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1918
1919 width = gpe_resource->width;
1920 height = gpe_resource->height;
1921 pitch = gpe_resource->pitch;
1922
1923 if (gpe_surface->is_media_block_rw) {
1924 if (gpe_surface->is_16bpp)
1925 width = (ALIGN(width * 2, 4) >> 2);
1926 else
1927 width = (ALIGN(width, 4) >> 2);
1928 }
1929
1930
1931 gen9_gpe_set_2d_surface_state(ss,
1932 gpe_surface->cacheability_control,
1933 gpe_surface->format,
1934 tiling,
1935 width, height, pitch,
1936 gpe_resource->bo->offset64 + gpe_surface->offset,
1937 0);
1938
1939 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1940 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1941 gpe_surface->offset,
1942 surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1943 gpe_resource->bo);
1944 } else if (gpe_surface->is_2d_surface && gpe_surface->is_uv_surface) {
1945 unsigned int cbcr_offset;
1946 struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1947
1948 width = gpe_resource->width;
1949 height = gpe_resource->height / 2;
1950 pitch = gpe_resource->pitch;
1951
1952 if (gpe_surface->is_media_block_rw) {
1953 if (gpe_surface->is_16bpp)
1954 width = (ALIGN(width * 2, 4) >> 2);
1955 else
1956 width = (ALIGN(width, 4) >> 2);
1957 }
1958
1959 if (tiling == I915_TILING_Y) {
1960 tile_alignment = 32;
1961 } else if (tiling == I915_TILING_X) {
1962 tile_alignment = 8;
1963 } else
1964 tile_alignment = 1;
1965
1966 y_offset = (gpe_resource->y_cb_offset % tile_alignment);
1967 cbcr_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset, tile_alignment) * pitch;
1968
1969 gen9_gpe_set_2d_surface_state(ss,
1970 gpe_surface->cacheability_control,
1971 I965_SURFACEFORMAT_R16_UINT,
1972 tiling,
1973 width, height, pitch,
1974 gpe_resource->bo->offset64 + cbcr_offset,
1975 y_offset);
1976
1977 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
1978 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
1979 cbcr_offset,
1980 surface_state_offset + offsetof(struct gen9_surface_state, ss8),
1981 gpe_resource->bo);
1982 } else if (gpe_surface->is_2d_surface) {
1983 struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
1984
1985 width = gpe_resource->width;
1986 height = gpe_resource->height;
1987 pitch = gpe_resource->pitch;
1988
1989 if (gpe_surface->is_media_block_rw) {
1990 if (gpe_surface->is_16bpp)
1991 width = (ALIGN(width * 2, 4) >> 2);
1992 else
1993 width = (ALIGN(width, 4) >> 2);
1994 }
1995
1996 gen9_gpe_set_2d_surface_state(ss,
1997 gpe_surface->cacheability_control,
1998 gpe_surface->format,
1999 tiling,
2000 width, height, pitch,
2001 gpe_resource->bo->offset64,
2002 y_offset);
2003
2004 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2005 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2006 0,
2007 surface_state_offset + offsetof(struct gen9_surface_state, ss8),
2008 gpe_resource->bo);
2009 } else if (gpe_surface->is_adv_surface) {
2010 struct gen9_surface_state2 *ss = (struct gen9_surface_state2 *)(buf + surface_state_offset);
2011
2012 width = gpe_resource->width;
2013 height = gpe_resource->height;
2014 pitch = gpe_resource->pitch;
2015
2016 gen9_gpe_set_adv_surface_state(ss,
2017 gpe_surface->v_direction,
2018 gpe_surface->cacheability_control,
2019 MFX_SURFACE_PLANAR_420_8,
2020 tiling,
2021 width, height, pitch,
2022 gpe_resource->bo->offset64,
2023 gpe_resource->y_cb_offset);
2024
2025 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2026 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2027 0,
2028 surface_state_offset + offsetof(struct gen9_surface_state2, ss6),
2029 gpe_resource->bo);
2030 } else {
2031 struct gen9_surface_state *ss = (struct gen9_surface_state *)(buf + surface_state_offset);
2032 unsigned int format;
2033
2034 assert(gpe_surface->is_buffer);
2035
2036 if (gpe_surface->is_raw_buffer) {
2037 format = I965_SURFACEFORMAT_RAW;
2038 pitch = 1;
2039 } else {
2040 format = I965_SURFACEFORMAT_R32_UINT;
2041 pitch = sizeof(unsigned int);
2042 }
2043
2044 gen9_gpe_set_buffer2_surface_state(ss,
2045 gpe_surface->cacheability_control,
2046 format,
2047 gpe_surface->size,
2048 pitch,
2049 gpe_resource->bo->offset64 + gpe_surface->offset);
2050
2051 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2052 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2053 gpe_surface->offset,
2054 surface_state_offset + offsetof(struct gen9_surface_state, ss8),
2055 gpe_resource->bo);
2056 }
2057
2058 dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
2059 }
2060
2061 bool
i965_gpe_allocate_2d_resource(dri_bufmgr * bufmgr,struct i965_gpe_resource * res,int width,int height,int pitch,const char * name)2062 i965_gpe_allocate_2d_resource(dri_bufmgr *bufmgr,
2063 struct i965_gpe_resource *res,
2064 int width,
2065 int height,
2066 int pitch,
2067 const char *name)
2068 {
2069 int bo_size;
2070
2071 if (!res)
2072 return false;
2073
2074 res->type = I965_GPE_RESOURCE_2D;
2075 res->width = width;
2076 res->height = height;
2077 res->pitch = pitch;
2078
2079 bo_size = ALIGN(height, 16) * pitch;
2080 res->size = bo_size;
2081
2082 res->bo = dri_bo_alloc(bufmgr, name, res->size, 4096);
2083 res->map = NULL;
2084
2085 return true;
2086 }
2087
2088 void
gen8_gpe_media_state_flush(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch)2089 gen8_gpe_media_state_flush(VADriverContextP ctx,
2090 struct i965_gpe_context *gpe_context,
2091 struct intel_batchbuffer *batch)
2092 {
2093 BEGIN_BATCH(batch, 2);
2094
2095 OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH | (2 - 2));
2096 OUT_BATCH(batch, 0);
2097
2098 ADVANCE_BATCH(batch);
2099 }
2100
2101 void
gen8_gpe_media_object(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch,struct gpe_media_object_parameter * param)2102 gen8_gpe_media_object(VADriverContextP ctx,
2103 struct i965_gpe_context *gpe_context,
2104 struct intel_batchbuffer *batch,
2105 struct gpe_media_object_parameter *param)
2106 {
2107 int batch_size, subdata_size;
2108
2109 batch_size = 6;
2110 subdata_size = 0;
2111 if (param->pinline_data && param->inline_size) {
2112 subdata_size = ALIGN(param->inline_size, 4);
2113 batch_size += subdata_size / 4;
2114 }
2115 BEGIN_BATCH(batch, batch_size);
2116 OUT_BATCH(batch, CMD_MEDIA_OBJECT | (batch_size - 2));
2117 OUT_BATCH(batch, param->interface_offset);
2118 OUT_BATCH(batch, param->use_scoreboard << 21);
2119 OUT_BATCH(batch, 0);
2120 OUT_BATCH(batch, (param->scoreboard_y << 16 |
2121 param->scoreboard_x));
2122 OUT_BATCH(batch, param->scoreboard_mask);
2123
2124 if (subdata_size)
2125 intel_batchbuffer_data(batch, param->pinline_data, subdata_size);
2126
2127 ADVANCE_BATCH(batch);
2128 }
2129
2130 void
gen8_gpe_media_object_walker(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct intel_batchbuffer * batch,struct gpe_media_object_walker_parameter * param)2131 gen8_gpe_media_object_walker(VADriverContextP ctx,
2132 struct i965_gpe_context *gpe_context,
2133 struct intel_batchbuffer *batch,
2134 struct gpe_media_object_walker_parameter *param)
2135 {
2136 int walker_length;
2137
2138 walker_length = 17;
2139 if (param->inline_size)
2140 walker_length += ALIGN(param->inline_size, 4) / 4;
2141 BEGIN_BATCH(batch, walker_length);
2142 OUT_BATCH(batch, CMD_MEDIA_OBJECT_WALKER | (walker_length - 2));
2143 OUT_BATCH(batch, param->interface_offset);
2144 OUT_BATCH(batch, param->use_scoreboard << 21);
2145 OUT_BATCH(batch, 0);
2146 OUT_BATCH(batch, 0);
2147 OUT_BATCH(batch, (param->group_id_loop_select << 8 |
2148 param->scoreboard_mask)); // DW5
2149 OUT_BATCH(batch, (param->color_count_minus1 << 24 |
2150 param->middle_loop_extra_steps << 16 |
2151 param->mid_loop_unit_y << 12 |
2152 param->mid_loop_unit_x << 8));
2153 OUT_BATCH(batch, ((param->global_loop_exec_count & 0x3ff) << 16 |
2154 (param->local_loop_exec_count & 0x3ff)));
2155 OUT_BATCH(batch, param->block_resolution.value);
2156 OUT_BATCH(batch, param->local_start.value);
2157 OUT_BATCH(batch, 0); // DW10
2158 OUT_BATCH(batch, param->local_outer_loop_stride.value);
2159 OUT_BATCH(batch, param->local_inner_loop_unit.value);
2160 OUT_BATCH(batch, param->global_resolution.value);
2161 OUT_BATCH(batch, param->global_start.value);
2162 OUT_BATCH(batch, param->global_outer_loop_stride.value);
2163 OUT_BATCH(batch, param->global_inner_loop_unit.value);
2164
2165 if (param->pinline_data && param->inline_size)
2166 intel_batchbuffer_data(batch, param->pinline_data, ALIGN(param->inline_size, 4));
2167
2168 ADVANCE_BATCH(batch);
2169 }
2170
2171
2172 void
intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_parameter * kernel_walker_param,struct gpe_media_object_walker_parameter * walker_param)2173 intel_vpp_init_media_object_walker_parameter(struct intel_vpp_kernel_walker_parameter *kernel_walker_param,
2174 struct gpe_media_object_walker_parameter *walker_param)
2175 {
2176 memset(walker_param, 0, sizeof(*walker_param));
2177
2178 walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
2179
2180 walker_param->block_resolution.x = kernel_walker_param->resolution_x;
2181 walker_param->block_resolution.y = kernel_walker_param->resolution_y;
2182
2183 walker_param->global_resolution.x = kernel_walker_param->resolution_x;
2184 walker_param->global_resolution.y = kernel_walker_param->resolution_y;
2185
2186 walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
2187 walker_param->global_outer_loop_stride.y = 0;
2188
2189 walker_param->global_inner_loop_unit.x = 0;
2190 walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
2191
2192 walker_param->local_loop_exec_count = 0xFFFF; //MAX VALUE
2193 walker_param->global_loop_exec_count = 0xFFFF; //MAX VALUE
2194
2195 if (kernel_walker_param->no_dependency) {
2196 /* The no_dependency is used for VPP */
2197 walker_param->scoreboard_mask = 0;
2198 walker_param->use_scoreboard = 0;
2199 // Raster scan walking pattern
2200 walker_param->local_outer_loop_stride.x = 0;
2201 walker_param->local_outer_loop_stride.y = 1;
2202 walker_param->local_inner_loop_unit.x = 1;
2203 walker_param->local_inner_loop_unit.y = 0;
2204 walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
2205 walker_param->local_end.y = 0;
2206 } else {
2207 walker_param->local_end.x = 0;
2208 walker_param->local_end.y = 0;
2209
2210 // 26 degree
2211 walker_param->scoreboard_mask = 0x0F;
2212 walker_param->local_outer_loop_stride.x = 1;
2213 walker_param->local_outer_loop_stride.y = 0;
2214 walker_param->local_inner_loop_unit.x = -2;
2215 walker_param->local_inner_loop_unit.y = 1;
2216 }
2217 }
2218
2219 void
gen8_gpe_reset_binding_table(VADriverContextP ctx,struct i965_gpe_context * gpe_context)2220 gen8_gpe_reset_binding_table(VADriverContextP ctx, struct i965_gpe_context *gpe_context)
2221 {
2222 unsigned int *binding_table;
2223 unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset;
2224 int i;
2225
2226 dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
2227 binding_table = (unsigned int*)((char *)gpe_context->surface_state_binding_table.bo->virtual + binding_table_offset);
2228
2229 for (i = 0; i < gpe_context->surface_state_binding_table.max_entries; i++) {
2230 *(binding_table + i) = gpe_context->surface_state_binding_table.surface_state_offset + i * SURFACE_STATE_PADDED_SIZE_GEN8;
2231 }
2232
2233 dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
2234 }
2235
2236 static void
gen8_gpe_set_2d_surface_state(struct gen8_surface_state * ss,unsigned int vert_line_stride_offset,unsigned int vert_line_stride,unsigned int cacheability_control,unsigned int format,unsigned int tiling,unsigned int width,unsigned int height,unsigned int pitch,unsigned int base_offset,unsigned int y_offset)2237 gen8_gpe_set_2d_surface_state(struct gen8_surface_state *ss,
2238 unsigned int vert_line_stride_offset,
2239 unsigned int vert_line_stride,
2240 unsigned int cacheability_control,
2241 unsigned int format,
2242 unsigned int tiling,
2243 unsigned int width,
2244 unsigned int height,
2245 unsigned int pitch,
2246 unsigned int base_offset,
2247 unsigned int y_offset)
2248 {
2249 memset(ss, 0, sizeof(*ss));
2250
2251 ss->ss0.vert_line_stride_ofs = vert_line_stride_offset;
2252 ss->ss0.vert_line_stride = vert_line_stride;
2253 ss->ss0.surface_format = format;
2254 ss->ss0.surface_type = I965_SURFACE_2D;
2255
2256 ss->ss1.surface_mocs = cacheability_control;
2257
2258 ss->ss2.width = width - 1;
2259 ss->ss2.height = height - 1;
2260
2261 ss->ss3.pitch = pitch - 1;
2262
2263 ss->ss5.y_offset = y_offset;
2264
2265 ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
2266 ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
2267 ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
2268 ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
2269
2270 ss->ss8.base_addr = base_offset;
2271
2272 gen8_gpe_set_surface_tiling(ss, tiling);
2273 }
2274
2275 static void
gen8_gpe_set_adv_surface_state(struct gen8_surface_state2 * ss,unsigned int v_direction,unsigned int cacheability_control,unsigned int format,unsigned int tiling,unsigned int width,unsigned int height,unsigned int pitch,unsigned int base_offset,unsigned int y_cb_offset)2276 gen8_gpe_set_adv_surface_state(struct gen8_surface_state2 *ss,
2277 unsigned int v_direction,
2278 unsigned int cacheability_control,
2279 unsigned int format,
2280 unsigned int tiling,
2281 unsigned int width,
2282 unsigned int height,
2283 unsigned int pitch,
2284 unsigned int base_offset,
2285 unsigned int y_cb_offset)
2286 {
2287 memset(ss, 0, sizeof(*ss));
2288
2289 ss->ss1.cbcr_pixel_offset_v_direction = v_direction;
2290 ss->ss1.width = width - 1;
2291 ss->ss1.height = height - 1;
2292
2293 ss->ss2.surface_format = format;
2294 ss->ss2.interleave_chroma = 1;
2295 ss->ss2.pitch = pitch - 1;
2296
2297 ss->ss3.y_offset_for_cb = y_cb_offset;
2298
2299 ss->ss5.surface_object_mocs = cacheability_control;
2300
2301 ss->ss6.base_addr = base_offset;
2302
2303 gen8_gpe_set_surface2_tiling(ss, tiling);
2304 }
2305
2306 static void
gen8_gpe_set_buffer2_surface_state(struct gen8_surface_state * ss,unsigned int cacheability_control,unsigned int format,unsigned int size,unsigned int pitch,unsigned int base_offset)2307 gen8_gpe_set_buffer2_surface_state(struct gen8_surface_state *ss,
2308 unsigned int cacheability_control,
2309 unsigned int format,
2310 unsigned int size,
2311 unsigned int pitch,
2312 unsigned int base_offset)
2313 {
2314 memset(ss, 0, sizeof(*ss));
2315
2316 ss->ss0.surface_format = format;
2317 ss->ss0.surface_type = I965_SURFACE_BUFFER;
2318
2319 ss->ss1.surface_mocs = cacheability_control;
2320
2321 ss->ss2.width = (size - 1) & 0x7F;
2322 ss->ss2.height = ((size - 1) & 0x1FFF80) >> 7;
2323
2324 ss->ss3.depth = ((size - 1) & 0xFE00000) >> 21;
2325 ss->ss3.pitch = pitch - 1;
2326
2327 ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
2328 ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
2329 ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
2330 ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
2331
2332 ss->ss8.base_addr = base_offset;
2333 }
2334
2335 void
gen8_gpe_context_add_surface(struct i965_gpe_context * gpe_context,struct i965_gpe_surface * gpe_surface,int index)2336 gen8_gpe_context_add_surface(struct i965_gpe_context *gpe_context,
2337 struct i965_gpe_surface *gpe_surface,
2338 int index)
2339 {
2340 char *buf;
2341 unsigned int tiling, swizzle, width, height, pitch, tile_alignment, y_offset = 0;
2342 unsigned int surface_state_offset = gpe_context->surface_state_binding_table.surface_state_offset +
2343 index * SURFACE_STATE_PADDED_SIZE_GEN8;
2344 unsigned int binding_table_offset = gpe_context->surface_state_binding_table.binding_table_offset +
2345 index * 4;
2346 struct i965_gpe_resource *gpe_resource = gpe_surface->gpe_resource;
2347
2348 dri_bo_get_tiling(gpe_resource->bo, &tiling, &swizzle);
2349
2350 dri_bo_map(gpe_context->surface_state_binding_table.bo, 1);
2351 buf = (char *)gpe_context->surface_state_binding_table.bo->virtual;
2352 *((unsigned int *)(buf + binding_table_offset)) = surface_state_offset;
2353
2354 if (gpe_surface->is_2d_surface) {
2355 struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
2356 unsigned int target_offset;
2357
2358 width = gpe_resource->width;
2359 height = gpe_resource->height;
2360 pitch = gpe_resource->pitch;
2361
2362 if (gpe_surface->is_override_offset) {
2363 y_offset = 0;
2364 target_offset = gpe_surface->offset;
2365 } else if (gpe_surface->is_uv_surface) {
2366 height /= 2;
2367
2368 if (tiling == I915_TILING_Y) {
2369 tile_alignment = 32;
2370 } else if (tiling == I915_TILING_X) {
2371 tile_alignment = 8;
2372 } else
2373 tile_alignment = 1;
2374
2375 y_offset = (gpe_resource->y_cb_offset % tile_alignment);
2376 target_offset = ALIGN_FLOOR(gpe_resource->y_cb_offset, tile_alignment) * pitch;
2377 } else {
2378 y_offset = 0;
2379 target_offset = 0;
2380 }
2381
2382 if (gpe_surface->is_media_block_rw) {
2383 width = (ALIGN(width, 4) >> 2);
2384 }
2385
2386 gen8_gpe_set_2d_surface_state(ss,
2387 gpe_surface->vert_line_stride_offset,
2388 gpe_surface->vert_line_stride,
2389 gpe_surface->cacheability_control,
2390 gpe_surface->format,
2391 tiling,
2392 width, height, pitch,
2393 gpe_resource->bo->offset64 + target_offset,
2394 y_offset);
2395
2396 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2397 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2398 target_offset,
2399 surface_state_offset + offsetof(struct gen8_surface_state, ss8),
2400 gpe_resource->bo);
2401 } else if (gpe_surface->is_adv_surface) {
2402 struct gen8_surface_state2 *ss = (struct gen8_surface_state2 *)(buf + surface_state_offset);
2403
2404 width = gpe_resource->width;
2405 height = gpe_resource->height;
2406 pitch = gpe_resource->pitch;
2407
2408 gen8_gpe_set_adv_surface_state(ss,
2409 gpe_surface->v_direction,
2410 gpe_surface->cacheability_control,
2411 MFX_SURFACE_PLANAR_420_8,
2412 tiling,
2413 width, height, pitch,
2414 gpe_resource->bo->offset64,
2415 gpe_resource->y_cb_offset);
2416
2417 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2418 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2419 0,
2420 surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
2421 gpe_resource->bo);
2422 } else {
2423 struct gen8_surface_state *ss = (struct gen8_surface_state *)(buf + surface_state_offset);
2424 unsigned int format;
2425
2426 assert(gpe_surface->is_buffer);
2427
2428 if (gpe_surface->is_raw_buffer) {
2429 format = I965_SURFACEFORMAT_RAW;
2430 pitch = 1;
2431 } else {
2432 format = I965_SURFACEFORMAT_R32_UINT;
2433 pitch = sizeof(unsigned int);
2434 }
2435
2436 gen8_gpe_set_buffer2_surface_state(ss,
2437 gpe_surface->cacheability_control,
2438 format,
2439 gpe_surface->size,
2440 pitch,
2441 gpe_resource->bo->offset64 + gpe_surface->offset);
2442
2443 dri_bo_emit_reloc(gpe_context->surface_state_binding_table.bo,
2444 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2445 gpe_surface->offset,
2446 surface_state_offset + offsetof(struct gen8_surface_state, ss8),
2447 gpe_resource->bo);
2448 }
2449
2450 dri_bo_unmap(gpe_context->surface_state_binding_table.bo);
2451 }
2452
2453 void
gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,struct intel_batchbuffer * batch,struct gpe_mi_conditional_batch_buffer_end_parameter * param)2454 gen8_gpe_mi_conditional_batch_buffer_end(VADriverContextP ctx,
2455 struct intel_batchbuffer *batch,
2456 struct gpe_mi_conditional_batch_buffer_end_parameter *param)
2457 {
2458 __OUT_BATCH(batch, (MI_CONDITIONAL_BATCH_BUFFER_END |
2459 (1 << 21) |
2460 (4 - 2))); /* Always use PPGTT */
2461 __OUT_BATCH(batch, param->compare_data);
2462 __OUT_RELOC64(batch,
2463 param->bo,
2464 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, 0,
2465 param->offset);
2466
2467 }
2468
2469 void
gen8_gpe_mi_copy_mem_mem(VADriverContextP ctx,struct intel_batchbuffer * batch,struct gpe_mi_copy_mem_parameter * param)2470 gen8_gpe_mi_copy_mem_mem(VADriverContextP ctx,
2471 struct intel_batchbuffer *batch,
2472 struct gpe_mi_copy_mem_parameter *param)
2473 {
2474 __OUT_BATCH(batch, (MI_COPY_MEM_MEM |
2475 (0 << 22) |
2476 (0 << 21) |
2477 (5 - 2))); /* Always use PPGTT for src and dst */
2478 __OUT_RELOC64(batch,
2479 param->dst_bo,
2480 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
2481 param->dst_offset);
2482 __OUT_RELOC64(batch,
2483 param->src_bo,
2484 I915_GEM_DOMAIN_RENDER, 0,
2485 param->src_offset);
2486 }
2487
2488 void
gen8_gpe_pipe_control(VADriverContextP ctx,struct intel_batchbuffer * batch,struct gpe_pipe_control_parameter * param)2489 gen8_gpe_pipe_control(VADriverContextP ctx,
2490 struct intel_batchbuffer *batch,
2491 struct gpe_pipe_control_parameter *param)
2492 {
2493 int render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
2494 int dc_flush_enable = 0;
2495 int state_cache_invalidation_enable = 0;
2496 int constant_cache_invalidation_enable = 0;
2497 int vf_cache_invalidation_enable = 0;
2498 int instruction_cache_invalidation_enable = 0;
2499 int post_sync_operation = CMD_PIPE_CONTROL_NOWRITE;
2500 int use_global_gtt = CMD_PIPE_CONTROL_GLOBAL_GTT_GEN8;
2501 int cs_stall_enable = !param->disable_cs_stall;
2502
2503 switch (param->flush_mode) {
2504 case PIPE_CONTROL_FLUSH_WRITE_CACHE:
2505 render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
2506 dc_flush_enable = CMD_PIPE_CONTROL_DC_FLUSH;
2507 break;
2508
2509 case PIPE_CONTROL_FLUSH_READ_CACHE:
2510 render_target_cache_flush_enable = 0;
2511 state_cache_invalidation_enable = CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8;
2512 constant_cache_invalidation_enable = CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8;
2513 vf_cache_invalidation_enable = CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8;
2514 instruction_cache_invalidation_enable = CMD_PIPE_CONTROL_IS_FLUSH;
2515 break;
2516
2517 case PIPE_CONTROL_FLUSH_NONE:
2518 default:
2519 render_target_cache_flush_enable = 0;
2520 break;
2521 }
2522
2523 if (param->bo) {
2524 post_sync_operation = CMD_PIPE_CONTROL_WRITE_QWORD;
2525 use_global_gtt = CMD_PIPE_CONTROL_LOCAL_PGTT_GEN8;
2526 } else {
2527 post_sync_operation = CMD_PIPE_CONTROL_NOWRITE;
2528 render_target_cache_flush_enable = CMD_PIPE_CONTROL_WC_FLUSH;
2529 state_cache_invalidation_enable = CMD_PIPE_CONTROL_SC_INVALIDATION_GEN8;
2530 constant_cache_invalidation_enable = CMD_PIPE_CONTROL_CC_INVALIDATION_GEN8;
2531 vf_cache_invalidation_enable = CMD_PIPE_CONTROL_VFC_INVALIDATION_GEN8;
2532 instruction_cache_invalidation_enable = CMD_PIPE_CONTROL_IS_FLUSH;
2533 }
2534
2535 __OUT_BATCH(batch, CMD_PIPE_CONTROL | (6 - 2));
2536 __OUT_BATCH(batch, (render_target_cache_flush_enable |
2537 dc_flush_enable |
2538 state_cache_invalidation_enable |
2539 constant_cache_invalidation_enable |
2540 vf_cache_invalidation_enable |
2541 instruction_cache_invalidation_enable |
2542 post_sync_operation |
2543 use_global_gtt |
2544 cs_stall_enable |
2545 CMD_PIPE_CONTROL_FLUSH_ENABLE));
2546
2547 if (param->bo)
2548 __OUT_RELOC64(batch,
2549 param->bo,
2550 I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_RENDER,
2551 param->offset);
2552 else {
2553 __OUT_BATCH(batch, 0);
2554 __OUT_BATCH(batch, 0);
2555 }
2556
2557 __OUT_BATCH(batch, param->dw0);
2558 __OUT_BATCH(batch, param->dw1);
2559 }
2560
2561 void
i965_init_media_object_walker_parameter(struct gpe_encoder_kernel_walker_parameter * kernel_walker_param,struct gpe_media_object_walker_parameter * walker_param)2562 i965_init_media_object_walker_parameter(struct gpe_encoder_kernel_walker_parameter *kernel_walker_param,
2563 struct gpe_media_object_walker_parameter *walker_param)
2564 {
2565 memset(walker_param, 0, sizeof(*walker_param));
2566
2567 walker_param->use_scoreboard = kernel_walker_param->use_scoreboard;
2568
2569 walker_param->block_resolution.x = kernel_walker_param->resolution_x;
2570 walker_param->block_resolution.y = kernel_walker_param->resolution_y;
2571
2572 walker_param->global_resolution.x = kernel_walker_param->resolution_x;
2573 walker_param->global_resolution.y = kernel_walker_param->resolution_y;
2574
2575 walker_param->global_outer_loop_stride.x = kernel_walker_param->resolution_x;
2576 walker_param->global_outer_loop_stride.y = 0;
2577
2578 walker_param->global_inner_loop_unit.x = 0;
2579 walker_param->global_inner_loop_unit.y = kernel_walker_param->resolution_y;
2580
2581 walker_param->local_loop_exec_count = 0xFFFF; //MAX VALUE
2582 walker_param->global_loop_exec_count = 0xFFFF; //MAX VALUE
2583
2584 if (kernel_walker_param->no_dependency) {
2585 walker_param->scoreboard_mask = 0;
2586 // Raster scan walking pattern
2587 walker_param->local_outer_loop_stride.x = 0;
2588 walker_param->local_outer_loop_stride.y = 1;
2589 walker_param->local_inner_loop_unit.x = 1;
2590 walker_param->local_inner_loop_unit.y = 0;
2591 walker_param->local_end.x = kernel_walker_param->resolution_x - 1;
2592 walker_param->local_end.y = 0;
2593 } else if (kernel_walker_param->use_vertical_raster_scan) {
2594 walker_param->scoreboard_mask = 0x1;
2595 walker_param->use_scoreboard = 0;
2596 // Raster scan walking pattern
2597 walker_param->local_outer_loop_stride.x = 1;
2598 walker_param->local_outer_loop_stride.y = 0;
2599 walker_param->local_inner_loop_unit.x = 0;
2600 walker_param->local_inner_loop_unit.y = 1;
2601 walker_param->local_end.x = 0;
2602 walker_param->local_end.y = kernel_walker_param->resolution_y - 1;
2603 } else {
2604 walker_param->local_end.x = 0;
2605 walker_param->local_end.y = 0;
2606
2607 if (kernel_walker_param->walker_degree == WALKER_45Z_DEGREE) {
2608 // 45z degree vp9
2609 walker_param->scoreboard_mask = 0x0F;
2610
2611 walker_param->global_loop_exec_count = 0x3FF;
2612 walker_param->local_loop_exec_count = 0x3FF;
2613
2614 walker_param->global_resolution.x = (unsigned int)(kernel_walker_param->resolution_x / 2.f) + 1;
2615 walker_param->global_resolution.y = 2 * kernel_walker_param->resolution_y;
2616
2617 walker_param->global_start.x = 0;
2618 walker_param->global_start.y = 0;
2619
2620 walker_param->global_outer_loop_stride.x = walker_param->global_resolution.x;
2621 walker_param->global_outer_loop_stride.y = 0;
2622
2623 walker_param->global_inner_loop_unit.x = 0;
2624 walker_param->global_inner_loop_unit.y = walker_param->global_resolution.y;
2625
2626 walker_param->block_resolution.x = walker_param->global_resolution.x;
2627 walker_param->block_resolution.y = walker_param->global_resolution.y;
2628
2629 walker_param->local_start.x = 0;
2630 walker_param->local_start.y = 0;
2631
2632 walker_param->local_outer_loop_stride.x = 1;
2633 walker_param->local_outer_loop_stride.y = 0;
2634
2635 walker_param->local_inner_loop_unit.x = -1;
2636 walker_param->local_inner_loop_unit.y = 4;
2637
2638 walker_param->middle_loop_extra_steps = 3;
2639 walker_param->mid_loop_unit_x = 0;
2640 walker_param->mid_loop_unit_y = 1;
2641 } else if (kernel_walker_param->walker_degree == WALKER_45_DEGREE) {
2642
2643 walker_param->scoreboard_mask = 0x03;
2644 // 45 order in local loop
2645 walker_param->local_outer_loop_stride.x = 1;
2646 walker_param->local_outer_loop_stride.y = 0;
2647 walker_param->local_inner_loop_unit.x = -1;
2648 walker_param->local_inner_loop_unit.y = 1;
2649 } else if (kernel_walker_param->walker_degree == WALKER_26Z_DEGREE) {
2650 // 26z HEVC
2651 walker_param->scoreboard_mask = 0x7f;
2652
2653 // z order in local loop
2654 walker_param->local_outer_loop_stride.x = 0;
2655 walker_param->local_outer_loop_stride.y = 1;
2656 walker_param->local_inner_loop_unit.x = 1;
2657 walker_param->local_inner_loop_unit.y = 0;
2658
2659 walker_param->block_resolution.x = 2;
2660 walker_param->block_resolution.y = 2;
2661
2662 walker_param->global_outer_loop_stride.x = 2;
2663 walker_param->global_outer_loop_stride.y = 0;
2664
2665 walker_param->global_inner_loop_unit.x = 0xFFF - 4 + 1;
2666 walker_param->global_inner_loop_unit.y = 2;
2667
2668 } else {
2669 // 26 degree
2670 walker_param->scoreboard_mask = 0x0F;
2671 walker_param->local_outer_loop_stride.x = 1;
2672 walker_param->local_outer_loop_stride.y = 0;
2673 walker_param->local_inner_loop_unit.x = -2;
2674 walker_param->local_inner_loop_unit.y = 1;
2675 }
2676 }
2677 }
2678
2679 void
i965_add_2d_gpe_surface(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct object_surface * obj_surface,int is_uv_surface,int is_media_block_rw,unsigned int format,int index)2680 i965_add_2d_gpe_surface(VADriverContextP ctx,
2681 struct i965_gpe_context *gpe_context,
2682 struct object_surface *obj_surface,
2683 int is_uv_surface,
2684 int is_media_block_rw,
2685 unsigned int format,
2686 int index)
2687 {
2688 struct i965_driver_data *i965 = i965_driver_data(ctx);
2689 struct i965_gpe_table *gpe = &i965->gpe_table;
2690 struct i965_gpe_resource gpe_resource;
2691 struct i965_gpe_surface gpe_surface;
2692
2693 memset(&gpe_surface, 0, sizeof(gpe_surface));
2694
2695 i965_object_surface_to_2d_gpe_resource(&gpe_resource, obj_surface);
2696 gpe_surface.gpe_resource = &gpe_resource;
2697 gpe_surface.is_2d_surface = 1;
2698 gpe_surface.is_uv_surface = !!is_uv_surface;
2699 gpe_surface.is_media_block_rw = !!is_media_block_rw;
2700
2701 gpe_surface.cacheability_control = i965->intel.mocs_state;
2702 gpe_surface.format = format;
2703
2704 if (gpe_surface.is_media_block_rw) {
2705 if (obj_surface->fourcc == VA_FOURCC_P010)
2706 gpe_surface.is_16bpp = 1;
2707 }
2708
2709 gpe->context_add_surface(gpe_context, &gpe_surface, index);
2710 i965_free_gpe_resource(&gpe_resource);
2711 }
2712
2713 void
i965_add_adv_gpe_surface(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct object_surface * obj_surface,int index)2714 i965_add_adv_gpe_surface(VADriverContextP ctx,
2715 struct i965_gpe_context *gpe_context,
2716 struct object_surface *obj_surface,
2717 int index)
2718 {
2719 struct i965_driver_data *i965 = i965_driver_data(ctx);
2720 struct i965_gpe_table *gpe = &i965->gpe_table;
2721 struct i965_gpe_resource gpe_resource;
2722 struct i965_gpe_surface gpe_surface;
2723
2724 memset(&gpe_surface, 0, sizeof(gpe_surface));
2725
2726 i965_object_surface_to_2d_gpe_resource(&gpe_resource, obj_surface);
2727 gpe_surface.gpe_resource = &gpe_resource;
2728 gpe_surface.is_adv_surface = 1;
2729 gpe_surface.cacheability_control = i965->intel.mocs_state;
2730 gpe_surface.v_direction = 2;
2731
2732 gpe->context_add_surface(gpe_context, &gpe_surface, index);
2733 i965_free_gpe_resource(&gpe_resource);
2734 }
2735
2736 void
i965_add_buffer_gpe_surface(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct i965_gpe_resource * gpe_buffer,int is_raw_buffer,unsigned int size,unsigned int offset,int index)2737 i965_add_buffer_gpe_surface(VADriverContextP ctx,
2738 struct i965_gpe_context *gpe_context,
2739 struct i965_gpe_resource *gpe_buffer,
2740 int is_raw_buffer,
2741 unsigned int size,
2742 unsigned int offset,
2743 int index)
2744 {
2745 struct i965_driver_data *i965 = i965_driver_data(ctx);
2746 struct i965_gpe_table *gpe = &i965->gpe_table;
2747 struct i965_gpe_surface gpe_surface;
2748
2749 memset(&gpe_surface, 0, sizeof(gpe_surface));
2750
2751 gpe_surface.gpe_resource = gpe_buffer;
2752 gpe_surface.is_buffer = 1;
2753 gpe_surface.is_raw_buffer = !!is_raw_buffer;
2754 gpe_surface.cacheability_control = i965->intel.mocs_state;
2755 gpe_surface.size = size;
2756 gpe_surface.offset = offset;
2757
2758 gpe->context_add_surface(gpe_context, &gpe_surface, index);
2759 }
2760
2761 void
i965_add_buffer_2d_gpe_surface(VADriverContextP ctx,struct i965_gpe_context * gpe_context,struct i965_gpe_resource * gpe_buffer,int is_media_block_rw,unsigned int format,int index)2762 i965_add_buffer_2d_gpe_surface(VADriverContextP ctx,
2763 struct i965_gpe_context *gpe_context,
2764 struct i965_gpe_resource *gpe_buffer,
2765 int is_media_block_rw,
2766 unsigned int format,
2767 int index)
2768 {
2769 struct i965_driver_data *i965 = i965_driver_data(ctx);
2770 struct i965_gpe_table *gpe = &i965->gpe_table;
2771 struct i965_gpe_surface gpe_surface;
2772
2773 memset(&gpe_surface, 0, sizeof(gpe_surface));
2774
2775 gpe_surface.gpe_resource = gpe_buffer;
2776 gpe_surface.is_2d_surface = 1;
2777 gpe_surface.is_media_block_rw = !!is_media_block_rw;
2778 gpe_surface.cacheability_control = i965->intel.mocs_state;
2779 gpe_surface.format = format;
2780
2781 gpe->context_add_surface(gpe_context, &gpe_surface, index);
2782 }
2783
2784 void
gen9_add_dri_buffer_gpe_surface(VADriverContextP ctx,struct i965_gpe_context * gpe_context,dri_bo * bo,int is_raw_buffer,unsigned int size,unsigned int offset,int index)2785 gen9_add_dri_buffer_gpe_surface(VADriverContextP ctx,
2786 struct i965_gpe_context *gpe_context,
2787 dri_bo *bo,
2788 int is_raw_buffer,
2789 unsigned int size,
2790 unsigned int offset,
2791 int index)
2792 {
2793 struct i965_gpe_resource gpe_resource;
2794
2795 i965_dri_object_to_buffer_gpe_resource(&gpe_resource, bo);
2796 i965_add_buffer_gpe_surface(ctx,
2797 gpe_context,
2798 &gpe_resource,
2799 is_raw_buffer,
2800 size,
2801 offset,
2802 index);
2803
2804 i965_free_gpe_resource(&gpe_resource);
2805 }
2806
2807 bool
i965_gpe_table_init(VADriverContextP ctx)2808 i965_gpe_table_init(VADriverContextP ctx)
2809 {
2810 struct i965_driver_data *i965 = i965_driver_data(ctx);
2811 struct i965_gpe_table *gpe = &i965->gpe_table;
2812
2813 if (IS_GEN8(i965->intel.device_info)) {
2814 gpe->context_init = gen8_gpe_context_init;
2815 gpe->context_destroy = gen8_gpe_context_destroy;
2816 gpe->context_add_surface = gen8_gpe_context_add_surface;
2817 gpe->reset_binding_table = gen8_gpe_reset_binding_table;
2818 gpe->load_kernels = gen8_gpe_load_kernels;
2819 gpe->setup_interface_data = gen8_gpe_setup_interface_data;
2820 gpe->set_dynamic_buffer = gen8_gpe_context_set_dynamic_buffer;
2821 gpe->media_object = gen8_gpe_media_object;
2822 gpe->media_object_walker = gen8_gpe_media_object_walker;
2823 gpe->media_state_flush = gen8_gpe_media_state_flush;
2824 gpe->pipe_control = gen8_gpe_pipe_control;
2825 gpe->pipeline_end = gen8_gpe_pipeline_end;
2826 gpe->pipeline_setup = gen8_gpe_pipeline_setup;
2827 gpe->mi_conditional_batch_buffer_end = gen8_gpe_mi_conditional_batch_buffer_end;
2828 gpe->mi_batch_buffer_start = gen8_gpe_mi_batch_buffer_start;
2829 gpe->mi_load_register_reg = gen8_gpe_mi_load_register_reg;
2830 gpe->mi_load_register_imm = gen8_gpe_mi_load_register_imm;
2831 gpe->mi_load_register_mem = gen8_gpe_mi_load_register_mem;
2832 gpe->mi_store_register_mem = gen8_gpe_mi_store_register_mem;
2833 gpe->mi_store_data_imm = gen8_gpe_mi_store_data_imm;
2834 gpe->mi_flush_dw = gen8_gpe_mi_flush_dw;
2835 gpe->mi_copy_mem_mem = gen8_gpe_mi_copy_mem_mem;
2836 } else if (IS_GEN9(i965->intel.device_info) ||
2837 IS_GEN10(i965->intel.device_info)) {
2838 gpe->context_init = gen8_gpe_context_init;
2839 gpe->context_destroy = gen8_gpe_context_destroy;
2840 gpe->context_add_surface = gen9_gpe_context_add_surface;
2841 gpe->reset_binding_table = gen9_gpe_reset_binding_table;
2842 gpe->load_kernels = gen8_gpe_load_kernels;
2843 gpe->setup_interface_data = gen8_gpe_setup_interface_data;
2844 gpe->set_dynamic_buffer = gen8_gpe_context_set_dynamic_buffer;
2845 gpe->media_object = gen8_gpe_media_object;
2846 gpe->media_object_walker = gen8_gpe_media_object_walker;
2847 gpe->media_state_flush = gen8_gpe_media_state_flush;
2848 gpe->pipe_control = gen8_gpe_pipe_control;
2849 gpe->pipeline_end = gen9_gpe_pipeline_end;
2850 gpe->pipeline_setup = gen9_gpe_pipeline_setup;
2851 gpe->mi_conditional_batch_buffer_end = gen9_gpe_mi_conditional_batch_buffer_end;
2852 gpe->mi_batch_buffer_start = gen8_gpe_mi_batch_buffer_start;
2853 gpe->mi_load_register_reg = gen8_gpe_mi_load_register_reg;
2854 gpe->mi_load_register_imm = gen8_gpe_mi_load_register_imm;
2855 gpe->mi_load_register_mem = gen8_gpe_mi_load_register_mem;
2856 gpe->mi_store_register_mem = gen8_gpe_mi_store_register_mem;
2857 gpe->mi_store_data_imm = gen8_gpe_mi_store_data_imm;
2858 gpe->mi_flush_dw = gen8_gpe_mi_flush_dw;
2859 gpe->mi_copy_mem_mem = gen8_gpe_mi_copy_mem_mem;
2860 } else {
2861 // TODO: for other platforms
2862 }
2863
2864 return true;
2865 }
2866
2867 void
i965_gpe_table_terminate(VADriverContextP ctx)2868 i965_gpe_table_terminate(VADriverContextP ctx)
2869 {
2870
2871 }
2872