1 /**************************************************************************
2 *
3 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <stdlib.h>
29 #include <string.h>
30 #include <assert.h>
31
32 #include "intel_batchbuffer.h"
33
34 #define MAX_BATCH_SIZE 0x400000
35
36
37 #define LOCAL_I915_EXEC_BSD_MASK (3<<13)
38 #define LOCAL_I915_EXEC_BSD_DEFAULT (0<<13) /* default ping-pong mode */
39 #define LOCAL_I915_EXEC_BSD_RING0 (1<<13)
40 #define LOCAL_I915_EXEC_BSD_RING1 (2<<13)
41
42 static void
intel_batchbuffer_reset(struct intel_batchbuffer * batch,int buffer_size)43 intel_batchbuffer_reset(struct intel_batchbuffer *batch, int buffer_size)
44 {
45 struct intel_driver_data *intel = batch->intel;
46 int batch_size = buffer_size;
47 int ring_flag;
48
49 ring_flag = batch->flag & I915_EXEC_RING_MASK;
50
51 assert(ring_flag == I915_EXEC_RENDER ||
52 ring_flag == I915_EXEC_BLT ||
53 ring_flag == I915_EXEC_BSD ||
54 ring_flag == I915_EXEC_VEBOX);
55
56 dri_bo_unreference(batch->buffer);
57 batch->buffer = dri_bo_alloc(intel->bufmgr,
58 "batch buffer",
59 batch_size,
60 0x1000);
61 assert(batch->buffer);
62 dri_bo_map(batch->buffer, 1);
63 assert(batch->buffer->virtual);
64 batch->map = batch->buffer->virtual;
65 batch->size = batch_size;
66 batch->ptr = batch->map;
67 batch->atomic = 0;
68 }
69
70 static unsigned int
intel_batchbuffer_space(struct intel_batchbuffer * batch)71 intel_batchbuffer_space(struct intel_batchbuffer *batch)
72 {
73 return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
74 }
75
76
77 struct intel_batchbuffer *
intel_batchbuffer_new(struct intel_driver_data * intel,int flag,int buffer_size)78 intel_batchbuffer_new(struct intel_driver_data *intel, int flag, int buffer_size)
79 {
80 struct intel_batchbuffer *batch = calloc(1, sizeof(*batch));
81 int ring_flag;
82
83 ring_flag = flag & I915_EXEC_RING_MASK;
84 assert(ring_flag == I915_EXEC_RENDER ||
85 ring_flag == I915_EXEC_BSD ||
86 ring_flag == I915_EXEC_BLT ||
87 ring_flag == I915_EXEC_VEBOX);
88
89 if (!buffer_size || buffer_size < BATCH_SIZE) {
90 buffer_size = BATCH_SIZE;
91 }
92
93 /* the buffer size can't exceed 4M */
94 if (buffer_size > MAX_BATCH_SIZE) {
95 buffer_size = MAX_BATCH_SIZE;
96 }
97
98 assert(batch);
99 batch->intel = intel;
100 batch->flag = flag;
101 batch->run = drm_intel_bo_mrb_exec;
102
103 if (IS_GEN6(intel->device_info) &&
104 flag == I915_EXEC_RENDER)
105 batch->wa_render_bo = dri_bo_alloc(intel->bufmgr,
106 "wa scratch",
107 4096,
108 4096);
109 else
110 batch->wa_render_bo = NULL;
111
112 intel_batchbuffer_reset(batch, buffer_size);
113
114 return batch;
115 }
116
intel_batchbuffer_free(struct intel_batchbuffer * batch)117 void intel_batchbuffer_free(struct intel_batchbuffer *batch)
118 {
119 if (batch->map) {
120 dri_bo_unmap(batch->buffer);
121 batch->map = NULL;
122 }
123
124 dri_bo_unreference(batch->buffer);
125 dri_bo_unreference(batch->wa_render_bo);
126 free(batch);
127 }
128
129 void
intel_batchbuffer_flush(struct intel_batchbuffer * batch)130 intel_batchbuffer_flush(struct intel_batchbuffer *batch)
131 {
132 unsigned int used = batch->ptr - batch->map;
133
134 if (used == 0) {
135 return;
136 }
137
138 if ((used & 4) == 0) {
139 *(unsigned int*)batch->ptr = 0;
140 batch->ptr += 4;
141 }
142
143 *(unsigned int*)batch->ptr = MI_BATCH_BUFFER_END;
144 batch->ptr += 4;
145 dri_bo_unmap(batch->buffer);
146 used = batch->ptr - batch->map;
147 batch->run(batch->buffer, used, 0, 0, 0, batch->flag);
148 intel_batchbuffer_reset(batch, batch->size);
149 }
150
151 void
intel_batchbuffer_emit_dword(struct intel_batchbuffer * batch,unsigned int x)152 intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, unsigned int x)
153 {
154 assert(intel_batchbuffer_space(batch) >= 4);
155 *(unsigned int *)batch->ptr = x;
156 batch->ptr += 4;
157 }
158
159 void
intel_batchbuffer_emit_reloc(struct intel_batchbuffer * batch,dri_bo * bo,uint32_t read_domains,uint32_t write_domains,uint32_t delta)160 intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, dri_bo *bo,
161 uint32_t read_domains, uint32_t write_domains,
162 uint32_t delta)
163 {
164 assert(batch->ptr - batch->map < batch->size);
165 dri_bo_emit_reloc(batch->buffer, read_domains, write_domains,
166 delta, batch->ptr - batch->map, bo);
167 intel_batchbuffer_emit_dword(batch, bo->offset + delta);
168 }
169
170 void
intel_batchbuffer_emit_reloc64(struct intel_batchbuffer * batch,dri_bo * bo,uint32_t read_domains,uint32_t write_domains,uint32_t delta)171 intel_batchbuffer_emit_reloc64(struct intel_batchbuffer *batch, dri_bo *bo,
172 uint32_t read_domains, uint32_t write_domains,
173 uint32_t delta)
174 {
175 assert(batch->ptr - batch->map < batch->size);
176 dri_bo_emit_reloc(batch->buffer, read_domains, write_domains,
177 delta, batch->ptr - batch->map, bo);
178
179 /* Using the old buffer offset, write in what the right data would be, in
180 * case the buffer doesn't move and we can short-circuit the relocation
181 * processing in the kernel.
182 */
183 uint64_t offset = bo->offset64 + delta;
184 intel_batchbuffer_emit_dword(batch, offset);
185 intel_batchbuffer_emit_dword(batch, offset >> 32);
186 }
187
188 void
intel_batchbuffer_require_space(struct intel_batchbuffer * batch,unsigned int size)189 intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
190 unsigned int size)
191 {
192 assert(size < batch->size - 8);
193
194 if (intel_batchbuffer_space(batch) < size) {
195 intel_batchbuffer_flush(batch);
196 }
197 }
198
199 void
intel_batchbuffer_data(struct intel_batchbuffer * batch,void * data,unsigned int size)200 intel_batchbuffer_data(struct intel_batchbuffer *batch,
201 void *data,
202 unsigned int size)
203 {
204 assert((size & 3) == 0);
205 intel_batchbuffer_require_space(batch, size);
206
207 assert(batch->ptr);
208 memcpy(batch->ptr, data, size);
209 batch->ptr += size;
210 }
211
212 void
intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer * batch)213 intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch)
214 {
215 struct intel_driver_data *intel = batch->intel;
216 int ring_flag;
217
218 ring_flag = batch->flag & I915_EXEC_RING_MASK;
219
220 if (IS_GEN6(intel->device_info) ||
221 IS_GEN7(intel->device_info) ||
222 IS_GEN8(intel->device_info) ||
223 IS_GEN9(intel->device_info) ||
224 IS_GEN10(intel->device_info)) {
225 if (ring_flag == I915_EXEC_RENDER) {
226 if (IS_GEN8(intel->device_info) || IS_GEN9(intel->device_info) ||
227 IS_GEN10(intel->device_info)) {
228 BEGIN_BATCH(batch, 6);
229 OUT_BATCH(batch, CMD_PIPE_CONTROL | (6 - 2));
230
231 OUT_BATCH(batch,
232 CMD_PIPE_CONTROL_CS_STALL |
233 CMD_PIPE_CONTROL_WC_FLUSH |
234 CMD_PIPE_CONTROL_TC_FLUSH |
235 CMD_PIPE_CONTROL_DC_FLUSH |
236 CMD_PIPE_CONTROL_NOWRITE);
237 OUT_BATCH(batch, 0); /* write address */
238 OUT_BATCH(batch, 0);
239 OUT_BATCH(batch, 0); /* write data */
240 OUT_BATCH(batch, 0);
241 ADVANCE_BATCH(batch);
242 } else if (IS_GEN6(intel->device_info)) {
243 assert(batch->wa_render_bo);
244
245 BEGIN_BATCH(batch, 4 * 3);
246
247 OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
248 OUT_BATCH(batch,
249 CMD_PIPE_CONTROL_CS_STALL |
250 CMD_PIPE_CONTROL_STALL_AT_SCOREBOARD);
251 OUT_BATCH(batch, 0); /* address */
252 OUT_BATCH(batch, 0); /* write data */
253
254 OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
255 OUT_BATCH(batch, CMD_PIPE_CONTROL_WRITE_QWORD);
256 OUT_RELOC(batch,
257 batch->wa_render_bo,
258 I915_GEM_DOMAIN_INSTRUCTION,
259 I915_GEM_DOMAIN_INSTRUCTION,
260 0);
261 OUT_BATCH(batch, 0); /* write data */
262
263 /* now finally the _real flush */
264 OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
265 OUT_BATCH(batch,
266 CMD_PIPE_CONTROL_WC_FLUSH |
267 CMD_PIPE_CONTROL_TC_FLUSH |
268 CMD_PIPE_CONTROL_NOWRITE);
269 OUT_BATCH(batch, 0); /* write address */
270 OUT_BATCH(batch, 0); /* write data */
271 ADVANCE_BATCH(batch);
272 } else {
273 BEGIN_BATCH(batch, 4);
274 OUT_BATCH(batch, CMD_PIPE_CONTROL | (4 - 2));
275
276 OUT_BATCH(batch,
277 CMD_PIPE_CONTROL_WC_FLUSH |
278 CMD_PIPE_CONTROL_TC_FLUSH |
279 CMD_PIPE_CONTROL_DC_FLUSH |
280 CMD_PIPE_CONTROL_NOWRITE);
281 OUT_BATCH(batch, 0); /* write address */
282 OUT_BATCH(batch, 0); /* write data */
283 ADVANCE_BATCH(batch);
284 }
285
286 } else {
287 if (ring_flag == I915_EXEC_BLT) {
288 BEGIN_BLT_BATCH(batch, 4);
289 OUT_BLT_BATCH(batch, MI_FLUSH_DW);
290 OUT_BLT_BATCH(batch, 0);
291 OUT_BLT_BATCH(batch, 0);
292 OUT_BLT_BATCH(batch, 0);
293 ADVANCE_BLT_BATCH(batch);
294 } else if (ring_flag == I915_EXEC_VEBOX) {
295 BEGIN_VEB_BATCH(batch, 4);
296 OUT_VEB_BATCH(batch, MI_FLUSH_DW);
297 OUT_VEB_BATCH(batch, 0);
298 OUT_VEB_BATCH(batch, 0);
299 OUT_VEB_BATCH(batch, 0);
300 ADVANCE_VEB_BATCH(batch);
301 } else {
302 assert(ring_flag == I915_EXEC_BSD);
303 BEGIN_BCS_BATCH(batch, 4);
304 OUT_BCS_BATCH(batch, MI_FLUSH_DW | MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE);
305 OUT_BCS_BATCH(batch, 0);
306 OUT_BCS_BATCH(batch, 0);
307 OUT_BCS_BATCH(batch, 0);
308 ADVANCE_BCS_BATCH(batch);
309 }
310 }
311 } else {
312 if (ring_flag == I915_EXEC_RENDER) {
313 BEGIN_BATCH(batch, 1);
314 OUT_BATCH(batch, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE);
315 ADVANCE_BATCH(batch);
316 } else {
317 assert(ring_flag == I915_EXEC_BSD);
318 BEGIN_BCS_BATCH(batch, 1);
319 OUT_BCS_BATCH(batch, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE);
320 ADVANCE_BCS_BATCH(batch);
321 }
322 }
323 }
324
325 void
intel_batchbuffer_begin_batch(struct intel_batchbuffer * batch,int total)326 intel_batchbuffer_begin_batch(struct intel_batchbuffer *batch, int total)
327 {
328 batch->emit_total = total * 4;
329 batch->emit_start = batch->ptr;
330 }
331
332 void
intel_batchbuffer_advance_batch(struct intel_batchbuffer * batch)333 intel_batchbuffer_advance_batch(struct intel_batchbuffer *batch)
334 {
335 assert(batch->emit_total == (batch->ptr - batch->emit_start));
336 }
337
338 void
intel_batchbuffer_check_batchbuffer_flag(struct intel_batchbuffer * batch,int flag)339 intel_batchbuffer_check_batchbuffer_flag(struct intel_batchbuffer *batch, int flag)
340 {
341 int ring_flag;
342
343 ring_flag = flag & I915_EXEC_RING_MASK;
344
345 if (ring_flag != I915_EXEC_RENDER &&
346 ring_flag != I915_EXEC_BLT &&
347 ring_flag != I915_EXEC_BSD &&
348 ring_flag != I915_EXEC_VEBOX)
349 return;
350
351 if (batch->flag == flag)
352 return;
353
354 intel_batchbuffer_flush(batch);
355 batch->flag = flag;
356 }
357
358 int
intel_batchbuffer_check_free_space(struct intel_batchbuffer * batch,int size)359 intel_batchbuffer_check_free_space(struct intel_batchbuffer *batch, int size)
360 {
361 return intel_batchbuffer_space(batch) >= size;
362 }
363
364 static void
intel_batchbuffer_start_atomic_helper(struct intel_batchbuffer * batch,int flag,unsigned int size)365 intel_batchbuffer_start_atomic_helper(struct intel_batchbuffer *batch,
366 int flag,
367 unsigned int size)
368 {
369 assert(!batch->atomic);
370 intel_batchbuffer_check_batchbuffer_flag(batch, flag);
371 intel_batchbuffer_require_space(batch, size);
372 batch->atomic = 1;
373 }
374
375 void
intel_batchbuffer_start_atomic(struct intel_batchbuffer * batch,unsigned int size)376 intel_batchbuffer_start_atomic(struct intel_batchbuffer *batch, unsigned int size)
377 {
378 intel_batchbuffer_start_atomic_helper(batch, I915_EXEC_RENDER, size);
379 }
380
381 void
intel_batchbuffer_start_atomic_blt(struct intel_batchbuffer * batch,unsigned int size)382 intel_batchbuffer_start_atomic_blt(struct intel_batchbuffer *batch, unsigned int size)
383 {
384 intel_batchbuffer_start_atomic_helper(batch, I915_EXEC_BLT, size);
385 }
386
387 void
intel_batchbuffer_start_atomic_bcs(struct intel_batchbuffer * batch,unsigned int size)388 intel_batchbuffer_start_atomic_bcs(struct intel_batchbuffer *batch, unsigned int size)
389 {
390 intel_batchbuffer_start_atomic_helper(batch, I915_EXEC_BSD, size);
391 }
392
393 void
intel_batchbuffer_start_atomic_veb(struct intel_batchbuffer * batch,unsigned int size)394 intel_batchbuffer_start_atomic_veb(struct intel_batchbuffer *batch, unsigned int size)
395 {
396 intel_batchbuffer_start_atomic_helper(batch, I915_EXEC_VEBOX, size);
397 }
398
intel_batchbuffer_start_atomic_bcs_override(struct intel_batchbuffer * batch,unsigned int size,bsd_ring_flag override_flag)399 void intel_batchbuffer_start_atomic_bcs_override(struct intel_batchbuffer *batch, unsigned int size,
400 bsd_ring_flag override_flag)
401 {
402 uint32_t ring_flag;
403
404 switch (override_flag) {
405 case BSD_RING0:
406 ring_flag = I915_EXEC_BSD + LOCAL_I915_EXEC_BSD_RING0;
407 break;
408 case BSD_RING1:
409 ring_flag = I915_EXEC_BSD + LOCAL_I915_EXEC_BSD_RING1;
410 break;
411 default:
412 ring_flag = I915_EXEC_BSD + LOCAL_I915_EXEC_BSD_DEFAULT;
413 break;
414 }
415 intel_batchbuffer_start_atomic_helper(batch, ring_flag, size);
416 }
417
418
419 void
intel_batchbuffer_end_atomic(struct intel_batchbuffer * batch)420 intel_batchbuffer_end_atomic(struct intel_batchbuffer *batch)
421 {
422 assert(batch->atomic);
423 batch->atomic = 0;
424 }
425
426 int
intel_batchbuffer_used_size(struct intel_batchbuffer * batch)427 intel_batchbuffer_used_size(struct intel_batchbuffer *batch)
428 {
429 return batch->ptr - batch->map;
430 }
431
432 void
intel_batchbuffer_align(struct intel_batchbuffer * batch,unsigned int alignedment)433 intel_batchbuffer_align(struct intel_batchbuffer *batch, unsigned int alignedment)
434 {
435 int used = batch->ptr - batch->map;
436 int pad_size;
437
438 assert((alignedment & 3) == 0);
439 pad_size = ALIGN(used, alignedment) - used;
440 assert((pad_size & 3) == 0);
441 assert(intel_batchbuffer_space(batch) >= pad_size);
442
443 while (pad_size >= 4) {
444 intel_batchbuffer_emit_dword(batch, 0);
445 pad_size -= 4;
446 }
447 }
448
449